fix: v0.2.1 — chunked ChatGPT cookies and Claude project path

- Support __Secure-next-auth.session-token.0/.1 split cookies; ChatGPT now issues tokens that exceed the 4KB per-cookie limit and must be sent as two named chunks or the auth endpoint returns no accessToken. Add CHATGPT_SESSION_TOKEN_1 env var; update auth wizard instructions. - Fix Claude conversations exported to wrong directory when project name is present in the listing but absent from the detail endpoint response. Explicitly propagate "project" alongside _-prefixed annotation keys. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 22:32:14 -04:00
parent 4ccd918eb1
commit 19bfdaecbe
5 changed files with 49 additions and 15 deletions
--- a/.env.example
+++ b/.env.example
@@ -6,9 +6,12 @@
 # --- ChatGPT ---
 # How to get: open chatgpt.com in Chrome → F12 → Application tab
-# → Cookies → https://chatgpt.com → find "__Secure-next-auth.session-token" → copy Value
+# → Cookies → https://chatgpt.com → find the two cookie chunks:
-# Token type: JWT (starts with "eyJ"). Typically valid for ~7 days.
+#   __Secure-next-auth.session-token.0  (starts with "eyJ") → CHATGPT_SESSION_TOKEN
 #   __Secure-next-auth.session-token.1  (the remainder)     → CHATGPT_SESSION_TOKEN_1
 # Token type: JWE. Typically valid for ~7 days.
 CHATGPT_SESSION_TOKEN=
 CHATGPT_SESSION_TOKEN_1=
 # ChatGPT Projects (optional): comma-separated list of project gizmo IDs.
 # Project conversations are NOT included in the default /conversations listing.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ai-chat-exporter"
-version = "0.2.0"
+version = "0.2.1"
 description = "Export ChatGPT and Claude conversation history to Markdown for personal archival in Joplin"
 requires-python = ">=3.11"
 dependencies = [
--- a/src/config.py
+++ b/src/config.py
@@ -28,6 +28,7 @@ class ConfigError(Exception):
@dataclass
 class Config:
    chatgpt_session_token: str | None
    chatgpt_session_token_1: str | None
    claude_session_key: str | None
    export_dir: Path
    output_structure: str
@@ -55,6 +56,7 @@ def load_config() -> Config:
    load_dotenv(override=False)
    chatgpt_token = os.getenv("CHATGPT_SESSION_TOKEN", "").strip() or None
    chatgpt_token_1 = os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None
    claude_key = os.getenv("CLAUDE_SESSION_KEY", "").strip() or None
    export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser()
    output_structure = os.getenv("OUTPUT_STRUCTURE", "provider/project/year").strip()
@@ -127,6 +129,7 @@ def load_config() -> Config:
    config = Config(
        chatgpt_session_token=chatgpt_token,
        chatgpt_session_token_1=chatgpt_token_1,
        claude_session_key=claude_key,
        export_dir=export_dir,
        output_structure=output_structure,
--- a/src/main.py
+++ b/src/main.py
@@ -153,15 +153,19 @@ def _auth_chatgpt(os_name: str) -> None:
    else:
        console.print("2. Press [bold]F12[/bold] to open DevTools → Application tab.")
    console.print("3. Expand [bold]Cookies[/bold] → [bold]https://chatgpt.com[/bold]")
-    console.print("4. Find [bold]__Secure-next-auth.session-token[/bold] → copy the Value.")
+    console.print("4. ChatGPT splits the session token across two cookies:")
-    console.print("   (Token starts with 'eyJ...' — it is a long JWT string)")
+    console.print("     [bold]__Secure-next-auth.session-token.0[/bold]  (starts with 'eyJ')")
-    console.print("5. Paste it below (input is hidden).\n")
+    console.print("     [bold]__Secure-next-auth.session-token.1[/bold]  (the remainder)")
    console.print("   Copy each Value in turn and paste below.")
    console.print("   (If you only see one cookie without a .0/.1 suffix, paste it for .0 and leave .1 blank.)\n")
-    token = click.prompt("ChatGPT session token", hide_input=True, default="", show_default=False).strip()
+    token = click.prompt("ChatGPT session token (.0)", hide_input=True, default="", show_default=False).strip()
    if not token:
        console.print("[yellow]Skipped ChatGPT token.[/yellow]")
        return
    token_1 = click.prompt("ChatGPT session token (.1, leave blank if absent)", hide_input=True, default="", show_default=False).strip() or None
    # Validate
    if not token.startswith("eyJ"):
        console.print("[yellow]Warning: token doesn't look like a JWT (expected 'eyJ...').[/yellow]")
@@ -184,7 +188,7 @@ def _auth_chatgpt(os_name: str) -> None:
    with console.status("[dim]Validating token with ChatGPT API…[/dim]"):
        try:
            from src.providers.chatgpt import ChatGPTProvider
-            _prov = ChatGPTProvider(session_token=token)
+            _prov = ChatGPTProvider(session_token=token, session_token_1=token_1)
            _prov._fetch_access_token()
            _valid = True
        except ProviderError as e:
@@ -198,6 +202,8 @@ def _auth_chatgpt(os_name: str) -> None:
        console.print(f"[red]✗ Token validation failed: {_error}[/red]")
    _write_token_to_env("CHATGPT_SESSION_TOKEN", token)
    if token_1:
        _write_token_to_env("CHATGPT_SESSION_TOKEN_1", token_1)
    # --- ChatGPT Projects ---
    console.print("\n[bold]ChatGPT Projects (optional)[/bold]")
@@ -402,7 +408,8 @@ def _run_doctor_checks() -> list[dict]:
    if chatgpt_token:
        try:
            from src.providers.chatgpt import ChatGPTProvider
-            p = ChatGPTProvider(chatgpt_token)
+            chatgpt_token_1 = os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None
            p = ChatGPTProvider(chatgpt_token, session_token_1=chatgpt_token_1)
            results = p.list_conversations(offset=0, limit=1)
            add("ChatGPT API reachable", True, f"Got {len(results)} result(s)")
        except ProviderError as e:
@@ -594,11 +601,15 @@ def export(
                conv_id = raw_conv.get("id") or raw_conv.get("uuid", "unknown")
                try:
                    full_raw = prov_instance.get_conversation(conv_id)
-                    # Propagate provider annotations from the listing summary
+                    # Propagate metadata from the listing summary into the full
-                    # (e.g. _project_name set by ChatGPT project fetching) into
+                    # detail so normalize_conversation can use it.
-                    # the full detail so normalize_conversation can use them.
+                    # - Keys starting with "_" are provider annotations
                    #   (e.g. _project_name injected by ChatGPT project fetching).
                    # - "project" is included explicitly because Claude's detail
                    #   endpoint omits it even though the listing returns it.
                    _PROPAGATE_KEYS = {"project"}
                    for key, val in raw_conv.items():
-                        if key.startswith("_") and key not in full_raw:
+                        if (key.startswith("_") or key in _PROPAGATE_KEYS) and key not in full_raw:
                            full_raw[key] = val
                    normalized = prov_instance.normalize_conversation(full_raw)
@@ -661,6 +672,7 @@ def _resolve_providers(provider: str, cfg) -> list[tuple[str, object]]:
                    "chatgpt",
                    ChatGPTProvider(
                        session_token=cfg.chatgpt_session_token,
                        session_token_1=cfg.chatgpt_session_token_1,
                        project_ids=cfg.chatgpt_project_ids,
                    ),
                ))
--- a/src/providers/chatgpt.py
+++ b/src/providers/chatgpt.py
@@ -56,6 +56,7 @@ class ChatGPTProvider(BaseProvider):
    def __init__(
        self,
        session_token: str | None = None,
        session_token_1: str | None = None,
        project_ids: list[str] | None = None,
    ) -> None:
        # Pass a curl_cffi session to the base class instead of a requests.Session.
@@ -82,6 +83,10 @@ class ChatGPTProvider(BaseProvider):
            )
        self._session_token = token
        # Second chunk of the session token (ChatGPT splits large cookies into
        # __Secure-next-auth.session-token.0 and .1 to stay under the 4KB limit).
        token_1 = session_token_1 or os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None
        # Project gizmo IDs (g-p-xxx) whose conversations we'll fetch.
        # ChatGPT project conversations do not appear in the default
        # /conversations listing — they require explicit project IDs.
@@ -93,13 +98,24 @@ class ChatGPTProvider(BaseProvider):
        # Cache of project_id → display name (avoids re-fetching gizmo details)
        self._project_name_cache: dict[str, str] = {}
-        # Set the session cookie in the cookie jar
+        # ChatGPT now splits large session cookies into .0 / .1 chunks.
        # Always send both named chunks; the server reassembles them.
        self._session.cookies.set(
-            "__Secure-next-auth.session-token",
+            "__Secure-next-auth.session-token.0",
            token,
            domain="chatgpt.com",
            path="/",
        )
        if token_1:
            self._session.cookies.set(
                "__Secure-next-auth.session-token.1",
                token_1,
                domain="chatgpt.com",
                path="/",
            )
            logger.debug("[chatgpt] Set both session cookie chunks (.0 and .1)")
        else:
            logger.debug("[chatgpt] Set session cookie chunk .0 only (no .1 configured)")
        # Set only Referer and sec-fetch-* headers for the auth exchange.
        # Origin is intentionally omitted: Chrome does not send Origin on