diff --git a/.env.example b/.env.example index adc7885..cb62497 100644 --- a/.env.example +++ b/.env.example @@ -6,9 +6,12 @@ # --- ChatGPT --- # How to get: open chatgpt.com in Chrome → F12 → Application tab -# → Cookies → https://chatgpt.com → find "__Secure-next-auth.session-token" → copy Value -# Token type: JWT (starts with "eyJ"). Typically valid for ~7 days. +# → Cookies → https://chatgpt.com → find the two cookie chunks: +# __Secure-next-auth.session-token.0 (starts with "eyJ") → CHATGPT_SESSION_TOKEN +# __Secure-next-auth.session-token.1 (the remainder) → CHATGPT_SESSION_TOKEN_1 +# Token type: JWE. Typically valid for ~7 days. CHATGPT_SESSION_TOKEN= +CHATGPT_SESSION_TOKEN_1= # ChatGPT Projects (optional): comma-separated list of project gizmo IDs. # Project conversations are NOT included in the default /conversations listing. diff --git a/pyproject.toml b/pyproject.toml index f0ca2a3..dcf27a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ai-chat-exporter" -version = "0.2.0" +version = "0.2.1" description = "Export ChatGPT and Claude conversation history to Markdown for personal archival in Joplin" requires-python = ">=3.11" dependencies = [ diff --git a/src/config.py b/src/config.py index 13f4ffb..a27c8f1 100644 --- a/src/config.py +++ b/src/config.py @@ -28,6 +28,7 @@ class ConfigError(Exception): @dataclass class Config: chatgpt_session_token: str | None + chatgpt_session_token_1: str | None claude_session_key: str | None export_dir: Path output_structure: str @@ -55,6 +56,7 @@ def load_config() -> Config: load_dotenv(override=False) chatgpt_token = os.getenv("CHATGPT_SESSION_TOKEN", "").strip() or None + chatgpt_token_1 = os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None claude_key = os.getenv("CLAUDE_SESSION_KEY", "").strip() or None export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser() output_structure = os.getenv("OUTPUT_STRUCTURE", "provider/project/year").strip() @@ -127,6 +129,7 @@ def load_config() -> Config: config = Config( chatgpt_session_token=chatgpt_token, + chatgpt_session_token_1=chatgpt_token_1, claude_session_key=claude_key, export_dir=export_dir, output_structure=output_structure, diff --git a/src/main.py b/src/main.py index 9a3da7e..0dd522e 100644 --- a/src/main.py +++ b/src/main.py @@ -153,15 +153,19 @@ def _auth_chatgpt(os_name: str) -> None: else: console.print("2. Press [bold]F12[/bold] to open DevTools → Application tab.") console.print("3. Expand [bold]Cookies[/bold] → [bold]https://chatgpt.com[/bold]") - console.print("4. Find [bold]__Secure-next-auth.session-token[/bold] → copy the Value.") - console.print(" (Token starts with 'eyJ...' — it is a long JWT string)") - console.print("5. Paste it below (input is hidden).\n") + console.print("4. ChatGPT splits the session token across two cookies:") + console.print(" [bold]__Secure-next-auth.session-token.0[/bold] (starts with 'eyJ')") + console.print(" [bold]__Secure-next-auth.session-token.1[/bold] (the remainder)") + console.print(" Copy each Value in turn and paste below.") + console.print(" (If you only see one cookie without a .0/.1 suffix, paste it for .0 and leave .1 blank.)\n") - token = click.prompt("ChatGPT session token", hide_input=True, default="", show_default=False).strip() + token = click.prompt("ChatGPT session token (.0)", hide_input=True, default="", show_default=False).strip() if not token: console.print("[yellow]Skipped ChatGPT token.[/yellow]") return + token_1 = click.prompt("ChatGPT session token (.1, leave blank if absent)", hide_input=True, default="", show_default=False).strip() or None + # Validate if not token.startswith("eyJ"): console.print("[yellow]Warning: token doesn't look like a JWT (expected 'eyJ...').[/yellow]") @@ -184,7 +188,7 @@ def _auth_chatgpt(os_name: str) -> None: with console.status("[dim]Validating token with ChatGPT API…[/dim]"): try: from src.providers.chatgpt import ChatGPTProvider - _prov = ChatGPTProvider(session_token=token) + _prov = ChatGPTProvider(session_token=token, session_token_1=token_1) _prov._fetch_access_token() _valid = True except ProviderError as e: @@ -198,6 +202,8 @@ def _auth_chatgpt(os_name: str) -> None: console.print(f"[red]✗ Token validation failed: {_error}[/red]") _write_token_to_env("CHATGPT_SESSION_TOKEN", token) + if token_1: + _write_token_to_env("CHATGPT_SESSION_TOKEN_1", token_1) # --- ChatGPT Projects --- console.print("\n[bold]ChatGPT Projects (optional)[/bold]") @@ -402,7 +408,8 @@ def _run_doctor_checks() -> list[dict]: if chatgpt_token: try: from src.providers.chatgpt import ChatGPTProvider - p = ChatGPTProvider(chatgpt_token) + chatgpt_token_1 = os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None + p = ChatGPTProvider(chatgpt_token, session_token_1=chatgpt_token_1) results = p.list_conversations(offset=0, limit=1) add("ChatGPT API reachable", True, f"Got {len(results)} result(s)") except ProviderError as e: @@ -594,11 +601,15 @@ def export( conv_id = raw_conv.get("id") or raw_conv.get("uuid", "unknown") try: full_raw = prov_instance.get_conversation(conv_id) - # Propagate provider annotations from the listing summary - # (e.g. _project_name set by ChatGPT project fetching) into - # the full detail so normalize_conversation can use them. + # Propagate metadata from the listing summary into the full + # detail so normalize_conversation can use it. + # - Keys starting with "_" are provider annotations + # (e.g. _project_name injected by ChatGPT project fetching). + # - "project" is included explicitly because Claude's detail + # endpoint omits it even though the listing returns it. + _PROPAGATE_KEYS = {"project"} for key, val in raw_conv.items(): - if key.startswith("_") and key not in full_raw: + if (key.startswith("_") or key in _PROPAGATE_KEYS) and key not in full_raw: full_raw[key] = val normalized = prov_instance.normalize_conversation(full_raw) @@ -661,6 +672,7 @@ def _resolve_providers(provider: str, cfg) -> list[tuple[str, object]]: "chatgpt", ChatGPTProvider( session_token=cfg.chatgpt_session_token, + session_token_1=cfg.chatgpt_session_token_1, project_ids=cfg.chatgpt_project_ids, ), )) diff --git a/src/providers/chatgpt.py b/src/providers/chatgpt.py index f6e0f42..6a6ca12 100644 --- a/src/providers/chatgpt.py +++ b/src/providers/chatgpt.py @@ -56,6 +56,7 @@ class ChatGPTProvider(BaseProvider): def __init__( self, session_token: str | None = None, + session_token_1: str | None = None, project_ids: list[str] | None = None, ) -> None: # Pass a curl_cffi session to the base class instead of a requests.Session. @@ -82,6 +83,10 @@ class ChatGPTProvider(BaseProvider): ) self._session_token = token + # Second chunk of the session token (ChatGPT splits large cookies into + # __Secure-next-auth.session-token.0 and .1 to stay under the 4KB limit). + token_1 = session_token_1 or os.getenv("CHATGPT_SESSION_TOKEN_1", "").strip() or None + # Project gizmo IDs (g-p-xxx) whose conversations we'll fetch. # ChatGPT project conversations do not appear in the default # /conversations listing — they require explicit project IDs. @@ -93,13 +98,24 @@ class ChatGPTProvider(BaseProvider): # Cache of project_id → display name (avoids re-fetching gizmo details) self._project_name_cache: dict[str, str] = {} - # Set the session cookie in the cookie jar + # ChatGPT now splits large session cookies into .0 / .1 chunks. + # Always send both named chunks; the server reassembles them. self._session.cookies.set( - "__Secure-next-auth.session-token", + "__Secure-next-auth.session-token.0", token, domain="chatgpt.com", path="/", ) + if token_1: + self._session.cookies.set( + "__Secure-next-auth.session-token.1", + token_1, + domain="chatgpt.com", + path="/", + ) + logger.debug("[chatgpt] Set both session cookie chunks (.0 and .1)") + else: + logger.debug("[chatgpt] Set session cookie chunk .0 only (no .1 configured)") # Set only Referer and sec-fetch-* headers for the auth exchange. # Origin is intentionally omitted: Chrome does not send Origin on