diff --git a/debug_auth.py b/debug_auth.py new file mode 100644 index 0000000..2c1fa7b --- /dev/null +++ b/debug_auth.py @@ -0,0 +1,26 @@ +"""Debug script — checks what /api/auth/session returns using curl_cffi Chrome impersonation.""" +import os +from dotenv import load_dotenv +from curl_cffi import requests as curl_requests + +load_dotenv() +token = os.getenv("CHATGPT_SESSION_TOKEN") +if not token: + print("ERROR: CHATGPT_SESSION_TOKEN not found in .env") + raise SystemExit(1) + +s = curl_requests.Session(impersonate="chrome120") +s.cookies.set("__Secure-next-auth.session-token", token, domain="chatgpt.com", path="/") +s.headers.update({ + "Referer": "https://chatgpt.com/", + "Accept": "*/*", + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", +}) + +print("Calling /api/auth/session (with Chrome TLS impersonation) ...") +r = s.get("https://chatgpt.com/api/auth/session", timeout=15) +print(f"Status: {r.status_code}") +print(f"Content-Type: {r.headers.get('content-type', '(none)')}") +print(f"Response body (first 500 chars):\n{r.text[:500]}") diff --git a/pyproject.toml b/pyproject.toml index a432976..fe20338 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ description = "Export ChatGPT and Claude conversation history to Markdown for pe requires-python = ">=3.11" dependencies = [ "requests==2.31.0", + "curl_cffi==0.14.0", "click==8.1.7", "python-dotenv==1.0.1", "rich==13.7.1", diff --git a/requirements.txt b/requirements.txt index 722a304..dab0d28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,17 @@ # Editable Git install with no remote (ai-chat-exporter==0.1.0) -e /home/jesse/services/ai-chatexport certifi==2026.2.25 +cffi==2.0.0 charset-normalizer==3.4.4 click==8.1.7 +curl_cffi==0.14.0 idna==3.11 iniconfig==2.3.0 markdown-it-py==4.0.0 mdurl==0.1.2 packaging==26.0 pluggy==1.6.0 +pycparser==3.0 Pygments==2.19.2 PyJWT==2.8.0 pytest==8.1.1 diff --git a/src/providers/chatgpt.py b/src/providers/chatgpt.py index d45270d..57c02e0 100644 --- a/src/providers/chatgpt.py +++ b/src/providers/chatgpt.py @@ -4,6 +4,8 @@ import logging import os from typing import Any +from curl_cffi import requests as curl_requests + from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT logger = logging.getLogger(__name__) @@ -11,13 +13,20 @@ logger = logging.getLogger(__name__) BASE_URL = "https://chatgpt.com/backend-api" AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session" +# Chrome version to impersonate — must match a version curl_cffi supports. +# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))" +IMPERSONATE = "chrome120" + class ChatGPTProvider(BaseProvider): """Provider for ChatGPT conversations via the internal web API. + Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing + Cloudflare's bot detection which blocks standard Python requests. + Authentication is a two-step process: - 1. Send __Secure-next-auth.session-token as a Cookie header to - /api/auth/session to obtain a short-lived accessToken. + 1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session + to obtain a short-lived accessToken. 2. Use that accessToken as the Bearer token for all backend-api calls. Token: __Secure-next-auth.session-token cookie (~7 day lifetime). @@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider): provider_name = "chatgpt" def __init__(self, session_token: str | None = None) -> None: - super().__init__() + # Pass a curl_cffi session to the base class instead of a requests.Session. + # curl_cffi.requests.Session is API-compatible with requests.Session. + cf_session = curl_requests.Session(impersonate=IMPERSONATE) + super().__init__(session=cf_session) # type: ignore[arg-type] + token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip() if not token: raise ProviderError( @@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider): ) self._session_token = token - # Set the session cookie in the cookie jar (proper cookie handling, not a raw header) + # Set the session cookie in the cookie jar self._session.cookies.set( "__Secure-next-auth.session-token", token, @@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider): path="/", ) - # Additional browser-like headers required by chatgpt.com self._session.headers.update( { "Referer": "https://chatgpt.com/", @@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider): # Exchange the session cookie for an access token self._access_token: str = self._fetch_access_token() self._session.headers["Authorization"] = f"Bearer {self._access_token}" - logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])") + logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])") def _fetch_access_token(self) -> str: """Exchange the session cookie for a Bearer access token.