fix: use curl_cffi Chrome TLS impersonation to bypass Cloudflare

chatgpt.com uses Cloudflare's TLS fingerprinting (JA3/JA4) which
blocks Python requests regardless of cookies. curl_cffi impersonates
Chrome's exact TLS handshake, making requests indistinguishable from
a real browser at the transport layer.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
JesseMarkowitz
2026-02-28 05:20:52 -05:00
parent d236fdb21a
commit 5c6dcafa34
4 changed files with 48 additions and 6 deletions

26
debug_auth.py Normal file
View File

@@ -0,0 +1,26 @@
"""Debug script — checks what /api/auth/session returns using curl_cffi Chrome impersonation."""
import os
from dotenv import load_dotenv
from curl_cffi import requests as curl_requests
load_dotenv()
token = os.getenv("CHATGPT_SESSION_TOKEN")
if not token:
print("ERROR: CHATGPT_SESSION_TOKEN not found in .env")
raise SystemExit(1)
s = curl_requests.Session(impersonate="chrome120")
s.cookies.set("__Secure-next-auth.session-token", token, domain="chatgpt.com", path="/")
s.headers.update({
"Referer": "https://chatgpt.com/",
"Accept": "*/*",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
})
print("Calling /api/auth/session (with Chrome TLS impersonation) ...")
r = s.get("https://chatgpt.com/api/auth/session", timeout=15)
print(f"Status: {r.status_code}")
print(f"Content-Type: {r.headers.get('content-type', '(none)')}")
print(f"Response body (first 500 chars):\n{r.text[:500]}")

View File

@@ -9,6 +9,7 @@ description = "Export ChatGPT and Claude conversation history to Markdown for pe
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
"requests==2.31.0", "requests==2.31.0",
"curl_cffi==0.14.0",
"click==8.1.7", "click==8.1.7",
"python-dotenv==1.0.1", "python-dotenv==1.0.1",
"rich==13.7.1", "rich==13.7.1",

View File

@@ -1,14 +1,17 @@
# Editable Git install with no remote (ai-chat-exporter==0.1.0) # Editable Git install with no remote (ai-chat-exporter==0.1.0)
-e /home/jesse/services/ai-chatexport -e /home/jesse/services/ai-chatexport
certifi==2026.2.25 certifi==2026.2.25
cffi==2.0.0
charset-normalizer==3.4.4 charset-normalizer==3.4.4
click==8.1.7 click==8.1.7
curl_cffi==0.14.0
idna==3.11 idna==3.11
iniconfig==2.3.0 iniconfig==2.3.0
markdown-it-py==4.0.0 markdown-it-py==4.0.0
mdurl==0.1.2 mdurl==0.1.2
packaging==26.0 packaging==26.0
pluggy==1.6.0 pluggy==1.6.0
pycparser==3.0
Pygments==2.19.2 Pygments==2.19.2
PyJWT==2.8.0 PyJWT==2.8.0
pytest==8.1.1 pytest==8.1.1

View File

@@ -4,6 +4,8 @@ import logging
import os import os
from typing import Any from typing import Any
from curl_cffi import requests as curl_requests
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -11,13 +13,20 @@ logger = logging.getLogger(__name__)
BASE_URL = "https://chatgpt.com/backend-api" BASE_URL = "https://chatgpt.com/backend-api"
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session" AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
# Chrome version to impersonate — must match a version curl_cffi supports.
# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))"
IMPERSONATE = "chrome120"
class ChatGPTProvider(BaseProvider): class ChatGPTProvider(BaseProvider):
"""Provider for ChatGPT conversations via the internal web API. """Provider for ChatGPT conversations via the internal web API.
Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing
Cloudflare's bot detection which blocks standard Python requests.
Authentication is a two-step process: Authentication is a two-step process:
1. Send __Secure-next-auth.session-token as a Cookie header to 1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session
/api/auth/session to obtain a short-lived accessToken. to obtain a short-lived accessToken.
2. Use that accessToken as the Bearer token for all backend-api calls. 2. Use that accessToken as the Bearer token for all backend-api calls.
Token: __Secure-next-auth.session-token cookie (~7 day lifetime). Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
@@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider):
provider_name = "chatgpt" provider_name = "chatgpt"
def __init__(self, session_token: str | None = None) -> None: def __init__(self, session_token: str | None = None) -> None:
super().__init__() # Pass a curl_cffi session to the base class instead of a requests.Session.
# curl_cffi.requests.Session is API-compatible with requests.Session.
cf_session = curl_requests.Session(impersonate=IMPERSONATE)
super().__init__(session=cf_session) # type: ignore[arg-type]
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip() token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
if not token: if not token:
raise ProviderError( raise ProviderError(
@@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider):
) )
self._session_token = token self._session_token = token
# Set the session cookie in the cookie jar (proper cookie handling, not a raw header) # Set the session cookie in the cookie jar
self._session.cookies.set( self._session.cookies.set(
"__Secure-next-auth.session-token", "__Secure-next-auth.session-token",
token, token,
@@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider):
path="/", path="/",
) )
# Additional browser-like headers required by chatgpt.com
self._session.headers.update( self._session.headers.update(
{ {
"Referer": "https://chatgpt.com/", "Referer": "https://chatgpt.com/",
@@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider):
# Exchange the session cookie for an access token # Exchange the session cookie for an access token
self._access_token: str = self._fetch_access_token() self._access_token: str = self._fetch_access_token()
self._session.headers["Authorization"] = f"Bearer {self._access_token}" self._session.headers["Authorization"] = f"Bearer {self._access_token}"
logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])") logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])")
def _fetch_access_token(self) -> str: def _fetch_access_token(self) -> str:
"""Exchange the session cookie for a Bearer access token. """Exchange the session cookie for a Bearer access token.