fix: use curl_cffi Chrome TLS impersonation to bypass Cloudflare
chatgpt.com uses Cloudflare's TLS fingerprinting (JA3/JA4) which blocks Python requests regardless of cookies. curl_cffi impersonates Chrome's exact TLS handshake, making requests indistinguishable from a real browser at the transport layer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
26
debug_auth.py
Normal file
26
debug_auth.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Debug script — checks what /api/auth/session returns using curl_cffi Chrome impersonation."""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
load_dotenv()
|
||||
token = os.getenv("CHATGPT_SESSION_TOKEN")
|
||||
if not token:
|
||||
print("ERROR: CHATGPT_SESSION_TOKEN not found in .env")
|
||||
raise SystemExit(1)
|
||||
|
||||
s = curl_requests.Session(impersonate="chrome120")
|
||||
s.cookies.set("__Secure-next-auth.session-token", token, domain="chatgpt.com", path="/")
|
||||
s.headers.update({
|
||||
"Referer": "https://chatgpt.com/",
|
||||
"Accept": "*/*",
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-origin",
|
||||
})
|
||||
|
||||
print("Calling /api/auth/session (with Chrome TLS impersonation) ...")
|
||||
r = s.get("https://chatgpt.com/api/auth/session", timeout=15)
|
||||
print(f"Status: {r.status_code}")
|
||||
print(f"Content-Type: {r.headers.get('content-type', '(none)')}")
|
||||
print(f"Response body (first 500 chars):\n{r.text[:500]}")
|
||||
@@ -9,6 +9,7 @@ description = "Export ChatGPT and Claude conversation history to Markdown for pe
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"requests==2.31.0",
|
||||
"curl_cffi==0.14.0",
|
||||
"click==8.1.7",
|
||||
"python-dotenv==1.0.1",
|
||||
"rich==13.7.1",
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
# Editable Git install with no remote (ai-chat-exporter==0.1.0)
|
||||
-e /home/jesse/services/ai-chatexport
|
||||
certifi==2026.2.25
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.4
|
||||
click==8.1.7
|
||||
curl_cffi==0.14.0
|
||||
idna==3.11
|
||||
iniconfig==2.3.0
|
||||
markdown-it-py==4.0.0
|
||||
mdurl==0.1.2
|
||||
packaging==26.0
|
||||
pluggy==1.6.0
|
||||
pycparser==3.0
|
||||
Pygments==2.19.2
|
||||
PyJWT==2.8.0
|
||||
pytest==8.1.1
|
||||
|
||||
@@ -4,6 +4,8 @@ import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -11,13 +13,20 @@ logger = logging.getLogger(__name__)
|
||||
BASE_URL = "https://chatgpt.com/backend-api"
|
||||
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
|
||||
|
||||
# Chrome version to impersonate — must match a version curl_cffi supports.
|
||||
# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))"
|
||||
IMPERSONATE = "chrome120"
|
||||
|
||||
|
||||
class ChatGPTProvider(BaseProvider):
|
||||
"""Provider for ChatGPT conversations via the internal web API.
|
||||
|
||||
Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing
|
||||
Cloudflare's bot detection which blocks standard Python requests.
|
||||
|
||||
Authentication is a two-step process:
|
||||
1. Send __Secure-next-auth.session-token as a Cookie header to
|
||||
/api/auth/session to obtain a short-lived accessToken.
|
||||
1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session
|
||||
to obtain a short-lived accessToken.
|
||||
2. Use that accessToken as the Bearer token for all backend-api calls.
|
||||
|
||||
Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
|
||||
@@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider):
|
||||
provider_name = "chatgpt"
|
||||
|
||||
def __init__(self, session_token: str | None = None) -> None:
|
||||
super().__init__()
|
||||
# Pass a curl_cffi session to the base class instead of a requests.Session.
|
||||
# curl_cffi.requests.Session is API-compatible with requests.Session.
|
||||
cf_session = curl_requests.Session(impersonate=IMPERSONATE)
|
||||
super().__init__(session=cf_session) # type: ignore[arg-type]
|
||||
|
||||
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
|
||||
if not token:
|
||||
raise ProviderError(
|
||||
@@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider):
|
||||
)
|
||||
self._session_token = token
|
||||
|
||||
# Set the session cookie in the cookie jar (proper cookie handling, not a raw header)
|
||||
# Set the session cookie in the cookie jar
|
||||
self._session.cookies.set(
|
||||
"__Secure-next-auth.session-token",
|
||||
token,
|
||||
@@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider):
|
||||
path="/",
|
||||
)
|
||||
|
||||
# Additional browser-like headers required by chatgpt.com
|
||||
self._session.headers.update(
|
||||
{
|
||||
"Referer": "https://chatgpt.com/",
|
||||
@@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider):
|
||||
# Exchange the session cookie for an access token
|
||||
self._access_token: str = self._fetch_access_token()
|
||||
self._session.headers["Authorization"] = f"Bearer {self._access_token}"
|
||||
logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])")
|
||||
logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])")
|
||||
|
||||
def _fetch_access_token(self) -> str:
|
||||
"""Exchange the session cookie for a Bearer access token.
|
||||
|
||||
Reference in New Issue
Block a user