fix: use curl_cffi Chrome TLS impersonation to bypass Cloudflare
chatgpt.com uses Cloudflare's TLS fingerprinting (JA3/JA4) which blocks Python requests regardless of cookies. curl_cffi impersonates Chrome's exact TLS handshake, making requests indistinguishable from a real browser at the transport layer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
26
debug_auth.py
Normal file
26
debug_auth.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""Debug script — checks what /api/auth/session returns using curl_cffi Chrome impersonation."""
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from curl_cffi import requests as curl_requests
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
token = os.getenv("CHATGPT_SESSION_TOKEN")
|
||||||
|
if not token:
|
||||||
|
print("ERROR: CHATGPT_SESSION_TOKEN not found in .env")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
s = curl_requests.Session(impersonate="chrome120")
|
||||||
|
s.cookies.set("__Secure-next-auth.session-token", token, domain="chatgpt.com", path="/")
|
||||||
|
s.headers.update({
|
||||||
|
"Referer": "https://chatgpt.com/",
|
||||||
|
"Accept": "*/*",
|
||||||
|
"sec-fetch-dest": "empty",
|
||||||
|
"sec-fetch-mode": "cors",
|
||||||
|
"sec-fetch-site": "same-origin",
|
||||||
|
})
|
||||||
|
|
||||||
|
print("Calling /api/auth/session (with Chrome TLS impersonation) ...")
|
||||||
|
r = s.get("https://chatgpt.com/api/auth/session", timeout=15)
|
||||||
|
print(f"Status: {r.status_code}")
|
||||||
|
print(f"Content-Type: {r.headers.get('content-type', '(none)')}")
|
||||||
|
print(f"Response body (first 500 chars):\n{r.text[:500]}")
|
||||||
@@ -9,6 +9,7 @@ description = "Export ChatGPT and Claude conversation history to Markdown for pe
|
|||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"requests==2.31.0",
|
"requests==2.31.0",
|
||||||
|
"curl_cffi==0.14.0",
|
||||||
"click==8.1.7",
|
"click==8.1.7",
|
||||||
"python-dotenv==1.0.1",
|
"python-dotenv==1.0.1",
|
||||||
"rich==13.7.1",
|
"rich==13.7.1",
|
||||||
|
|||||||
@@ -1,14 +1,17 @@
|
|||||||
# Editable Git install with no remote (ai-chat-exporter==0.1.0)
|
# Editable Git install with no remote (ai-chat-exporter==0.1.0)
|
||||||
-e /home/jesse/services/ai-chatexport
|
-e /home/jesse/services/ai-chatexport
|
||||||
certifi==2026.2.25
|
certifi==2026.2.25
|
||||||
|
cffi==2.0.0
|
||||||
charset-normalizer==3.4.4
|
charset-normalizer==3.4.4
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
|
curl_cffi==0.14.0
|
||||||
idna==3.11
|
idna==3.11
|
||||||
iniconfig==2.3.0
|
iniconfig==2.3.0
|
||||||
markdown-it-py==4.0.0
|
markdown-it-py==4.0.0
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
packaging==26.0
|
packaging==26.0
|
||||||
pluggy==1.6.0
|
pluggy==1.6.0
|
||||||
|
pycparser==3.0
|
||||||
Pygments==2.19.2
|
Pygments==2.19.2
|
||||||
PyJWT==2.8.0
|
PyJWT==2.8.0
|
||||||
pytest==8.1.1
|
pytest==8.1.1
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import logging
|
|||||||
import os
|
import os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from curl_cffi import requests as curl_requests
|
||||||
|
|
||||||
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
|
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -11,13 +13,20 @@ logger = logging.getLogger(__name__)
|
|||||||
BASE_URL = "https://chatgpt.com/backend-api"
|
BASE_URL = "https://chatgpt.com/backend-api"
|
||||||
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
|
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
|
||||||
|
|
||||||
|
# Chrome version to impersonate — must match a version curl_cffi supports.
|
||||||
|
# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))"
|
||||||
|
IMPERSONATE = "chrome120"
|
||||||
|
|
||||||
|
|
||||||
class ChatGPTProvider(BaseProvider):
|
class ChatGPTProvider(BaseProvider):
|
||||||
"""Provider for ChatGPT conversations via the internal web API.
|
"""Provider for ChatGPT conversations via the internal web API.
|
||||||
|
|
||||||
|
Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing
|
||||||
|
Cloudflare's bot detection which blocks standard Python requests.
|
||||||
|
|
||||||
Authentication is a two-step process:
|
Authentication is a two-step process:
|
||||||
1. Send __Secure-next-auth.session-token as a Cookie header to
|
1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session
|
||||||
/api/auth/session to obtain a short-lived accessToken.
|
to obtain a short-lived accessToken.
|
||||||
2. Use that accessToken as the Bearer token for all backend-api calls.
|
2. Use that accessToken as the Bearer token for all backend-api calls.
|
||||||
|
|
||||||
Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
|
Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
|
||||||
@@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider):
|
|||||||
provider_name = "chatgpt"
|
provider_name = "chatgpt"
|
||||||
|
|
||||||
def __init__(self, session_token: str | None = None) -> None:
|
def __init__(self, session_token: str | None = None) -> None:
|
||||||
super().__init__()
|
# Pass a curl_cffi session to the base class instead of a requests.Session.
|
||||||
|
# curl_cffi.requests.Session is API-compatible with requests.Session.
|
||||||
|
cf_session = curl_requests.Session(impersonate=IMPERSONATE)
|
||||||
|
super().__init__(session=cf_session) # type: ignore[arg-type]
|
||||||
|
|
||||||
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
|
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
|
||||||
if not token:
|
if not token:
|
||||||
raise ProviderError(
|
raise ProviderError(
|
||||||
@@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider):
|
|||||||
)
|
)
|
||||||
self._session_token = token
|
self._session_token = token
|
||||||
|
|
||||||
# Set the session cookie in the cookie jar (proper cookie handling, not a raw header)
|
# Set the session cookie in the cookie jar
|
||||||
self._session.cookies.set(
|
self._session.cookies.set(
|
||||||
"__Secure-next-auth.session-token",
|
"__Secure-next-auth.session-token",
|
||||||
token,
|
token,
|
||||||
@@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider):
|
|||||||
path="/",
|
path="/",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Additional browser-like headers required by chatgpt.com
|
|
||||||
self._session.headers.update(
|
self._session.headers.update(
|
||||||
{
|
{
|
||||||
"Referer": "https://chatgpt.com/",
|
"Referer": "https://chatgpt.com/",
|
||||||
@@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider):
|
|||||||
# Exchange the session cookie for an access token
|
# Exchange the session cookie for an access token
|
||||||
self._access_token: str = self._fetch_access_token()
|
self._access_token: str = self._fetch_access_token()
|
||||||
self._session.headers["Authorization"] = f"Bearer {self._access_token}"
|
self._session.headers["Authorization"] = f"Bearer {self._access_token}"
|
||||||
logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])")
|
logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])")
|
||||||
|
|
||||||
def _fetch_access_token(self) -> str:
|
def _fetch_access_token(self) -> str:
|
||||||
"""Exchange the session cookie for a Bearer access token.
|
"""Exchange the session cookie for a Bearer access token.
|
||||||
|
|||||||
Reference in New Issue
Block a user