fix: use curl_cffi Chrome TLS impersonation to bypass Cloudflare

chatgpt.com uses Cloudflare's TLS fingerprinting (JA3/JA4) which
blocks Python requests regardless of cookies. curl_cffi impersonates
Chrome's exact TLS handshake, making requests indistinguishable from
a real browser at the transport layer.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
JesseMarkowitz
2026-02-28 05:20:52 -05:00
parent d236fdb21a
commit 5c6dcafa34
4 changed files with 48 additions and 6 deletions

View File

@@ -4,6 +4,8 @@ import logging
import os
from typing import Any
from curl_cffi import requests as curl_requests
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
logger = logging.getLogger(__name__)
@@ -11,13 +13,20 @@ logger = logging.getLogger(__name__)
BASE_URL = "https://chatgpt.com/backend-api"
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
# Chrome version to impersonate — must match a version curl_cffi supports.
# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))"
IMPERSONATE = "chrome120"
class ChatGPTProvider(BaseProvider):
"""Provider for ChatGPT conversations via the internal web API.
Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing
Cloudflare's bot detection which blocks standard Python requests.
Authentication is a two-step process:
1. Send __Secure-next-auth.session-token as a Cookie header to
/api/auth/session to obtain a short-lived accessToken.
1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session
to obtain a short-lived accessToken.
2. Use that accessToken as the Bearer token for all backend-api calls.
Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
@@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider):
provider_name = "chatgpt"
def __init__(self, session_token: str | None = None) -> None:
super().__init__()
# Pass a curl_cffi session to the base class instead of a requests.Session.
# curl_cffi.requests.Session is API-compatible with requests.Session.
cf_session = curl_requests.Session(impersonate=IMPERSONATE)
super().__init__(session=cf_session) # type: ignore[arg-type]
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
if not token:
raise ProviderError(
@@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider):
)
self._session_token = token
# Set the session cookie in the cookie jar (proper cookie handling, not a raw header)
# Set the session cookie in the cookie jar
self._session.cookies.set(
"__Secure-next-auth.session-token",
token,
@@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider):
path="/",
)
# Additional browser-like headers required by chatgpt.com
self._session.headers.update(
{
"Referer": "https://chatgpt.com/",
@@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider):
# Exchange the session cookie for an access token
self._access_token: str = self._fetch_access_token()
self._session.headers["Authorization"] = f"Bearer {self._access_token}"
logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])")
logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])")
def _fetch_access_token(self) -> str:
"""Exchange the session cookie for a Bearer access token.