fix: use curl_cffi Chrome TLS impersonation to bypass Cloudflare
chatgpt.com uses Cloudflare's TLS fingerprinting (JA3/JA4) which blocks Python requests regardless of cookies. curl_cffi impersonates Chrome's exact TLS handshake, making requests indistinguishable from a real browser at the transport layer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,8 @@ import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from curl_cffi import requests as curl_requests
|
||||
|
||||
from src.providers.base import BaseProvider, ProviderError, REQUEST_TIMEOUT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -11,13 +13,20 @@ logger = logging.getLogger(__name__)
|
||||
BASE_URL = "https://chatgpt.com/backend-api"
|
||||
AUTH_SESSION_URL = "https://chatgpt.com/api/auth/session"
|
||||
|
||||
# Chrome version to impersonate — must match a version curl_cffi supports.
|
||||
# Run: python -c "from curl_cffi.requests import BrowserType; print(list(BrowserType))"
|
||||
IMPERSONATE = "chrome120"
|
||||
|
||||
|
||||
class ChatGPTProvider(BaseProvider):
|
||||
"""Provider for ChatGPT conversations via the internal web API.
|
||||
|
||||
Uses curl_cffi to impersonate Chrome's TLS fingerprint, bypassing
|
||||
Cloudflare's bot detection which blocks standard Python requests.
|
||||
|
||||
Authentication is a two-step process:
|
||||
1. Send __Secure-next-auth.session-token as a Cookie header to
|
||||
/api/auth/session to obtain a short-lived accessToken.
|
||||
1. Send __Secure-next-auth.session-token as a Cookie to /api/auth/session
|
||||
to obtain a short-lived accessToken.
|
||||
2. Use that accessToken as the Bearer token for all backend-api calls.
|
||||
|
||||
Token: __Secure-next-auth.session-token cookie (~7 day lifetime).
|
||||
@@ -26,7 +35,11 @@ class ChatGPTProvider(BaseProvider):
|
||||
provider_name = "chatgpt"
|
||||
|
||||
def __init__(self, session_token: str | None = None) -> None:
|
||||
super().__init__()
|
||||
# Pass a curl_cffi session to the base class instead of a requests.Session.
|
||||
# curl_cffi.requests.Session is API-compatible with requests.Session.
|
||||
cf_session = curl_requests.Session(impersonate=IMPERSONATE)
|
||||
super().__init__(session=cf_session) # type: ignore[arg-type]
|
||||
|
||||
token = session_token or os.getenv("CHATGPT_SESSION_TOKEN", "").strip()
|
||||
if not token:
|
||||
raise ProviderError(
|
||||
@@ -39,7 +52,7 @@ class ChatGPTProvider(BaseProvider):
|
||||
)
|
||||
self._session_token = token
|
||||
|
||||
# Set the session cookie in the cookie jar (proper cookie handling, not a raw header)
|
||||
# Set the session cookie in the cookie jar
|
||||
self._session.cookies.set(
|
||||
"__Secure-next-auth.session-token",
|
||||
token,
|
||||
@@ -47,7 +60,6 @@ class ChatGPTProvider(BaseProvider):
|
||||
path="/",
|
||||
)
|
||||
|
||||
# Additional browser-like headers required by chatgpt.com
|
||||
self._session.headers.update(
|
||||
{
|
||||
"Referer": "https://chatgpt.com/",
|
||||
@@ -61,7 +73,7 @@ class ChatGPTProvider(BaseProvider):
|
||||
# Exchange the session cookie for an access token
|
||||
self._access_token: str = self._fetch_access_token()
|
||||
self._session.headers["Authorization"] = f"Bearer {self._access_token}"
|
||||
logger.debug("[chatgpt] Session initialised — access token obtained (token: [REDACTED])")
|
||||
logger.debug("[chatgpt] Session initialised with Chrome TLS impersonation (token: [REDACTED])")
|
||||
|
||||
def _fetch_access_token(self) -> str:
|
||||
"""Exchange the session cookie for a Bearer access token.
|
||||
|
||||
Reference in New Issue
Block a user