diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..7ebf41b --- /dev/null +++ b/src/config.py @@ -0,0 +1,194 @@ +"""Configuration loader and validation for ai-chat-exporter.""" + +import logging +import os +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path + +import jwt +from dotenv import load_dotenv + +from src.utils import format_token_status + +logger = logging.getLogger(__name__) + +# Placeholder values from .env.example — reject if still set +_CHATGPT_PLACEHOLDER = "" +_CLAUDE_PLACEHOLDER = "" + +# Valid OUTPUT_STRUCTURE values +VALID_STRUCTURES = {"provider/project/year", "provider/project", "provider/year"} + + +class ConfigError(Exception): + """Raised when required configuration is missing or invalid.""" + + +@dataclass +class Config: + chatgpt_session_token: str | None + claude_session_key: str | None + export_dir: Path + output_structure: str + cache_dir: Path + log_file: str + # Decoded ChatGPT JWT expiry (None if token absent or not a JWT) + chatgpt_token_expiry: datetime | None = field(default=None, repr=False) + + +def load_config() -> Config: + """Load configuration from environment / .env file. + + Validates all values and logs a startup summary. + + Raises: + ConfigError: If a critical config value is missing or invalid. + """ + load_dotenv(override=False) + + chatgpt_token = os.getenv("CHATGPT_SESSION_TOKEN", "").strip() or None + claude_key = os.getenv("CLAUDE_SESSION_KEY", "").strip() or None + export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser() + output_structure = os.getenv("OUTPUT_STRUCTURE", "provider/project/year").strip() + cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser() + log_file = os.getenv("LOG_FILE", "~/.ai-chat-exporter/logs/exporter.log").strip() + + errors: list[str] = [] + + # Validate output structure + if output_structure not in VALID_STRUCTURES: + errors.append( + f"OUTPUT_STRUCTURE '{output_structure}' is invalid. " + f"Must be one of: {', '.join(sorted(VALID_STRUCTURES))}" + ) + + # Validate and decode ChatGPT JWT + chatgpt_expiry: datetime | None = None + if chatgpt_token: + chatgpt_expiry = _validate_chatgpt_token(chatgpt_token) + + # Validate Claude key + if claude_key: + _validate_claude_key(claude_key) + + # Ensure at least one provider is configured (warning only) + if not chatgpt_token and not claude_key: + logger.warning( + "Neither CHATGPT_SESSION_TOKEN nor CLAUDE_SESSION_KEY is set. " + "Run 'python -m src.main auth' to configure credentials." + ) + + # Create and validate output directory + try: + export_dir.mkdir(parents=True, exist_ok=True) + _check_writable(export_dir) + except (OSError, PermissionError) as e: + errors.append(f"Cannot create/write to EXPORT_DIR '{export_dir}': {e}") + + # Create and validate cache directory + try: + cache_dir.mkdir(parents=True, exist_ok=True) + _check_writable(cache_dir) + except (OSError, PermissionError) as e: + errors.append(f"Cannot create/write to CACHE_DIR '{cache_dir}': {e}") + + if errors: + for err in errors: + logger.critical(err) + raise ConfigError( + "Configuration errors found:\n" + "\n".join(f" - {e}" for e in errors) + ) + + config = Config( + chatgpt_session_token=chatgpt_token, + claude_session_key=claude_key, + export_dir=export_dir, + output_structure=output_structure, + cache_dir=cache_dir, + log_file=log_file, + chatgpt_token_expiry=chatgpt_expiry, + ) + + _log_startup_summary(config) + return config + + +def _validate_chatgpt_token(token: str) -> datetime | None: + """Validate ChatGPT session token (JWT). Returns expiry or None.""" + if not token.startswith("eyJ"): + logger.warning( + "CHATGPT_SESSION_TOKEN does not look like a JWT (expected 'eyJ...'). " + "It may be expired or incorrectly copied." + ) + return None + + try: + payload = jwt.decode(token, options={"verify_signature": False}) + except jwt.DecodeError as e: + logger.warning("CHATGPT_SESSION_TOKEN could not be decoded as JWT: %s", e) + return None + + exp = payload.get("exp") + if exp is None: + logger.warning("CHATGPT_SESSION_TOKEN JWT has no 'exp' claim.") + return None + + expiry = datetime.fromtimestamp(exp, tz=timezone.utc) + now = datetime.now(tz=timezone.utc) + delta = expiry - now + + if delta.total_seconds() < 0: + logger.warning( + "CHATGPT_SESSION_TOKEN expired at %s. " + "Run 'python -m src.main auth' to refresh it.", + expiry.strftime("%Y-%m-%d %H:%M UTC"), + ) + elif delta.total_seconds() < 86400: + logger.warning( + "CHATGPT_SESSION_TOKEN expires in less than 24 hours (%s). " + "Consider refreshing it soon.", + expiry.strftime("%Y-%m-%d %H:%M UTC"), + ) + + return expiry + + +def _validate_claude_key(key: str) -> None: + """Validate Claude session key (opaque string).""" + # Reject if it's the placeholder text from .env.example + if not key or key.startswith("CLAUDE_SESSION_KEY="): + logger.warning( + "CLAUDE_SESSION_KEY appears to be a placeholder. " + "Set it to the actual sessionKey cookie value from claude.ai." + ) + + +def _check_writable(path: Path) -> None: + """Raise PermissionError if path is not writable.""" + test_file = path / ".write_test" + try: + test_file.touch() + test_file.unlink() + except OSError as e: + raise PermissionError(f"Directory '{path}' is not writable: {e}") from e + + +def _log_startup_summary(cfg: Config) -> None: + """Log a single INFO line summarising the active configuration.""" + chatgpt_status = format_token_status(cfg.chatgpt_session_token, cfg.chatgpt_token_expiry) + claude_status = format_token_status(cfg.claude_session_key) + + logger.info( + "Config loaded | " + "ChatGPT: %s | " + "Claude: %s | " + "export_dir=%s | " + "structure=%s | " + "cache_dir=%s", + chatgpt_status, + claude_status, + cfg.export_dir, + cfg.output_structure, + cfg.cache_dir, + )