From 050cd491242b726e78c7b325cab9f1d6463845b7 Mon Sep 17 00:00:00 2001 From: JesseMarkowitz Date: Mon, 30 Mar 2026 11:08:05 -0400 Subject: [PATCH] updated to run on Windows and add est capabilities --- .env.example | 6 +- .gitignore | 4 ++ README.md | 99 +++++++++++++++++--------- src/config.py | 8 +-- src/main.py | 49 +++++++++++-- src/providers/base.py | 2 +- src/providers/chatgpt.py | 13 ++-- src/providers/claude.py | 4 +- tests/test_cli.py | 129 ++++++++++++++++++++++++++++++++++ tests/test_config.py | 56 +++++++++++++++ tests/test_exporters.py | 28 ++++++++ tests/test_providers.py | 33 +++++++++ tests/test_utils.py | 147 +++++++++++++++++++++++++++++++++++++++ 13 files changed, 524 insertions(+), 54 deletions(-) create mode 100644 tests/test_cli.py create mode 100644 tests/test_config.py create mode 100644 tests/test_utils.py diff --git a/.env.example b/.env.example index 24756a6..adc7885 100644 --- a/.env.example +++ b/.env.example @@ -46,9 +46,9 @@ JOPLIN_API_URL=http://localhost:41184 # JOPLIN_REQUEST_TIMEOUT=30 # --- Cache --- -# Where the sync manifest and logs are stored (default: ~/.ai-chat-exporter) -CACHE_DIR=~/.ai-chat-exporter +# Where the sync manifest is stored (default: ./cache, inside the install directory) +CACHE_DIR=./cache # --- Logging --- # Log file path. Set to "none" to disable file logging. -LOG_FILE=~/.ai-chat-exporter/logs/exporter.log +LOG_FILE=./cache/logs/exporter.log diff --git a/.gitignore b/.gitignore index f0e2e6f..37dc0ef 100644 --- a/.gitignore +++ b/.gitignore @@ -25,10 +25,14 @@ exports/ !CHANGELOG.md # Cache and logs +cache/ .ai-chat-exporter/ logs/ *.log +# Test tracking +test-plan.csv + # Editor / OS .DS_Store .idea/ diff --git a/README.md b/README.md index 2b68716..db4fb75 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ This tool is designed for a single user backing up their own conversations. Do n ## Installation +### Linux / macOS + ```bash git clone cd ai-chat-exporter @@ -36,6 +38,37 @@ source .venv/bin/activate pip install -e ".[dev]" ``` +### Windows + +No admin access required. Run these in **Command Prompt** (`cmd.exe`) — it's the simplest option on Windows because it doesn't have PowerShell's script execution policy restrictions. + +```bat +git clone +cd ai-chat-exporter +python -m venv .venv +.venv\Scripts\activate +pip install -e ".[dev]" +``` + +All `ai-chat-exporter` commands work identically in Command Prompt. + +**Using PowerShell instead?** If you prefer PowerShell, you may need to allow script execution first (one-time, current user only): + +```powershell +Set-ExecutionPolicy RemoteSigned -Scope CurrentUser +``` + +Then activate the venv and run commands the same way. + +**Prerequisites:** +- Python 3.11 or later — install from [python.org](https://www.python.org/downloads/windows/). During installation, tick **"Add Python to PATH"**. +- Git — install from [git-scm.com](https://git-scm.com/) if not already present. + +**Notes:** +- The cache manifest and logs are stored in `cache\` inside the install directory — the same as on Linux. +- File permission hardening (`chmod 600`) is silently ignored on Windows — not a concern for single-user desktop use. +- Joplin Web Clipper runs on `localhost:41184` on all platforms; no configuration changes needed. + --- ## First Run: Run Doctor @@ -43,7 +76,7 @@ pip install -e ".[dev]" Before anything else, validate your setup: ```bash -python -m src.main doctor +ai-chat-exporter doctor ``` This checks token presence, format, expiry, directory permissions, disk space, and live API connectivity. Fix any failures before proceeding. @@ -76,7 +109,7 @@ Session tokens are how your browser stays logged in. This tool uses them to acce ### When Tokens Expire When a token expires you'll see a `401 Unauthorized` error. To refresh: -- Re-run the `auth` wizard: `python -m src.main auth` +- Re-run the `auth` wizard: `ai-chat-exporter auth` - Or manually update the value in your `.env` file --- @@ -86,7 +119,7 @@ When a token expires you'll see a `401 Unauthorized` error. To refresh: The easiest way to configure tokens is the interactive wizard: ```bash -python -m src.main auth +ai-chat-exporter auth ``` This walks you through finding your token, validates it, shows the expiry date (ChatGPT only), and offers to write it to your `.env` automatically. Tokens are never echoed to the terminal. @@ -128,8 +161,8 @@ cp .env.example .env | Variable | Default | Description | |----------|---------|-------------| -| `CACHE_DIR` | `~/.ai-chat-exporter` | Where to store the sync manifest | -| `LOG_FILE` | `~/.ai-chat-exporter/logs/exporter.log` | Log file path (`none` to disable) | +| `CACHE_DIR` | `./cache` | Where to store the sync manifest | +| `LOG_FILE` | `./cache/logs/exporter.log` | Log file path (`none` to disable) | --- @@ -218,7 +251,7 @@ Each provider+project combination maps to a flat Joplin notebook created automat ### `auth` — Interactive token setup ```bash -python -m src.main auth +ai-chat-exporter auth ``` Guided wizard to find and save session tokens and ChatGPT project IDs. Detects OS and shows the correct DevTools shortcut. @@ -226,7 +259,7 @@ Guided wizard to find and save session tokens and ChatGPT project IDs. Detects O ### `doctor` — Health check ```bash -python -m src.main doctor +ai-chat-exporter doctor ``` Checks: token presence, JWT validity and expiry, directory permissions, disk space, live API reachability. Exits with code 0 if all pass, 1 if any fail. @@ -235,31 +268,31 @@ Checks: token presence, JWT validity and expiry, directory permissions, disk spa ```bash # Export everything (new/updated only) -python -m src.main export +ai-chat-exporter export # Single provider -python -m src.main export --provider claude +ai-chat-exporter export --provider claude # JSON output -python -m src.main export --format json +ai-chat-exporter export --format json # Both Markdown and JSON -python -m src.main export --format both +ai-chat-exporter export --format both # Only conversations updated since a date -python -m src.main export --since 2024-06-01 +ai-chat-exporter export --since 2024-06-01 # Only conversations in a specific project (case-insensitive substring) -python -m src.main export --project "learning python" +ai-chat-exporter export --project "learning python" # Only conversations outside any project -python -m src.main export --project none +ai-chat-exporter export --project none # Write to a custom directory -python -m src.main export --output /path/to/my/notes +ai-chat-exporter export --output /path/to/my/notes # Preview without writing anything -python -m src.main export --dry-run +ai-chat-exporter export --dry-run ``` Options: `--provider [chatgpt|claude|all]`, `--format [markdown|json|both]`, `--output PATH`, `--since YYYY-MM-DD`, `--project NAME`, `--dry-run` @@ -268,16 +301,16 @@ Options: `--provider [chatgpt|claude|all]`, `--format [markdown|json|both]`, `-- ```bash # List all conversations for all providers -python -m src.main list +ai-chat-exporter list # Single provider -python -m src.main list --provider chatgpt +ai-chat-exporter list --provider chatgpt # Filter by project -python -m src.main list --project "learning python" +ai-chat-exporter list --project "learning python" # Only conversations outside any project -python -m src.main list --project none +ai-chat-exporter list --project none ``` Fetches and displays all conversations without exporting them. Useful for verifying what the tool can see before running an export. @@ -286,19 +319,19 @@ Fetches and displays all conversations without exporting them. Useful for verify ```bash # Sync all pending conversations to Joplin -python -m src.main joplin +ai-chat-exporter joplin # Preview what would be synced without sending anything -python -m src.main joplin --dry-run +ai-chat-exporter joplin --dry-run # Sync a single provider -python -m src.main joplin --provider chatgpt +ai-chat-exporter joplin --provider chatgpt # Sync only conversations in a specific project -python -m src.main joplin --project "learning python" +ai-chat-exporter joplin --project "learning python" # Sync only conversations outside any project -python -m src.main joplin --project none +ai-chat-exporter joplin --project none ``` Reads the local export cache and pushes each exported Markdown file to Joplin as a note. Notebooks are created automatically. Re-running is safe — notes are updated (not duplicated). @@ -315,20 +348,20 @@ Options: `--provider [chatgpt|claude|all]`, `--project NAME`, `--dry-run` ```bash # Show statistics -python -m src.main cache --show +ai-chat-exporter cache --show # Clear all cached entries (forces full re-export next run) -python -m src.main cache --clear +ai-chat-exporter cache --clear # Clear a single provider -python -m src.main cache --clear --provider claude +ai-chat-exporter cache --clear --provider claude ``` --- ## How the Cache Works -The cache manifest lives at `~/.ai-chat-exporter/manifest.json` and records every exported conversation: its title, project, `updated_at` timestamp, output file path, and (after Joplin sync) the Joplin note ID. +The cache manifest lives at `cache/manifest.json` (inside the install directory) and records every exported conversation: its title, project, `updated_at` timestamp, output file path, and (after Joplin sync) the Joplin note ID. On every `export` run: 1. Fetch the full conversation list from the provider @@ -343,7 +376,7 @@ On every `joplin` run: **This design makes every run inherently resumable.** If the tool is interrupted for any reason — rate limit, network drop, Ctrl+C, crash — simply re-run the same command. It will skip already-processed conversations and continue from where it stopped. -To force a full re-export: `python -m src.main cache --clear` then re-run export. +To force a full re-export: `ai-chat-exporter cache --clear` then re-run export. --- @@ -351,7 +384,7 @@ To force a full re-export: `python -m src.main cache --clear` then re-run export ### `401 Unauthorized` Your session token has expired. -- Run `python -m src.main auth` to get a new token interactively +- Run `ai-chat-exporter auth` to get a new token interactively - Or manually copy a fresh cookie value into your `.env` file Note: Claude's `sessionKey` is an opaque string — the only way to know it's expired is the 401 error. ChatGPT JWTs have an `exp` claim that the `doctor` command can decode and display. @@ -391,10 +424,10 @@ The provider's internal API may have changed. Run with `--debug`, sanitize the o Images, code interpreter outputs, DALL-E generations, and Claude artifacts are not exported in v0.2.0. A WARNING is logged for each skipped item. See `FUTURE.md` for the roadmap. ### Empty export / all conversations skipped -No new or updated conversations since your last run. To verify: `python -m src.main cache --show`. To force a full re-export: `python -m src.main cache --clear`. +No new or updated conversations since your last run. To verify: `ai-chat-exporter cache --show`. To force a full re-export: `ai-chat-exporter cache --clear`. ### Filing a bug report -1. Run with `--debug`: `python -m src.main export --debug 2>&1 | tee debug.log` +1. Run with `--debug`: `ai-chat-exporter export --debug 2>&1 | tee debug.log` 2. Remove any personal conversation content from `debug.log` 3. Open a GitHub Issue with the sanitized log and the exact command you ran diff --git a/src/config.py b/src/config.py index daffcb0..13f4ffb 100644 --- a/src/config.py +++ b/src/config.py @@ -58,8 +58,8 @@ def load_config() -> Config: claude_key = os.getenv("CLAUDE_SESSION_KEY", "").strip() or None export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser() output_structure = os.getenv("OUTPUT_STRUCTURE", "provider/project/year").strip() - cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser() - log_file = os.getenv("LOG_FILE", "~/.ai-chat-exporter/logs/exporter.log").strip() + cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser() + log_file = os.getenv("LOG_FILE", "./cache/logs/exporter.log").strip() # Joplin joplin_token = os.getenv("JOPLIN_API_TOKEN", "").strip() or None @@ -101,7 +101,7 @@ def load_config() -> Config: if not chatgpt_token and not claude_key: logger.warning( "Neither CHATGPT_SESSION_TOKEN nor CLAUDE_SESSION_KEY is set. " - "Run 'python -m src.main auth' to configure credentials." + "Run 'ai-chat-exporter auth' to configure credentials." ) # Create and validate output directory @@ -173,7 +173,7 @@ def _validate_chatgpt_token(token: str) -> datetime | None: if delta.total_seconds() < 0: logger.warning( "CHATGPT_SESSION_TOKEN expired at %s. " - "Run 'python -m src.main auth' to refresh it.", + "Run 'ai-chat-exporter auth' to refresh it.", expiry.strftime("%Y-%m-%d %H:%M UTC"), ) elif delta.total_seconds() < 86400: diff --git a/src/main.py b/src/main.py index 0939dae..b650ab1 100644 --- a/src/main.py +++ b/src/main.py @@ -70,7 +70,7 @@ def cli(ctx: click.Context, verbose: bool, quiet: bool, debug: bool, no_log_file # Determine log file path from env (setup_logging handles "none") import os - log_file = os.getenv("LOG_FILE", "~/.ai-chat-exporter/logs/exporter.log") + log_file = os.getenv("LOG_FILE", "./cache/logs/exporter.log") setup_logging(level=level, log_file=log_file, no_log_file=no_log_file) @@ -79,7 +79,7 @@ def cli(ctx: click.Context, verbose: bool, quiet: bool, debug: bool, no_log_file # Initialise cache (needed for ToS gate on every command) import os - cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser() + cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser() try: cache = Cache(cache_dir) except CacheError as e: @@ -140,7 +140,7 @@ def auth(ctx: click.Context) -> None: if configure_claude: _auth_claude(os_name) - console.print("\n[green]Done! Run 'python -m src.main doctor' to verify your setup.[/green]") + console.print("\n[green]Done! Run 'ai-chat-exporter doctor' to verify your setup.[/green]") def _auth_chatgpt(os_name: str) -> None: @@ -178,6 +178,25 @@ def _auth_chatgpt(os_name: str) -> None: except Exception: console.print("[yellow]Could not decode token expiry.[/yellow]") + # Live validation — exchange session token for an access token + _valid = False + _error: str | None = None + with console.status("[dim]Validating token with ChatGPT API…[/dim]"): + try: + from src.providers.chatgpt import ChatGPTProvider + _prov = ChatGPTProvider(session_token=token) + _prov._fetch_access_token() + _valid = True + except ProviderError as e: + _error = str(e.original) + except Exception as e: + _error = str(e) + + if _valid: + console.print("[green]✓ Token verified — connected to ChatGPT API.[/green]") + else: + console.print(f"[red]✗ Token validation failed: {_error}[/red]") + _write_token_to_env("CHATGPT_SESSION_TOKEN", token) # --- ChatGPT Projects --- @@ -231,7 +250,25 @@ def _auth_claude(os_name: str) -> None: console.print("[yellow]Skipped Claude token.[/yellow]") return - console.print("[green]Claude session key saved.[/green]") + # Live validation — fetch org ID (the first call any Claude operation makes) + _valid = False + _error: str | None = None + with console.status("[dim]Validating token with Claude API…[/dim]"): + try: + from src.providers.claude import ClaudeProvider + _prov = ClaudeProvider(session_key=key) + _prov._get_org_id() + _valid = True + except ProviderError as e: + _error = str(e.original) + except Exception as e: + _error = str(e) + + if _valid: + console.print("[green]✓ Token verified — connected to Claude API.[/green]") + else: + console.print(f"[red]✗ Token validation failed: {_error}[/red]") + _write_token_to_env("CLAUDE_SESSION_KEY", key) @@ -341,7 +378,7 @@ def _run_doctor_checks() -> list[dict]: # Directories export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser() - cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser() + cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser() for label, dirpath in [("Export dir writable", export_dir), ("Cache dir writable", cache_dir)]: try: @@ -496,7 +533,7 @@ def export( providers_to_run = _resolve_providers(provider, cfg) if not providers_to_run: err_console.print( - "[red]No providers configured. Run 'python -m src.main auth' to set up tokens.[/red]" + "[red]No providers configured. Run 'ai-chat-exporter auth' to set up tokens.[/red]" ) sys.exit(1) diff --git a/src/providers/base.py b/src/providers/base.py index 34dde89..db17e18 100644 --- a/src/providers/base.py +++ b/src/providers/base.py @@ -326,7 +326,7 @@ class BaseProvider(ABC): msg = ( f"[{self.provider_name}] Authentication failed (401 Unauthorized). " "Your session token has likely expired. " - "Run 'python -m src.main auth' to refresh your token." + "Run 'ai-chat-exporter auth' to refresh your token." ) logger.error(msg) raise ProviderError( diff --git a/src/providers/chatgpt.py b/src/providers/chatgpt.py index fd9f8c3..2990559 100644 --- a/src/providers/chatgpt.py +++ b/src/providers/chatgpt.py @@ -77,7 +77,7 @@ class ChatGPTProvider(BaseProvider): "init", RuntimeError( "CHATGPT_SESSION_TOKEN is not set. " - "Run 'python -m src.main auth' to configure it." + "Run 'ai-chat-exporter auth' to configure it." ), ) self._session_token = token @@ -157,7 +157,7 @@ class ChatGPTProvider(BaseProvider): "fetch_access_token", RuntimeError( "No accessToken in /api/auth/session response. " - "Your session token may be expired — run 'python -m src.main auth' to refresh." + "Your session token may be expired — run 'ai-chat-exporter auth' to refresh." ), ) return access_token @@ -169,7 +169,7 @@ class ChatGPTProvider(BaseProvider): "The session token is used to obtain a short-lived access token via /api/auth/session. " "To refresh: open chatgpt.com in Chrome → F12 → Application → Cookies " "→ find '__Secure-next-auth.session-token' → copy the value. " - "Then run 'python -m src.main auth' or update CHATGPT_SESSION_TOKEN in .env." + "Then run 'ai-chat-exporter auth' or update CHATGPT_SESSION_TOKEN in .env." ) logger.error(msg) raise ProviderError( @@ -369,7 +369,7 @@ class ChatGPTProvider(BaseProvider): logger.info( "[chatgpt] No project IDs configured — skipping project conversations. " "To include projects, set CHATGPT_PROJECT_IDS in .env " - "(see 'python -m src.main auth' for instructions)." + "(see 'ai-chat-exporter auth' for instructions)." ) return self._apply_since_filter(default_convs, since) @@ -624,7 +624,10 @@ def _extract_messages( content_type = content_obj.get("content_type", "text") text = _extract_text(content_obj, conv_id, node_id) - if content_type != "text": + # model_editable_context carries project instructions as plain text parts + _TEXT_EXTRACTABLE = {"text", "model_editable_context"} + + if content_type not in _TEXT_EXTRACTABLE: logger.warning( "[chatgpt] Skipping %s content in conversation %s message %s " "— rich content not yet supported (see FUTURE.md)", diff --git a/src/providers/claude.py b/src/providers/claude.py index 47ae18b..01fcbc4 100644 --- a/src/providers/claude.py +++ b/src/providers/claude.py @@ -39,7 +39,7 @@ class ClaudeProvider(BaseProvider): "init", RuntimeError( "CLAUDE_SESSION_KEY is not set. " - "Run 'python -m src.main auth' to configure it." + "Run 'ai-chat-exporter auth' to configure it." ), ) # Set sessionKey in the cookie jar @@ -60,7 +60,7 @@ class ClaudeProvider(BaseProvider): "Note: Claude session keys are opaque — a 401 is the only expiry signal. " "To refresh: open claude.ai in Chrome → F12 → Application → Cookies " "→ find 'sessionKey' → copy the value. " - "Then run 'python -m src.main auth' or update CLAUDE_SESSION_KEY in .env." + "Then run 'ai-chat-exporter auth' or update CLAUDE_SESSION_KEY in .env." ) logger.error(msg) raise ProviderError( diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..9ea4794 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,129 @@ +"""CLI-level tests using Click's CliRunner — no live API calls required.""" + +import pytest +from click.testing import CliRunner + +from src.cache import Cache +from src.main import _filter_by_project, cli + + +# --------------------------------------------------------------------------- +# _filter_by_project (T-27) +# --------------------------------------------------------------------------- + + +class TestFilterByProject: + """Unit tests for the project filter logic used by export/list/joplin.""" + + # ChatGPT conversations use the _project_name annotation key + def _chatgpt(self, conv_id, project_name): + return {"id": conv_id, "_project_name": project_name} + + # Claude conversations use the project dict key + def _claude(self, conv_id, project_name): + proj = {"name": project_name} if project_name else None + return {"id": conv_id, "project": proj} + + def test_none_filter_keeps_no_project_chatgpt(self): + convs = [self._chatgpt("a", None), self._chatgpt("b", "Python Course")] + result = _filter_by_project(convs, "none") + assert len(result) == 1 + assert result[0]["id"] == "a" + + def test_none_filter_keeps_no_project_claude(self): + convs = [self._claude("a", None), self._claude("b", "Python Course")] + result = _filter_by_project(convs, "none") + assert len(result) == 1 + assert result[0]["id"] == "a" + + def test_name_filter_case_insensitive(self): + convs = [ + self._chatgpt("a", "Python Course"), + self._chatgpt("b", "Java Course"), + self._chatgpt("c", None), + ] + result = _filter_by_project(convs, "PYTHON") + assert len(result) == 1 + assert result[0]["id"] == "a" + + def test_name_filter_substring_match(self): + convs = [ + self._chatgpt("a", "Python Advanced Course"), + self._chatgpt("b", "Python Basics"), + self._chatgpt("c", "JavaScript"), + ] + result = _filter_by_project(convs, "python") + assert len(result) == 2 + assert {c["id"] for c in result} == {"a", "b"} + + def test_no_matches_returns_empty(self): + convs = [self._chatgpt("a", "Python Course"), self._chatgpt("b", None)] + result = _filter_by_project(convs, "ruby") + assert result == [] + + def test_none_filter_excludes_all_with_projects(self): + convs = [self._chatgpt("a", "Project A"), self._chatgpt("b", "Project B")] + result = _filter_by_project(convs, "none") + assert result == [] + + def test_empty_string_project_treated_as_no_project(self): + convs = [{"id": "a", "_project_name": ""}, {"id": "b", "_project_name": "Real"}] + result = _filter_by_project(convs, "none") + assert len(result) == 1 + assert result[0]["id"] == "a" + + def test_claude_project_string_matched(self): + # Claude can also have project as a plain string + convs = [{"id": "a", "project": "python-course"}, {"id": "b", "project": None}] + result = _filter_by_project(convs, "python") + assert len(result) == 1 + assert result[0]["id"] == "a" + + +# --------------------------------------------------------------------------- +# export --since validation (T-25) +# --------------------------------------------------------------------------- + + +class TestExportSinceValidation: + """Test that --since with an invalid date exits cleanly with an error message.""" + + def _pre_populated_cache(self, tmp_path) -> Cache: + """Create a cache that passes the ToS gate and first-run doctor check.""" + cache = Cache(tmp_path) + cache.acknowledge_tos() + cache.mark_exported("chatgpt", "dummy-conv", {"updated_at": "2024-01-01T00:00:00Z"}) + return cache + + def test_invalid_since_date_exits_with_error(self, tmp_path): + self._pre_populated_cache(tmp_path) + + runner = CliRunner(mix_stderr=True) + result = runner.invoke( + cli, + ["--no-log-file", "export", "--since", "notadate"], + env={ + "CHATGPT_SESSION_TOKEN": "eyJtesttoken", + "CACHE_DIR": str(tmp_path), + "EXPORT_DIR": str(tmp_path / "exports"), + }, + ) + assert result.exit_code == 1 + assert "Invalid --since date" in result.output + assert "YYYY-MM-DD" in result.output + + def test_valid_since_date_does_not_error(self, tmp_path): + """A valid date should not produce the invalid-date error (may fail later on API).""" + self._pre_populated_cache(tmp_path) + + runner = CliRunner(mix_stderr=True) + result = runner.invoke( + cli, + ["--no-log-file", "export", "--since", "2024-01-01"], + env={ + "CHATGPT_SESSION_TOKEN": "eyJtesttoken", + "CACHE_DIR": str(tmp_path), + "EXPORT_DIR": str(tmp_path / "exports"), + }, + ) + assert "Invalid --since date" not in result.output diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..e2e8098 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,56 @@ +"""Tests for src/config.py — token validation logic (T-14).""" + +import logging +import time + +import jwt +import pytest + +from src.config import _validate_chatgpt_token + + +class TestValidateChatGPTToken: + def test_expired_token_logs_warning(self, caplog): + # T-14: expired JWT must produce a clear warning + payload = {"exp": int(time.time()) - 3600} # expired 1 hour ago + token = jwt.encode(payload, "secret", algorithm="HS256") + with caplog.at_level(logging.WARNING, logger="src.config"): + result = _validate_chatgpt_token(token) + assert any("expired" in r.message.lower() for r in caplog.records) + assert result is not None # still returns the expiry datetime + + def test_expiring_within_24h_logs_warning(self, caplog): + payload = {"exp": int(time.time()) + 3600} # expires in 1 hour + token = jwt.encode(payload, "secret", algorithm="HS256") + with caplog.at_level(logging.WARNING, logger="src.config"): + _validate_chatgpt_token(token) + assert any("less than 24 hours" in r.message for r in caplog.records) + + def test_valid_token_no_expiry_warning(self, caplog): + payload = {"exp": int(time.time()) + 86400 * 5} # valid for 5 days + token = jwt.encode(payload, "secret", algorithm="HS256") + with caplog.at_level(logging.WARNING, logger="src.config"): + result = _validate_chatgpt_token(token) + assert not any("expired" in r.message.lower() for r in caplog.records) + assert result is not None + + def test_token_without_exp_claim_logs_warning(self, caplog): + payload = {"sub": "user123"} # no exp + token = jwt.encode(payload, "secret", algorithm="HS256") + with caplog.at_level(logging.WARNING, logger="src.config"): + result = _validate_chatgpt_token(token) + assert any("'exp'" in r.message or "no 'exp'" in r.message for r in caplog.records) + assert result is None + + def test_jwe_encrypted_token_returns_none(self, caplog): + # JWE tokens (alg=dir) cannot be decoded client-side — this is normal for ChatGPT + jwe_like = "eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0.fake.token.data.here" + with caplog.at_level(logging.DEBUG, logger="src.config"): + result = _validate_chatgpt_token(jwe_like) + assert result is None # cannot decode, but not an error + + def test_non_jwt_string_logs_warning(self, caplog): + with caplog.at_level(logging.WARNING, logger="src.config"): + result = _validate_chatgpt_token("notajwttoken") + assert any("does not look like a JWT" in r.message for r in caplog.records) + assert result is None diff --git a/tests/test_exporters.py b/tests/test_exporters.py index 5c32cee..1536482 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -199,6 +199,34 @@ class TestJSONExporter: assert " " in raw +class TestBothFormats: + """T-38: Markdown and JSON exporters produce matching filenames for the same conversation.""" + + def test_both_formats_produce_files(self, tmp_path): + md_exp = MarkdownExporter(tmp_path) + json_exp = JSONExporter(tmp_path) + md_path = md_exp.export(SAMPLE_CONV) + json_path = json_exp.export(SAMPLE_CONV) + assert md_path.exists() + assert json_path.exists() + + def test_both_formats_have_matching_stems(self, tmp_path): + md_exp = MarkdownExporter(tmp_path) + json_exp = JSONExporter(tmp_path) + md_path = md_exp.export(SAMPLE_CONV) + json_path = json_exp.export(SAMPLE_CONV) + assert md_path.suffix == ".md" + assert json_path.suffix == ".json" + assert md_path.stem == json_path.stem + + def test_both_formats_same_directory(self, tmp_path): + md_exp = MarkdownExporter(tmp_path) + json_exp = JSONExporter(tmp_path) + md_path = md_exp.export(SAMPLE_CONV) + json_path = json_exp.export(SAMPLE_CONV) + assert md_path.parent == json_path.parent + + class TestYamlEscape: def test_escapes_double_quotes(self): assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"' diff --git a/tests/test_providers.py b/tests/test_providers.py index efbcc21..8097610 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -75,6 +75,39 @@ class TestChatGPTNormalization: for r in caplog.records ) + def test_model_editable_context_included_without_warning(self, caplog): + """model_editable_context messages (project instructions) should be included, not warned about.""" + import logging + conv = { + "id": "test-conv-mec", + "title": "Test", + "create_time": 1700000000.0, + "update_time": 1700000001.0, + "mapping": { + "root": {"id": "root", "message": None, "parent": None, "children": ["msg1"]}, + "msg1": { + "id": "msg1", + "message": { + "id": "msg1", + "author": {"role": "user"}, + "content": { + "content_type": "model_editable_context", + "parts": ["These are the project instructions."], + }, + "create_time": 1700000001.0, + "status": "finished_successfully", + }, + "parent": "root", + "children": [], + }, + }, + } + p = self._get_provider() + with caplog.at_level(logging.WARNING): + result = p.normalize_conversation(conv) + assert any(m["content"] == "These are the project instructions." for m in result["messages"]) + assert not any("model_editable_context" in r.message for r in caplog.records) + def test_message_roles_are_valid(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..99bd381 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,147 @@ +"""Tests for src/utils.py — filename generation, path building, redaction.""" + +from pathlib import Path + +import pytest + +from src.utils import ( + build_export_path, + format_token_status, + generate_filename, + redact_secrets, +) + + +class TestGenerateFilename: + def test_basic_format(self): + name = generate_filename("Hello World", "abc12345def", "2024-06-10T14:00:00Z") + assert name == "2024-06-10_hello-world_abc12345.md" + + def test_special_chars_slugified(self): + # T-36: titles with punctuation must produce safe, OS-compatible filenames + name = generate_filename("What's this?! A test.", "abc12345", "2024-06-01T00:00:00Z") + assert "?" not in name + assert "!" not in name + assert "'" not in name + assert " " not in name + assert name.startswith("2024-06-01_") + assert name.endswith("_abc12345.md") + + def test_unicode_chars_handled(self): + name = generate_filename("Héllo Wörld", "abc12345", "2024-06-01T00:00:00Z") + assert " " not in name + assert name.endswith("_abc12345.md") + + def test_empty_title_becomes_untitled(self): + name = generate_filename("", "abc12345", "2024-06-01T00:00:00Z") + assert "untitled" in name + + def test_id_truncated_to_8_chars(self): + name = generate_filename("Test", "abcdefghijklmnop", "2024-06-01T00:00:00Z") + assert name.endswith("_abcdefgh.md") + + def test_long_title_truncated(self): + long_title = "a" * 200 + name = generate_filename(long_title, "abc12345", "2024-06-01T00:00:00Z") + # Slug is capped at 60 chars by max_length + slug_part = name.split("_")[1] + assert len(slug_part) <= 60 + + def test_date_comes_from_created_at(self): + name = generate_filename("Test", "abc12345", "2023-11-25T00:00:00Z") + assert name.startswith("2023-11-25_") + + +class TestBuildExportPath: + def test_default_structure_provider_project_year(self): + path = build_export_path( + Path("/exports"), "claude", "my-project", "2024-06-01T00:00:00Z", "file.md" + ) + assert str(path) == "/exports/claude/my-project/2024/file.md" + + def test_no_project_uses_no_project_slug(self): + path = build_export_path( + Path("/exports"), "chatgpt", None, "2024-06-01T00:00:00Z", "file.md" + ) + assert "no-project" in str(path) + + def test_provider_project_structure_omits_year(self): + path = build_export_path( + Path("/exports"), "claude", "proj", "2024-06-01T00:00:00Z", "file.md", + structure="provider/project", + ) + assert "2024" not in str(path) + assert "proj" in str(path) + + def test_provider_year_structure_omits_project(self): + path = build_export_path( + Path("/exports"), "claude", "proj", "2024-06-01T00:00:00Z", "file.md", + structure="provider/year", + ) + assert "proj" not in str(path) + assert "2024" in str(path) + + def test_project_name_with_spaces_is_slugified(self): + path = build_export_path( + Path("/exports"), "claude", "My Project Name!", "2024-06-01T00:00:00Z", "file.md" + ) + assert " " not in str(path) + assert "!" not in str(path) + + +class TestRedactSecrets: + def test_token_value_redacted(self): + data = {"token": "supersecret"} + result = redact_secrets(data) + assert result["token"] == "[REDACTED]" + + def test_session_key_redacted(self): + result = redact_secrets({"sessionKey": "abc123"}) + assert result["sessionKey"] == "[REDACTED]" + + def test_non_sensitive_key_unchanged(self): + result = redact_secrets({"title": "My Chat", "id": "abc123"}) + assert result["title"] == "My Chat" + assert result["id"] == "abc123" + + def test_nested_dict_redacted(self): + data = {"user": {"token": "secret", "name": "Alice"}} + result = redact_secrets(data) + assert result["user"]["token"] == "[REDACTED]" + assert result["user"]["name"] == "Alice" + + def test_list_of_dicts(self): + data = [{"password": "p@ss"}, {"title": "chat"}] + result = redact_secrets(data) + assert result[0]["password"] == "[REDACTED]" + assert result[1]["title"] == "chat" + + +class TestFormatTokenStatus: + def test_none_token_returns_not_set(self): + assert format_token_status(None) == "[NOT SET]" + + def test_empty_token_returns_not_set(self): + assert format_token_status("") == "[NOT SET]" + + def test_set_token_no_expiry(self): + assert format_token_status("sometoken") == "[SET]" + + def test_expired_token(self): + from datetime import datetime, timezone, timedelta + expiry = datetime.now(tz=timezone.utc) - timedelta(days=1) + result = format_token_status("tok", expiry) + assert "EXPIRED" in result + + def test_expiring_today_shows_hours(self): + from datetime import datetime, timezone, timedelta + expiry = datetime.now(tz=timezone.utc) + timedelta(hours=3) + result = format_token_status("tok", expiry) + assert "expires in" in result + assert "h" in result + + def test_expiring_in_days(self): + from datetime import datetime, timezone, timedelta + expiry = datetime.now(tz=timezone.utc) + timedelta(days=10, hours=12) + result = format_token_status("tok", expiry) + assert "10 days" in result