updated to run on Windows and add est capabilities

2026-03-30 11:08:05 -04:00
parent 304cf4fde4
commit 050cd49124
13 changed files with 524 additions and 54 deletions
--- a/.env.example
+++ b/.env.example
@@ -46,9 +46,9 @@ JOPLIN_API_URL=http://localhost:41184
 # JOPLIN_REQUEST_TIMEOUT=30
 # --- Cache ---
-# Where the sync manifest and logs are stored (default: ~/.ai-chat-exporter)
+# Where the sync manifest is stored (default: ./cache, inside the install directory)
-CACHE_DIR=~/.ai-chat-exporter
+CACHE_DIR=./cache
 # --- Logging ---
 # Log file path. Set to "none" to disable file logging.
-LOG_FILE=~/.ai-chat-exporter/logs/exporter.log
+LOG_FILE=./cache/logs/exporter.log
--- a/.gitignore
+++ b/.gitignore
@@ -25,10 +25,14 @@ exports/
 !CHANGELOG.md
 # Cache and logs
 cache/
 .ai-chat-exporter/
 logs/
 *.log
 # Test tracking
 test-plan.csv
 # Editor / OS
 .DS_Store
 .idea/
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ This tool is designed for a single user backing up their own conversations. Do n
 ## Installation
 ### Linux / macOS
 ```bash
 git clone <repo-url>
 cd ai-chat-exporter
@@ -36,6 +38,37 @@ source .venv/bin/activate
 pip install -e ".[dev]"
 ```
 ### Windows
 No admin access required. Run these in **Command Prompt** (`cmd.exe`) — it's the simplest option on Windows because it doesn't have PowerShell's script execution policy restrictions.
 ```bat
 git clone <repo-url>
 cd ai-chat-exporter
 python -m venv .venv
 .venv\Scripts\activate
 pip install -e ".[dev]"
 ```
 All `ai-chat-exporter` commands work identically in Command Prompt.
 **Using PowerShell instead?** If you prefer PowerShell, you may need to allow script execution first (one-time, current user only):
 ```powershell
 Set-ExecutionPolicy RemoteSigned -Scope CurrentUser
 ```
 Then activate the venv and run commands the same way.
 **Prerequisites:**
 - Python 3.11 or later — install from [python.org](https://www.python.org/downloads/windows/). During installation, tick **"Add Python to PATH"**.
 - Git — install from [git-scm.com](https://git-scm.com/) if not already present.
 **Notes:**
 - The cache manifest and logs are stored in `cache\` inside the install directory — the same as on Linux.
 - File permission hardening (`chmod 600`) is silently ignored on Windows — not a concern for single-user desktop use.
 - Joplin Web Clipper runs on `localhost:41184` on all platforms; no configuration changes needed.
 ---
 ## First Run: Run Doctor
@@ -43,7 +76,7 @@ pip install -e ".[dev]"
 Before anything else, validate your setup:
 ```bash
-python -m src.main doctor
+ai-chat-exporter doctor
 ```
 This checks token presence, format, expiry, directory permissions, disk space, and live API connectivity. Fix any failures before proceeding.
@@ -76,7 +109,7 @@ Session tokens are how your browser stays logged in. This tool uses them to acce
 ### When Tokens Expire
 When a token expires you'll see a `401 Unauthorized` error. To refresh:
- Re-run the `auth` wizard: `python -m src.main auth`
+- Re-run the `auth` wizard: `ai-chat-exporter auth`
 - Or manually update the value in your `.env` file
 ---
@@ -86,7 +119,7 @@ When a token expires you'll see a `401 Unauthorized` error. To refresh:
 The easiest way to configure tokens is the interactive wizard:
 ```bash
-python -m src.main auth
+ai-chat-exporter auth
 ```
 This walks you through finding your token, validates it, shows the expiry date (ChatGPT only), and offers to write it to your `.env` automatically. Tokens are never echoed to the terminal.
@@ -128,8 +161,8 @@ cp .env.example .env
 | Variable | Default | Description |
 |----------|---------|-------------|
-| `CACHE_DIR` | `~/.ai-chat-exporter` | Where to store the sync manifest |
+| `CACHE_DIR` | `./cache` | Where to store the sync manifest |
-| `LOG_FILE` | `~/.ai-chat-exporter/logs/exporter.log` | Log file path (`none` to disable) |
+| `LOG_FILE` | `./cache/logs/exporter.log` | Log file path (`none` to disable) |
 ---
@@ -218,7 +251,7 @@ Each provider+project combination maps to a flat Joplin notebook created automat
 ### `auth` — Interactive token setup
 ```bash
-python -m src.main auth
+ai-chat-exporter auth
 ```
 Guided wizard to find and save session tokens and ChatGPT project IDs. Detects OS and shows the correct DevTools shortcut.
@@ -226,7 +259,7 @@ Guided wizard to find and save session tokens and ChatGPT project IDs. Detects O
 ### `doctor` — Health check
 ```bash
-python -m src.main doctor
+ai-chat-exporter doctor
 ```
 Checks: token presence, JWT validity and expiry, directory permissions, disk space, live API reachability. Exits with code 0 if all pass, 1 if any fail.
@@ -235,31 +268,31 @@ Checks: token presence, JWT validity and expiry, directory permissions, disk spa
 ```bash
 # Export everything (new/updated only)
-python -m src.main export
+ai-chat-exporter export
 # Single provider
-python -m src.main export --provider claude
+ai-chat-exporter export --provider claude
 # JSON output
-python -m src.main export --format json
+ai-chat-exporter export --format json
 # Both Markdown and JSON
-python -m src.main export --format both
+ai-chat-exporter export --format both
 # Only conversations updated since a date
-python -m src.main export --since 2024-06-01
+ai-chat-exporter export --since 2024-06-01
 # Only conversations in a specific project (case-insensitive substring)
-python -m src.main export --project "learning python"
+ai-chat-exporter export --project "learning python"
 # Only conversations outside any project
-python -m src.main export --project none
+ai-chat-exporter export --project none
 # Write to a custom directory
-python -m src.main export --output /path/to/my/notes
+ai-chat-exporter export --output /path/to/my/notes
 # Preview without writing anything
-python -m src.main export --dry-run
+ai-chat-exporter export --dry-run
 ```
 Options: `--provider [chatgpt|claude|all]`, `--format [markdown|json|both]`, `--output PATH`, `--since YYYY-MM-DD`, `--project NAME`, `--dry-run`
@@ -268,16 +301,16 @@ Options: `--provider [chatgpt|claude|all]`, `--format [markdown|json|both]`, `--
 ```bash
 # List all conversations for all providers
-python -m src.main list
+ai-chat-exporter list
 # Single provider
-python -m src.main list --provider chatgpt
+ai-chat-exporter list --provider chatgpt
 # Filter by project
-python -m src.main list --project "learning python"
+ai-chat-exporter list --project "learning python"
 # Only conversations outside any project
-python -m src.main list --project none
+ai-chat-exporter list --project none
 ```
 Fetches and displays all conversations without exporting them. Useful for verifying what the tool can see before running an export.
@@ -286,19 +319,19 @@ Fetches and displays all conversations without exporting them. Useful for verify
 ```bash
 # Sync all pending conversations to Joplin
-python -m src.main joplin
+ai-chat-exporter joplin
 # Preview what would be synced without sending anything
-python -m src.main joplin --dry-run
+ai-chat-exporter joplin --dry-run
 # Sync a single provider
-python -m src.main joplin --provider chatgpt
+ai-chat-exporter joplin --provider chatgpt
 # Sync only conversations in a specific project
-python -m src.main joplin --project "learning python"
+ai-chat-exporter joplin --project "learning python"
 # Sync only conversations outside any project
-python -m src.main joplin --project none
+ai-chat-exporter joplin --project none
 ```
 Reads the local export cache and pushes each exported Markdown file to Joplin as a note. Notebooks are created automatically. Re-running is safe — notes are updated (not duplicated).
@@ -315,20 +348,20 @@ Options: `--provider [chatgpt|claude|all]`, `--project NAME`, `--dry-run`
 ```bash
 # Show statistics
-python -m src.main cache --show
+ai-chat-exporter cache --show
 # Clear all cached entries (forces full re-export next run)
-python -m src.main cache --clear
+ai-chat-exporter cache --clear
 # Clear a single provider
-python -m src.main cache --clear --provider claude
+ai-chat-exporter cache --clear --provider claude
 ```
 ---
 ## How the Cache Works
-The cache manifest lives at `~/.ai-chat-exporter/manifest.json` and records every exported conversation: its title, project, `updated_at` timestamp, output file path, and (after Joplin sync) the Joplin note ID.
+The cache manifest lives at `cache/manifest.json` (inside the install directory) and records every exported conversation: its title, project, `updated_at` timestamp, output file path, and (after Joplin sync) the Joplin note ID.
 On every `export` run:
 1. Fetch the full conversation list from the provider
@@ -343,7 +376,7 @@ On every `joplin` run:
 **This design makes every run inherently resumable.** If the tool is interrupted for any reason — rate limit, network drop, Ctrl+C, crash — simply re-run the same command. It will skip already-processed conversations and continue from where it stopped.
-To force a full re-export: `python -m src.main cache --clear` then re-run export.
+To force a full re-export: `ai-chat-exporter cache --clear` then re-run export.
 ---
@@ -351,7 +384,7 @@ To force a full re-export: `python -m src.main cache --clear` then re-run export
 ### `401 Unauthorized`
 Your session token has expired.
- Run `python -m src.main auth` to get a new token interactively
+- Run `ai-chat-exporter auth` to get a new token interactively
 - Or manually copy a fresh cookie value into your `.env` file
 Note: Claude's `sessionKey` is an opaque string — the only way to know it's expired is the 401 error. ChatGPT JWTs have an `exp` claim that the `doctor` command can decode and display.
@@ -391,10 +424,10 @@ The provider's internal API may have changed. Run with `--debug`, sanitize the o
 Images, code interpreter outputs, DALL-E generations, and Claude artifacts are not exported in v0.2.0. A WARNING is logged for each skipped item. See `FUTURE.md` for the roadmap.
 ### Empty export / all conversations skipped
-No new or updated conversations since your last run. To verify: `python -m src.main cache --show`. To force a full re-export: `python -m src.main cache --clear`.
+No new or updated conversations since your last run. To verify: `ai-chat-exporter cache --show`. To force a full re-export: `ai-chat-exporter cache --clear`.
 ### Filing a bug report
-1. Run with `--debug`: `python -m src.main export --debug 2>&1 | tee debug.log`
+1. Run with `--debug`: `ai-chat-exporter export --debug 2>&1 | tee debug.log`
 2. Remove any personal conversation content from `debug.log`
 3. Open a GitHub Issue with the sanitized log and the exact command you ran
--- a/src/config.py
+++ b/src/config.py
@@ -58,8 +58,8 @@ def load_config() -> Config:
    claude_key = os.getenv("CLAUDE_SESSION_KEY", "").strip() or None
    export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser()
    output_structure = os.getenv("OUTPUT_STRUCTURE", "provider/project/year").strip()
-    cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser()
+    cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser()
-    log_file = os.getenv("LOG_FILE", "~/.ai-chat-exporter/logs/exporter.log").strip()
+    log_file = os.getenv("LOG_FILE", "./cache/logs/exporter.log").strip()
    # Joplin
    joplin_token = os.getenv("JOPLIN_API_TOKEN", "").strip() or None
@@ -101,7 +101,7 @@ def load_config() -> Config:
    if not chatgpt_token and not claude_key:
        logger.warning(
            "Neither CHATGPT_SESSION_TOKEN nor CLAUDE_SESSION_KEY is set. "
-            "Run 'python -m src.main auth' to configure credentials."
+            "Run 'ai-chat-exporter auth' to configure credentials."
        )
    # Create and validate output directory
@@ -173,7 +173,7 @@ def _validate_chatgpt_token(token: str) -> datetime | None:
    if delta.total_seconds() < 0:
        logger.warning(
            "CHATGPT_SESSION_TOKEN expired at %s. "
-            "Run 'python -m src.main auth' to refresh it.",
+            "Run 'ai-chat-exporter auth' to refresh it.",
            expiry.strftime("%Y-%m-%d %H:%M UTC"),
        )
    elif delta.total_seconds() < 86400:
--- a/src/main.py
+++ b/src/main.py
@@ -70,7 +70,7 @@ def cli(ctx: click.Context, verbose: bool, quiet: bool, debug: bool, no_log_file
    # Determine log file path from env (setup_logging handles "none")
    import os
-    log_file = os.getenv("LOG_FILE", "~/.ai-chat-exporter/logs/exporter.log")
+    log_file = os.getenv("LOG_FILE", "./cache/logs/exporter.log")
    setup_logging(level=level, log_file=log_file, no_log_file=no_log_file)
@@ -79,7 +79,7 @@ def cli(ctx: click.Context, verbose: bool, quiet: bool, debug: bool, no_log_file
    # Initialise cache (needed for ToS gate on every command)
    import os
-    cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser()
+    cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser()
    try:
        cache = Cache(cache_dir)
    except CacheError as e:
@@ -140,7 +140,7 @@ def auth(ctx: click.Context) -> None:
    if configure_claude:
        _auth_claude(os_name)
-    console.print("\n[green]Done! Run 'python -m src.main doctor' to verify your setup.[/green]")
+    console.print("\n[green]Done! Run 'ai-chat-exporter doctor' to verify your setup.[/green]")
 def _auth_chatgpt(os_name: str) -> None:
@@ -178,6 +178,25 @@ def _auth_chatgpt(os_name: str) -> None:
    except Exception:
        console.print("[yellow]Could not decode token expiry.[/yellow]")
    # Live validation — exchange session token for an access token
    _valid = False
    _error: str | None = None
    with console.status("[dim]Validating token with ChatGPT API…[/dim]"):
        try:
            from src.providers.chatgpt import ChatGPTProvider
            _prov = ChatGPTProvider(session_token=token)
            _prov._fetch_access_token()
            _valid = True
        except ProviderError as e:
            _error = str(e.original)
        except Exception as e:
            _error = str(e)
    if _valid:
        console.print("[green]✓ Token verified — connected to ChatGPT API.[/green]")
    else:
        console.print(f"[red]✗ Token validation failed: {_error}[/red]")
    _write_token_to_env("CHATGPT_SESSION_TOKEN", token)
    # --- ChatGPT Projects ---
@@ -231,7 +250,25 @@ def _auth_claude(os_name: str) -> None:
        console.print("[yellow]Skipped Claude token.[/yellow]")
        return
-    console.print("[green]Claude session key saved.[/green]")
+    # Live validation — fetch org ID (the first call any Claude operation makes)
    _valid = False
    _error: str | None = None
    with console.status("[dim]Validating token with Claude API…[/dim]"):
        try:
            from src.providers.claude import ClaudeProvider
            _prov = ClaudeProvider(session_key=key)
            _prov._get_org_id()
            _valid = True
        except ProviderError as e:
            _error = str(e.original)
        except Exception as e:
            _error = str(e)
    if _valid:
        console.print("[green]✓ Token verified — connected to Claude API.[/green]")
    else:
        console.print(f"[red]✗ Token validation failed: {_error}[/red]")
    _write_token_to_env("CLAUDE_SESSION_KEY", key)
@@ -341,7 +378,7 @@ def _run_doctor_checks() -> list[dict]:
    # Directories
    export_dir = Path(os.getenv("EXPORT_DIR", "./exports")).expanduser()
-    cache_dir = Path(os.getenv("CACHE_DIR", "~/.ai-chat-exporter")).expanduser()
+    cache_dir = Path(os.getenv("CACHE_DIR", "./cache")).expanduser()
    for label, dirpath in [("Export dir writable", export_dir), ("Cache dir writable", cache_dir)]:
        try:
@@ -496,7 +533,7 @@ def export(
    providers_to_run = _resolve_providers(provider, cfg)
    if not providers_to_run:
        err_console.print(
-            "[red]No providers configured. Run 'python -m src.main auth' to set up tokens.[/red]"
+            "[red]No providers configured. Run 'ai-chat-exporter auth' to set up tokens.[/red]"
        )
        sys.exit(1)
--- a/src/providers/base.py
+++ b/src/providers/base.py
@@ -326,7 +326,7 @@ class BaseProvider(ABC):
        msg = (
            f"[{self.provider_name}] Authentication failed (401 Unauthorized). "
            "Your session token has likely expired. "
-            "Run 'python -m src.main auth' to refresh your token."
+            "Run 'ai-chat-exporter auth' to refresh your token."
        )
        logger.error(msg)
        raise ProviderError(
--- a/src/providers/chatgpt.py
+++ b/src/providers/chatgpt.py
@@ -77,7 +77,7 @@ class ChatGPTProvider(BaseProvider):
                "init",
                RuntimeError(
                    "CHATGPT_SESSION_TOKEN is not set. "
-                    "Run 'python -m src.main auth' to configure it."
+                    "Run 'ai-chat-exporter auth' to configure it."
                ),
            )
        self._session_token = token
@@ -157,7 +157,7 @@ class ChatGPTProvider(BaseProvider):
                "fetch_access_token",
                RuntimeError(
                    "No accessToken in /api/auth/session response. "
-                    "Your session token may be expired — run 'python -m src.main auth' to refresh."
+                    "Your session token may be expired — run 'ai-chat-exporter auth' to refresh."
                ),
            )
        return access_token
@@ -169,7 +169,7 @@ class ChatGPTProvider(BaseProvider):
            "The session token is used to obtain a short-lived access token via /api/auth/session. "
            "To refresh: open chatgpt.com in Chrome → F12 → Application → Cookies "
            "→ find '__Secure-next-auth.session-token' → copy the value. "
-            "Then run 'python -m src.main auth' or update CHATGPT_SESSION_TOKEN in .env."
+            "Then run 'ai-chat-exporter auth' or update CHATGPT_SESSION_TOKEN in .env."
        )
        logger.error(msg)
        raise ProviderError(
@@ -369,7 +369,7 @@ class ChatGPTProvider(BaseProvider):
            logger.info(
                "[chatgpt] No project IDs configured — skipping project conversations. "
                "To include projects, set CHATGPT_PROJECT_IDS in .env "
-                "(see 'python -m src.main auth' for instructions)."
+                "(see 'ai-chat-exporter auth' for instructions)."
            )
            return self._apply_since_filter(default_convs, since)
@@ -624,7 +624,10 @@ def _extract_messages(
                content_type = content_obj.get("content_type", "text")
                text = _extract_text(content_obj, conv_id, node_id)
-                if content_type != "text":
+                # model_editable_context carries project instructions as plain text parts
                _TEXT_EXTRACTABLE = {"text", "model_editable_context"}
                if content_type not in _TEXT_EXTRACTABLE:
                    logger.warning(
                        "[chatgpt] Skipping %s content in conversation %s message %s "
                        "— rich content not yet supported (see FUTURE.md)",
--- a/src/providers/claude.py
+++ b/src/providers/claude.py
@@ -39,7 +39,7 @@ class ClaudeProvider(BaseProvider):
                "init",
                RuntimeError(
                    "CLAUDE_SESSION_KEY is not set. "
-                    "Run 'python -m src.main auth' to configure it."
+                    "Run 'ai-chat-exporter auth' to configure it."
                ),
            )
        # Set sessionKey in the cookie jar
@@ -60,7 +60,7 @@ class ClaudeProvider(BaseProvider):
            "Note: Claude session keys are opaque — a 401 is the only expiry signal. "
            "To refresh: open claude.ai in Chrome → F12 → Application → Cookies "
            "→ find 'sessionKey' → copy the value. "
-            "Then run 'python -m src.main auth' or update CLAUDE_SESSION_KEY in .env."
+            "Then run 'ai-chat-exporter auth' or update CLAUDE_SESSION_KEY in .env."
        )
        logger.error(msg)
        raise ProviderError(
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,129 @@
 """CLI-level tests using Click's CliRunner — no live API calls required."""
 import pytest
 from click.testing import CliRunner
 from src.cache import Cache
 from src.main import _filter_by_project, cli
 # ---------------------------------------------------------------------------
 # _filter_by_project  (T-27)
 # ---------------------------------------------------------------------------
 class TestFilterByProject:
    """Unit tests for the project filter logic used by export/list/joplin."""
    # ChatGPT conversations use the _project_name annotation key
    def _chatgpt(self, conv_id, project_name):
        return {"id": conv_id, "_project_name": project_name}
    # Claude conversations use the project dict key
    def _claude(self, conv_id, project_name):
        proj = {"name": project_name} if project_name else None
        return {"id": conv_id, "project": proj}
    def test_none_filter_keeps_no_project_chatgpt(self):
        convs = [self._chatgpt("a", None), self._chatgpt("b", "Python Course")]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"
    def test_none_filter_keeps_no_project_claude(self):
        convs = [self._claude("a", None), self._claude("b", "Python Course")]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"
    def test_name_filter_case_insensitive(self):
        convs = [
            self._chatgpt("a", "Python Course"),
            self._chatgpt("b", "Java Course"),
            self._chatgpt("c", None),
        ]
        result = _filter_by_project(convs, "PYTHON")
        assert len(result) == 1
        assert result[0]["id"] == "a"
    def test_name_filter_substring_match(self):
        convs = [
            self._chatgpt("a", "Python Advanced Course"),
            self._chatgpt("b", "Python Basics"),
            self._chatgpt("c", "JavaScript"),
        ]
        result = _filter_by_project(convs, "python")
        assert len(result) == 2
        assert {c["id"] for c in result} == {"a", "b"}
    def test_no_matches_returns_empty(self):
        convs = [self._chatgpt("a", "Python Course"), self._chatgpt("b", None)]
        result = _filter_by_project(convs, "ruby")
        assert result == []
    def test_none_filter_excludes_all_with_projects(self):
        convs = [self._chatgpt("a", "Project A"), self._chatgpt("b", "Project B")]
        result = _filter_by_project(convs, "none")
        assert result == []
    def test_empty_string_project_treated_as_no_project(self):
        convs = [{"id": "a", "_project_name": ""}, {"id": "b", "_project_name": "Real"}]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"
    def test_claude_project_string_matched(self):
        # Claude can also have project as a plain string
        convs = [{"id": "a", "project": "python-course"}, {"id": "b", "project": None}]
        result = _filter_by_project(convs, "python")
        assert len(result) == 1
        assert result[0]["id"] == "a"
 # ---------------------------------------------------------------------------
 # export --since validation  (T-25)
 # ---------------------------------------------------------------------------
 class TestExportSinceValidation:
    """Test that --since with an invalid date exits cleanly with an error message."""
    def _pre_populated_cache(self, tmp_path) -> Cache:
        """Create a cache that passes the ToS gate and first-run doctor check."""
        cache = Cache(tmp_path)
        cache.acknowledge_tos()
        cache.mark_exported("chatgpt", "dummy-conv", {"updated_at": "2024-01-01T00:00:00Z"})
        return cache
    def test_invalid_since_date_exits_with_error(self, tmp_path):
        self._pre_populated_cache(tmp_path)
        runner = CliRunner(mix_stderr=True)
        result = runner.invoke(
            cli,
            ["--no-log-file", "export", "--since", "notadate"],
            env={
                "CHATGPT_SESSION_TOKEN": "eyJtesttoken",
                "CACHE_DIR": str(tmp_path),
                "EXPORT_DIR": str(tmp_path / "exports"),
            },
        )
        assert result.exit_code == 1
        assert "Invalid --since date" in result.output
        assert "YYYY-MM-DD" in result.output
    def test_valid_since_date_does_not_error(self, tmp_path):
        """A valid date should not produce the invalid-date error (may fail later on API)."""
        self._pre_populated_cache(tmp_path)
        runner = CliRunner(mix_stderr=True)
        result = runner.invoke(
            cli,
            ["--no-log-file", "export", "--since", "2024-01-01"],
            env={
                "CHATGPT_SESSION_TOKEN": "eyJtesttoken",
                "CACHE_DIR": str(tmp_path),
                "EXPORT_DIR": str(tmp_path / "exports"),
            },
        )
        assert "Invalid --since date" not in result.output
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -0,0 +1,56 @@
 """Tests for src/config.py — token validation logic (T-14)."""
 import logging
 import time
 import jwt
 import pytest
 from src.config import _validate_chatgpt_token
 class TestValidateChatGPTToken:
    def test_expired_token_logs_warning(self, caplog):
        # T-14: expired JWT must produce a clear warning
        payload = {"exp": int(time.time()) - 3600}  # expired 1 hour ago
        token = jwt.encode(payload, "secret", algorithm="HS256")
        with caplog.at_level(logging.WARNING, logger="src.config"):
            result = _validate_chatgpt_token(token)
        assert any("expired" in r.message.lower() for r in caplog.records)
        assert result is not None  # still returns the expiry datetime
    def test_expiring_within_24h_logs_warning(self, caplog):
        payload = {"exp": int(time.time()) + 3600}  # expires in 1 hour
        token = jwt.encode(payload, "secret", algorithm="HS256")
        with caplog.at_level(logging.WARNING, logger="src.config"):
            _validate_chatgpt_token(token)
        assert any("less than 24 hours" in r.message for r in caplog.records)
    def test_valid_token_no_expiry_warning(self, caplog):
        payload = {"exp": int(time.time()) + 86400 * 5}  # valid for 5 days
        token = jwt.encode(payload, "secret", algorithm="HS256")
        with caplog.at_level(logging.WARNING, logger="src.config"):
            result = _validate_chatgpt_token(token)
        assert not any("expired" in r.message.lower() for r in caplog.records)
        assert result is not None
    def test_token_without_exp_claim_logs_warning(self, caplog):
        payload = {"sub": "user123"}  # no exp
        token = jwt.encode(payload, "secret", algorithm="HS256")
        with caplog.at_level(logging.WARNING, logger="src.config"):
            result = _validate_chatgpt_token(token)
        assert any("'exp'" in r.message or "no 'exp'" in r.message for r in caplog.records)
        assert result is None
    def test_jwe_encrypted_token_returns_none(self, caplog):
        # JWE tokens (alg=dir) cannot be decoded client-side — this is normal for ChatGPT
        jwe_like = "eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0.fake.token.data.here"
        with caplog.at_level(logging.DEBUG, logger="src.config"):
            result = _validate_chatgpt_token(jwe_like)
        assert result is None  # cannot decode, but not an error
    def test_non_jwt_string_logs_warning(self, caplog):
        with caplog.at_level(logging.WARNING, logger="src.config"):
            result = _validate_chatgpt_token("notajwttoken")
        assert any("does not look like a JWT" in r.message for r in caplog.records)
        assert result is None
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -199,6 +199,34 @@ class TestJSONExporter:
        assert "  " in raw
 class TestBothFormats:
    """T-38: Markdown and JSON exporters produce matching filenames for the same conversation."""
    def test_both_formats_produce_files(self, tmp_path):
        md_exp = MarkdownExporter(tmp_path)
        json_exp = JSONExporter(tmp_path)
        md_path = md_exp.export(SAMPLE_CONV)
        json_path = json_exp.export(SAMPLE_CONV)
        assert md_path.exists()
        assert json_path.exists()
    def test_both_formats_have_matching_stems(self, tmp_path):
        md_exp = MarkdownExporter(tmp_path)
        json_exp = JSONExporter(tmp_path)
        md_path = md_exp.export(SAMPLE_CONV)
        json_path = json_exp.export(SAMPLE_CONV)
        assert md_path.suffix == ".md"
        assert json_path.suffix == ".json"
        assert md_path.stem == json_path.stem
    def test_both_formats_same_directory(self, tmp_path):
        md_exp = MarkdownExporter(tmp_path)
        json_exp = JSONExporter(tmp_path)
        md_path = md_exp.export(SAMPLE_CONV)
        json_path = json_exp.export(SAMPLE_CONV)
        assert md_path.parent == json_path.parent
 class TestYamlEscape:
    def test_escapes_double_quotes(self):
        assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"'
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -75,6 +75,39 @@ class TestChatGPTNormalization:
            for r in caplog.records
        )
    def test_model_editable_context_included_without_warning(self, caplog):
        """model_editable_context messages (project instructions) should be included, not warned about."""
        import logging
        conv = {
            "id": "test-conv-mec",
            "title": "Test",
            "create_time": 1700000000.0,
            "update_time": 1700000001.0,
            "mapping": {
                "root": {"id": "root", "message": None, "parent": None, "children": ["msg1"]},
                "msg1": {
                    "id": "msg1",
                    "message": {
                        "id": "msg1",
                        "author": {"role": "user"},
                        "content": {
                            "content_type": "model_editable_context",
                            "parts": ["These are the project instructions."],
                        },
                        "create_time": 1700000001.0,
                        "status": "finished_successfully",
                    },
                    "parent": "root",
                    "children": [],
                },
            },
        }
        p = self._get_provider()
        with caplog.at_level(logging.WARNING):
            result = p.normalize_conversation(conv)
        assert any(m["content"] == "These are the project instructions." for m in result["messages"])
        assert not any("model_editable_context" in r.message for r in caplog.records)
    def test_message_roles_are_valid(self):
        raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
        p = self._get_provider()
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -0,0 +1,147 @@
 """Tests for src/utils.py — filename generation, path building, redaction."""
 from pathlib import Path
 import pytest
 from src.utils import (
    build_export_path,
    format_token_status,
    generate_filename,
    redact_secrets,
 )
 class TestGenerateFilename:
    def test_basic_format(self):
        name = generate_filename("Hello World", "abc12345def", "2024-06-10T14:00:00Z")
        assert name == "2024-06-10_hello-world_abc12345.md"
    def test_special_chars_slugified(self):
        # T-36: titles with punctuation must produce safe, OS-compatible filenames
        name = generate_filename("What's this?! A test.", "abc12345", "2024-06-01T00:00:00Z")
        assert "?" not in name
        assert "!" not in name
        assert "'" not in name
        assert " " not in name
        assert name.startswith("2024-06-01_")
        assert name.endswith("_abc12345.md")
    def test_unicode_chars_handled(self):
        name = generate_filename("Héllo Wörld", "abc12345", "2024-06-01T00:00:00Z")
        assert " " not in name
        assert name.endswith("_abc12345.md")
    def test_empty_title_becomes_untitled(self):
        name = generate_filename("", "abc12345", "2024-06-01T00:00:00Z")
        assert "untitled" in name
    def test_id_truncated_to_8_chars(self):
        name = generate_filename("Test", "abcdefghijklmnop", "2024-06-01T00:00:00Z")
        assert name.endswith("_abcdefgh.md")
    def test_long_title_truncated(self):
        long_title = "a" * 200
        name = generate_filename(long_title, "abc12345", "2024-06-01T00:00:00Z")
        # Slug is capped at 60 chars by max_length
        slug_part = name.split("_")[1]
        assert len(slug_part) <= 60
    def test_date_comes_from_created_at(self):
        name = generate_filename("Test", "abc12345", "2023-11-25T00:00:00Z")
        assert name.startswith("2023-11-25_")
 class TestBuildExportPath:
    def test_default_structure_provider_project_year(self):
        path = build_export_path(
            Path("/exports"), "claude", "my-project", "2024-06-01T00:00:00Z", "file.md"
        )
        assert str(path) == "/exports/claude/my-project/2024/file.md"
    def test_no_project_uses_no_project_slug(self):
        path = build_export_path(
            Path("/exports"), "chatgpt", None, "2024-06-01T00:00:00Z", "file.md"
        )
        assert "no-project" in str(path)
    def test_provider_project_structure_omits_year(self):
        path = build_export_path(
            Path("/exports"), "claude", "proj", "2024-06-01T00:00:00Z", "file.md",
            structure="provider/project",
        )
        assert "2024" not in str(path)
        assert "proj" in str(path)
    def test_provider_year_structure_omits_project(self):
        path = build_export_path(
            Path("/exports"), "claude", "proj", "2024-06-01T00:00:00Z", "file.md",
            structure="provider/year",
        )
        assert "proj" not in str(path)
        assert "2024" in str(path)
    def test_project_name_with_spaces_is_slugified(self):
        path = build_export_path(
            Path("/exports"), "claude", "My Project Name!", "2024-06-01T00:00:00Z", "file.md"
        )
        assert " " not in str(path)
        assert "!" not in str(path)
 class TestRedactSecrets:
    def test_token_value_redacted(self):
        data = {"token": "supersecret"}
        result = redact_secrets(data)
        assert result["token"] == "[REDACTED]"
    def test_session_key_redacted(self):
        result = redact_secrets({"sessionKey": "abc123"})
        assert result["sessionKey"] == "[REDACTED]"
    def test_non_sensitive_key_unchanged(self):
        result = redact_secrets({"title": "My Chat", "id": "abc123"})
        assert result["title"] == "My Chat"
        assert result["id"] == "abc123"
    def test_nested_dict_redacted(self):
        data = {"user": {"token": "secret", "name": "Alice"}}
        result = redact_secrets(data)
        assert result["user"]["token"] == "[REDACTED]"
        assert result["user"]["name"] == "Alice"
    def test_list_of_dicts(self):
        data = [{"password": "p@ss"}, {"title": "chat"}]
        result = redact_secrets(data)
        assert result[0]["password"] == "[REDACTED]"
        assert result[1]["title"] == "chat"
 class TestFormatTokenStatus:
    def test_none_token_returns_not_set(self):
        assert format_token_status(None) == "[NOT SET]"
    def test_empty_token_returns_not_set(self):
        assert format_token_status("") == "[NOT SET]"
    def test_set_token_no_expiry(self):
        assert format_token_status("sometoken") == "[SET]"
    def test_expired_token(self):
        from datetime import datetime, timezone, timedelta
        expiry = datetime.now(tz=timezone.utc) - timedelta(days=1)
        result = format_token_status("tok", expiry)
        assert "EXPIRED" in result
    def test_expiring_today_shows_hours(self):
        from datetime import datetime, timezone, timedelta
        expiry = datetime.now(tz=timezone.utc) + timedelta(hours=3)
        result = format_token_status("tok", expiry)
        assert "expires in" in result
        assert "h" in result
    def test_expiring_in_days(self):
        from datetime import datetime, timezone, timedelta
        expiry = datetime.now(tz=timezone.utc) + timedelta(days=10, hours=12)
        result = format_token_status("tok", expiry)
        assert "10 days" in result