From 62445c7c0c1090ecae42e065c9c995ddad164e27 Mon Sep 17 00:00:00 2001 From: JesseMarkowitz Date: Fri, 27 Feb 2026 22:45:46 -0500 Subject: [PATCH] chore: initialize project scaffold Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 35 +++++++++++++++++++ .gitignore | 38 +++++++++++++++++++++ CHANGELOG.md | 11 ++++++ FUTURE.md | 72 +++++++++++++++++++++++++++++++++++++++ pyproject.toml | 37 ++++++++++++++++++++ requirements.txt | 25 ++++++++++++++ src/__init__.py | 0 src/exporters/__init__.py | 0 src/providers/__init__.py | 0 tests/__init__.py | 0 10 files changed, 218 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 FUTURE.md create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/exporters/__init__.py create mode 100644 src/providers/__init__.py create mode 100644 tests/__init__.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..97b0a63 --- /dev/null +++ b/.env.example @@ -0,0 +1,35 @@ +# ============================================================ +# AI Chat Exporter — Configuration +# ============================================================ +# Copy this file to .env and fill in your values. +# NEVER commit .env to git. It contains secrets. + +# --- ChatGPT --- +# How to get: open chatgpt.com in Chrome → F12 → Application tab +# → Cookies → https://chatgpt.com → find "__Secure-next-auth.session-token" → copy Value +# Token type: JWT (starts with "eyJ"). Typically valid for ~7 days. +CHATGPT_SESSION_TOKEN= + +# --- Claude --- +# How to get: open claude.ai in Chrome → F12 → Application tab +# → Cookies → https://claude.ai → find "sessionKey" → copy Value +# Token type: opaque string. Typically valid for ~30 days. +CLAUDE_SESSION_KEY= + +# --- Output --- +# Where exported Markdown files are written (default: ./exports) +EXPORT_DIR=./exports + +# Output folder structure. Options: +# provider/project/year (default) → exports/claude/my-project/2024/file.md +# provider/project → exports/claude/my-project/file.md +# provider/year → exports/claude/2024/file.md (ignores projects) +OUTPUT_STRUCTURE=provider/project/year + +# --- Cache --- +# Where the sync manifest and logs are stored (default: ~/.ai-chat-exporter) +CACHE_DIR=~/.ai-chat-exporter + +# --- Logging --- +# Log file path. Set to "none" to disable file logging. +LOG_FILE=~/.ai-chat-exporter/logs/exporter.log diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..064d8a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +.venv/ +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ +.eggs/ + +# Secrets — never commit these +.env +*.env +.env.* +!.env.example + +# Exported conversation data — may contain private content +exports/ +*.md +*.json +!tests/fixtures/*.json +!README.md +!FUTURE.md +!CHANGELOG.md + +# Cache and logs +.ai-chat-exporter/ +logs/ +*.log + +# Editor / OS +.DS_Store +.idea/ +.vscode/ +*.swp +*.swo +Thumbs.db diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3ae6ba7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,11 @@ +# Changelog + +All notable changes to this project will be documented here. +Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [0.1.0] - Unreleased +### Added +- Initial implementation: ChatGPT and Claude export via internal web APIs +- Markdown and JSON exporters +- Local cache/manifest for incremental sync +- CLI with export, list, cache, doctor, and auth commands diff --git a/FUTURE.md b/FUTURE.md new file mode 100644 index 0000000..dbd0c29 --- /dev/null +++ b/FUTURE.md @@ -0,0 +1,72 @@ +# Planned Future Work + +These items are explicitly out of scope for v0.1.0 but have been designed for. +The codebase is structured to make each of these additions straightforward. + +## Export --force Flag (v0.1.x) +Add `--force` to the `export` command to re-export already-cached conversations +without permanently clearing the entire manifest. Useful for re-generating files +after changing the Markdown template or output structure. + +Implementation: pass a `force=True` flag to `cache.get_new_or_updated()`, which +returns all conversations regardless of cache state when force is True. + +Current workaround: `python -m src.main cache --clear` then re-run export. + +## Joplin Integration (v0.2.0) +Automate importing exported Markdown files into Joplin as new notes. +Joplin exposes a local REST API (requires Joplin desktop running with Web Clipper enabled). + +Approach: after export, iterate exported files and POST each to +`http://localhost:41184/notes` with the appropriate notebook ID. + +The output folder structure maps directly to Joplin notebooks: +- exports/chatgpt/my-project/ → Joplin notebook "ChatGPT - My Project" +- exports/claude/my-project/ → Joplin notebook "Claude - My Project" +- exports/chatgpt/no-project/ → Joplin notebook "ChatGPT - No Project" +- exports/claude/no-project/ → Joplin notebook "Claude - No Project" + +Prerequisites: +- Joplin desktop must be running with Web Clipper enabled +- `JOPLIN_API_TOKEN` env var (get from Joplin → Tools → Web Clipper Options) +- The Joplin import script will need to create notebooks if they don't exist, + then POST each note into the correct notebook + +Note: The default OUTPUT_STRUCTURE of provider/project/year is assumed when +implementing the import script. If the user has changed OUTPUT_STRUCTURE, +the import script will need updating accordingly. + +## Official API Migration (v0.3.0) +If the unofficial internal web API approach breaks, migrate to official export +file parsing as a fallback: +- ChatGPT: parse `conversations.json` from Settings → Export Data +- Claude: parse `conversations.json` from Settings → Privacy → Export Data + +The `BaseProvider` abstract class is intentionally designed so that a +`FileProvider` subclass can implement the same interface +(list_conversations, get_conversation, normalize_conversation) +without any changes to cache, exporters, or CLI code. + +To add this: implement `src/providers/file_chatgpt.py` and +`src/providers/file_claude.py`, then add `--input-file` flag to the +export command to accept a pre-downloaded export ZIP or JSON. + +## Rich Content Support (v0.4.0) +Currently only text content is exported. Future versions should handle: + +### Claude +- Artifacts (code, documents, HTML) — export as separate files, link from Markdown +- Uploaded images — download and embed or link +- Extended thinking/reasoning blocks — include as collapsible sections +- Tool call results and web search citations — include as footnotes or appendices + +### ChatGPT +- DALL-E generated images — download and embed or link +- Code Interpreter outputs — export code and results +- File attachments — download and reference +- Voice transcripts — include as text + +Implementation note: the normalized message schema already includes a +`content_type` field placeholder. When this work begins, extend the schema +rather than replacing it. In v0.1.0, log a WARNING whenever non-text content +is encountered so users know what was skipped. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a432976 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "ai-chat-exporter" +version = "0.1.0" +description = "Export ChatGPT and Claude conversation history to Markdown for personal archival in Joplin" +requires-python = ">=3.11" +dependencies = [ + "requests==2.31.0", + "click==8.1.7", + "python-dotenv==1.0.1", + "rich==13.7.1", + "python-slugify==8.0.4", + "PyJWT==2.8.0", +] + +[project.optional-dependencies] +dev = [ + "pytest==8.1.1", + "pytest-mock==3.14.0", + "responses==0.25.3", +] + +[project.scripts] +ai-chat-exporter = "src.main:cli" + +[tool.setuptools.packages.find] +where = ["."] +include = ["src*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..722a304 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +# Editable Git install with no remote (ai-chat-exporter==0.1.0) +-e /home/jesse/services/ai-chatexport +certifi==2026.2.25 +charset-normalizer==3.4.4 +click==8.1.7 +idna==3.11 +iniconfig==2.3.0 +markdown-it-py==4.0.0 +mdurl==0.1.2 +packaging==26.0 +pluggy==1.6.0 +Pygments==2.19.2 +PyJWT==2.8.0 +pytest==8.1.1 +pytest-mock==3.14.0 +python-dotenv==1.0.1 +python-slugify==8.0.4 +PyYAML==6.0.3 +requests==2.31.0 +responses==0.25.3 +rich==13.7.1 +setuptools==82.0.0 +text-unidecode==1.3 +urllib3==2.6.3 +wheel==0.46.3 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/exporters/__init__.py b/src/exporters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/providers/__init__.py b/src/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29