Files
ai-chatexport/tests/test_providers.py
JesseMarkowitz 304cf4fde4 feat: v0.2.0 — Joplin import, ChatGPT Projects, --project filter
Core features:
- Add `joplin` command: syncs exported Markdown to Joplin via local REST API
- Notebooks auto-created per provider+project (e.g. "ChatGPT - My Project")
- Idempotent: notes updated (not duplicated) on re-run; note ID tracked in manifest
- Add `--project` filter to `export` and `list` commands (substring or 'none')
- Add ChatGPT Projects support via CHATGPT_PROJECT_IDS env var

Config:
- Add JOPLIN_API_TOKEN, JOPLIN_API_URL, JOPLIN_REQUEST_TIMEOUT
- Version now read from importlib.metadata (single source of truth: pyproject.toml)
- Bump version to 0.2.0

Quality:
- Explicit Timeout handling in JoplinClient with actionable error messages
- token validation (validate_token) separate from connectivity (ping)
- Remove debug_auth.py, debug_claude.py, and untracked .har file
- Add *.har to .gitignore (may contain auth cookies/session tokens)
- Update README, CHANGELOG, FUTURE.md to reflect v0.2.0

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-01 06:04:03 -05:00

177 lines
6.8 KiB
Python

"""Unit tests for src/providers/ using fixture files."""
import json
from pathlib import Path
import pytest
FIXTURES = Path(__file__).parent / "fixtures"
class TestChatGPTNormalization:
"""Test ChatGPTProvider.normalize_conversation() using fixture data."""
def _get_provider(self):
from src.providers.chatgpt import ChatGPTProvider
# Bypass __init__ token check
p = ChatGPTProvider.__new__(ChatGPTProvider)
import requests
p._session = requests.Session()
p._org_id = None
p._project_ids = []
p._project_map = {}
p._project_name_cache = {}
return p
def test_normalizes_conversation(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["id"] == "chatgpt-conv-001"
assert result["title"] == "Python Async Tutorial"
assert result["provider"] == "chatgpt"
# No entry in _project_map → project is None
assert result["project"] is None
assert result["created_at"] != ""
assert result["updated_at"] != ""
assert isinstance(result["messages"], list)
def test_normalizes_without_project(self):
raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["project"] is None
assert result["id"] == "chatgpt-conv-002"
def test_normalizes_with_project_from_map(self):
"""Project name from _project_map (populated by fetch_all_conversations) flows through."""
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
p._project_map["chatgpt-conv-001"] = "My Research Project"
result = p.normalize_conversation(raw)
assert result["project"] == "My Research Project"
def test_extracts_text_messages(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert len(result["messages"]) >= 2
user_msgs = [m for m in result["messages"] if m["role"] == "user"]
assert any("async" in m["content"].lower() for m in user_msgs)
def test_skips_non_text_content_with_warning(self, caplog):
import logging
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
with caplog.at_level(logging.WARNING):
result = p.normalize_conversation(raw)
# The fixture has an image_asset_pointer node — should be warned about
assert any(
"image_asset_pointer" in r.message or "rich content" in r.message
for r in caplog.records
)
def test_message_roles_are_valid(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
for msg in result["messages"]:
assert msg["role"] in ("user", "assistant", "system")
def test_message_count_matches(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["message_count"] == len(result["messages"])
def test_code_fence_preserved(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
all_content = " ".join(m["content"] for m in result["messages"])
assert "```python" in all_content
class TestClaudeNormalization:
"""Test ClaudeProvider.normalize_conversation() using fixture data."""
def _get_provider(self):
from src.providers.claude import ClaudeProvider
import requests
p = ClaudeProvider.__new__(ClaudeProvider)
p._session = requests.Session()
p._org_id = None
return p
def test_normalizes_with_project(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["id"] == "claude-conv-001"
assert result["title"] == "StartOS Service Packaging"
assert result["provider"] == "claude"
assert result["project"] == "StarTOS Packaging"
assert result["created_at"] == "2024-06-10T14:32:00.000Z"
assert isinstance(result["messages"], list)
def test_normalizes_without_project(self):
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["project"] is None
assert result["id"] == "claude-conv-002"
def test_string_content_extracted(self):
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert any("Docker" in m["content"] for m in result["messages"])
def test_list_content_extracted(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"]
assert any("manifest" in m["content"].lower() for m in assistant_msgs)
def test_non_text_blocks_skipped_with_warning(self, caplog):
import logging
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
with caplog.at_level(logging.WARNING):
result = p.normalize_conversation(raw)
# The fixture has a tool_use block — should warn
assert any(
"tool_use" in r.message or "rich content" in r.message
for r in caplog.records
)
def test_message_count_matches(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["message_count"] == len(result["messages"])
def test_roles_normalized(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
for msg in result["messages"]:
assert msg["role"] in ("user", "assistant", "system")
def test_human_sender_maps_to_user(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
roles = {m["role"] for m in result["messages"]}
assert "user" in roles
assert "human" not in roles