Core features: - Add `joplin` command: syncs exported Markdown to Joplin via local REST API - Notebooks auto-created per provider+project (e.g. "ChatGPT - My Project") - Idempotent: notes updated (not duplicated) on re-run; note ID tracked in manifest - Add `--project` filter to `export` and `list` commands (substring or 'none') - Add ChatGPT Projects support via CHATGPT_PROJECT_IDS env var Config: - Add JOPLIN_API_TOKEN, JOPLIN_API_URL, JOPLIN_REQUEST_TIMEOUT - Version now read from importlib.metadata (single source of truth: pyproject.toml) - Bump version to 0.2.0 Quality: - Explicit Timeout handling in JoplinClient with actionable error messages - token validation (validate_token) separate from connectivity (ping) - Remove debug_auth.py, debug_claude.py, and untracked .har file - Add *.har to .gitignore (may contain auth cookies/session tokens) - Update README, CHANGELOG, FUTURE.md to reflect v0.2.0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
177 lines
6.8 KiB
Python
177 lines
6.8 KiB
Python
"""Unit tests for src/providers/ using fixture files."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
FIXTURES = Path(__file__).parent / "fixtures"
|
|
|
|
|
|
class TestChatGPTNormalization:
|
|
"""Test ChatGPTProvider.normalize_conversation() using fixture data."""
|
|
|
|
def _get_provider(self):
|
|
from src.providers.chatgpt import ChatGPTProvider
|
|
# Bypass __init__ token check
|
|
p = ChatGPTProvider.__new__(ChatGPTProvider)
|
|
import requests
|
|
p._session = requests.Session()
|
|
p._org_id = None
|
|
p._project_ids = []
|
|
p._project_map = {}
|
|
p._project_name_cache = {}
|
|
return p
|
|
|
|
def test_normalizes_conversation(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["id"] == "chatgpt-conv-001"
|
|
assert result["title"] == "Python Async Tutorial"
|
|
assert result["provider"] == "chatgpt"
|
|
# No entry in _project_map → project is None
|
|
assert result["project"] is None
|
|
assert result["created_at"] != ""
|
|
assert result["updated_at"] != ""
|
|
assert isinstance(result["messages"], list)
|
|
|
|
def test_normalizes_without_project(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] is None
|
|
assert result["id"] == "chatgpt-conv-002"
|
|
|
|
def test_normalizes_with_project_from_map(self):
|
|
"""Project name from _project_map (populated by fetch_all_conversations) flows through."""
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
p._project_map["chatgpt-conv-001"] = "My Research Project"
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] == "My Research Project"
|
|
|
|
def test_extracts_text_messages(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert len(result["messages"]) >= 2
|
|
user_msgs = [m for m in result["messages"] if m["role"] == "user"]
|
|
assert any("async" in m["content"].lower() for m in user_msgs)
|
|
|
|
def test_skips_non_text_content_with_warning(self, caplog):
|
|
import logging
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
with caplog.at_level(logging.WARNING):
|
|
result = p.normalize_conversation(raw)
|
|
# The fixture has an image_asset_pointer node — should be warned about
|
|
assert any(
|
|
"image_asset_pointer" in r.message or "rich content" in r.message
|
|
for r in caplog.records
|
|
)
|
|
|
|
def test_message_roles_are_valid(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
for msg in result["messages"]:
|
|
assert msg["role"] in ("user", "assistant", "system")
|
|
|
|
def test_message_count_matches(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
assert result["message_count"] == len(result["messages"])
|
|
|
|
def test_code_fence_preserved(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
all_content = " ".join(m["content"] for m in result["messages"])
|
|
assert "```python" in all_content
|
|
|
|
|
|
class TestClaudeNormalization:
|
|
"""Test ClaudeProvider.normalize_conversation() using fixture data."""
|
|
|
|
def _get_provider(self):
|
|
from src.providers.claude import ClaudeProvider
|
|
import requests
|
|
p = ClaudeProvider.__new__(ClaudeProvider)
|
|
p._session = requests.Session()
|
|
p._org_id = None
|
|
return p
|
|
|
|
def test_normalizes_with_project(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["id"] == "claude-conv-001"
|
|
assert result["title"] == "StartOS Service Packaging"
|
|
assert result["provider"] == "claude"
|
|
assert result["project"] == "StarTOS Packaging"
|
|
assert result["created_at"] == "2024-06-10T14:32:00.000Z"
|
|
assert isinstance(result["messages"], list)
|
|
|
|
def test_normalizes_without_project(self):
|
|
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] is None
|
|
assert result["id"] == "claude-conv-002"
|
|
|
|
def test_string_content_extracted(self):
|
|
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert any("Docker" in m["content"] for m in result["messages"])
|
|
|
|
def test_list_content_extracted(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"]
|
|
assert any("manifest" in m["content"].lower() for m in assistant_msgs)
|
|
|
|
def test_non_text_blocks_skipped_with_warning(self, caplog):
|
|
import logging
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
with caplog.at_level(logging.WARNING):
|
|
result = p.normalize_conversation(raw)
|
|
# The fixture has a tool_use block — should warn
|
|
assert any(
|
|
"tool_use" in r.message or "rich content" in r.message
|
|
for r in caplog.records
|
|
)
|
|
|
|
def test_message_count_matches(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
assert result["message_count"] == len(result["messages"])
|
|
|
|
def test_roles_normalized(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
for msg in result["messages"]:
|
|
assert msg["role"] in ("user", "assistant", "system")
|
|
|
|
def test_human_sender_maps_to_user(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
roles = {m["role"] for m in result["messages"]}
|
|
assert "user" in roles
|
|
assert "human" not in roles
|