diff --git a/tests/fixtures/chatgpt_conversation.json b/tests/fixtures/chatgpt_conversation.json new file mode 100644 index 0000000..e8de3d8 --- /dev/null +++ b/tests/fixtures/chatgpt_conversation.json @@ -0,0 +1,57 @@ +{ + "id": "chatgpt-conv-001", + "title": "Python Async Tutorial", + "create_time": 1704067200.0, + "update_time": 1704153600.0, + "project_title": "Learning Python", + "mapping": { + "node-root": { + "id": "node-root", + "parent": null, + "children": ["node-1"], + "message": null + }, + "node-1": { + "id": "node-1", + "parent": "node-root", + "children": ["node-2"], + "message": { + "id": "node-1", + "author": {"role": "user"}, + "create_time": 1704067200.0, + "content": { + "content_type": "text", + "parts": ["How does async/await work in Python?"] + } + } + }, + "node-2": { + "id": "node-2", + "parent": "node-1", + "children": ["node-3"], + "message": { + "id": "node-2", + "author": {"role": "assistant"}, + "create_time": 1704067210.0, + "content": { + "content_type": "text", + "parts": ["Async/await in Python allows you to write concurrent code using coroutines.\n\n```python\nimport asyncio\n\nasync def main():\n await asyncio.sleep(1)\n print('Done!')\n\nasyncio.run(main())\n```"] + } + } + }, + "node-3": { + "id": "node-3", + "parent": "node-2", + "children": [], + "message": { + "id": "node-3", + "author": {"role": "user"}, + "create_time": 1704067300.0, + "content": { + "content_type": "image_asset_pointer", + "parts": [{"content_type": "image_asset_pointer", "asset_pointer": "file://some-image"}] + } + } + } + } +} diff --git a/tests/fixtures/chatgpt_no_project.json b/tests/fixtures/chatgpt_no_project.json new file mode 100644 index 0000000..e12a648 --- /dev/null +++ b/tests/fixtures/chatgpt_no_project.json @@ -0,0 +1,43 @@ +{ + "id": "chatgpt-conv-002", + "title": "Quick Question", + "create_time": 1706745600.0, + "update_time": 1706745900.0, + "project_title": null, + "mapping": { + "node-root": { + "id": "node-root", + "parent": null, + "children": ["msg-1"], + "message": null + }, + "msg-1": { + "id": "msg-1", + "parent": "node-root", + "children": ["msg-2"], + "message": { + "id": "msg-1", + "author": {"role": "user"}, + "create_time": 1706745600.0, + "content": { + "content_type": "text", + "parts": ["What is the capital of France?"] + } + } + }, + "msg-2": { + "id": "msg-2", + "parent": "msg-1", + "children": [], + "message": { + "id": "msg-2", + "author": {"role": "assistant"}, + "create_time": 1706745610.0, + "content": { + "content_type": "text", + "parts": ["The capital of France is Paris."] + } + } + } + } +} diff --git a/tests/fixtures/claude_conversation.json b/tests/fixtures/claude_conversation.json new file mode 100644 index 0000000..291021e --- /dev/null +++ b/tests/fixtures/claude_conversation.json @@ -0,0 +1,35 @@ +{ + "uuid": "claude-conv-001", + "name": "StartOS Service Packaging", + "created_at": "2024-06-10T14:32:00.000Z", + "updated_at": "2024-06-10T15:00:00.000Z", + "project": { + "uuid": "proj-001", + "name": "StarTOS Packaging" + }, + "chat_messages": [ + { + "uuid": "msg-001", + "sender": "human", + "created_at": "2024-06-10T14:32:00.000Z", + "content": [ + {"type": "text", "text": "How do I create a manifest.ts for a StartOS service?"} + ] + }, + { + "uuid": "msg-002", + "sender": "assistant", + "created_at": "2024-06-10T14:32:10.000Z", + "content": [ + {"type": "text", "text": "To create a manifest.ts for a StartOS service, you need to define the package metadata:\n\n```typescript\nimport { sdk } from './sdk'\n\nexport const manifest = sdk.Manifest.of({\n id: 'my-service',\n title: 'My Service',\n version: '1.0.0',\n})\n```"}, + {"type": "tool_use", "id": "tool-001", "name": "search", "input": {"query": "startOS docs"}} + ] + }, + { + "uuid": "msg-003", + "sender": "human", + "created_at": "2024-06-10T14:45:00.000Z", + "content": "Thank you, that helped!" + } + ] +} diff --git a/tests/fixtures/claude_no_project.json b/tests/fixtures/claude_no_project.json new file mode 100644 index 0000000..5736b18 --- /dev/null +++ b/tests/fixtures/claude_no_project.json @@ -0,0 +1,23 @@ +{ + "uuid": "claude-conv-002", + "name": "Explain Docker", + "created_at": "2024-08-01T09:00:00.000Z", + "updated_at": "2024-08-01T09:05:00.000Z", + "project": null, + "chat_messages": [ + { + "uuid": "msg-004", + "sender": "human", + "created_at": "2024-08-01T09:00:00.000Z", + "content": "Can you explain Docker containers briefly?" + }, + { + "uuid": "msg-005", + "sender": "assistant", + "created_at": "2024-08-01T09:00:10.000Z", + "content": [ + {"type": "text", "text": "Docker containers are lightweight, isolated environments that package an application and its dependencies together, ensuring consistent behaviour across different systems."} + ] + } + ] +} diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..6ce3fc3 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,153 @@ +"""Unit tests for src/cache.py.""" + +import json +import os +import tempfile +from pathlib import Path + +import pytest + +from src.cache import Cache, CacheError, MANIFEST_VERSION + + +@pytest.fixture +def tmp_cache(tmp_path): + return Cache(tmp_path) + + +class TestIsCached: + def test_miss_when_no_entry(self, tmp_cache): + assert tmp_cache.is_cached("claude", "conv-abc", "2024-01-01T00:00:00Z") is False + + def test_hit_after_mark_exported(self, tmp_cache): + tmp_cache.mark_exported("claude", "conv-abc", {"updated_at": "2024-01-01T00:00:00Z"}) + assert tmp_cache.is_cached("claude", "conv-abc", "2024-01-01T00:00:00Z") is True + + def test_stale_when_provider_has_newer_date(self, tmp_cache): + tmp_cache.mark_exported("claude", "conv-abc", {"updated_at": "2024-01-01T00:00:00Z"}) + assert tmp_cache.is_cached("claude", "conv-abc", "2024-06-01T00:00:00Z") is False + + def test_hit_when_provider_has_same_date(self, tmp_cache): + tmp_cache.mark_exported("chatgpt", "conv-xyz", {"updated_at": "2024-06-01T00:00:00Z"}) + assert tmp_cache.is_cached("chatgpt", "conv-xyz", "2024-06-01T00:00:00Z") is True + + def test_miss_for_different_provider(self, tmp_cache): + tmp_cache.mark_exported("claude", "conv-abc", {"updated_at": "2024-01-01T00:00:00Z"}) + assert tmp_cache.is_cached("chatgpt", "conv-abc", "2024-01-01T00:00:00Z") is False + + +class TestAtomicWrite: + def test_manifest_has_600_permissions(self, tmp_path): + c = Cache(tmp_path) + c.mark_exported("claude", "x", {"updated_at": "2024-01-01"}) + manifest = tmp_path / "manifest.json" + mode = oct(os.stat(manifest).st_mode)[-3:] + assert mode == "600" + + def test_no_tmp_file_left_after_write(self, tmp_path): + c = Cache(tmp_path) + c.mark_exported("claude", "x", {"updated_at": "2024-01-01"}) + tmp_files = list(tmp_path.glob("*.tmp")) + assert tmp_files == [] + + def test_manifest_is_valid_json(self, tmp_path): + c = Cache(tmp_path) + c.mark_exported("claude", "x", {}) + manifest = tmp_path / "manifest.json" + data = json.loads(manifest.read_text()) + assert isinstance(data, dict) + assert "claude" in data + + +class TestStats: + def test_empty_stats(self, tmp_cache): + stats = tmp_cache.stats() + assert stats["chatgpt"] == 0 + assert stats["claude"] == 0 + + def test_stats_after_exports(self, tmp_cache): + tmp_cache.mark_exported("claude", "c1", {}) + tmp_cache.mark_exported("claude", "c2", {}) + tmp_cache.mark_exported("chatgpt", "g1", {}) + stats = tmp_cache.stats() + assert stats["claude"] == 2 + assert stats["chatgpt"] == 1 + + +class TestClear: + def test_clear_single_provider(self, tmp_cache): + tmp_cache.mark_exported("claude", "c1", {}) + tmp_cache.mark_exported("chatgpt", "g1", {}) + tmp_cache.clear("claude") + assert tmp_cache.stats()["claude"] == 0 + assert tmp_cache.stats()["chatgpt"] == 1 + + def test_clear_all(self, tmp_cache): + tmp_cache.mark_exported("claude", "c1", {}) + tmp_cache.mark_exported("chatgpt", "g1", {}) + tmp_cache.clear() + assert tmp_cache.stats()["claude"] == 0 + assert tmp_cache.stats()["chatgpt"] == 0 + + +class TestCorruptManifestRecovery: + def test_recovers_from_invalid_json(self, tmp_path): + manifest = tmp_path / "manifest.json" + manifest.write_text("{invalid json!!!", encoding="utf-8") + # Should not raise, should start fresh + c = Cache(tmp_path) + assert c.stats()["claude"] == 0 + # Backup should exist + backup = tmp_path / "manifest.json.bak" + assert backup.exists() + assert backup.read_text() == "{invalid json!!!" + + def test_raises_on_future_version(self, tmp_path): + manifest = tmp_path / "manifest.json" + manifest.write_text( + json.dumps({"version": MANIFEST_VERSION + 99, "chatgpt": {}, "claude": {}}), + encoding="utf-8", + ) + with pytest.raises(CacheError, match="Unsupported manifest version"): + Cache(tmp_path) + + +class TestTosAcknowledgement: + def test_not_acknowledged_by_default(self, tmp_cache): + assert tmp_cache.is_tos_acknowledged() is False + + def test_acknowledged_after_call(self, tmp_cache): + tmp_cache.acknowledge_tos() + assert tmp_cache.is_tos_acknowledged() is True + + def test_acknowledgement_persists_across_instances(self, tmp_path): + c1 = Cache(tmp_path) + c1.acknowledge_tos() + c2 = Cache(tmp_path) + assert c2.is_tos_acknowledged() is True + + +class TestGetNewOrUpdated: + def test_returns_all_when_cache_empty(self, tmp_cache): + convs = [ + {"id": "a", "updated_at": "2024-01-01T00:00:00Z"}, + {"id": "b", "updated_at": "2024-01-02T00:00:00Z"}, + ] + result = tmp_cache.get_new_or_updated("claude", convs) + assert len(result) == 2 + + def test_skips_cached_unchanged(self, tmp_cache): + tmp_cache.mark_exported("claude", "a", {"updated_at": "2024-01-01T00:00:00Z"}) + convs = [ + {"id": "a", "updated_at": "2024-01-01T00:00:00Z"}, + {"id": "b", "updated_at": "2024-01-02T00:00:00Z"}, + ] + result = tmp_cache.get_new_or_updated("claude", convs) + assert len(result) == 1 + assert result[0]["id"] == "b" + + def test_includes_stale_conversations(self, tmp_cache): + tmp_cache.mark_exported("claude", "a", {"updated_at": "2024-01-01T00:00:00Z"}) + convs = [{"id": "a", "updated_at": "2024-06-01T00:00:00Z"}] + result = tmp_cache.get_new_or_updated("claude", convs) + assert len(result) == 1 diff --git a/tests/test_exporters.py b/tests/test_exporters.py new file mode 100644 index 0000000..5c32cee --- /dev/null +++ b/tests/test_exporters.py @@ -0,0 +1,224 @@ +"""Unit tests for src/exporters/.""" + +import json +import os +import tempfile +from pathlib import Path + +import pytest + +from src.exporters.markdown import MarkdownExporter, _yaml_escape, _format_timestamp +from src.exporters.json_export import JSONExporter + + +SAMPLE_CONV = { + "id": "abc12345def67890", + "title": "Test Conversation", + "provider": "claude", + "project": "my-project", + "created_at": "2024-06-10T14:32:00Z", + "updated_at": "2024-06-10T15:00:00Z", + "message_count": 2, + "messages": [ + { + "role": "user", + "content": "Hello, how are you?", + "content_type": "text", + "timestamp": "2024-06-10T14:32:00Z", + }, + { + "role": "assistant", + "content": "I'm doing well, thank you! How can I help?", + "content_type": "text", + "timestamp": "2024-06-10T14:32:10Z", + }, + ], +} + +NO_PROJECT_CONV = { + **SAMPLE_CONV, + "id": "noproj12345", + "project": None, + "title": "No Project Chat", +} + +CODE_CONV = { + **SAMPLE_CONV, + "id": "code12345", + "messages": [ + { + "role": "user", + "content": "Here is some code:\n```python\nprint('hello')\n```", + "content_type": "text", + "timestamp": None, + } + ], +} + + +class TestMarkdownFrontmatter: + def test_yaml_frontmatter_present(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + content = path.read_text() + assert content.startswith("---\n") + assert "title: " in content + assert "provider: claude" in content + assert "conversation_id: abc12345def67890" in content + assert "created_at: 2024-06-10T14:32:00Z" in content + assert "exported_at: " in content + assert "message_count: 2" in content + assert "tags: [claude, my-project]" in content + + def test_no_project_uses_no_project_label(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(NO_PROJECT_CONV) + content = path.read_text() + assert "project: no-project" in content + assert "tags: [claude]" in content + + def test_metadata_table_present(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + content = path.read_text() + assert "| Provider | Claude |" in content + assert "| Project | my-project |" in content + assert "| Date | 2024-06-10 |" in content + assert "| Messages | 2 |" in content + + def test_messages_rendered(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + content = path.read_text() + assert "Hello, how are you?" in content + assert "I'm doing well" in content + assert "🧑 Human" in content + assert "🤖 Assistant" in content + + def test_code_fences_preserved(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(CODE_CONV) + content = path.read_text() + assert "```python" in content + assert "print('hello')" in content + + +class TestMarkdownFilenameGeneration: + def test_filename_format(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + assert path.name == "2024-06-10_test-conversation_abc12345.md" + + def test_no_project_goes_to_no_project_dir(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(NO_PROJECT_CONV) + assert "no-project" in str(path) + + def test_project_slug_in_path(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + assert "my-project" in str(path) + + def test_year_in_path(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + assert "/2024/" in str(path) + + def test_output_structure_provider_project(self, tmp_path): + exp = MarkdownExporter(tmp_path, output_structure="provider/project") + path = exp.export(SAMPLE_CONV) + # Should NOT have year subdirectory + parts = path.parts + assert "2024" not in parts + + +class TestMarkdownEmptyMessages: + def test_empty_message_skipped(self, tmp_path, caplog): + import logging + conv = { + **SAMPLE_CONV, + "messages": [ + {"role": "user", "content": " ", "content_type": "text", "timestamp": None}, + {"role": "assistant", "content": "Real response", "content_type": "text", "timestamp": None}, + ], + } + exp = MarkdownExporter(tmp_path) + with caplog.at_level(logging.WARNING, logger="src.exporters.markdown"): + path = exp.export(conv) + content = path.read_text() + assert "Real response" in content + assert any("empty" in r.message.lower() for r in caplog.records) + + +class TestMarkdownAtomicWrite: + def test_permissions_600(self, tmp_path): + exp = MarkdownExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + mode = oct(os.stat(path).st_mode)[-3:] + assert mode == "600" + + def test_no_tmp_files_left(self, tmp_path): + exp = MarkdownExporter(tmp_path) + exp.export(SAMPLE_CONV) + tmp_files = list(tmp_path.rglob("*.tmp")) + assert tmp_files == [] + + +class TestJSONExporter: + def test_produces_valid_json(self, tmp_path): + exp = JSONExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + data = json.loads(path.read_text()) + assert data["id"] == "abc12345def67890" + assert data["title"] == "Test Conversation" + assert len(data["messages"]) == 2 + + def test_includes_exported_at(self, tmp_path): + exp = JSONExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + data = json.loads(path.read_text()) + assert "exported_at" in data + + def test_permissions_600(self, tmp_path): + exp = JSONExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + mode = oct(os.stat(path).st_mode)[-3:] + assert mode == "600" + + def test_json_extension(self, tmp_path): + exp = JSONExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + assert path.suffix == ".json" + + def test_pretty_printed(self, tmp_path): + exp = JSONExporter(tmp_path) + path = exp.export(SAMPLE_CONV) + raw = path.read_text() + # Pretty-printed JSON has newlines and indentation + assert "\n" in raw + assert " " in raw + + +class TestYamlEscape: + def test_escapes_double_quotes(self): + assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"' + + def test_escapes_backslash(self): + assert _yaml_escape("path\\to\\file") == "path\\\\to\\\\file" + + def test_no_change_for_plain_string(self): + assert _yaml_escape("Hello World") == "Hello World" + + +class TestFormatTimestamp: + def test_strips_fractional_seconds(self): + result = _format_timestamp("2024-06-10T14:32:00.123456Z") + assert "." not in result + + def test_replaces_T_with_space(self): + result = _format_timestamp("2024-06-10T14:32:00Z") + assert "T" not in result + assert "2024-06-10 14:32:00" == result + + def test_empty_string(self): + assert _format_timestamp("") == "" diff --git a/tests/test_providers.py b/tests/test_providers.py new file mode 100644 index 0000000..c2b3c8e --- /dev/null +++ b/tests/test_providers.py @@ -0,0 +1,164 @@ +"""Unit tests for src/providers/ using fixture files.""" + +import json +from pathlib import Path + +import pytest + +FIXTURES = Path(__file__).parent / "fixtures" + + +class TestChatGPTNormalization: + """Test ChatGPTProvider.normalize_conversation() using fixture data.""" + + def _get_provider(self): + from src.providers.chatgpt import ChatGPTProvider + import unittest.mock as mock + # Bypass __init__ token check + p = ChatGPTProvider.__new__(ChatGPTProvider) + import requests + p._session = requests.Session() + p._org_id = None + return p + + def test_normalizes_with_project(self): + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert result["id"] == "chatgpt-conv-001" + assert result["title"] == "Python Async Tutorial" + assert result["provider"] == "chatgpt" + assert result["project"] == "Learning Python" + assert result["created_at"] != "" + assert result["updated_at"] != "" + assert isinstance(result["messages"], list) + + def test_normalizes_without_project(self): + raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert result["project"] is None + assert result["id"] == "chatgpt-conv-002" + + def test_extracts_text_messages(self): + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert len(result["messages"]) >= 2 + user_msgs = [m for m in result["messages"] if m["role"] == "user"] + assert any("async" in m["content"].lower() for m in user_msgs) + + def test_skips_non_text_content_with_warning(self, caplog): + import logging + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + with caplog.at_level(logging.WARNING): + result = p.normalize_conversation(raw) + # The fixture has an image_asset_pointer node — should be warned about + assert any( + "image_asset_pointer" in r.message or "rich content" in r.message + for r in caplog.records + ) + + def test_message_roles_are_valid(self): + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + for msg in result["messages"]: + assert msg["role"] in ("user", "assistant", "system") + + def test_message_count_matches(self): + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + assert result["message_count"] == len(result["messages"]) + + def test_code_fence_preserved(self): + raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + all_content = " ".join(m["content"] for m in result["messages"]) + assert "```python" in all_content + + +class TestClaudeNormalization: + """Test ClaudeProvider.normalize_conversation() using fixture data.""" + + def _get_provider(self): + from src.providers.claude import ClaudeProvider + import requests + p = ClaudeProvider.__new__(ClaudeProvider) + p._session = requests.Session() + p._org_id = None + return p + + def test_normalizes_with_project(self): + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert result["id"] == "claude-conv-001" + assert result["title"] == "StartOS Service Packaging" + assert result["provider"] == "claude" + assert result["project"] == "StarTOS Packaging" + assert result["created_at"] == "2024-06-10T14:32:00.000Z" + assert isinstance(result["messages"], list) + + def test_normalizes_without_project(self): + raw = json.loads((FIXTURES / "claude_no_project.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert result["project"] is None + assert result["id"] == "claude-conv-002" + + def test_string_content_extracted(self): + raw = json.loads((FIXTURES / "claude_no_project.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assert any("Docker" in m["content"] for m in result["messages"]) + + def test_list_content_extracted(self): + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + + assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"] + assert any("manifest" in m["content"].lower() for m in assistant_msgs) + + def test_non_text_blocks_skipped_with_warning(self, caplog): + import logging + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + with caplog.at_level(logging.WARNING): + result = p.normalize_conversation(raw) + # The fixture has a tool_use block — should warn + assert any( + "tool_use" in r.message or "rich content" in r.message + for r in caplog.records + ) + + def test_message_count_matches(self): + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + assert result["message_count"] == len(result["messages"]) + + def test_roles_normalized(self): + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + for msg in result["messages"]: + assert msg["role"] in ("user", "assistant", "system") + + def test_human_sender_maps_to_user(self): + raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) + p = self._get_provider() + result = p.normalize_conversation(raw) + roles = {m["role"] for m in result["messages"]} + assert "user" in roles + assert "human" not in roles