Files
ai-chatexport/tests/test_providers.py
2026-03-30 11:08:05 -04:00

210 lines
8.3 KiB
Python

"""Unit tests for src/providers/ using fixture files."""
import json
from pathlib import Path
import pytest
FIXTURES = Path(__file__).parent / "fixtures"
class TestChatGPTNormalization:
"""Test ChatGPTProvider.normalize_conversation() using fixture data."""
def _get_provider(self):
from src.providers.chatgpt import ChatGPTProvider
# Bypass __init__ token check
p = ChatGPTProvider.__new__(ChatGPTProvider)
import requests
p._session = requests.Session()
p._org_id = None
p._project_ids = []
p._project_map = {}
p._project_name_cache = {}
return p
def test_normalizes_conversation(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["id"] == "chatgpt-conv-001"
assert result["title"] == "Python Async Tutorial"
assert result["provider"] == "chatgpt"
# No entry in _project_map → project is None
assert result["project"] is None
assert result["created_at"] != ""
assert result["updated_at"] != ""
assert isinstance(result["messages"], list)
def test_normalizes_without_project(self):
raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["project"] is None
assert result["id"] == "chatgpt-conv-002"
def test_normalizes_with_project_from_map(self):
"""Project name from _project_map (populated by fetch_all_conversations) flows through."""
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
p._project_map["chatgpt-conv-001"] = "My Research Project"
result = p.normalize_conversation(raw)
assert result["project"] == "My Research Project"
def test_extracts_text_messages(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert len(result["messages"]) >= 2
user_msgs = [m for m in result["messages"] if m["role"] == "user"]
assert any("async" in m["content"].lower() for m in user_msgs)
def test_skips_non_text_content_with_warning(self, caplog):
import logging
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
with caplog.at_level(logging.WARNING):
result = p.normalize_conversation(raw)
# The fixture has an image_asset_pointer node — should be warned about
assert any(
"image_asset_pointer" in r.message or "rich content" in r.message
for r in caplog.records
)
def test_model_editable_context_included_without_warning(self, caplog):
"""model_editable_context messages (project instructions) should be included, not warned about."""
import logging
conv = {
"id": "test-conv-mec",
"title": "Test",
"create_time": 1700000000.0,
"update_time": 1700000001.0,
"mapping": {
"root": {"id": "root", "message": None, "parent": None, "children": ["msg1"]},
"msg1": {
"id": "msg1",
"message": {
"id": "msg1",
"author": {"role": "user"},
"content": {
"content_type": "model_editable_context",
"parts": ["These are the project instructions."],
},
"create_time": 1700000001.0,
"status": "finished_successfully",
},
"parent": "root",
"children": [],
},
},
}
p = self._get_provider()
with caplog.at_level(logging.WARNING):
result = p.normalize_conversation(conv)
assert any(m["content"] == "These are the project instructions." for m in result["messages"])
assert not any("model_editable_context" in r.message for r in caplog.records)
def test_message_roles_are_valid(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
for msg in result["messages"]:
assert msg["role"] in ("user", "assistant", "system")
def test_message_count_matches(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["message_count"] == len(result["messages"])
def test_code_fence_preserved(self):
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
all_content = " ".join(m["content"] for m in result["messages"])
assert "```python" in all_content
class TestClaudeNormalization:
"""Test ClaudeProvider.normalize_conversation() using fixture data."""
def _get_provider(self):
from src.providers.claude import ClaudeProvider
import requests
p = ClaudeProvider.__new__(ClaudeProvider)
p._session = requests.Session()
p._org_id = None
return p
def test_normalizes_with_project(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["id"] == "claude-conv-001"
assert result["title"] == "StartOS Service Packaging"
assert result["provider"] == "claude"
assert result["project"] == "StarTOS Packaging"
assert result["created_at"] == "2024-06-10T14:32:00.000Z"
assert isinstance(result["messages"], list)
def test_normalizes_without_project(self):
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["project"] is None
assert result["id"] == "claude-conv-002"
def test_string_content_extracted(self):
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert any("Docker" in m["content"] for m in result["messages"])
def test_list_content_extracted(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"]
assert any("manifest" in m["content"].lower() for m in assistant_msgs)
def test_non_text_blocks_skipped_with_warning(self, caplog):
import logging
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
with caplog.at_level(logging.WARNING):
result = p.normalize_conversation(raw)
# The fixture has a tool_use block — should warn
assert any(
"tool_use" in r.message or "rich content" in r.message
for r in caplog.records
)
def test_message_count_matches(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
assert result["message_count"] == len(result["messages"])
def test_roles_normalized(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
for msg in result["messages"]:
assert msg["role"] in ("user", "assistant", "system")
def test_human_sender_maps_to_user(self):
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
p = self._get_provider()
result = p.normalize_conversation(raw)
roles = {m["role"] for m in result["messages"]}
assert "user" in roles
assert "human" not in roles