210 lines
8.3 KiB
Python
210 lines
8.3 KiB
Python
"""Unit tests for src/providers/ using fixture files."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
FIXTURES = Path(__file__).parent / "fixtures"
|
|
|
|
|
|
class TestChatGPTNormalization:
|
|
"""Test ChatGPTProvider.normalize_conversation() using fixture data."""
|
|
|
|
def _get_provider(self):
|
|
from src.providers.chatgpt import ChatGPTProvider
|
|
# Bypass __init__ token check
|
|
p = ChatGPTProvider.__new__(ChatGPTProvider)
|
|
import requests
|
|
p._session = requests.Session()
|
|
p._org_id = None
|
|
p._project_ids = []
|
|
p._project_map = {}
|
|
p._project_name_cache = {}
|
|
return p
|
|
|
|
def test_normalizes_conversation(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["id"] == "chatgpt-conv-001"
|
|
assert result["title"] == "Python Async Tutorial"
|
|
assert result["provider"] == "chatgpt"
|
|
# No entry in _project_map → project is None
|
|
assert result["project"] is None
|
|
assert result["created_at"] != ""
|
|
assert result["updated_at"] != ""
|
|
assert isinstance(result["messages"], list)
|
|
|
|
def test_normalizes_without_project(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] is None
|
|
assert result["id"] == "chatgpt-conv-002"
|
|
|
|
def test_normalizes_with_project_from_map(self):
|
|
"""Project name from _project_map (populated by fetch_all_conversations) flows through."""
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
p._project_map["chatgpt-conv-001"] = "My Research Project"
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] == "My Research Project"
|
|
|
|
def test_extracts_text_messages(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert len(result["messages"]) >= 2
|
|
user_msgs = [m for m in result["messages"] if m["role"] == "user"]
|
|
assert any("async" in m["content"].lower() for m in user_msgs)
|
|
|
|
def test_skips_non_text_content_with_warning(self, caplog):
|
|
import logging
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
with caplog.at_level(logging.WARNING):
|
|
result = p.normalize_conversation(raw)
|
|
# The fixture has an image_asset_pointer node — should be warned about
|
|
assert any(
|
|
"image_asset_pointer" in r.message or "rich content" in r.message
|
|
for r in caplog.records
|
|
)
|
|
|
|
def test_model_editable_context_included_without_warning(self, caplog):
|
|
"""model_editable_context messages (project instructions) should be included, not warned about."""
|
|
import logging
|
|
conv = {
|
|
"id": "test-conv-mec",
|
|
"title": "Test",
|
|
"create_time": 1700000000.0,
|
|
"update_time": 1700000001.0,
|
|
"mapping": {
|
|
"root": {"id": "root", "message": None, "parent": None, "children": ["msg1"]},
|
|
"msg1": {
|
|
"id": "msg1",
|
|
"message": {
|
|
"id": "msg1",
|
|
"author": {"role": "user"},
|
|
"content": {
|
|
"content_type": "model_editable_context",
|
|
"parts": ["These are the project instructions."],
|
|
},
|
|
"create_time": 1700000001.0,
|
|
"status": "finished_successfully",
|
|
},
|
|
"parent": "root",
|
|
"children": [],
|
|
},
|
|
},
|
|
}
|
|
p = self._get_provider()
|
|
with caplog.at_level(logging.WARNING):
|
|
result = p.normalize_conversation(conv)
|
|
assert any(m["content"] == "These are the project instructions." for m in result["messages"])
|
|
assert not any("model_editable_context" in r.message for r in caplog.records)
|
|
|
|
def test_message_roles_are_valid(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
for msg in result["messages"]:
|
|
assert msg["role"] in ("user", "assistant", "system")
|
|
|
|
def test_message_count_matches(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
assert result["message_count"] == len(result["messages"])
|
|
|
|
def test_code_fence_preserved(self):
|
|
raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
all_content = " ".join(m["content"] for m in result["messages"])
|
|
assert "```python" in all_content
|
|
|
|
|
|
class TestClaudeNormalization:
|
|
"""Test ClaudeProvider.normalize_conversation() using fixture data."""
|
|
|
|
def _get_provider(self):
|
|
from src.providers.claude import ClaudeProvider
|
|
import requests
|
|
p = ClaudeProvider.__new__(ClaudeProvider)
|
|
p._session = requests.Session()
|
|
p._org_id = None
|
|
return p
|
|
|
|
def test_normalizes_with_project(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["id"] == "claude-conv-001"
|
|
assert result["title"] == "StartOS Service Packaging"
|
|
assert result["provider"] == "claude"
|
|
assert result["project"] == "StarTOS Packaging"
|
|
assert result["created_at"] == "2024-06-10T14:32:00.000Z"
|
|
assert isinstance(result["messages"], list)
|
|
|
|
def test_normalizes_without_project(self):
|
|
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert result["project"] is None
|
|
assert result["id"] == "claude-conv-002"
|
|
|
|
def test_string_content_extracted(self):
|
|
raw = json.loads((FIXTURES / "claude_no_project.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assert any("Docker" in m["content"] for m in result["messages"])
|
|
|
|
def test_list_content_extracted(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
|
|
assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"]
|
|
assert any("manifest" in m["content"].lower() for m in assistant_msgs)
|
|
|
|
def test_non_text_blocks_skipped_with_warning(self, caplog):
|
|
import logging
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
with caplog.at_level(logging.WARNING):
|
|
result = p.normalize_conversation(raw)
|
|
# The fixture has a tool_use block — should warn
|
|
assert any(
|
|
"tool_use" in r.message or "rich content" in r.message
|
|
for r in caplog.records
|
|
)
|
|
|
|
def test_message_count_matches(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
assert result["message_count"] == len(result["messages"])
|
|
|
|
def test_roles_normalized(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
for msg in result["messages"]:
|
|
assert msg["role"] in ("user", "assistant", "system")
|
|
|
|
def test_human_sender_maps_to_user(self):
|
|
raw = json.loads((FIXTURES / "claude_conversation.json").read_text())
|
|
p = self._get_provider()
|
|
result = p.normalize_conversation(raw)
|
|
roles = {m["role"] for m in result["messages"]}
|
|
assert "user" in roles
|
|
assert "human" not in roles
|