"""Unit tests for src/providers/ using fixture files.""" import json from pathlib import Path import pytest FIXTURES = Path(__file__).parent / "fixtures" class TestChatGPTNormalization: """Test ChatGPTProvider.normalize_conversation() using fixture data.""" def _get_provider(self): from src.providers.chatgpt import ChatGPTProvider import unittest.mock as mock # Bypass __init__ token check p = ChatGPTProvider.__new__(ChatGPTProvider) import requests p._session = requests.Session() p._org_id = None return p def test_normalizes_with_project(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["id"] == "chatgpt-conv-001" assert result["title"] == "Python Async Tutorial" assert result["provider"] == "chatgpt" assert result["project"] == "Learning Python" assert result["created_at"] != "" assert result["updated_at"] != "" assert isinstance(result["messages"], list) def test_normalizes_without_project(self): raw = json.loads((FIXTURES / "chatgpt_no_project.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["project"] is None assert result["id"] == "chatgpt-conv-002" def test_extracts_text_messages(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert len(result["messages"]) >= 2 user_msgs = [m for m in result["messages"] if m["role"] == "user"] assert any("async" in m["content"].lower() for m in user_msgs) def test_skips_non_text_content_with_warning(self, caplog): import logging raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() with caplog.at_level(logging.WARNING): result = p.normalize_conversation(raw) # The fixture has an image_asset_pointer node — should be warned about assert any( "image_asset_pointer" in r.message or "rich content" in r.message for r in caplog.records ) def test_message_roles_are_valid(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) for msg in result["messages"]: assert msg["role"] in ("user", "assistant", "system") def test_message_count_matches(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["message_count"] == len(result["messages"]) def test_code_fence_preserved(self): raw = json.loads((FIXTURES / "chatgpt_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) all_content = " ".join(m["content"] for m in result["messages"]) assert "```python" in all_content class TestClaudeNormalization: """Test ClaudeProvider.normalize_conversation() using fixture data.""" def _get_provider(self): from src.providers.claude import ClaudeProvider import requests p = ClaudeProvider.__new__(ClaudeProvider) p._session = requests.Session() p._org_id = None return p def test_normalizes_with_project(self): raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["id"] == "claude-conv-001" assert result["title"] == "StartOS Service Packaging" assert result["provider"] == "claude" assert result["project"] == "StarTOS Packaging" assert result["created_at"] == "2024-06-10T14:32:00.000Z" assert isinstance(result["messages"], list) def test_normalizes_without_project(self): raw = json.loads((FIXTURES / "claude_no_project.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["project"] is None assert result["id"] == "claude-conv-002" def test_string_content_extracted(self): raw = json.loads((FIXTURES / "claude_no_project.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert any("Docker" in m["content"] for m in result["messages"]) def test_list_content_extracted(self): raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assistant_msgs = [m for m in result["messages"] if m["role"] == "assistant"] assert any("manifest" in m["content"].lower() for m in assistant_msgs) def test_non_text_blocks_skipped_with_warning(self, caplog): import logging raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() with caplog.at_level(logging.WARNING): result = p.normalize_conversation(raw) # The fixture has a tool_use block — should warn assert any( "tool_use" in r.message or "rich content" in r.message for r in caplog.records ) def test_message_count_matches(self): raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) assert result["message_count"] == len(result["messages"]) def test_roles_normalized(self): raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) for msg in result["messages"]: assert msg["role"] in ("user", "assistant", "system") def test_human_sender_maps_to_user(self): raw = json.loads((FIXTURES / "claude_conversation.json").read_text()) p = self._get_provider() result = p.normalize_conversation(raw) roles = {m["role"] for m in result["messages"]} assert "user" in roles assert "human" not in roles