"""Unit tests for src/exporters/ and src/blocks.py.""" import json import os import tempfile from pathlib import Path import pytest from src.blocks import ( BLOCK_TYPE_TEXT, UNKNOWN_REASON_EXTRACTION_FAILED, UNKNOWN_REASON_UNKNOWN_TYPE, _blockquote_prefix, _safe_fence, make_code_block, make_file_placeholder, make_hidden_context_marker, make_image_placeholder, make_text_block, make_thinking_block, make_tool_result_block, make_tool_use_block, make_unknown_block, render_blocks_to_markdown, ) from src.exporters.markdown import MarkdownExporter, _yaml_escape, _format_timestamp from src.exporters.json_export import JSONExporter SAMPLE_CONV = { "id": "abc12345def67890", "title": "Test Conversation", "provider": "claude", "project": "my-project", "created_at": "2024-06-10T14:32:00Z", "updated_at": "2024-06-10T15:00:00Z", "message_count": 2, "messages": [ { "role": "user", "content": "Hello, how are you?", "content_type": "text", "timestamp": "2024-06-10T14:32:00Z", }, { "role": "assistant", "content": "I'm doing well, thank you! How can I help?", "content_type": "text", "timestamp": "2024-06-10T14:32:10Z", }, ], } NO_PROJECT_CONV = { **SAMPLE_CONV, "id": "noproj12345", "project": None, "title": "No Project Chat", } CODE_CONV = { **SAMPLE_CONV, "id": "code12345", "messages": [ { "role": "user", "content": "Here is some code:\n```python\nprint('hello')\n```", "content_type": "text", "timestamp": None, } ], } class TestMarkdownFrontmatter: def test_yaml_frontmatter_present(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) content = path.read_text() assert content.startswith("---\n") assert "title: " in content assert "provider: claude" in content assert "conversation_id: abc12345def67890" in content assert "created_at: 2024-06-10T14:32:00Z" in content assert "exported_at: " in content assert "message_count: 2" in content assert "tags: [claude, my-project]" in content def test_no_project_uses_no_project_label(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(NO_PROJECT_CONV) content = path.read_text() assert "project: no-project" in content assert "tags: [claude]" in content def test_metadata_table_present(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) content = path.read_text() assert "| Provider | Claude |" in content assert "| Project | my-project |" in content assert "| Date | 2024-06-10 |" in content assert "| Messages | 2 |" in content def test_messages_rendered(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) content = path.read_text() assert "Hello, how are you?" in content assert "I'm doing well" in content assert "🧑 Human" in content assert "🤖 Assistant" in content def test_code_fences_preserved(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(CODE_CONV) content = path.read_text() assert "```python" in content assert "print('hello')" in content class TestMarkdownFilenameGeneration: def test_filename_format(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) assert path.name == "2024-06-10_test-conversation_abc12345.md" def test_no_project_goes_to_no_project_dir(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(NO_PROJECT_CONV) assert "no-project" in str(path) def test_project_slug_in_path(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) assert "my-project" in str(path) def test_year_in_path(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) assert "/2024/" in str(path) def test_output_structure_provider_project(self, tmp_path): exp = MarkdownExporter(tmp_path, output_structure="provider/project") path = exp.export(SAMPLE_CONV) # Should NOT have year subdirectory parts = path.parts assert "2024" not in parts class TestMarkdownEmptyMessages: def test_empty_message_skipped(self, tmp_path, caplog): import logging conv = { **SAMPLE_CONV, "messages": [ {"role": "user", "content": " ", "content_type": "text", "timestamp": None}, {"role": "assistant", "content": "Real response", "content_type": "text", "timestamp": None}, ], } exp = MarkdownExporter(tmp_path) with caplog.at_level(logging.WARNING, logger="src.exporters.markdown"): path = exp.export(conv) content = path.read_text() assert "Real response" in content assert any("empty" in r.message.lower() for r in caplog.records) class TestMarkdownAtomicWrite: def test_permissions_600(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) mode = oct(os.stat(path).st_mode)[-3:] assert mode == "600" def test_no_tmp_files_left(self, tmp_path): exp = MarkdownExporter(tmp_path) exp.export(SAMPLE_CONV) tmp_files = list(tmp_path.rglob("*.tmp")) assert tmp_files == [] class TestJSONExporter: def test_produces_valid_json(self, tmp_path): exp = JSONExporter(tmp_path) path = exp.export(SAMPLE_CONV) data = json.loads(path.read_text()) assert data["id"] == "abc12345def67890" assert data["title"] == "Test Conversation" assert len(data["messages"]) == 2 def test_includes_exported_at(self, tmp_path): exp = JSONExporter(tmp_path) path = exp.export(SAMPLE_CONV) data = json.loads(path.read_text()) assert "exported_at" in data def test_permissions_600(self, tmp_path): exp = JSONExporter(tmp_path) path = exp.export(SAMPLE_CONV) mode = oct(os.stat(path).st_mode)[-3:] assert mode == "600" def test_json_extension(self, tmp_path): exp = JSONExporter(tmp_path) path = exp.export(SAMPLE_CONV) assert path.suffix == ".json" def test_pretty_printed(self, tmp_path): exp = JSONExporter(tmp_path) path = exp.export(SAMPLE_CONV) raw = path.read_text() # Pretty-printed JSON has newlines and indentation assert "\n" in raw assert " " in raw class TestBothFormats: """T-38: Markdown and JSON exporters produce matching filenames for the same conversation.""" def test_both_formats_produce_files(self, tmp_path): md_exp = MarkdownExporter(tmp_path) json_exp = JSONExporter(tmp_path) md_path = md_exp.export(SAMPLE_CONV) json_path = json_exp.export(SAMPLE_CONV) assert md_path.exists() assert json_path.exists() def test_both_formats_have_matching_stems(self, tmp_path): md_exp = MarkdownExporter(tmp_path) json_exp = JSONExporter(tmp_path) md_path = md_exp.export(SAMPLE_CONV) json_path = json_exp.export(SAMPLE_CONV) assert md_path.suffix == ".md" assert json_path.suffix == ".json" assert md_path.stem == json_path.stem def test_both_formats_same_directory(self, tmp_path): md_exp = MarkdownExporter(tmp_path) json_exp = JSONExporter(tmp_path) md_path = md_exp.export(SAMPLE_CONV) json_path = json_exp.export(SAMPLE_CONV) assert md_path.parent == json_path.parent class TestYamlEscape: def test_escapes_double_quotes(self): assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"' def test_escapes_backslash(self): assert _yaml_escape("path\\to\\file") == "path\\\\to\\\\file" def test_no_change_for_plain_string(self): assert _yaml_escape("Hello World") == "Hello World" class TestFormatTimestamp: def test_strips_fractional_seconds(self): result = _format_timestamp("2024-06-10T14:32:00.123456Z") assert "." not in result def test_replaces_T_with_space(self): result = _format_timestamp("2024-06-10T14:32:00Z") assert "T" not in result assert "2024-06-10 14:32:00" == result def test_empty_string(self): assert _format_timestamp("") == "" # --------------------------------------------------------------------------- # Block helpers and rendering # --------------------------------------------------------------------------- class TestSafeFence: def test_minimum_three_backticks(self): assert _safe_fence("plain text") == "```" def test_four_backticks_when_three_in_content(self): assert _safe_fence("here ``` is a fence") == "````" def test_five_backticks_when_four_in_content(self): assert _safe_fence("here ```` is four") == "`````" def test_handles_empty_string(self): assert _safe_fence("") == "```" def test_handles_run_at_end(self): # Trailing run still counted assert _safe_fence("text ending in ```") == "````" class TestBlockquotePrefix: def test_single_line(self): assert _blockquote_prefix("hello") == "> hello" def test_multi_line(self): assert _blockquote_prefix("a\nb\nc") == "> a\n> b\n> c" def test_empty_lines_become_naked_quote_marker(self): assert _blockquote_prefix("a\n\nb") == "> a\n>\n> b" def test_empty_string(self): assert _blockquote_prefix("") == ">" class TestBlockConstructors: def test_make_text_block_returns_none_for_empty(self): assert make_text_block("") is None assert make_text_block(" ") is None def test_make_text_block_returns_dict(self): b = make_text_block("hello") assert b == {"type": "text", "text": "hello"} def test_make_code_block_returns_none_for_empty(self): assert make_code_block("") is None def test_make_thinking_block_returns_none_for_empty(self): assert make_thinking_block("") is None class TestRenderBlocks: def test_text_block_renders_as_paragraph(self): out = render_blocks_to_markdown([make_text_block("Hello world")]) assert out == "Hello world" def test_blocks_separated_by_blank_line(self): out = render_blocks_to_markdown( [make_text_block("first"), make_text_block("second")] ) assert out == "first\n\nsecond" def test_code_block_with_language(self): out = render_blocks_to_markdown([make_code_block("print(1)", language="python")]) assert "```python" in out assert "print(1)" in out def test_thinking_block_uses_blockquote(self): out = render_blocks_to_markdown([make_thinking_block("step 1\nstep 2")]) assert "**💭 Reasoning**" in out assert "> step 1" in out assert "> step 2" in out def test_tool_use_renders_as_blockquote_with_safe_fence(self): out = render_blocks_to_markdown( [make_tool_use_block("search", {"query": "test"})] ) assert "> 🔧 **Tool: search**" in out # Every line of the body is blockquote-prefixed assert "> ```json" in out assert "> }" in out def test_tool_use_with_multiline_input(self): out = render_blocks_to_markdown( [make_tool_use_block("complex", {"a": 1, "b": [{"x": "y"}]})] ) # Prefix every line of multi-line JSON for line in out.split("\n"): assert line.startswith(">") or line == "" def test_tool_result_success_uses_outbox_icon(self): out = render_blocks_to_markdown([make_tool_result_block("OK")]) assert "📤 **Result**" in out assert "❌" not in out def test_tool_result_error_uses_x_icon(self): out = render_blocks_to_markdown([make_tool_result_block("oops", is_error=True)]) assert "❌ **Result (error)**" in out assert "📤" not in out def test_tool_result_with_tool_name_in_header(self): out = render_blocks_to_markdown( [make_tool_result_block("done", tool_name="container.exec")] ) assert "📤 **Result: container.exec**" in out def test_tool_result_error_with_tool_name(self): out = render_blocks_to_markdown( [make_tool_result_block("503", tool_name="web", is_error=True)] ) assert "❌ **Result (error): web**" in out def test_tool_result_summary_renders_as_italic_line(self): out = render_blocks_to_markdown( [ make_tool_result_block( "output", tool_name="container.exec", summary="Reading skill documentation", ) ] ) # Summary line is italic, lives between header and fence, # all inside the blockquote prefix. assert "> *Reading skill documentation*" in out # Order: header before summary before fence header_idx = out.index("Result: container.exec") summary_idx = out.index("Reading skill documentation") fence_idx = out.index("output") assert header_idx < summary_idx < fence_idx def test_image_placeholder_rendering(self): out = render_blocks_to_markdown( [make_image_placeholder(ref="file-123", source="user_upload")] ) assert "đŸ–ŧī¸ **Image attached**" in out assert "`file-123`" in out assert "user_upload" in out assert "content not preserved" in out def test_file_placeholder_with_metadata(self): out = render_blocks_to_markdown( [make_file_placeholder(ref="sediment://x", mime="audio/wav", size_bytes=10240, duration_seconds=2.5)] ) assert "📎 **File attached**" in out assert "audio/wav" in out assert "KB" in out assert "2.50s" in out def test_unknown_block_renders_with_keys(self): out = render_blocks_to_markdown( [ make_unknown_block( raw_type="future_x", observed_keys=["foo", "bar"], reason=UNKNOWN_REASON_UNKNOWN_TYPE, ) ] ) assert "âš ī¸ **Unsupported content**" in out assert "future_x" in out assert "`foo`" in out assert "`bar`" in out def test_unknown_extraction_failed_includes_summary(self): out = render_blocks_to_markdown( [ make_unknown_block( raw_type="audio_transcription", observed_keys=["asset_pointer"], reason=UNKNOWN_REASON_EXTRACTION_FAILED, summary="expected key 'text' not found", ) ] ) assert "extraction_failed" in out assert "expected key 'text' not found" in out def test_hidden_context_marker(self): out = render_blocks_to_markdown( [make_hidden_context_marker("user_editable_context")] ) assert "â„šī¸ **Hidden context**" in out assert "`user_editable_context`" in out def test_safe_fence_prevents_runaway_code_block(self): # Content contains an unbalanced opening fence — without _safe_fence # this would corrupt downstream rendering. evil_content = "before\n```Follow\ntext\nraw is: \"```" block = make_code_block(evil_content) out = render_blocks_to_markdown([block, make_text_block("after")]) # The 4-backtick wrap should be present assert "````" in out # The "after" text should appear OUTSIDE any code block — it follows # the closing ```` fence. assert out.endswith("after") def test_block_order_preserved(self): blocks = [ make_text_block("a"), make_image_placeholder(ref="r1", source="user_upload"), make_text_block("b"), ] out = render_blocks_to_markdown(blocks) assert out.index("a") < out.index("Image attached") assert out.index("Image attached") < out.index("b") # --------------------------------------------------------------------------- # Markdown exporter with blocks # --------------------------------------------------------------------------- SAMPLE_CONV_BLOCKS = { "id": "blocks12345", "title": "Blocks Conversation", "provider": "claude", "project": None, "created_at": "2024-06-10T14:32:00Z", "updated_at": "2024-06-10T15:00:00Z", "message_count": 1, "messages": [ { "role": "assistant", "content_type": "text", "timestamp": None, "blocks": [ {"type": "text", "text": "Here is the answer."}, {"type": "tool_use", "name": "search", "input": {"q": "x"}, "tool_id": "t1"}, ], } ], } class TestMarkdownExporterWithBlocks: def test_renders_blocks(self, tmp_path): exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV_BLOCKS) body = path.read_text() assert "Here is the answer." in body assert "🔧 **Tool: search**" in body def test_falls_back_to_content_when_blocks_missing(self, tmp_path): # Backward-compat: messages with `content` only (no `blocks`) still render. exp = MarkdownExporter(tmp_path) path = exp.export(SAMPLE_CONV) # SAMPLE_CONV has content only, no blocks body = path.read_text() assert "Hello, how are you?" in body def test_skips_messages_with_neither_blocks_nor_content(self, tmp_path): conv = { **SAMPLE_CONV_BLOCKS, "messages": [ {"role": "user", "content_type": "text", "timestamp": None, "blocks": []}, {"role": "assistant", "content_type": "text", "timestamp": None, "blocks": [ {"type": "text", "text": "I am here."} ]}, ], } exp = MarkdownExporter(tmp_path) path = exp.export(conv) body = path.read_text() assert "I am here." in body