First real-data export against v0.4.0 surfaced 66 unknown blocks across three content types — captured live and added. Added: - execution_output (Code Interpreter / container.exec / python tool output) → tool_result block. output=content.text, tool_name=author.name, is_error=metadata.aggregate_result.status, summary=metadata.reasoning_title - system_error → error tool_result with tool_name=author.name - tether_browsing_display: spinner placeholders (empty result+summary) skip silently with DEBUG log; defensive populated-case branch maps to tool_result (untested in real data) - tool_result block schema: optional `summary` field rendered as italic line between header and fence - tool_result rendering: tool_name appears in header when present (e.g. `📤 Result: container.exec`); existing tool_name=None calls unchanged - _ROLE_LABELS["tool"] = ("🔧 Tool", "tool") Fixed: - chatgpt.normalize_conversation reads `conversation_id` as fallback for `id`. Live API uses conversation_id; fixtures use id. Pre-fix: empty id in YAML frontmatter and missing context in WARNING logs. Tests: 11 new (192 total, 0 failures). Fixture extended with 4 tool-output cases (execution_output success, empty execution_output that should skip, system_error, tether_browsing_display spinner). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
538 lines
18 KiB
Python
538 lines
18 KiB
Python
"""Unit tests for src/exporters/ and src/blocks.py."""
|
||
|
||
import json
|
||
import os
|
||
import tempfile
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
from src.blocks import (
|
||
BLOCK_TYPE_TEXT,
|
||
UNKNOWN_REASON_EXTRACTION_FAILED,
|
||
UNKNOWN_REASON_UNKNOWN_TYPE,
|
||
_blockquote_prefix,
|
||
_safe_fence,
|
||
make_code_block,
|
||
make_file_placeholder,
|
||
make_hidden_context_marker,
|
||
make_image_placeholder,
|
||
make_text_block,
|
||
make_thinking_block,
|
||
make_tool_result_block,
|
||
make_tool_use_block,
|
||
make_unknown_block,
|
||
render_blocks_to_markdown,
|
||
)
|
||
from src.exporters.markdown import MarkdownExporter, _yaml_escape, _format_timestamp
|
||
from src.exporters.json_export import JSONExporter
|
||
|
||
|
||
SAMPLE_CONV = {
|
||
"id": "abc12345def67890",
|
||
"title": "Test Conversation",
|
||
"provider": "claude",
|
||
"project": "my-project",
|
||
"created_at": "2024-06-10T14:32:00Z",
|
||
"updated_at": "2024-06-10T15:00:00Z",
|
||
"message_count": 2,
|
||
"messages": [
|
||
{
|
||
"role": "user",
|
||
"content": "Hello, how are you?",
|
||
"content_type": "text",
|
||
"timestamp": "2024-06-10T14:32:00Z",
|
||
},
|
||
{
|
||
"role": "assistant",
|
||
"content": "I'm doing well, thank you! How can I help?",
|
||
"content_type": "text",
|
||
"timestamp": "2024-06-10T14:32:10Z",
|
||
},
|
||
],
|
||
}
|
||
|
||
NO_PROJECT_CONV = {
|
||
**SAMPLE_CONV,
|
||
"id": "noproj12345",
|
||
"project": None,
|
||
"title": "No Project Chat",
|
||
}
|
||
|
||
CODE_CONV = {
|
||
**SAMPLE_CONV,
|
||
"id": "code12345",
|
||
"messages": [
|
||
{
|
||
"role": "user",
|
||
"content": "Here is some code:\n```python\nprint('hello')\n```",
|
||
"content_type": "text",
|
||
"timestamp": None,
|
||
}
|
||
],
|
||
}
|
||
|
||
|
||
class TestMarkdownFrontmatter:
|
||
def test_yaml_frontmatter_present(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
content = path.read_text()
|
||
assert content.startswith("---\n")
|
||
assert "title: " in content
|
||
assert "provider: claude" in content
|
||
assert "conversation_id: abc12345def67890" in content
|
||
assert "created_at: 2024-06-10T14:32:00Z" in content
|
||
assert "exported_at: " in content
|
||
assert "message_count: 2" in content
|
||
assert "tags: [claude, my-project]" in content
|
||
|
||
def test_no_project_uses_no_project_label(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(NO_PROJECT_CONV)
|
||
content = path.read_text()
|
||
assert "project: no-project" in content
|
||
assert "tags: [claude]" in content
|
||
|
||
def test_metadata_table_present(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
content = path.read_text()
|
||
assert "| Provider | Claude |" in content
|
||
assert "| Project | my-project |" in content
|
||
assert "| Date | 2024-06-10 |" in content
|
||
assert "| Messages | 2 |" in content
|
||
|
||
def test_messages_rendered(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
content = path.read_text()
|
||
assert "Hello, how are you?" in content
|
||
assert "I'm doing well" in content
|
||
assert "🧑 Human" in content
|
||
assert "🤖 Assistant" in content
|
||
|
||
def test_code_fences_preserved(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(CODE_CONV)
|
||
content = path.read_text()
|
||
assert "```python" in content
|
||
assert "print('hello')" in content
|
||
|
||
|
||
class TestMarkdownFilenameGeneration:
|
||
def test_filename_format(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
assert path.name == "2024-06-10_test-conversation_abc12345.md"
|
||
|
||
def test_no_project_goes_to_no_project_dir(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(NO_PROJECT_CONV)
|
||
assert "no-project" in str(path)
|
||
|
||
def test_project_slug_in_path(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
assert "my-project" in str(path)
|
||
|
||
def test_year_in_path(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
assert "/2024/" in str(path)
|
||
|
||
def test_output_structure_provider_project(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path, output_structure="provider/project")
|
||
path = exp.export(SAMPLE_CONV)
|
||
# Should NOT have year subdirectory
|
||
parts = path.parts
|
||
assert "2024" not in parts
|
||
|
||
|
||
class TestMarkdownEmptyMessages:
|
||
def test_empty_message_skipped(self, tmp_path, caplog):
|
||
import logging
|
||
conv = {
|
||
**SAMPLE_CONV,
|
||
"messages": [
|
||
{"role": "user", "content": " ", "content_type": "text", "timestamp": None},
|
||
{"role": "assistant", "content": "Real response", "content_type": "text", "timestamp": None},
|
||
],
|
||
}
|
||
exp = MarkdownExporter(tmp_path)
|
||
with caplog.at_level(logging.WARNING, logger="src.exporters.markdown"):
|
||
path = exp.export(conv)
|
||
content = path.read_text()
|
||
assert "Real response" in content
|
||
assert any("empty" in r.message.lower() for r in caplog.records)
|
||
|
||
|
||
class TestMarkdownAtomicWrite:
|
||
def test_permissions_600(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
mode = oct(os.stat(path).st_mode)[-3:]
|
||
assert mode == "600"
|
||
|
||
def test_no_tmp_files_left(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
exp.export(SAMPLE_CONV)
|
||
tmp_files = list(tmp_path.rglob("*.tmp"))
|
||
assert tmp_files == []
|
||
|
||
|
||
class TestJSONExporter:
|
||
def test_produces_valid_json(self, tmp_path):
|
||
exp = JSONExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
data = json.loads(path.read_text())
|
||
assert data["id"] == "abc12345def67890"
|
||
assert data["title"] == "Test Conversation"
|
||
assert len(data["messages"]) == 2
|
||
|
||
def test_includes_exported_at(self, tmp_path):
|
||
exp = JSONExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
data = json.loads(path.read_text())
|
||
assert "exported_at" in data
|
||
|
||
def test_permissions_600(self, tmp_path):
|
||
exp = JSONExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
mode = oct(os.stat(path).st_mode)[-3:]
|
||
assert mode == "600"
|
||
|
||
def test_json_extension(self, tmp_path):
|
||
exp = JSONExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
assert path.suffix == ".json"
|
||
|
||
def test_pretty_printed(self, tmp_path):
|
||
exp = JSONExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV)
|
||
raw = path.read_text()
|
||
# Pretty-printed JSON has newlines and indentation
|
||
assert "\n" in raw
|
||
assert " " in raw
|
||
|
||
|
||
class TestBothFormats:
|
||
"""T-38: Markdown and JSON exporters produce matching filenames for the same conversation."""
|
||
|
||
def test_both_formats_produce_files(self, tmp_path):
|
||
md_exp = MarkdownExporter(tmp_path)
|
||
json_exp = JSONExporter(tmp_path)
|
||
md_path = md_exp.export(SAMPLE_CONV)
|
||
json_path = json_exp.export(SAMPLE_CONV)
|
||
assert md_path.exists()
|
||
assert json_path.exists()
|
||
|
||
def test_both_formats_have_matching_stems(self, tmp_path):
|
||
md_exp = MarkdownExporter(tmp_path)
|
||
json_exp = JSONExporter(tmp_path)
|
||
md_path = md_exp.export(SAMPLE_CONV)
|
||
json_path = json_exp.export(SAMPLE_CONV)
|
||
assert md_path.suffix == ".md"
|
||
assert json_path.suffix == ".json"
|
||
assert md_path.stem == json_path.stem
|
||
|
||
def test_both_formats_same_directory(self, tmp_path):
|
||
md_exp = MarkdownExporter(tmp_path)
|
||
json_exp = JSONExporter(tmp_path)
|
||
md_path = md_exp.export(SAMPLE_CONV)
|
||
json_path = json_exp.export(SAMPLE_CONV)
|
||
assert md_path.parent == json_path.parent
|
||
|
||
|
||
class TestYamlEscape:
|
||
def test_escapes_double_quotes(self):
|
||
assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"'
|
||
|
||
def test_escapes_backslash(self):
|
||
assert _yaml_escape("path\\to\\file") == "path\\\\to\\\\file"
|
||
|
||
def test_no_change_for_plain_string(self):
|
||
assert _yaml_escape("Hello World") == "Hello World"
|
||
|
||
|
||
class TestFormatTimestamp:
|
||
def test_strips_fractional_seconds(self):
|
||
result = _format_timestamp("2024-06-10T14:32:00.123456Z")
|
||
assert "." not in result
|
||
|
||
def test_replaces_T_with_space(self):
|
||
result = _format_timestamp("2024-06-10T14:32:00Z")
|
||
assert "T" not in result
|
||
assert "2024-06-10 14:32:00" == result
|
||
|
||
def test_empty_string(self):
|
||
assert _format_timestamp("") == ""
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Block helpers and rendering
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestSafeFence:
|
||
def test_minimum_three_backticks(self):
|
||
assert _safe_fence("plain text") == "```"
|
||
|
||
def test_four_backticks_when_three_in_content(self):
|
||
assert _safe_fence("here ``` is a fence") == "````"
|
||
|
||
def test_five_backticks_when_four_in_content(self):
|
||
assert _safe_fence("here ```` is four") == "`````"
|
||
|
||
def test_handles_empty_string(self):
|
||
assert _safe_fence("") == "```"
|
||
|
||
def test_handles_run_at_end(self):
|
||
# Trailing run still counted
|
||
assert _safe_fence("text ending in ```") == "````"
|
||
|
||
|
||
class TestBlockquotePrefix:
|
||
def test_single_line(self):
|
||
assert _blockquote_prefix("hello") == "> hello"
|
||
|
||
def test_multi_line(self):
|
||
assert _blockquote_prefix("a\nb\nc") == "> a\n> b\n> c"
|
||
|
||
def test_empty_lines_become_naked_quote_marker(self):
|
||
assert _blockquote_prefix("a\n\nb") == "> a\n>\n> b"
|
||
|
||
def test_empty_string(self):
|
||
assert _blockquote_prefix("") == ">"
|
||
|
||
|
||
class TestBlockConstructors:
|
||
def test_make_text_block_returns_none_for_empty(self):
|
||
assert make_text_block("") is None
|
||
assert make_text_block(" ") is None
|
||
|
||
def test_make_text_block_returns_dict(self):
|
||
b = make_text_block("hello")
|
||
assert b == {"type": "text", "text": "hello"}
|
||
|
||
def test_make_code_block_returns_none_for_empty(self):
|
||
assert make_code_block("") is None
|
||
|
||
def test_make_thinking_block_returns_none_for_empty(self):
|
||
assert make_thinking_block("") is None
|
||
|
||
|
||
class TestRenderBlocks:
|
||
def test_text_block_renders_as_paragraph(self):
|
||
out = render_blocks_to_markdown([make_text_block("Hello world")])
|
||
assert out == "Hello world"
|
||
|
||
def test_blocks_separated_by_blank_line(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_text_block("first"), make_text_block("second")]
|
||
)
|
||
assert out == "first\n\nsecond"
|
||
|
||
def test_code_block_with_language(self):
|
||
out = render_blocks_to_markdown([make_code_block("print(1)", language="python")])
|
||
assert "```python" in out
|
||
assert "print(1)" in out
|
||
|
||
def test_thinking_block_uses_blockquote(self):
|
||
out = render_blocks_to_markdown([make_thinking_block("step 1\nstep 2")])
|
||
assert "**💭 Reasoning**" in out
|
||
assert "> step 1" in out
|
||
assert "> step 2" in out
|
||
|
||
def test_tool_use_renders_as_blockquote_with_safe_fence(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_tool_use_block("search", {"query": "test"})]
|
||
)
|
||
assert "> 🔧 **Tool: search**" in out
|
||
# Every line of the body is blockquote-prefixed
|
||
assert "> ```json" in out
|
||
assert "> }" in out
|
||
|
||
def test_tool_use_with_multiline_input(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_tool_use_block("complex", {"a": 1, "b": [{"x": "y"}]})]
|
||
)
|
||
# Prefix every line of multi-line JSON
|
||
for line in out.split("\n"):
|
||
assert line.startswith(">") or line == ""
|
||
|
||
def test_tool_result_success_uses_outbox_icon(self):
|
||
out = render_blocks_to_markdown([make_tool_result_block("OK")])
|
||
assert "📤 **Result**" in out
|
||
assert "❌" not in out
|
||
|
||
def test_tool_result_error_uses_x_icon(self):
|
||
out = render_blocks_to_markdown([make_tool_result_block("oops", is_error=True)])
|
||
assert "❌ **Result (error)**" in out
|
||
assert "📤" not in out
|
||
|
||
def test_tool_result_with_tool_name_in_header(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_tool_result_block("done", tool_name="container.exec")]
|
||
)
|
||
assert "📤 **Result: container.exec**" in out
|
||
|
||
def test_tool_result_error_with_tool_name(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_tool_result_block("503", tool_name="web", is_error=True)]
|
||
)
|
||
assert "❌ **Result (error): web**" in out
|
||
|
||
def test_tool_result_summary_renders_as_italic_line(self):
|
||
out = render_blocks_to_markdown(
|
||
[
|
||
make_tool_result_block(
|
||
"output",
|
||
tool_name="container.exec",
|
||
summary="Reading skill documentation",
|
||
)
|
||
]
|
||
)
|
||
# Summary line is italic, lives between header and fence,
|
||
# all inside the blockquote prefix.
|
||
assert "> *Reading skill documentation*" in out
|
||
# Order: header before summary before fence
|
||
header_idx = out.index("Result: container.exec")
|
||
summary_idx = out.index("Reading skill documentation")
|
||
fence_idx = out.index("output")
|
||
assert header_idx < summary_idx < fence_idx
|
||
|
||
def test_image_placeholder_rendering(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_image_placeholder(ref="file-123", source="user_upload")]
|
||
)
|
||
assert "🖼️ **Image attached**" in out
|
||
assert "`file-123`" in out
|
||
assert "user_upload" in out
|
||
assert "content not preserved" in out
|
||
|
||
def test_file_placeholder_with_metadata(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_file_placeholder(ref="sediment://x", mime="audio/wav", size_bytes=10240, duration_seconds=2.5)]
|
||
)
|
||
assert "📎 **File attached**" in out
|
||
assert "audio/wav" in out
|
||
assert "KB" in out
|
||
assert "2.50s" in out
|
||
|
||
def test_unknown_block_renders_with_keys(self):
|
||
out = render_blocks_to_markdown(
|
||
[
|
||
make_unknown_block(
|
||
raw_type="future_x",
|
||
observed_keys=["foo", "bar"],
|
||
reason=UNKNOWN_REASON_UNKNOWN_TYPE,
|
||
)
|
||
]
|
||
)
|
||
assert "⚠️ **Unsupported content**" in out
|
||
assert "future_x" in out
|
||
assert "`foo`" in out
|
||
assert "`bar`" in out
|
||
|
||
def test_unknown_extraction_failed_includes_summary(self):
|
||
out = render_blocks_to_markdown(
|
||
[
|
||
make_unknown_block(
|
||
raw_type="audio_transcription",
|
||
observed_keys=["asset_pointer"],
|
||
reason=UNKNOWN_REASON_EXTRACTION_FAILED,
|
||
summary="expected key 'text' not found",
|
||
)
|
||
]
|
||
)
|
||
assert "extraction_failed" in out
|
||
assert "expected key 'text' not found" in out
|
||
|
||
def test_hidden_context_marker(self):
|
||
out = render_blocks_to_markdown(
|
||
[make_hidden_context_marker("user_editable_context")]
|
||
)
|
||
assert "ℹ️ **Hidden context**" in out
|
||
assert "`user_editable_context`" in out
|
||
|
||
def test_safe_fence_prevents_runaway_code_block(self):
|
||
# Content contains an unbalanced opening fence — without _safe_fence
|
||
# this would corrupt downstream rendering.
|
||
evil_content = "before\n```Follow\ntext\nraw is: \"```"
|
||
block = make_code_block(evil_content)
|
||
out = render_blocks_to_markdown([block, make_text_block("after")])
|
||
# The 4-backtick wrap should be present
|
||
assert "````" in out
|
||
# The "after" text should appear OUTSIDE any code block — it follows
|
||
# the closing ```` fence.
|
||
assert out.endswith("after")
|
||
|
||
def test_block_order_preserved(self):
|
||
blocks = [
|
||
make_text_block("a"),
|
||
make_image_placeholder(ref="r1", source="user_upload"),
|
||
make_text_block("b"),
|
||
]
|
||
out = render_blocks_to_markdown(blocks)
|
||
assert out.index("a") < out.index("Image attached")
|
||
assert out.index("Image attached") < out.index("b")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Markdown exporter with blocks
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
SAMPLE_CONV_BLOCKS = {
|
||
"id": "blocks12345",
|
||
"title": "Blocks Conversation",
|
||
"provider": "claude",
|
||
"project": None,
|
||
"created_at": "2024-06-10T14:32:00Z",
|
||
"updated_at": "2024-06-10T15:00:00Z",
|
||
"message_count": 1,
|
||
"messages": [
|
||
{
|
||
"role": "assistant",
|
||
"content_type": "text",
|
||
"timestamp": None,
|
||
"blocks": [
|
||
{"type": "text", "text": "Here is the answer."},
|
||
{"type": "tool_use", "name": "search", "input": {"q": "x"}, "tool_id": "t1"},
|
||
],
|
||
}
|
||
],
|
||
}
|
||
|
||
|
||
class TestMarkdownExporterWithBlocks:
|
||
def test_renders_blocks(self, tmp_path):
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV_BLOCKS)
|
||
body = path.read_text()
|
||
assert "Here is the answer." in body
|
||
assert "🔧 **Tool: search**" in body
|
||
|
||
def test_falls_back_to_content_when_blocks_missing(self, tmp_path):
|
||
# Backward-compat: messages with `content` only (no `blocks`) still render.
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(SAMPLE_CONV) # SAMPLE_CONV has content only, no blocks
|
||
body = path.read_text()
|
||
assert "Hello, how are you?" in body
|
||
|
||
def test_skips_messages_with_neither_blocks_nor_content(self, tmp_path):
|
||
conv = {
|
||
**SAMPLE_CONV_BLOCKS,
|
||
"messages": [
|
||
{"role": "user", "content_type": "text", "timestamp": None, "blocks": []},
|
||
{"role": "assistant", "content_type": "text", "timestamp": None, "blocks": [
|
||
{"type": "text", "text": "I am here."}
|
||
]},
|
||
],
|
||
}
|
||
exp = MarkdownExporter(tmp_path)
|
||
path = exp.export(conv)
|
||
body = path.read_text()
|
||
assert "I am here." in body
|