Files
ai-chatexport/tests/test_exporters.py
JesseMarkowitz e9b2e42893 feat: v0.5.0 — nested Joplin notebooks, date-prefixed note titles, flat year folders
Joplin notebooks now use a two-level hierarchy: AI-ChatGPT / <project> and
AI-Claude / <project> instead of a single flat title. Note titles are prefixed
with the conversation created_at date (YYYY-MM-DD). Export folders collapse
provider/project/year into a single provider/project.year directory.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-05 11:05:39 -04:00

538 lines
18 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Unit tests for src/exporters/ and src/blocks.py."""
import json
import os
import tempfile
from pathlib import Path
import pytest
from src.blocks import (
BLOCK_TYPE_TEXT,
UNKNOWN_REASON_EXTRACTION_FAILED,
UNKNOWN_REASON_UNKNOWN_TYPE,
_blockquote_prefix,
_safe_fence,
make_code_block,
make_file_placeholder,
make_hidden_context_marker,
make_image_placeholder,
make_text_block,
make_thinking_block,
make_tool_result_block,
make_tool_use_block,
make_unknown_block,
render_blocks_to_markdown,
)
from src.exporters.markdown import MarkdownExporter, _yaml_escape, _format_timestamp
from src.exporters.json_export import JSONExporter
SAMPLE_CONV = {
"id": "abc12345def67890",
"title": "Test Conversation",
"provider": "claude",
"project": "my-project",
"created_at": "2024-06-10T14:32:00Z",
"updated_at": "2024-06-10T15:00:00Z",
"message_count": 2,
"messages": [
{
"role": "user",
"content": "Hello, how are you?",
"content_type": "text",
"timestamp": "2024-06-10T14:32:00Z",
},
{
"role": "assistant",
"content": "I'm doing well, thank you! How can I help?",
"content_type": "text",
"timestamp": "2024-06-10T14:32:10Z",
},
],
}
NO_PROJECT_CONV = {
**SAMPLE_CONV,
"id": "noproj12345",
"project": None,
"title": "No Project Chat",
}
CODE_CONV = {
**SAMPLE_CONV,
"id": "code12345",
"messages": [
{
"role": "user",
"content": "Here is some code:\n```python\nprint('hello')\n```",
"content_type": "text",
"timestamp": None,
}
],
}
class TestMarkdownFrontmatter:
def test_yaml_frontmatter_present(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
content = path.read_text()
assert content.startswith("---\n")
assert "title: " in content
assert "provider: claude" in content
assert "conversation_id: abc12345def67890" in content
assert "created_at: 2024-06-10T14:32:00Z" in content
assert "exported_at: " in content
assert "message_count: 2" in content
assert "tags: [claude, my-project]" in content
def test_no_project_uses_no_project_label(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(NO_PROJECT_CONV)
content = path.read_text()
assert "project: no-project" in content
assert "tags: [claude]" in content
def test_metadata_table_present(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
content = path.read_text()
assert "| Provider | Claude |" in content
assert "| Project | my-project |" in content
assert "| Date | 2024-06-10 |" in content
assert "| Messages | 2 |" in content
def test_messages_rendered(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
content = path.read_text()
assert "Hello, how are you?" in content
assert "I'm doing well" in content
assert "🧑 Human" in content
assert "🤖 Assistant" in content
def test_code_fences_preserved(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(CODE_CONV)
content = path.read_text()
assert "```python" in content
assert "print('hello')" in content
class TestMarkdownFilenameGeneration:
def test_filename_format(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
assert path.name == "2024-06-10_test-conversation_abc12345.md"
def test_no_project_goes_to_no_project_dir(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(NO_PROJECT_CONV)
assert "no-project" in str(path)
def test_project_slug_in_path(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
assert "my-project" in str(path)
def test_year_in_path(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
assert ".2024/" in str(path)
def test_output_structure_provider_project(self, tmp_path):
exp = MarkdownExporter(tmp_path, output_structure="provider/project")
path = exp.export(SAMPLE_CONV)
# Should NOT have year subdirectory
parts = path.parts
assert "2024" not in parts
class TestMarkdownEmptyMessages:
def test_empty_message_skipped(self, tmp_path, caplog):
import logging
conv = {
**SAMPLE_CONV,
"messages": [
{"role": "user", "content": " ", "content_type": "text", "timestamp": None},
{"role": "assistant", "content": "Real response", "content_type": "text", "timestamp": None},
],
}
exp = MarkdownExporter(tmp_path)
with caplog.at_level(logging.WARNING, logger="src.exporters.markdown"):
path = exp.export(conv)
content = path.read_text()
assert "Real response" in content
assert any("empty" in r.message.lower() for r in caplog.records)
class TestMarkdownAtomicWrite:
def test_permissions_600(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
mode = oct(os.stat(path).st_mode)[-3:]
assert mode == "600"
def test_no_tmp_files_left(self, tmp_path):
exp = MarkdownExporter(tmp_path)
exp.export(SAMPLE_CONV)
tmp_files = list(tmp_path.rglob("*.tmp"))
assert tmp_files == []
class TestJSONExporter:
def test_produces_valid_json(self, tmp_path):
exp = JSONExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
data = json.loads(path.read_text())
assert data["id"] == "abc12345def67890"
assert data["title"] == "Test Conversation"
assert len(data["messages"]) == 2
def test_includes_exported_at(self, tmp_path):
exp = JSONExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
data = json.loads(path.read_text())
assert "exported_at" in data
def test_permissions_600(self, tmp_path):
exp = JSONExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
mode = oct(os.stat(path).st_mode)[-3:]
assert mode == "600"
def test_json_extension(self, tmp_path):
exp = JSONExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
assert path.suffix == ".json"
def test_pretty_printed(self, tmp_path):
exp = JSONExporter(tmp_path)
path = exp.export(SAMPLE_CONV)
raw = path.read_text()
# Pretty-printed JSON has newlines and indentation
assert "\n" in raw
assert " " in raw
class TestBothFormats:
"""T-38: Markdown and JSON exporters produce matching filenames for the same conversation."""
def test_both_formats_produce_files(self, tmp_path):
md_exp = MarkdownExporter(tmp_path)
json_exp = JSONExporter(tmp_path)
md_path = md_exp.export(SAMPLE_CONV)
json_path = json_exp.export(SAMPLE_CONV)
assert md_path.exists()
assert json_path.exists()
def test_both_formats_have_matching_stems(self, tmp_path):
md_exp = MarkdownExporter(tmp_path)
json_exp = JSONExporter(tmp_path)
md_path = md_exp.export(SAMPLE_CONV)
json_path = json_exp.export(SAMPLE_CONV)
assert md_path.suffix == ".md"
assert json_path.suffix == ".json"
assert md_path.stem == json_path.stem
def test_both_formats_same_directory(self, tmp_path):
md_exp = MarkdownExporter(tmp_path)
json_exp = JSONExporter(tmp_path)
md_path = md_exp.export(SAMPLE_CONV)
json_path = json_exp.export(SAMPLE_CONV)
assert md_path.parent == json_path.parent
class TestYamlEscape:
def test_escapes_double_quotes(self):
assert _yaml_escape('Say "hello"') == 'Say \\"hello\\"'
def test_escapes_backslash(self):
assert _yaml_escape("path\\to\\file") == "path\\\\to\\\\file"
def test_no_change_for_plain_string(self):
assert _yaml_escape("Hello World") == "Hello World"
class TestFormatTimestamp:
def test_strips_fractional_seconds(self):
result = _format_timestamp("2024-06-10T14:32:00.123456Z")
assert "." not in result
def test_replaces_T_with_space(self):
result = _format_timestamp("2024-06-10T14:32:00Z")
assert "T" not in result
assert "2024-06-10 14:32:00" == result
def test_empty_string(self):
assert _format_timestamp("") == ""
# ---------------------------------------------------------------------------
# Block helpers and rendering
# ---------------------------------------------------------------------------
class TestSafeFence:
def test_minimum_three_backticks(self):
assert _safe_fence("plain text") == "```"
def test_four_backticks_when_three_in_content(self):
assert _safe_fence("here ``` is a fence") == "````"
def test_five_backticks_when_four_in_content(self):
assert _safe_fence("here ```` is four") == "`````"
def test_handles_empty_string(self):
assert _safe_fence("") == "```"
def test_handles_run_at_end(self):
# Trailing run still counted
assert _safe_fence("text ending in ```") == "````"
class TestBlockquotePrefix:
def test_single_line(self):
assert _blockquote_prefix("hello") == "> hello"
def test_multi_line(self):
assert _blockquote_prefix("a\nb\nc") == "> a\n> b\n> c"
def test_empty_lines_become_naked_quote_marker(self):
assert _blockquote_prefix("a\n\nb") == "> a\n>\n> b"
def test_empty_string(self):
assert _blockquote_prefix("") == ">"
class TestBlockConstructors:
def test_make_text_block_returns_none_for_empty(self):
assert make_text_block("") is None
assert make_text_block(" ") is None
def test_make_text_block_returns_dict(self):
b = make_text_block("hello")
assert b == {"type": "text", "text": "hello"}
def test_make_code_block_returns_none_for_empty(self):
assert make_code_block("") is None
def test_make_thinking_block_returns_none_for_empty(self):
assert make_thinking_block("") is None
class TestRenderBlocks:
def test_text_block_renders_as_paragraph(self):
out = render_blocks_to_markdown([make_text_block("Hello world")])
assert out == "Hello world"
def test_blocks_separated_by_blank_line(self):
out = render_blocks_to_markdown(
[make_text_block("first"), make_text_block("second")]
)
assert out == "first\n\nsecond"
def test_code_block_with_language(self):
out = render_blocks_to_markdown([make_code_block("print(1)", language="python")])
assert "```python" in out
assert "print(1)" in out
def test_thinking_block_uses_blockquote(self):
out = render_blocks_to_markdown([make_thinking_block("step 1\nstep 2")])
assert "**💭 Reasoning**" in out
assert "> step 1" in out
assert "> step 2" in out
def test_tool_use_renders_as_blockquote_with_safe_fence(self):
out = render_blocks_to_markdown(
[make_tool_use_block("search", {"query": "test"})]
)
assert "> 🔧 **Tool: search**" in out
# Every line of the body is blockquote-prefixed
assert "> ```json" in out
assert "> }" in out
def test_tool_use_with_multiline_input(self):
out = render_blocks_to_markdown(
[make_tool_use_block("complex", {"a": 1, "b": [{"x": "y"}]})]
)
# Prefix every line of multi-line JSON
for line in out.split("\n"):
assert line.startswith(">") or line == ""
def test_tool_result_success_uses_outbox_icon(self):
out = render_blocks_to_markdown([make_tool_result_block("OK")])
assert "📤 **Result**" in out
assert "" not in out
def test_tool_result_error_uses_x_icon(self):
out = render_blocks_to_markdown([make_tool_result_block("oops", is_error=True)])
assert "❌ **Result (error)**" in out
assert "📤" not in out
def test_tool_result_with_tool_name_in_header(self):
out = render_blocks_to_markdown(
[make_tool_result_block("done", tool_name="container.exec")]
)
assert "📤 **Result: container.exec**" in out
def test_tool_result_error_with_tool_name(self):
out = render_blocks_to_markdown(
[make_tool_result_block("503", tool_name="web", is_error=True)]
)
assert "❌ **Result (error): web**" in out
def test_tool_result_summary_renders_as_italic_line(self):
out = render_blocks_to_markdown(
[
make_tool_result_block(
"output",
tool_name="container.exec",
summary="Reading skill documentation",
)
]
)
# Summary line is italic, lives between header and fence,
# all inside the blockquote prefix.
assert "> *Reading skill documentation*" in out
# Order: header before summary before fence
header_idx = out.index("Result: container.exec")
summary_idx = out.index("Reading skill documentation")
fence_idx = out.index("output")
assert header_idx < summary_idx < fence_idx
def test_image_placeholder_rendering(self):
out = render_blocks_to_markdown(
[make_image_placeholder(ref="file-123", source="user_upload")]
)
assert "🖼️ **Image attached**" in out
assert "`file-123`" in out
assert "user_upload" in out
assert "content not preserved" in out
def test_file_placeholder_with_metadata(self):
out = render_blocks_to_markdown(
[make_file_placeholder(ref="sediment://x", mime="audio/wav", size_bytes=10240, duration_seconds=2.5)]
)
assert "📎 **File attached**" in out
assert "audio/wav" in out
assert "KB" in out
assert "2.50s" in out
def test_unknown_block_renders_with_keys(self):
out = render_blocks_to_markdown(
[
make_unknown_block(
raw_type="future_x",
observed_keys=["foo", "bar"],
reason=UNKNOWN_REASON_UNKNOWN_TYPE,
)
]
)
assert "⚠️ **Unsupported content**" in out
assert "future_x" in out
assert "`foo`" in out
assert "`bar`" in out
def test_unknown_extraction_failed_includes_summary(self):
out = render_blocks_to_markdown(
[
make_unknown_block(
raw_type="audio_transcription",
observed_keys=["asset_pointer"],
reason=UNKNOWN_REASON_EXTRACTION_FAILED,
summary="expected key 'text' not found",
)
]
)
assert "extraction_failed" in out
assert "expected key 'text' not found" in out
def test_hidden_context_marker(self):
out = render_blocks_to_markdown(
[make_hidden_context_marker("user_editable_context")]
)
assert " **Hidden context**" in out
assert "`user_editable_context`" in out
def test_safe_fence_prevents_runaway_code_block(self):
# Content contains an unbalanced opening fence — without _safe_fence
# this would corrupt downstream rendering.
evil_content = "before\n```Follow\ntext\nraw is: \"```"
block = make_code_block(evil_content)
out = render_blocks_to_markdown([block, make_text_block("after")])
# The 4-backtick wrap should be present
assert "````" in out
# The "after" text should appear OUTSIDE any code block — it follows
# the closing ```` fence.
assert out.endswith("after")
def test_block_order_preserved(self):
blocks = [
make_text_block("a"),
make_image_placeholder(ref="r1", source="user_upload"),
make_text_block("b"),
]
out = render_blocks_to_markdown(blocks)
assert out.index("a") < out.index("Image attached")
assert out.index("Image attached") < out.index("b")
# ---------------------------------------------------------------------------
# Markdown exporter with blocks
# ---------------------------------------------------------------------------
SAMPLE_CONV_BLOCKS = {
"id": "blocks12345",
"title": "Blocks Conversation",
"provider": "claude",
"project": None,
"created_at": "2024-06-10T14:32:00Z",
"updated_at": "2024-06-10T15:00:00Z",
"message_count": 1,
"messages": [
{
"role": "assistant",
"content_type": "text",
"timestamp": None,
"blocks": [
{"type": "text", "text": "Here is the answer."},
{"type": "tool_use", "name": "search", "input": {"q": "x"}, "tool_id": "t1"},
],
}
],
}
class TestMarkdownExporterWithBlocks:
def test_renders_blocks(self, tmp_path):
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV_BLOCKS)
body = path.read_text()
assert "Here is the answer." in body
assert "🔧 **Tool: search**" in body
def test_falls_back_to_content_when_blocks_missing(self, tmp_path):
# Backward-compat: messages with `content` only (no `blocks`) still render.
exp = MarkdownExporter(tmp_path)
path = exp.export(SAMPLE_CONV) # SAMPLE_CONV has content only, no blocks
body = path.read_text()
assert "Hello, how are you?" in body
def test_skips_messages_with_neither_blocks_nor_content(self, tmp_path):
conv = {
**SAMPLE_CONV_BLOCKS,
"messages": [
{"role": "user", "content_type": "text", "timestamp": None, "blocks": []},
{"role": "assistant", "content_type": "text", "timestamp": None, "blocks": [
{"type": "text", "text": "I am here."}
]},
],
}
exp = MarkdownExporter(tmp_path)
path = exp.export(conv)
body = path.read_text()
assert "I am here." in body