feat: v0.4.0 — rich content support with typed blocks and loss visibility

Extracts per-message content into a typed `blocks` list (text, code, thinking, tool_use, tool_result, image_placeholder, file_placeholder, unknown) and renders them at exporter write time. Voice transcripts, Custom Instructions, and image references now appear in exports instead of being silently dropped. Foundation: - src/blocks.py: pure block constructors, _safe_fence (fence-corruption defense, verified live in Joplin), _blockquote_prefix, render - src/loss_report.py: per-run tally surfaced as INFO summary at end of export so silently-dropped data becomes visible Providers: - ChatGPT: dispatch on content_type produces typed blocks; voice shapes (audio_transcription, audio_asset_pointer, real_time_user_audio_video_ asset_pointer) locked from live DevTools capture; Custom Instructions bug fix (parts-vs-direct-fields); role filter lifted; hidden-context marker driven by is_visually_hidden_from_conversation flag - Claude: defensive dispatch for text/thinking/tool_use/tool_result/image with recursive nested-block flattening; untested against real rich- content data — fix-forward in v0.4.1 Exporter: - Markdown renders from blocks at write time via render_blocks_to_markdown; backward-compat fallback to content for any pre-v0.4.0 cached data Tests: - 27 new tests across providers, exporters, CLI; fixtures rebuilt with real-shape ChatGPT voice + Custom Instructions cases - 181/181 pass Behavior changes (intentional): - JSON output omits content; consumers should read blocks - Per-conversation message counts increase (Custom Instructions, image- only, tool-only messages now appear) - Existing exports not auto-re-rendered; users wanting fresh output run cache --clear then export Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 23:17:18 -04:00
parent 4798edcea7
commit 473d02f71a
16 changed files with 1786 additions and 232 deletions
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -1,4 +1,4 @@
-"""Unit tests for src/exporters/."""
+"""Unit tests for src/exporters/ and src/blocks.py."""

 import json
 import os
@@ -7,6 +7,23 @@ from pathlib import Path

 import pytest

+from src.blocks import (
+    BLOCK_TYPE_TEXT,
+    UNKNOWN_REASON_EXTRACTION_FAILED,
+    UNKNOWN_REASON_UNKNOWN_TYPE,
+    _blockquote_prefix,
+    _safe_fence,
+    make_code_block,
+    make_file_placeholder,
+    make_hidden_context_marker,
+    make_image_placeholder,
+    make_text_block,
+    make_thinking_block,
+    make_tool_result_block,
+    make_tool_use_block,
+    make_unknown_block,
+    render_blocks_to_markdown,
+)
 from src.exporters.markdown import MarkdownExporter, _yaml_escape, _format_timestamp
 from src.exporters.json_export import JSONExporter

@@ -250,3 +267,240 @@ class TestFormatTimestamp:

    def test_empty_string(self):
        assert _format_timestamp("") == ""
+
+
+# ---------------------------------------------------------------------------
+# Block helpers and rendering
+# ---------------------------------------------------------------------------
+
+
+class TestSafeFence:
+    def test_minimum_three_backticks(self):
+        assert _safe_fence("plain text") == "```"
+
+    def test_four_backticks_when_three_in_content(self):
+        assert _safe_fence("here ``` is a fence") == "````"
+
+    def test_five_backticks_when_four_in_content(self):
+        assert _safe_fence("here ```` is four") == "`````"
+
+    def test_handles_empty_string(self):
+        assert _safe_fence("") == "```"
+
+    def test_handles_run_at_end(self):
+        # Trailing run still counted
+        assert _safe_fence("text ending in ```") == "````"
+
+
+class TestBlockquotePrefix:
+    def test_single_line(self):
+        assert _blockquote_prefix("hello") == "> hello"
+
+    def test_multi_line(self):
+        assert _blockquote_prefix("a\nb\nc") == "> a\n> b\n> c"
+
+    def test_empty_lines_become_naked_quote_marker(self):
+        assert _blockquote_prefix("a\n\nb") == "> a\n>\n> b"
+
+    def test_empty_string(self):
+        assert _blockquote_prefix("") == ">"
+
+
+class TestBlockConstructors:
+    def test_make_text_block_returns_none_for_empty(self):
+        assert make_text_block("") is None
+        assert make_text_block("   ") is None
+
+    def test_make_text_block_returns_dict(self):
+        b = make_text_block("hello")
+        assert b == {"type": "text", "text": "hello"}
+
+    def test_make_code_block_returns_none_for_empty(self):
+        assert make_code_block("") is None
+
+    def test_make_thinking_block_returns_none_for_empty(self):
+        assert make_thinking_block("") is None
+
+
+class TestRenderBlocks:
+    def test_text_block_renders_as_paragraph(self):
+        out = render_blocks_to_markdown([make_text_block("Hello world")])
+        assert out == "Hello world"
+
+    def test_blocks_separated_by_blank_line(self):
+        out = render_blocks_to_markdown(
+            [make_text_block("first"), make_text_block("second")]
+        )
+        assert out == "first\n\nsecond"
+
+    def test_code_block_with_language(self):
+        out = render_blocks_to_markdown([make_code_block("print(1)", language="python")])
+        assert "```python" in out
+        assert "print(1)" in out
+
+    def test_thinking_block_uses_blockquote(self):
+        out = render_blocks_to_markdown([make_thinking_block("step 1\nstep 2")])
+        assert "**💭 Reasoning**" in out
+        assert "> step 1" in out
+        assert "> step 2" in out
+
+    def test_tool_use_renders_as_blockquote_with_safe_fence(self):
+        out = render_blocks_to_markdown(
+            [make_tool_use_block("search", {"query": "test"})]
+        )
+        assert "> 🔧 **Tool: search**" in out
+        # Every line of the body is blockquote-prefixed
+        assert "> ```json" in out
+        assert "> }" in out
+
+    def test_tool_use_with_multiline_input(self):
+        out = render_blocks_to_markdown(
+            [make_tool_use_block("complex", {"a": 1, "b": [{"x": "y"}]})]
+        )
+        # Prefix every line of multi-line JSON
+        for line in out.split("\n"):
+            assert line.startswith(">") or line == ""
+
+    def test_tool_result_success_uses_outbox_icon(self):
+        out = render_blocks_to_markdown([make_tool_result_block("OK")])
+        assert "📤 **Result**" in out
+        assert "❌" not in out
+
+    def test_tool_result_error_uses_x_icon(self):
+        out = render_blocks_to_markdown([make_tool_result_block("oops", is_error=True)])
+        assert "❌ **Result (error)**" in out
+        assert "📤" not in out
+
+    def test_image_placeholder_rendering(self):
+        out = render_blocks_to_markdown(
+            [make_image_placeholder(ref="file-123", source="user_upload")]
+        )
+        assert "🖼️ **Image attached**" in out
+        assert "`file-123`" in out
+        assert "user_upload" in out
+        assert "content not preserved" in out
+
+    def test_file_placeholder_with_metadata(self):
+        out = render_blocks_to_markdown(
+            [make_file_placeholder(ref="sediment://x", mime="audio/wav", size_bytes=10240, duration_seconds=2.5)]
+        )
+        assert "📎 **File attached**" in out
+        assert "audio/wav" in out
+        assert "KB" in out
+        assert "2.50s" in out
+
+    def test_unknown_block_renders_with_keys(self):
+        out = render_blocks_to_markdown(
+            [
+                make_unknown_block(
+                    raw_type="future_x",
+                    observed_keys=["foo", "bar"],
+                    reason=UNKNOWN_REASON_UNKNOWN_TYPE,
+                )
+            ]
+        )
+        assert "⚠️ **Unsupported content**" in out
+        assert "future_x" in out
+        assert "`foo`" in out
+        assert "`bar`" in out
+
+    def test_unknown_extraction_failed_includes_summary(self):
+        out = render_blocks_to_markdown(
+            [
+                make_unknown_block(
+                    raw_type="audio_transcription",
+                    observed_keys=["asset_pointer"],
+                    reason=UNKNOWN_REASON_EXTRACTION_FAILED,
+                    summary="expected key 'text' not found",
+                )
+            ]
+        )
+        assert "extraction_failed" in out
+        assert "expected key 'text' not found" in out
+
+    def test_hidden_context_marker(self):
+        out = render_blocks_to_markdown(
+            [make_hidden_context_marker("user_editable_context")]
+        )
+        assert "ℹ️ **Hidden context**" in out
+        assert "`user_editable_context`" in out
+
+    def test_safe_fence_prevents_runaway_code_block(self):
+        # Content contains an unbalanced opening fence — without _safe_fence
+        # this would corrupt downstream rendering.
+        evil_content = "before\n```Follow\ntext\nraw is: \"```"
+        block = make_code_block(evil_content)
+        out = render_blocks_to_markdown([block, make_text_block("after")])
+        # The 4-backtick wrap should be present
+        assert "````" in out
+        # The "after" text should appear OUTSIDE any code block — it follows
+        # the closing ```` fence.
+        assert out.endswith("after")
+
+    def test_block_order_preserved(self):
+        blocks = [
+            make_text_block("a"),
+            make_image_placeholder(ref="r1", source="user_upload"),
+            make_text_block("b"),
+        ]
+        out = render_blocks_to_markdown(blocks)
+        assert out.index("a") < out.index("Image attached")
+        assert out.index("Image attached") < out.index("b")
+
+
+# ---------------------------------------------------------------------------
+# Markdown exporter with blocks
+# ---------------------------------------------------------------------------
+
+
+SAMPLE_CONV_BLOCKS = {
+    "id": "blocks12345",
+    "title": "Blocks Conversation",
+    "provider": "claude",
+    "project": None,
+    "created_at": "2024-06-10T14:32:00Z",
+    "updated_at": "2024-06-10T15:00:00Z",
+    "message_count": 1,
+    "messages": [
+        {
+            "role": "assistant",
+            "content_type": "text",
+            "timestamp": None,
+            "blocks": [
+                {"type": "text", "text": "Here is the answer."},
+                {"type": "tool_use", "name": "search", "input": {"q": "x"}, "tool_id": "t1"},
+            ],
+        }
+    ],
+}
+
+
+class TestMarkdownExporterWithBlocks:
+    def test_renders_blocks(self, tmp_path):
+        exp = MarkdownExporter(tmp_path)
+        path = exp.export(SAMPLE_CONV_BLOCKS)
+        body = path.read_text()
+        assert "Here is the answer." in body
+        assert "🔧 **Tool: search**" in body
+
+    def test_falls_back_to_content_when_blocks_missing(self, tmp_path):
+        # Backward-compat: messages with `content` only (no `blocks`) still render.
+        exp = MarkdownExporter(tmp_path)
+        path = exp.export(SAMPLE_CONV)  # SAMPLE_CONV has content only, no blocks
+        body = path.read_text()
+        assert "Hello, how are you?" in body
+
+    def test_skips_messages_with_neither_blocks_nor_content(self, tmp_path):
+        conv = {
+            **SAMPLE_CONV_BLOCKS,
+            "messages": [
+                {"role": "user", "content_type": "text", "timestamp": None, "blocks": []},
+                {"role": "assistant", "content_type": "text", "timestamp": None, "blocks": [
+                    {"type": "text", "text": "I am here."}
+                ]},
+            ],
+        }
+        exp = MarkdownExporter(tmp_path)
+        path = exp.export(conv)
+        body = path.read_text()
+        assert "I am here." in body