Extracts per-message content into a typed `blocks` list (text, code, thinking, tool_use, tool_result, image_placeholder, file_placeholder, unknown) and renders them at exporter write time. Voice transcripts, Custom Instructions, and image references now appear in exports instead of being silently dropped. Foundation: - src/blocks.py: pure block constructors, _safe_fence (fence-corruption defense, verified live in Joplin), _blockquote_prefix, render - src/loss_report.py: per-run tally surfaced as INFO summary at end of export so silently-dropped data becomes visible Providers: - ChatGPT: dispatch on content_type produces typed blocks; voice shapes (audio_transcription, audio_asset_pointer, real_time_user_audio_video_ asset_pointer) locked from live DevTools capture; Custom Instructions bug fix (parts-vs-direct-fields); role filter lifted; hidden-context marker driven by is_visually_hidden_from_conversation flag - Claude: defensive dispatch for text/thinking/tool_use/tool_result/image with recursive nested-block flattening; untested against real rich- content data — fix-forward in v0.4.1 Exporter: - Markdown renders from blocks at write time via render_blocks_to_markdown; backward-compat fallback to content for any pre-v0.4.0 cached data Tests: - 27 new tests across providers, exporters, CLI; fixtures rebuilt with real-shape ChatGPT voice + Custom Instructions cases - 181/181 pass Behavior changes (intentional): - JSON output omits content; consumers should read blocks - Per-conversation message counts increase (Custom Instructions, image- only, tool-only messages now appear) - Existing exports not auto-re-rendered; users wanting fresh output run cache --clear then export Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
177 lines
6.5 KiB
Python
177 lines
6.5 KiB
Python
"""CLI-level tests using Click's CliRunner — no live API calls required."""
|
|
|
|
import pytest
|
|
from click.testing import CliRunner
|
|
|
|
from src.cache import Cache
|
|
from src.main import _filter_by_project, cli
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _filter_by_project (T-27)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFilterByProject:
|
|
"""Unit tests for the project filter logic used by export/list/joplin."""
|
|
|
|
# ChatGPT conversations use the _project_name annotation key
|
|
def _chatgpt(self, conv_id, project_name):
|
|
return {"id": conv_id, "_project_name": project_name}
|
|
|
|
# Claude conversations use the project dict key
|
|
def _claude(self, conv_id, project_name):
|
|
proj = {"name": project_name} if project_name else None
|
|
return {"id": conv_id, "project": proj}
|
|
|
|
def test_none_filter_keeps_no_project_chatgpt(self):
|
|
convs = [self._chatgpt("a", None), self._chatgpt("b", "Python Course")]
|
|
result = _filter_by_project(convs, "none")
|
|
assert len(result) == 1
|
|
assert result[0]["id"] == "a"
|
|
|
|
def test_none_filter_keeps_no_project_claude(self):
|
|
convs = [self._claude("a", None), self._claude("b", "Python Course")]
|
|
result = _filter_by_project(convs, "none")
|
|
assert len(result) == 1
|
|
assert result[0]["id"] == "a"
|
|
|
|
def test_name_filter_case_insensitive(self):
|
|
convs = [
|
|
self._chatgpt("a", "Python Course"),
|
|
self._chatgpt("b", "Java Course"),
|
|
self._chatgpt("c", None),
|
|
]
|
|
result = _filter_by_project(convs, "PYTHON")
|
|
assert len(result) == 1
|
|
assert result[0]["id"] == "a"
|
|
|
|
def test_name_filter_substring_match(self):
|
|
convs = [
|
|
self._chatgpt("a", "Python Advanced Course"),
|
|
self._chatgpt("b", "Python Basics"),
|
|
self._chatgpt("c", "JavaScript"),
|
|
]
|
|
result = _filter_by_project(convs, "python")
|
|
assert len(result) == 2
|
|
assert {c["id"] for c in result} == {"a", "b"}
|
|
|
|
def test_no_matches_returns_empty(self):
|
|
convs = [self._chatgpt("a", "Python Course"), self._chatgpt("b", None)]
|
|
result = _filter_by_project(convs, "ruby")
|
|
assert result == []
|
|
|
|
def test_none_filter_excludes_all_with_projects(self):
|
|
convs = [self._chatgpt("a", "Project A"), self._chatgpt("b", "Project B")]
|
|
result = _filter_by_project(convs, "none")
|
|
assert result == []
|
|
|
|
def test_empty_string_project_treated_as_no_project(self):
|
|
convs = [{"id": "a", "_project_name": ""}, {"id": "b", "_project_name": "Real"}]
|
|
result = _filter_by_project(convs, "none")
|
|
assert len(result) == 1
|
|
assert result[0]["id"] == "a"
|
|
|
|
def test_claude_project_string_matched(self):
|
|
# Claude can also have project as a plain string
|
|
convs = [{"id": "a", "project": "python-course"}, {"id": "b", "project": None}]
|
|
result = _filter_by_project(convs, "python")
|
|
assert len(result) == 1
|
|
assert result[0]["id"] == "a"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# export --since validation (T-25)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExportSinceValidation:
|
|
"""Test that --since with an invalid date exits cleanly with an error message."""
|
|
|
|
def _pre_populated_cache(self, tmp_path) -> Cache:
|
|
"""Create a cache that passes the ToS gate and first-run doctor check."""
|
|
cache = Cache(tmp_path)
|
|
cache.acknowledge_tos()
|
|
cache.mark_exported("chatgpt", "dummy-conv", {"updated_at": "2024-01-01T00:00:00Z"})
|
|
return cache
|
|
|
|
def test_invalid_since_date_exits_with_error(self, tmp_path):
|
|
self._pre_populated_cache(tmp_path)
|
|
|
|
runner = CliRunner(mix_stderr=True)
|
|
result = runner.invoke(
|
|
cli,
|
|
["--no-log-file", "export", "--since", "notadate"],
|
|
env={
|
|
"CHATGPT_SESSION_TOKEN": "eyJtesttoken",
|
|
"CACHE_DIR": str(tmp_path),
|
|
"EXPORT_DIR": str(tmp_path / "exports"),
|
|
},
|
|
)
|
|
assert result.exit_code == 1
|
|
assert "Invalid --since date" in result.output
|
|
assert "YYYY-MM-DD" in result.output
|
|
|
|
def test_valid_since_date_does_not_error(self, tmp_path):
|
|
"""A valid date should not produce the invalid-date error (may fail later on API)."""
|
|
self._pre_populated_cache(tmp_path)
|
|
|
|
runner = CliRunner(mix_stderr=True)
|
|
result = runner.invoke(
|
|
cli,
|
|
["--no-log-file", "export", "--since", "2024-01-01"],
|
|
env={
|
|
"CHATGPT_SESSION_TOKEN": "eyJtesttoken",
|
|
"CACHE_DIR": str(tmp_path),
|
|
"EXPORT_DIR": str(tmp_path / "exports"),
|
|
},
|
|
)
|
|
assert "Invalid --since date" not in result.output
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# LossReport summary
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestLossReportSummary:
|
|
"""The LossReport's format_summary() pinned format covers zero, top-5, and overflow cases."""
|
|
|
|
def test_zero_summary_uses_none_sentinel(self):
|
|
from src.loss_report import LossReport
|
|
|
|
report = LossReport()
|
|
out = report.format_summary()
|
|
assert "[export] Run summary:" in out
|
|
assert "conversations: 0" in out
|
|
assert "messages rendered: 0" in out
|
|
# Both "(none)" sentinels present — never empty parens
|
|
assert out.count("(none)") == 2
|
|
|
|
def test_top_5_breakdown(self):
|
|
from src.loss_report import LossReport
|
|
|
|
report = LossReport()
|
|
for raw_type in ("a", "b", "c", "d", "e", "f", "g"):
|
|
report.record_unknown(raw_type)
|
|
if raw_type == "a":
|
|
# Make 'a' the most common
|
|
for _ in range(4):
|
|
report.record_unknown("a")
|
|
out = report.format_summary()
|
|
# Top entry shown
|
|
assert "a=5" in out
|
|
# Overflow line present (7 types, top 5 + 2 more)
|
|
assert "+ 2 more types" in out
|
|
|
|
def test_messages_and_conversations_recorded(self):
|
|
from src.loss_report import LossReport
|
|
|
|
report = LossReport()
|
|
report.record_conversation()
|
|
report.record_message()
|
|
report.record_message()
|
|
out = report.format_summary()
|
|
assert "conversations: 1" in out
|
|
assert "messages rendered: 2" in out
|