feat: add markdown and JSON exporters

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
JesseMarkowitz
2026-02-27 23:03:58 -05:00
parent f4ef937aa1
commit d1cac3ce04
2 changed files with 272 additions and 0 deletions

View File

@@ -0,0 +1,77 @@
"""JSON exporter — writes the normalized conversation schema as pretty-printed JSON."""
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
from src.utils import build_export_path, generate_filename
logger = logging.getLogger(__name__)
class JSONExporter:
"""Exports a normalized conversation dict to a JSON file.
The file contains the normalized schema as-is (pretty-printed, indent=2).
All writes are atomic (tmp → os.replace) with 600 permissions.
"""
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
self._export_dir = Path(export_dir)
self._structure = output_structure
def export(self, conversation: dict) -> Path:
"""Write a conversation to a JSON file and return the output path.
Args:
conversation: Normalized conversation dict.
Returns:
Path to the written file.
"""
conv_id = conversation.get("id", "unknown")
title = conversation.get("title") or "Untitled"
provider = conversation.get("provider", "unknown")
project = conversation.get("project") or None
created_at = conversation.get("created_at", "")
# Use .json extension but same filename pattern
md_filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
json_filename = md_filename.replace(".md", ".json")
output_path = build_export_path(
self._export_dir,
provider,
project,
created_at or datetime.now().isoformat(),
json_filename,
self._structure,
)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Add exported_at to the output
output_data = dict(conversation)
output_data["exported_at"] = datetime.now(tz=timezone.utc).isoformat()
_write_atomic(output_path, json.dumps(output_data, indent=2, default=str))
logger.debug("[json] Wrote %s", output_path)
return output_path
def _write_atomic(path: Path, content: str) -> None:
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
tmp = path.with_suffix(".json.tmp")
try:
tmp.write_text(content, encoding="utf-8")
os.chmod(tmp, 0o600)
os.replace(tmp, path)
except OSError as e:
logger.error("[json] Failed to write %s: %s", path, e)
try:
tmp.unlink(missing_ok=True)
except OSError:
pass
raise

195
src/exporters/markdown.py Normal file
View File

@@ -0,0 +1,195 @@
"""Markdown exporter — writes conversations as Markdown with YAML frontmatter."""
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from src.utils import build_export_path, generate_filename
logger = logging.getLogger(__name__)
# Role display labels and icons
_ROLE_LABELS = {
"user": ("🧑 Human", "user"),
"assistant": ("🤖 Assistant", "assistant"),
"system": ("⚙️ System", "system"),
}
class MarkdownExporter:
"""Exports a normalized conversation dict to a Markdown file.
Output format:
- YAML frontmatter (Joplin-compatible)
- Human-readable metadata table
- Messages with role headers and timestamps
All writes are atomic (tmp → os.replace) with 600 permissions.
"""
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
self._export_dir = Path(export_dir)
self._structure = output_structure
def export(self, conversation: dict) -> Path:
"""Write a conversation to a Markdown file and return the output path.
Args:
conversation: Normalized conversation dict (from provider.normalize_conversation).
Returns:
Path to the written file.
Raises:
OSError: If the file cannot be written.
"""
conv_id = conversation.get("id", "unknown")
title = conversation.get("title") or "Untitled"
provider = conversation.get("provider", "unknown")
project = conversation.get("project") or None
created_at = conversation.get("created_at", "")
updated_at = conversation.get("updated_at", "")
messages = conversation.get("messages", [])
filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
output_path = build_export_path(
self._export_dir, provider, project, created_at or datetime.now().isoformat(),
filename, self._structure
)
output_path.parent.mkdir(parents=True, exist_ok=True)
content = self._render(conversation, title, provider, project, created_at, updated_at, messages, conv_id)
_write_atomic(output_path, content)
logger.debug("[markdown] Wrote %s (%d messages)", output_path, len(messages))
return output_path
# ------------------------------------------------------------------
# Rendering
# ------------------------------------------------------------------
def _render(
self,
conversation: dict,
title: str,
provider: str,
project: str | None,
created_at: str,
updated_at: str,
messages: list[dict],
conv_id: str,
) -> str:
exported_at = datetime.now(tz=timezone.utc).isoformat()
message_count = len(messages)
date_str = _format_date(created_at)
project_display = project or "no-project"
provider_display = provider.capitalize()
# Tags: provider always; project only if set
tags = [provider]
if project:
tags.append(project)
lines: list[str] = []
# YAML frontmatter
lines += [
"---",
f'title: "{_yaml_escape(title)}"',
f"provider: {provider}",
f"project: {project_display}",
f"conversation_id: {conv_id}",
f"created_at: {created_at}",
f"exported_at: {exported_at}",
f"message_count: {message_count}",
f"tags: [{', '.join(tags)}]",
"---",
"",
f"# {title}",
"",
"| Field | Value |",
"|-------|-------|",
f"| Provider | {provider_display} |",
f"| Project | {project_display} |",
f"| Date | {date_str} |",
f"| Messages | {message_count} |",
f"| ID | {conv_id} |",
"",
"---",
"",
]
# Messages
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
timestamp = msg.get("timestamp")
if not content or not content.strip():
logger.warning(
"[markdown] Skipping empty/whitespace message in conversation %s",
conv_id[:8],
)
continue
label, _ = _ROLE_LABELS.get(role, (f"💬 {role.capitalize()}", role))
lines.append(f"### {label}")
if timestamp:
lines.append(f"*{_format_timestamp(timestamp)}*")
lines.append("")
else:
lines.append("")
lines.append(content)
lines.append("")
lines.append("---")
lines.append("")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _write_atomic(path: Path, content: str) -> None:
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
tmp = path.with_suffix(".md.tmp")
try:
tmp.write_text(content, encoding="utf-8")
os.chmod(tmp, 0o600)
os.replace(tmp, path)
except OSError as e:
logger.error("[markdown] Failed to write %s: %s", path, e)
try:
tmp.unlink(missing_ok=True)
except OSError:
pass
raise
def _format_date(ts: str) -> str:
"""Extract YYYY-MM-DD from an ISO8601 string."""
if not ts:
return "unknown"
return ts[:10]
def _format_timestamp(ts: str) -> str:
"""Format an ISO8601 timestamp for display, stripping sub-second precision."""
if not ts:
return ""
# Replace T with space, strip fractional seconds and timezone for readability
ts = ts.replace("T", " ")
if "." in ts:
ts = ts[: ts.index(".")]
return ts.rstrip("Z").strip()
def _yaml_escape(s: str) -> str:
"""Escape a string for use in YAML double-quoted scalar."""
return s.replace("\\", "\\\\").replace('"', '\\"')