feat: add markdown and JSON exporters
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
77
src/exporters/json_export.py
Normal file
77
src/exporters/json_export.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""JSON exporter — writes the normalized conversation schema as pretty-printed JSON."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from src.utils import build_export_path, generate_filename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JSONExporter:
|
||||
"""Exports a normalized conversation dict to a JSON file.
|
||||
|
||||
The file contains the normalized schema as-is (pretty-printed, indent=2).
|
||||
All writes are atomic (tmp → os.replace) with 600 permissions.
|
||||
"""
|
||||
|
||||
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
|
||||
self._export_dir = Path(export_dir)
|
||||
self._structure = output_structure
|
||||
|
||||
def export(self, conversation: dict) -> Path:
|
||||
"""Write a conversation to a JSON file and return the output path.
|
||||
|
||||
Args:
|
||||
conversation: Normalized conversation dict.
|
||||
|
||||
Returns:
|
||||
Path to the written file.
|
||||
"""
|
||||
conv_id = conversation.get("id", "unknown")
|
||||
title = conversation.get("title") or "Untitled"
|
||||
provider = conversation.get("provider", "unknown")
|
||||
project = conversation.get("project") or None
|
||||
created_at = conversation.get("created_at", "")
|
||||
|
||||
# Use .json extension but same filename pattern
|
||||
md_filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
|
||||
json_filename = md_filename.replace(".md", ".json")
|
||||
|
||||
output_path = build_export_path(
|
||||
self._export_dir,
|
||||
provider,
|
||||
project,
|
||||
created_at or datetime.now().isoformat(),
|
||||
json_filename,
|
||||
self._structure,
|
||||
)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Add exported_at to the output
|
||||
output_data = dict(conversation)
|
||||
output_data["exported_at"] = datetime.now(tz=timezone.utc).isoformat()
|
||||
|
||||
_write_atomic(output_path, json.dumps(output_data, indent=2, default=str))
|
||||
logger.debug("[json] Wrote %s", output_path)
|
||||
return output_path
|
||||
|
||||
|
||||
def _write_atomic(path: Path, content: str) -> None:
|
||||
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
try:
|
||||
tmp.write_text(content, encoding="utf-8")
|
||||
os.chmod(tmp, 0o600)
|
||||
os.replace(tmp, path)
|
||||
except OSError as e:
|
||||
logger.error("[json] Failed to write %s: %s", path, e)
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
195
src/exporters/markdown.py
Normal file
195
src/exporters/markdown.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""Markdown exporter — writes conversations as Markdown with YAML frontmatter."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from src.utils import build_export_path, generate_filename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Role display labels and icons
|
||||
_ROLE_LABELS = {
|
||||
"user": ("🧑 Human", "user"),
|
||||
"assistant": ("🤖 Assistant", "assistant"),
|
||||
"system": ("⚙️ System", "system"),
|
||||
}
|
||||
|
||||
|
||||
class MarkdownExporter:
|
||||
"""Exports a normalized conversation dict to a Markdown file.
|
||||
|
||||
Output format:
|
||||
- YAML frontmatter (Joplin-compatible)
|
||||
- Human-readable metadata table
|
||||
- Messages with role headers and timestamps
|
||||
|
||||
All writes are atomic (tmp → os.replace) with 600 permissions.
|
||||
"""
|
||||
|
||||
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
|
||||
self._export_dir = Path(export_dir)
|
||||
self._structure = output_structure
|
||||
|
||||
def export(self, conversation: dict) -> Path:
|
||||
"""Write a conversation to a Markdown file and return the output path.
|
||||
|
||||
Args:
|
||||
conversation: Normalized conversation dict (from provider.normalize_conversation).
|
||||
|
||||
Returns:
|
||||
Path to the written file.
|
||||
|
||||
Raises:
|
||||
OSError: If the file cannot be written.
|
||||
"""
|
||||
conv_id = conversation.get("id", "unknown")
|
||||
title = conversation.get("title") or "Untitled"
|
||||
provider = conversation.get("provider", "unknown")
|
||||
project = conversation.get("project") or None
|
||||
created_at = conversation.get("created_at", "")
|
||||
updated_at = conversation.get("updated_at", "")
|
||||
messages = conversation.get("messages", [])
|
||||
|
||||
filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
|
||||
output_path = build_export_path(
|
||||
self._export_dir, provider, project, created_at or datetime.now().isoformat(),
|
||||
filename, self._structure
|
||||
)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
content = self._render(conversation, title, provider, project, created_at, updated_at, messages, conv_id)
|
||||
|
||||
_write_atomic(output_path, content)
|
||||
logger.debug("[markdown] Wrote %s (%d messages)", output_path, len(messages))
|
||||
return output_path
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Rendering
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _render(
|
||||
self,
|
||||
conversation: dict,
|
||||
title: str,
|
||||
provider: str,
|
||||
project: str | None,
|
||||
created_at: str,
|
||||
updated_at: str,
|
||||
messages: list[dict],
|
||||
conv_id: str,
|
||||
) -> str:
|
||||
exported_at = datetime.now(tz=timezone.utc).isoformat()
|
||||
message_count = len(messages)
|
||||
date_str = _format_date(created_at)
|
||||
project_display = project or "no-project"
|
||||
provider_display = provider.capitalize()
|
||||
|
||||
# Tags: provider always; project only if set
|
||||
tags = [provider]
|
||||
if project:
|
||||
tags.append(project)
|
||||
|
||||
lines: list[str] = []
|
||||
|
||||
# YAML frontmatter
|
||||
lines += [
|
||||
"---",
|
||||
f'title: "{_yaml_escape(title)}"',
|
||||
f"provider: {provider}",
|
||||
f"project: {project_display}",
|
||||
f"conversation_id: {conv_id}",
|
||||
f"created_at: {created_at}",
|
||||
f"exported_at: {exported_at}",
|
||||
f"message_count: {message_count}",
|
||||
f"tags: [{', '.join(tags)}]",
|
||||
"---",
|
||||
"",
|
||||
f"# {title}",
|
||||
"",
|
||||
"| Field | Value |",
|
||||
"|-------|-------|",
|
||||
f"| Provider | {provider_display} |",
|
||||
f"| Project | {project_display} |",
|
||||
f"| Date | {date_str} |",
|
||||
f"| Messages | {message_count} |",
|
||||
f"| ID | {conv_id} |",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
]
|
||||
|
||||
# Messages
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
timestamp = msg.get("timestamp")
|
||||
|
||||
if not content or not content.strip():
|
||||
logger.warning(
|
||||
"[markdown] Skipping empty/whitespace message in conversation %s",
|
||||
conv_id[:8],
|
||||
)
|
||||
continue
|
||||
|
||||
label, _ = _ROLE_LABELS.get(role, (f"💬 {role.capitalize()}", role))
|
||||
lines.append(f"### {label}")
|
||||
if timestamp:
|
||||
lines.append(f"*{_format_timestamp(timestamp)}*")
|
||||
lines.append("")
|
||||
else:
|
||||
lines.append("")
|
||||
|
||||
lines.append(content)
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_atomic(path: Path, content: str) -> None:
|
||||
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
|
||||
tmp = path.with_suffix(".md.tmp")
|
||||
try:
|
||||
tmp.write_text(content, encoding="utf-8")
|
||||
os.chmod(tmp, 0o600)
|
||||
os.replace(tmp, path)
|
||||
except OSError as e:
|
||||
logger.error("[markdown] Failed to write %s: %s", path, e)
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
def _format_date(ts: str) -> str:
|
||||
"""Extract YYYY-MM-DD from an ISO8601 string."""
|
||||
if not ts:
|
||||
return "unknown"
|
||||
return ts[:10]
|
||||
|
||||
|
||||
def _format_timestamp(ts: str) -> str:
|
||||
"""Format an ISO8601 timestamp for display, stripping sub-second precision."""
|
||||
if not ts:
|
||||
return ""
|
||||
# Replace T with space, strip fractional seconds and timezone for readability
|
||||
ts = ts.replace("T", " ")
|
||||
if "." in ts:
|
||||
ts = ts[: ts.index(".")]
|
||||
return ts.rstrip("Z").strip()
|
||||
|
||||
|
||||
def _yaml_escape(s: str) -> str:
|
||||
"""Escape a string for use in YAML double-quoted scalar."""
|
||||
return s.replace("\\", "\\\\").replace('"', '\\"')
|
||||
Reference in New Issue
Block a user