feat: add markdown and JSON exporters
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
77
src/exporters/json_export.py
Normal file
77
src/exporters/json_export.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
"""JSON exporter — writes the normalized conversation schema as pretty-printed JSON."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from src.utils import build_export_path, generate_filename
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class JSONExporter:
|
||||||
|
"""Exports a normalized conversation dict to a JSON file.
|
||||||
|
|
||||||
|
The file contains the normalized schema as-is (pretty-printed, indent=2).
|
||||||
|
All writes are atomic (tmp → os.replace) with 600 permissions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
|
||||||
|
self._export_dir = Path(export_dir)
|
||||||
|
self._structure = output_structure
|
||||||
|
|
||||||
|
def export(self, conversation: dict) -> Path:
|
||||||
|
"""Write a conversation to a JSON file and return the output path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
conversation: Normalized conversation dict.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the written file.
|
||||||
|
"""
|
||||||
|
conv_id = conversation.get("id", "unknown")
|
||||||
|
title = conversation.get("title") or "Untitled"
|
||||||
|
provider = conversation.get("provider", "unknown")
|
||||||
|
project = conversation.get("project") or None
|
||||||
|
created_at = conversation.get("created_at", "")
|
||||||
|
|
||||||
|
# Use .json extension but same filename pattern
|
||||||
|
md_filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
|
||||||
|
json_filename = md_filename.replace(".md", ".json")
|
||||||
|
|
||||||
|
output_path = build_export_path(
|
||||||
|
self._export_dir,
|
||||||
|
provider,
|
||||||
|
project,
|
||||||
|
created_at or datetime.now().isoformat(),
|
||||||
|
json_filename,
|
||||||
|
self._structure,
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Add exported_at to the output
|
||||||
|
output_data = dict(conversation)
|
||||||
|
output_data["exported_at"] = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
|
||||||
|
_write_atomic(output_path, json.dumps(output_data, indent=2, default=str))
|
||||||
|
logger.debug("[json] Wrote %s", output_path)
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
def _write_atomic(path: Path, content: str) -> None:
|
||||||
|
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
|
||||||
|
tmp = path.with_suffix(".json.tmp")
|
||||||
|
try:
|
||||||
|
tmp.write_text(content, encoding="utf-8")
|
||||||
|
os.chmod(tmp, 0o600)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error("[json] Failed to write %s: %s", path, e)
|
||||||
|
try:
|
||||||
|
tmp.unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
195
src/exporters/markdown.py
Normal file
195
src/exporters/markdown.py
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
"""Markdown exporter — writes conversations as Markdown with YAML frontmatter."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from src.utils import build_export_path, generate_filename
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Role display labels and icons
|
||||||
|
_ROLE_LABELS = {
|
||||||
|
"user": ("🧑 Human", "user"),
|
||||||
|
"assistant": ("🤖 Assistant", "assistant"),
|
||||||
|
"system": ("⚙️ System", "system"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownExporter:
|
||||||
|
"""Exports a normalized conversation dict to a Markdown file.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
- YAML frontmatter (Joplin-compatible)
|
||||||
|
- Human-readable metadata table
|
||||||
|
- Messages with role headers and timestamps
|
||||||
|
|
||||||
|
All writes are atomic (tmp → os.replace) with 600 permissions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None:
|
||||||
|
self._export_dir = Path(export_dir)
|
||||||
|
self._structure = output_structure
|
||||||
|
|
||||||
|
def export(self, conversation: dict) -> Path:
|
||||||
|
"""Write a conversation to a Markdown file and return the output path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
conversation: Normalized conversation dict (from provider.normalize_conversation).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the written file.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
OSError: If the file cannot be written.
|
||||||
|
"""
|
||||||
|
conv_id = conversation.get("id", "unknown")
|
||||||
|
title = conversation.get("title") or "Untitled"
|
||||||
|
provider = conversation.get("provider", "unknown")
|
||||||
|
project = conversation.get("project") or None
|
||||||
|
created_at = conversation.get("created_at", "")
|
||||||
|
updated_at = conversation.get("updated_at", "")
|
||||||
|
messages = conversation.get("messages", [])
|
||||||
|
|
||||||
|
filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat())
|
||||||
|
output_path = build_export_path(
|
||||||
|
self._export_dir, provider, project, created_at or datetime.now().isoformat(),
|
||||||
|
filename, self._structure
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
content = self._render(conversation, title, provider, project, created_at, updated_at, messages, conv_id)
|
||||||
|
|
||||||
|
_write_atomic(output_path, content)
|
||||||
|
logger.debug("[markdown] Wrote %s (%d messages)", output_path, len(messages))
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Rendering
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _render(
|
||||||
|
self,
|
||||||
|
conversation: dict,
|
||||||
|
title: str,
|
||||||
|
provider: str,
|
||||||
|
project: str | None,
|
||||||
|
created_at: str,
|
||||||
|
updated_at: str,
|
||||||
|
messages: list[dict],
|
||||||
|
conv_id: str,
|
||||||
|
) -> str:
|
||||||
|
exported_at = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
message_count = len(messages)
|
||||||
|
date_str = _format_date(created_at)
|
||||||
|
project_display = project or "no-project"
|
||||||
|
provider_display = provider.capitalize()
|
||||||
|
|
||||||
|
# Tags: provider always; project only if set
|
||||||
|
tags = [provider]
|
||||||
|
if project:
|
||||||
|
tags.append(project)
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
# YAML frontmatter
|
||||||
|
lines += [
|
||||||
|
"---",
|
||||||
|
f'title: "{_yaml_escape(title)}"',
|
||||||
|
f"provider: {provider}",
|
||||||
|
f"project: {project_display}",
|
||||||
|
f"conversation_id: {conv_id}",
|
||||||
|
f"created_at: {created_at}",
|
||||||
|
f"exported_at: {exported_at}",
|
||||||
|
f"message_count: {message_count}",
|
||||||
|
f"tags: [{', '.join(tags)}]",
|
||||||
|
"---",
|
||||||
|
"",
|
||||||
|
f"# {title}",
|
||||||
|
"",
|
||||||
|
"| Field | Value |",
|
||||||
|
"|-------|-------|",
|
||||||
|
f"| Provider | {provider_display} |",
|
||||||
|
f"| Project | {project_display} |",
|
||||||
|
f"| Date | {date_str} |",
|
||||||
|
f"| Messages | {message_count} |",
|
||||||
|
f"| ID | {conv_id} |",
|
||||||
|
"",
|
||||||
|
"---",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Messages
|
||||||
|
for msg in messages:
|
||||||
|
role = msg.get("role", "user")
|
||||||
|
content = msg.get("content", "")
|
||||||
|
timestamp = msg.get("timestamp")
|
||||||
|
|
||||||
|
if not content or not content.strip():
|
||||||
|
logger.warning(
|
||||||
|
"[markdown] Skipping empty/whitespace message in conversation %s",
|
||||||
|
conv_id[:8],
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
label, _ = _ROLE_LABELS.get(role, (f"💬 {role.capitalize()}", role))
|
||||||
|
lines.append(f"### {label}")
|
||||||
|
if timestamp:
|
||||||
|
lines.append(f"*{_format_timestamp(timestamp)}*")
|
||||||
|
lines.append("")
|
||||||
|
else:
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append(content)
|
||||||
|
lines.append("")
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _write_atomic(path: Path, content: str) -> None:
|
||||||
|
"""Write content atomically: .tmp → chmod 600 → os.replace()."""
|
||||||
|
tmp = path.with_suffix(".md.tmp")
|
||||||
|
try:
|
||||||
|
tmp.write_text(content, encoding="utf-8")
|
||||||
|
os.chmod(tmp, 0o600)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error("[markdown] Failed to write %s: %s", path, e)
|
||||||
|
try:
|
||||||
|
tmp.unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _format_date(ts: str) -> str:
|
||||||
|
"""Extract YYYY-MM-DD from an ISO8601 string."""
|
||||||
|
if not ts:
|
||||||
|
return "unknown"
|
||||||
|
return ts[:10]
|
||||||
|
|
||||||
|
|
||||||
|
def _format_timestamp(ts: str) -> str:
|
||||||
|
"""Format an ISO8601 timestamp for display, stripping sub-second precision."""
|
||||||
|
if not ts:
|
||||||
|
return ""
|
||||||
|
# Replace T with space, strip fractional seconds and timezone for readability
|
||||||
|
ts = ts.replace("T", " ")
|
||||||
|
if "." in ts:
|
||||||
|
ts = ts[: ts.index(".")]
|
||||||
|
return ts.rstrip("Z").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _yaml_escape(s: str) -> str:
|
||||||
|
"""Escape a string for use in YAML double-quoted scalar."""
|
||||||
|
return s.replace("\\", "\\\\").replace('"', '\\"')
|
||||||
Reference in New Issue
Block a user