From d1cac3ce049a49a41fed2f48102df89bd807af2d Mon Sep 17 00:00:00 2001 From: JesseMarkowitz Date: Fri, 27 Feb 2026 23:03:58 -0500 Subject: [PATCH] feat: add markdown and JSON exporters Co-Authored-By: Claude Sonnet 4.6 --- src/exporters/json_export.py | 77 ++++++++++++++ src/exporters/markdown.py | 195 +++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 src/exporters/json_export.py create mode 100644 src/exporters/markdown.py diff --git a/src/exporters/json_export.py b/src/exporters/json_export.py new file mode 100644 index 0000000..300b73b --- /dev/null +++ b/src/exporters/json_export.py @@ -0,0 +1,77 @@ +"""JSON exporter — writes the normalized conversation schema as pretty-printed JSON.""" + +import json +import logging +import os +from datetime import datetime, timezone +from pathlib import Path + +from src.utils import build_export_path, generate_filename + +logger = logging.getLogger(__name__) + + +class JSONExporter: + """Exports a normalized conversation dict to a JSON file. + + The file contains the normalized schema as-is (pretty-printed, indent=2). + All writes are atomic (tmp → os.replace) with 600 permissions. + """ + + def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None: + self._export_dir = Path(export_dir) + self._structure = output_structure + + def export(self, conversation: dict) -> Path: + """Write a conversation to a JSON file and return the output path. + + Args: + conversation: Normalized conversation dict. + + Returns: + Path to the written file. + """ + conv_id = conversation.get("id", "unknown") + title = conversation.get("title") or "Untitled" + provider = conversation.get("provider", "unknown") + project = conversation.get("project") or None + created_at = conversation.get("created_at", "") + + # Use .json extension but same filename pattern + md_filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat()) + json_filename = md_filename.replace(".md", ".json") + + output_path = build_export_path( + self._export_dir, + provider, + project, + created_at or datetime.now().isoformat(), + json_filename, + self._structure, + ) + + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Add exported_at to the output + output_data = dict(conversation) + output_data["exported_at"] = datetime.now(tz=timezone.utc).isoformat() + + _write_atomic(output_path, json.dumps(output_data, indent=2, default=str)) + logger.debug("[json] Wrote %s", output_path) + return output_path + + +def _write_atomic(path: Path, content: str) -> None: + """Write content atomically: .tmp → chmod 600 → os.replace().""" + tmp = path.with_suffix(".json.tmp") + try: + tmp.write_text(content, encoding="utf-8") + os.chmod(tmp, 0o600) + os.replace(tmp, path) + except OSError as e: + logger.error("[json] Failed to write %s: %s", path, e) + try: + tmp.unlink(missing_ok=True) + except OSError: + pass + raise diff --git a/src/exporters/markdown.py b/src/exporters/markdown.py new file mode 100644 index 0000000..3c37fac --- /dev/null +++ b/src/exporters/markdown.py @@ -0,0 +1,195 @@ +"""Markdown exporter — writes conversations as Markdown with YAML frontmatter.""" + +import logging +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from src.utils import build_export_path, generate_filename + +logger = logging.getLogger(__name__) + +# Role display labels and icons +_ROLE_LABELS = { + "user": ("🧑 Human", "user"), + "assistant": ("🤖 Assistant", "assistant"), + "system": ("⚙️ System", "system"), +} + + +class MarkdownExporter: + """Exports a normalized conversation dict to a Markdown file. + + Output format: + - YAML frontmatter (Joplin-compatible) + - Human-readable metadata table + - Messages with role headers and timestamps + + All writes are atomic (tmp → os.replace) with 600 permissions. + """ + + def __init__(self, export_dir: Path, output_structure: str = "provider/project/year") -> None: + self._export_dir = Path(export_dir) + self._structure = output_structure + + def export(self, conversation: dict) -> Path: + """Write a conversation to a Markdown file and return the output path. + + Args: + conversation: Normalized conversation dict (from provider.normalize_conversation). + + Returns: + Path to the written file. + + Raises: + OSError: If the file cannot be written. + """ + conv_id = conversation.get("id", "unknown") + title = conversation.get("title") or "Untitled" + provider = conversation.get("provider", "unknown") + project = conversation.get("project") or None + created_at = conversation.get("created_at", "") + updated_at = conversation.get("updated_at", "") + messages = conversation.get("messages", []) + + filename = generate_filename(title, conv_id, created_at or datetime.now().isoformat()) + output_path = build_export_path( + self._export_dir, provider, project, created_at or datetime.now().isoformat(), + filename, self._structure + ) + + output_path.parent.mkdir(parents=True, exist_ok=True) + + content = self._render(conversation, title, provider, project, created_at, updated_at, messages, conv_id) + + _write_atomic(output_path, content) + logger.debug("[markdown] Wrote %s (%d messages)", output_path, len(messages)) + return output_path + + # ------------------------------------------------------------------ + # Rendering + # ------------------------------------------------------------------ + + def _render( + self, + conversation: dict, + title: str, + provider: str, + project: str | None, + created_at: str, + updated_at: str, + messages: list[dict], + conv_id: str, + ) -> str: + exported_at = datetime.now(tz=timezone.utc).isoformat() + message_count = len(messages) + date_str = _format_date(created_at) + project_display = project or "no-project" + provider_display = provider.capitalize() + + # Tags: provider always; project only if set + tags = [provider] + if project: + tags.append(project) + + lines: list[str] = [] + + # YAML frontmatter + lines += [ + "---", + f'title: "{_yaml_escape(title)}"', + f"provider: {provider}", + f"project: {project_display}", + f"conversation_id: {conv_id}", + f"created_at: {created_at}", + f"exported_at: {exported_at}", + f"message_count: {message_count}", + f"tags: [{', '.join(tags)}]", + "---", + "", + f"# {title}", + "", + "| Field | Value |", + "|-------|-------|", + f"| Provider | {provider_display} |", + f"| Project | {project_display} |", + f"| Date | {date_str} |", + f"| Messages | {message_count} |", + f"| ID | {conv_id} |", + "", + "---", + "", + ] + + # Messages + for msg in messages: + role = msg.get("role", "user") + content = msg.get("content", "") + timestamp = msg.get("timestamp") + + if not content or not content.strip(): + logger.warning( + "[markdown] Skipping empty/whitespace message in conversation %s", + conv_id[:8], + ) + continue + + label, _ = _ROLE_LABELS.get(role, (f"💬 {role.capitalize()}", role)) + lines.append(f"### {label}") + if timestamp: + lines.append(f"*{_format_timestamp(timestamp)}*") + lines.append("") + else: + lines.append("") + + lines.append(content) + lines.append("") + lines.append("---") + lines.append("") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _write_atomic(path: Path, content: str) -> None: + """Write content atomically: .tmp → chmod 600 → os.replace().""" + tmp = path.with_suffix(".md.tmp") + try: + tmp.write_text(content, encoding="utf-8") + os.chmod(tmp, 0o600) + os.replace(tmp, path) + except OSError as e: + logger.error("[markdown] Failed to write %s: %s", path, e) + try: + tmp.unlink(missing_ok=True) + except OSError: + pass + raise + + +def _format_date(ts: str) -> str: + """Extract YYYY-MM-DD from an ISO8601 string.""" + if not ts: + return "unknown" + return ts[:10] + + +def _format_timestamp(ts: str) -> str: + """Format an ISO8601 timestamp for display, stripping sub-second precision.""" + if not ts: + return "" + # Replace T with space, strip fractional seconds and timezone for readability + ts = ts.replace("T", " ") + if "." in ts: + ts = ts[: ts.index(".")] + return ts.rstrip("Z").strip() + + +def _yaml_escape(s: str) -> str: + """Escape a string for use in YAML double-quoted scalar.""" + return s.replace("\\", "\\\\").replace('"', '\\"')