ai-chatexport/tests/test_cli.py

"""CLI-level tests using Click's CliRunner — no live API calls required."""

import pytest
from click.testing import CliRunner

from src.cache import Cache
from src.main import _filter_by_project, cli


# ---------------------------------------------------------------------------
# _filter_by_project  (T-27)
# ---------------------------------------------------------------------------


class TestFilterByProject:
    """Unit tests for the project filter logic used by export/list/joplin."""

    # ChatGPT conversations use the _project_name annotation key
    def _chatgpt(self, conv_id, project_name):
        return {"id": conv_id, "_project_name": project_name}

    # Claude conversations use the project dict key
    def _claude(self, conv_id, project_name):
        proj = {"name": project_name} if project_name else None
        return {"id": conv_id, "project": proj}

    def test_none_filter_keeps_no_project_chatgpt(self):
        convs = [self._chatgpt("a", None), self._chatgpt("b", "Python Course")]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"

    def test_none_filter_keeps_no_project_claude(self):
        convs = [self._claude("a", None), self._claude("b", "Python Course")]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"

    def test_name_filter_case_insensitive(self):
        convs = [
            self._chatgpt("a", "Python Course"),
            self._chatgpt("b", "Java Course"),
            self._chatgpt("c", None),
        ]
        result = _filter_by_project(convs, "PYTHON")
        assert len(result) == 1
        assert result[0]["id"] == "a"

    def test_name_filter_substring_match(self):
        convs = [
            self._chatgpt("a", "Python Advanced Course"),
            self._chatgpt("b", "Python Basics"),
            self._chatgpt("c", "JavaScript"),
        ]
        result = _filter_by_project(convs, "python")
        assert len(result) == 2
        assert {c["id"] for c in result} == {"a", "b"}

    def test_no_matches_returns_empty(self):
        convs = [self._chatgpt("a", "Python Course"), self._chatgpt("b", None)]
        result = _filter_by_project(convs, "ruby")
        assert result == []

    def test_none_filter_excludes_all_with_projects(self):
        convs = [self._chatgpt("a", "Project A"), self._chatgpt("b", "Project B")]
        result = _filter_by_project(convs, "none")
        assert result == []

    def test_empty_string_project_treated_as_no_project(self):
        convs = [{"id": "a", "_project_name": ""}, {"id": "b", "_project_name": "Real"}]
        result = _filter_by_project(convs, "none")
        assert len(result) == 1
        assert result[0]["id"] == "a"

    def test_claude_project_string_matched(self):
        # Claude can also have project as a plain string
        convs = [{"id": "a", "project": "python-course"}, {"id": "b", "project": None}]
        result = _filter_by_project(convs, "python")
        assert len(result) == 1
        assert result[0]["id"] == "a"


# ---------------------------------------------------------------------------
# export --since validation  (T-25)
# ---------------------------------------------------------------------------


class TestExportSinceValidation:
    """Test that --since with an invalid date exits cleanly with an error message."""

    def _pre_populated_cache(self, tmp_path) -> Cache:
        """Create a cache that passes the ToS gate and first-run doctor check."""
        cache = Cache(tmp_path)
        cache.acknowledge_tos()
        cache.mark_exported("chatgpt", "dummy-conv", {"updated_at": "2024-01-01T00:00:00Z"})
        return cache

    def test_invalid_since_date_exits_with_error(self, tmp_path):
        self._pre_populated_cache(tmp_path)

        runner = CliRunner(mix_stderr=True)
        result = runner.invoke(
            cli,
            ["--no-log-file", "export", "--since", "notadate"],
            env={
                "CHATGPT_SESSION_TOKEN": "eyJtesttoken",
                "CACHE_DIR": str(tmp_path),
                "EXPORT_DIR": str(tmp_path / "exports"),
            },
        )
        assert result.exit_code == 1
        assert "Invalid --since date" in result.output
        assert "YYYY-MM-DD" in result.output

    def test_valid_since_date_does_not_error(self, tmp_path):
        """A valid date should not produce the invalid-date error (may fail later on API)."""
        self._pre_populated_cache(tmp_path)

        runner = CliRunner(mix_stderr=True)
        result = runner.invoke(
            cli,
            ["--no-log-file", "export", "--since", "2024-01-01"],
            env={
                "CHATGPT_SESSION_TOKEN": "eyJtesttoken",
                "CACHE_DIR": str(tmp_path),
                "EXPORT_DIR": str(tmp_path / "exports"),
            },
        )
        assert "Invalid --since date" not in result.output


# ---------------------------------------------------------------------------
# LossReport summary
# ---------------------------------------------------------------------------


class TestLossReportSummary:
    """The LossReport's format_summary() pinned format covers zero, top-5, and overflow cases."""

    def test_zero_summary_uses_none_sentinel(self):
        from src.loss_report import LossReport

        report = LossReport()
        out = report.format_summary()
        assert "[export] Run summary:" in out
        assert "conversations:        0" in out
        assert "messages rendered:    0" in out
        # Both "(none)" sentinels present — never empty parens
        assert out.count("(none)") == 2

    def test_top_5_breakdown(self):
        from src.loss_report import LossReport

        report = LossReport()
        for raw_type in ("a", "b", "c", "d", "e", "f", "g"):
            report.record_unknown(raw_type)
            if raw_type == "a":
                # Make 'a' the most common
                for _ in range(4):
                    report.record_unknown("a")
        out = report.format_summary()
        # Top entry shown
        assert "a=5" in out
        # Overflow line present (7 types, top 5 + 2 more)
        assert "+ 2 more types" in out

    def test_messages_and_conversations_recorded(self):
        from src.loss_report import LossReport

        report = LossReport()
        report.record_conversation()
        report.record_message()
        report.record_message()
        out = report.format_summary()
        assert "conversations:        1" in out
        assert "messages rendered:    2" in out