"""Source-level audit that the checkpoint module has zero network code.

Per SPEC §7.4, the checkpoint module never pushes, never opens a
remote, and never instantiates pygit2 callback types that would
deliver remote credentials. The cheapest way to keep that invariant
honest is a grep over the package: a future contributor who tries to
add `repo.remotes.create_anonymous(...)` will fail this test before
the change lands.

We also assert that no `requests`/`http.client`/`urllib.request`
modules end up in `sys.modules` after importing the package. That
catches transitive bugs — say, a future helper that pulled in `httpx`
for "just one log upload" — without inspecting every file.
"""

import ast
import json
import re
import subprocess
import sys
import tokenize
from io import StringIO
from pathlib import Path

import attractor.checkpoint as cp_pkg

# Forbidden source patterns. Each entry is a compiled regex so we can
# express word-boundary intent. They match function names or
# identifiers that would only appear if the module shipped network
# code; comments like `# no push allowed` are stripped before
# matching to keep the audit clean.
_FORBIDDEN_PATTERNS: list[re.Pattern[str]] = [
    re.compile(r"\bpush\("),  # repo.remotes[*].push(...)
    re.compile(r"\bRemoteCallbacks\b"),
    re.compile(r"\.remotes\b"),  # any remote enumeration
    re.compile(r"\bfetch\("),
    re.compile(r"\bclone_repository\b"),
    re.compile(r"\bhttp[s]?://"),  # URLs in source (excluding refs/heads/)
    re.compile(r"\bGitHub API\b", re.IGNORECASE),
]

# Forbidden modules. If any of these are present in `sys.modules`
# AFTER importing `attractor.checkpoint`, the package brought them
# in — directly or transitively — which violates §7.4.
_FORBIDDEN_MODULES: tuple[str, ...] = (
    "requests",
    "httpx",
    "aiohttp",
    "urllib3",
    "http.client",
    "urllib.request",
)


def _code_lines(source: str) -> list[tuple[int, str]]:
    """Return `(lineno, line)` pairs from `source` with comments AND
    docstrings/string literals stripped.

    Uses the stdlib tokenizer so we don't fight with multiline strings
    or f-strings by hand. Docstrings and ordinary string literals are
    NOT code — they're commentary — so a SPEC reference in a docstring
    must not trip the audit.
    """
    cleaned_by_line: dict[int, list[str]] = {}
    try:
        tokens = list(tokenize.generate_tokens(StringIO(source).readline))
    except Exception:
        # Fallback: if the file won't tokenize (shouldn't happen for
        # our own source), surface every line so the audit at least
        # runs. We catch broadly because `tokenize.TokenizeError` is
        # surfaced inconsistently across Python versions.
        return list(enumerate(source.splitlines(), start=1))

    for tok in tokens:
        if tok.type in (
            tokenize.COMMENT,
            tokenize.STRING,
            tokenize.FSTRING_START,
            tokenize.FSTRING_MIDDLE,
            tokenize.FSTRING_END,
            tokenize.NEWLINE,
            tokenize.NL,
            tokenize.INDENT,
            tokenize.DEDENT,
            tokenize.ENCODING,
            tokenize.ENDMARKER,
        ):
            continue
        lineno = tok.start[0]
        cleaned_by_line.setdefault(lineno, []).append(tok.string)
    return [(ln, " ".join(parts)) for ln, parts in sorted(cleaned_by_line.items())]


# [unit->REQ-STORE-NO-NETWORK]
class TestSourceAudit:
    """Scan `attractor/checkpoint/` source files for forbidden patterns."""

    def test_no_network_tokens_in_source(self) -> None:
        package_dir = Path(cp_pkg.__file__).parent
        offenders: list[tuple[str, int, str]] = []
        for py_file in package_dir.rglob("*.py"):
            text = py_file.read_text(encoding="utf-8")
            # Validate the file is syntactically valid Python first;
            # a malformed source would silently skip the scan otherwise.
            ast.parse(text)
            for lineno, code in _code_lines(text):
                for pattern in _FORBIDDEN_PATTERNS:
                    if pattern.search(code):
                        offenders.append((py_file.name, lineno, code))
        assert not offenders, (
            "Network code detected in attractor.checkpoint — "
            "see SPEC §7.4. Offending lines:\n"
            + "\n".join(f"  {f}:{ln}: {src!r}" for f, ln, src in offenders)
        )


# [unit->REQ-STORE-NO-NETWORK]
class TestImportSurface:
    """Importing the package must not transitively pull in network libs."""

    def test_no_forbidden_modules_after_import(self) -> None:
        # Spawn a clean Python subprocess so we measure what
        # `attractor.checkpoint` actually imports — not state polluted
        # by other tests in the same pytest run.
        probe = (
            "import sys, json, attractor.checkpoint; "
            f"print(json.dumps([m for m in {_FORBIDDEN_MODULES!r} if m in sys.modules]))"
        )
        result = subprocess.run(
            [sys.executable, "-c", probe],
            capture_output=True,
            text=True,
            check=True,
            timeout=30,
        )
        present = json.loads(result.stdout.strip())
        assert not present, (
            "Forbidden network modules imported transitively by "
            f"attractor.checkpoint: {present}"
        )
