From 7ac6183b6057d2371dd7ecf7308281c0347450f7 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 28 Mar 2026 16:30:52 +0500 Subject: [PATCH 01/15] feat(mcp): add optional read-only MCP server for deterministic baseline-aware agent workflows --- AGENTS.md | 40 +- CHANGELOG.md | 8 + README.md | 31 +- codeclone/mcp_server.py | 383 ++++++++ codeclone/mcp_service.py | 976 +++++++++++++++++++ docs/README.md | 2 + docs/architecture.md | 41 + docs/book/01-architecture-map.md | 5 + docs/book/08-report.md | 5 +- docs/book/09-cli.md | 1 + docs/book/14-compatibility-and-versioning.md | 4 + docs/book/20-mcp-interface.md | 150 +++ docs/book/README.md | 1 + docs/book/appendix/b-schema-layouts.md | 8 +- docs/book/appendix/c-error-catalog.md | 15 + docs/mcp.md | 359 +++++++ mkdocs.yml | 2 + pyproject.toml | 6 +- tests/test_mcp_server.py | 273 ++++++ tests/test_mcp_service.py | 547 +++++++++++ uv.lock | 600 +++++++++++- 21 files changed, 3439 insertions(+), 18 deletions(-) create mode 100644 codeclone/mcp_server.py create mode 100644 codeclone/mcp_service.py create mode 100644 docs/book/20-mcp-interface.md create mode 100644 docs/mcp.md create mode 100644 tests/test_mcp_server.py create mode 100644 tests/test_mcp_service.py diff --git a/AGENTS.md b/AGENTS.md index 91c606c..b871f7f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -60,6 +60,7 @@ Key artifacts: - `codeclone.baseline.json` — trusted baseline snapshot (for CI comparisons) - `.cache/codeclone/cache.json` — analysis cache (integrity-checked) - `.cache/codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports +- `codeclone-mcp` — optional read-only MCP server (install via `codeclone[mcp]`) - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -79,6 +80,12 @@ If you touched `docs/`, `mkdocs.yml`, docs publishing workflow, or sample-report uv run --with mkdocs --with mkdocs-material mkdocs build --strict ``` +If you touched the MCP surface, also run: + +```bash +uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py +``` + --- ## 4) Baseline contract (v2, stable) @@ -161,6 +168,9 @@ Reports come in: - SARIF (`--sarif`) - Text (`--text`) +MCP is a separate optional interface, not a report format. It must remain a +read-only agent layer over the same canonical report/baseline/cache contracts. + ### Report invariants - Ordering must be deterministic (stable sort keys). @@ -273,6 +283,8 @@ Before cutting a release: - Don’t add project-root hashes or unstable machine-local fields to baseline. - Don’t embed suppressions into baseline unless explicitly designed as a versioned contract. - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). +- Don’t make the base `codeclone` install depend on optional MCP runtime packages. +- Don’t let MCP mutate baselines, source files, or repo state. --- @@ -296,6 +308,8 @@ Architecture is layered, but grounded in current code (not aspirational diagrams `codeclone/templates.py`) renders views from report/meta facts. - **Documentation/publishing surface** (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. +- **MCP agent interface** (`codeclone/mcp_service.py`, `codeclone/mcp_server.py`) exposes the current pipeline as a + deterministic, read-only MCP server for AI agents and MCP-capable clients. - **Tests-as-spec** (`tests/`) lock behavior, contracts, determinism, and architecture boundaries. Non-negotiable interpretation: @@ -303,6 +317,7 @@ Non-negotiable interpretation: - Core produces facts; renderers present facts. - Baseline/cache are persistence contracts, not analysis truth. - UI/report must not invent gating semantics. +- MCP reuses pipeline/report contracts and must not create a second analysis truth path. ## 13) Module map @@ -333,6 +348,10 @@ Use this map to route changes to the right owner module. change belongs here. - `codeclone/report/*.py` (other modules) — deterministic projections/format transforms ( text/markdown/sarif/derived/findings/suggestions); avoid injecting new analysis heuristics here. +- `codeclone/mcp_service.py` — typed, in-process MCP service adapter over the current pipeline/report contracts; keep + it read-only and deterministic; do not move shell UX or `sys.exit` behavior here. +- `codeclone/mcp_server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource + registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. - `codeclone/html_report.py` — public HTML facade/re-export surface; preserve backward-compatible imports here; do not grow section/layout logic in this module. - `codeclone/_html_report/*` — actual HTML assembly, context shaping, tabs, sections, and overview/navigation behavior; @@ -365,6 +384,7 @@ Operational rules: - CLI helper modules (`_cli_*`) must orchestrate/format, not own domain semantics. - Persistence semantics (baseline/cache trust/integrity) must stay in persistence/domain modules, not in render/UI layers. +- MCP may depend on pipeline/report/contracts, but core/persistence/report layers must not depend on MCP modules. ## 15) Suppression policy @@ -389,15 +409,16 @@ Prefer explicit inline suppressions for runtime/dynamic false positives instead If you change a contract-sensitive zone, route docs/tests/approval deliberately. -| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | -|-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------| -| Baseline schema/trust/integrity (`codeclone/baseline.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | -| Cache schema/profile/integrity (`codeclone/cache.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | -| Canonical report JSON shape (`codeclone/report/json_contract.py`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | -| CLI flags/help/exit behavior (`codeclone/cli.py`, `_cli_*`, `contracts.py`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | -| Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | -| Suppression semantics/reporting (`suppressions`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | -| Docs site / sample report publication (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/README.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `mkdocs build --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | +| Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | +|-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------| +| Baseline schema/trust/integrity (`codeclone/baseline.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | +| Cache schema/profile/integrity (`codeclone/cache.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | +| Canonical report JSON shape (`codeclone/report/json_contract.py`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | +| CLI flags/help/exit behavior (`codeclone/cli.py`, `_cli_*`, `contracts.py`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | +| Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | +| Suppression semantics/reporting (`suppressions`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| MCP interface (`codeclone/mcp_service.py`, `codeclone/mcp_server.py`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | +| Docs site / sample report publication (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) | `docs/README.md`, `docs/publishing.md`, `docs/examples/report.md`, and any contract pages surfaced by the change, `CHANGELOG.md` when user-visible behavior changes | `mkdocs build --strict`, sample-report generation smoke path, and relevant report/html tests if generated examples or embeds change | published docs navigation, sample-report generation, or Pages workflow semantics change | published documentation behavior or sample-report generation contract changes | Golden rule: do not “fix” failures by snapshot refresh unless the underlying contract change is intentional, documented, and approved. @@ -431,6 +452,7 @@ Policy: - Cache schema/status/profile compatibility/integrity (`CACHE_VERSION` contract family). - Canonical report JSON schema/payload semantics (`REPORT_SCHEMA_VERSION` contract family). - Documented report projections and their machine/user-facing semantics (HTML/Markdown/SARIF/Text). +- Documented MCP launcher/install behavior, tool names, resource URIs, and read-only semantics. - Documented finding families/kinds/ids and suppression-facing report fields. - Metrics baseline schema/compatibility where used by CI/gating. - Benchmark schema/outputs if consumed as a reproducible contract surface. diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b97621..00bfb5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [2.0.0b3] + +### MCP + +- Add optional `codeclone[mcp]` extra and `codeclone-mcp` launcher. +- Add a deterministic, read-only MCP server over the canonical pipeline and report contracts. +- Expose MCP tools/resources for repository analysis, run summaries, report sections, findings, hotlists, and gate previews. + ## [2.0.0b2] ### Dependencies diff --git a/README.md b/README.md index 38bcb00..2c52577 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Live sample report: score - **Baseline governance** — known debt stays accepted; CI blocks only new clones and metric regressions - **Reports** — interactive HTML, deterministic JSON/TXT plus Markdown and SARIF projections from one canonical report +- **MCP server** — optional read-only MCP surface for AI agents, IDEs, and MCP-capable clients - **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support - **Fast*** — incremental caching, parallel processing, warm-run optimization, and reproducible benchmark coverage @@ -59,6 +60,34 @@ uvx codeclone@latest . +## MCP Server + +Install MCP support only when you need the agent interface: + +```bash +pip install "codeclone[mcp]" +``` + +Then run the optional MCP launcher: + +```bash +codeclone-mcp --transport stdio +# or +codeclone-mcp --transport streamable-http --port 8000 +``` + +For local command-based clients, prefer `stdio`. Use `streamable-http` only +when the client expects a remote MCP endpoint. + +CodeClone MCP is read-only and baseline-aware. It exposes deterministic tools +for analysis, summaries, findings, hotspots, report sections, and gate previews +without mutating source files or baselines. + +Docs: +[MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) +· +[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) + ## CI Integration ```bash @@ -197,7 +226,7 @@ Dynamic/runtime false positives are resolved via explicit inline suppressions, n { "report_schema_version": "2.1", "meta": { - "codeclone_version": "2.0.0b2", + "codeclone_version": "2.0.0b3", "project_name": "...", "scan_root": ".", "report_mode": "full", diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py new file mode 100644 index 0000000..e507787 --- /dev/null +++ b/codeclone/mcp_server.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import argparse +import sys +from typing import TYPE_CHECKING, Literal, cast + +from . import __version__ +from .contracts import DOCS_URL +from .mcp_service import ( + CodeCloneMCPService, + MCPAnalysisRequest, + MCPGateRequest, +) + +if TYPE_CHECKING: + from mcp.server.fastmcp import FastMCP + from mcp.types import ToolAnnotations + +_SERVER_INSTRUCTIONS = ( + "CodeClone MCP is a deterministic, baseline-aware, read-only analysis server " + "for Python repositories. Use analyze_repository first, then query the latest " + "or a specific run with summary, finding, hotspot, gate, and report-section " + "tools. This server never updates baselines and never mutates source files." +) +_MCP_INSTALL_HINT = ( + "CodeClone MCP support requires the optional 'mcp' extra. " + "Install it with: pip install 'codeclone[mcp]'" +) + + +class MCPDependencyError(RuntimeError): + """Raised when the optional MCP runtime dependency is unavailable.""" + + +def _load_mcp_runtime() -> tuple[type[FastMCP], ToolAnnotations]: + try: + from mcp.server.fastmcp import FastMCP as runtime_fastmcp + from mcp.types import ToolAnnotations as runtime_tool_annotations + except ImportError as exc: + raise MCPDependencyError(_MCP_INSTALL_HINT) from exc + return ( + cast("type[FastMCP]", runtime_fastmcp), + runtime_tool_annotations( + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=False, + ), + ) + + +def build_mcp_server( + *, + history_limit: int = 16, + host: str = "127.0.0.1", + port: int = 8000, + json_response: bool = False, + stateless_http: bool = False, + debug: bool = False, + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", +) -> FastMCP: + runtime_fastmcp, read_only_tool = _load_mcp_runtime() + service = CodeCloneMCPService(history_limit=history_limit) + mcp = runtime_fastmcp( + name="CodeClone", + instructions=_SERVER_INSTRUCTIONS, + website_url=DOCS_URL, + host=host, + port=port, + json_response=json_response, + stateless_http=stateless_http, + debug=debug, + log_level=log_level, + dependencies=(f"codeclone=={__version__}",), + ) + + @mcp.tool( + title="Analyze Repository", + description=( + "Run a deterministic CodeClone analysis for a repository and register " + "the result as the latest MCP run." + ), + annotations=read_only_tool, + structured_output=True, + ) + def analyze_repository( + root: str = ".", + analysis_mode: str = "full", + respect_pyproject: bool = True, + processes: int | None = None, + min_loc: int | None = None, + min_stmt: int | None = None, + block_min_loc: int | None = None, + block_min_stmt: int | None = None, + segment_min_loc: int | None = None, + segment_min_stmt: int | None = None, + baseline_path: str | None = None, + metrics_baseline_path: str | None = None, + max_baseline_size_mb: int | None = None, + cache_policy: str = "reuse", + cache_path: str | None = None, + max_cache_size_mb: int | None = None, + ) -> dict[str, object]: + return service.analyze_repository( + MCPAnalysisRequest( + root=root, + analysis_mode=analysis_mode, # type: ignore[arg-type] + respect_pyproject=respect_pyproject, + processes=processes, + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + baseline_path=baseline_path, + metrics_baseline_path=metrics_baseline_path, + max_baseline_size_mb=max_baseline_size_mb, + cache_policy=cache_policy, # type: ignore[arg-type] + cache_path=cache_path, + max_cache_size_mb=max_cache_size_mb, + ) + ) + + @mcp.tool( + title="Get Run Summary", + description="Return the stored summary for the latest or specified MCP run.", + annotations=read_only_tool, + structured_output=True, + ) + def get_run_summary(run_id: str | None = None) -> dict[str, object]: + return service.get_run_summary(run_id) + + @mcp.tool( + title="Evaluate Gates", + description=( + "Evaluate CodeClone gate conditions against an existing MCP run without " + "modifying baselines or exiting the process." + ), + annotations=read_only_tool, + structured_output=True, + ) + def evaluate_gates( + run_id: str | None = None, + fail_on_new: bool = False, + fail_threshold: int = -1, + fail_complexity: int = -1, + fail_coupling: int = -1, + fail_cohesion: int = -1, + fail_cycles: bool = False, + fail_dead_code: bool = False, + fail_health: int = -1, + fail_on_new_metrics: bool = False, + ) -> dict[str, object]: + return service.evaluate_gates( + MCPGateRequest( + run_id=run_id, + fail_on_new=fail_on_new, + fail_threshold=fail_threshold, + fail_complexity=fail_complexity, + fail_coupling=fail_coupling, + fail_cohesion=fail_cohesion, + fail_cycles=fail_cycles, + fail_dead_code=fail_dead_code, + fail_health=fail_health, + fail_on_new_metrics=fail_on_new_metrics, + ) + ) + + @mcp.tool( + title="Get Report Section", + description=( + "Return a canonical CodeClone report section for the latest or " + "specified MCP run." + ), + annotations=read_only_tool, + structured_output=True, + ) + def get_report_section( + run_id: str | None = None, + section: str = "all", + ) -> dict[str, object]: + return service.get_report_section( + run_id=run_id, + section=section, # type: ignore[arg-type] + ) + + @mcp.tool( + title="List Findings", + description=( + "List canonical finding groups with deterministic ordering, optional " + "filters, and pagination." + ), + annotations=read_only_tool, + structured_output=True, + ) + def list_findings( + run_id: str | None = None, + family: str = "all", + severity: str | None = None, + source_kind: str | None = None, + novelty: str = "all", + offset: int = 0, + limit: int = 50, + ) -> dict[str, object]: + return service.list_findings( + run_id=run_id, + family=family, # type: ignore[arg-type] + severity=severity, + source_kind=source_kind, + novelty=novelty, # type: ignore[arg-type] + offset=offset, + limit=limit, + ) + + @mcp.tool( + title="Get Finding", + description="Return a single canonical finding group by id.", + annotations=read_only_tool, + structured_output=True, + ) + def get_finding( + finding_id: str, + run_id: str | None = None, + ) -> dict[str, object]: + return service.get_finding(finding_id=finding_id, run_id=run_id) + + @mcp.tool( + title="List Hotspots", + description=( + "Return one of the derived CodeClone hotlists for the latest or " + "specified MCP run." + ), + annotations=read_only_tool, + structured_output=True, + ) + def list_hotspots( + kind: str, + run_id: str | None = None, + limit: int = 10, + ) -> dict[str, object]: + return service.list_hotspots( + kind=kind, # type: ignore[arg-type] + run_id=run_id, + limit=limit, + ) + + @mcp.resource( + "codeclone://latest/summary", + title="Latest Run Summary", + description="Canonical JSON summary for the latest CodeClone MCP run.", + mime_type="application/json", + ) + def latest_summary_resource() -> str: + return service.read_resource("codeclone://latest/summary") + + @mcp.resource( + "codeclone://latest/report.json", + title="Latest Canonical Report", + description="Canonical JSON report for the latest CodeClone MCP run.", + mime_type="application/json", + ) + def latest_report_resource() -> str: + return service.read_resource("codeclone://latest/report.json") + + @mcp.resource( + "codeclone://runs/{run_id}/summary", + title="Run Summary", + description="Canonical JSON summary for a specific CodeClone MCP run.", + mime_type="application/json", + ) + def run_summary_resource(run_id: str) -> str: + return service.read_resource(f"codeclone://runs/{run_id}/summary") + + @mcp.resource( + "codeclone://runs/{run_id}/report.json", + title="Run Canonical Report", + description="Canonical JSON report for a specific CodeClone MCP run.", + mime_type="application/json", + ) + def run_report_resource(run_id: str) -> str: + return service.read_resource(f"codeclone://runs/{run_id}/report.json") + + @mcp.resource( + "codeclone://runs/{run_id}/findings/{finding_id}", + title="Run Finding", + description="Canonical JSON finding group for a specific CodeClone MCP run.", + mime_type="application/json", + ) + def run_finding_resource(run_id: str, finding_id: str) -> str: + return service.read_resource(f"codeclone://runs/{run_id}/findings/{finding_id}") + + return mcp + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="codeclone-mcp", + description=( + "CodeClone MCP server for deterministic, baseline-aware, read-only " + "analysis of Python repositories." + ), + ) + parser.add_argument( + "--transport", + choices=("stdio", "streamable-http"), + default="stdio", + help="MCP transport to run. Defaults to stdio.", + ) + parser.add_argument( + "--host", + default="127.0.0.1", + help="Host to bind when using streamable-http.", + ) + parser.add_argument( + "--port", + type=int, + default=8000, + help="Port to bind when using streamable-http.", + ) + parser.add_argument( + "--history-limit", + type=int, + default=16, + help="Maximum number of in-memory analysis runs retained by the server.", + ) + parser.add_argument( + "--json-response", + action=argparse.BooleanOptionalAction, + default=True, + help="Use JSON responses for streamable-http transport.", + ) + parser.add_argument( + "--stateless-http", + action=argparse.BooleanOptionalAction, + default=True, + help="Use stateless Streamable HTTP mode when transport is streamable-http.", + ) + parser.add_argument( + "--debug", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable FastMCP debug mode.", + ) + parser.add_argument( + "--log-level", + choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"), + default="INFO", + help="FastMCP server log level.", + ) + return parser + + +def main() -> None: + args = build_parser().parse_args() + try: + server = build_mcp_server( + history_limit=args.history_limit, + host=args.host, + port=args.port, + json_response=args.json_response, + stateless_http=args.stateless_http, + debug=args.debug, + log_level=args.log_level, + ) + except MCPDependencyError as exc: + print(str(exc), file=sys.stderr) + raise SystemExit(2) from exc + try: + server.run(transport=args.transport) + except KeyboardInterrupt: + return + + +__all__ = [ + "MCPDependencyError", + "build_mcp_server", + "build_parser", + "main", +] diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py new file mode 100644 index 0000000..c642498 --- /dev/null +++ b/codeclone/mcp_service.py @@ -0,0 +1,976 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import json +from argparse import Namespace +from collections import OrderedDict +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from threading import RLock +from typing import Literal, cast + +from . import __version__ +from ._cli_args import ( + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_ROOT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) +from ._cli_baselines import ( + CloneBaselineState, + MetricsBaselineState, + probe_metrics_baseline_section, + resolve_clone_baseline_state, + resolve_metrics_baseline_state, +) +from ._cli_config import ConfigValidationError, load_pyproject_config +from ._cli_meta import _build_report_meta, _current_report_timestamp_utc +from ._cli_runtime import ( + resolve_cache_path, + resolve_cache_status, + validate_numeric_args, +) +from .baseline import Baseline +from .cache import Cache, CacheStatus, build_segment_report_projection +from .contracts import REPORT_SCHEMA_VERSION +from .errors import CacheError +from .models import MetricsDiff +from .normalize import NormalizationConfig +from .pipeline import ( + AnalysisResult, + BootstrapResult, + OutputPaths, + analyze, + bootstrap, + discover, + gate, + process, + report, +) +from .report.overview import materialize_report_overview + +AnalysisMode = Literal["full", "clones_only"] +CachePolicy = Literal["reuse", "refresh", "off"] +HotlistKind = Literal[ + "most_actionable", + "highest_spread", + "production_hotspots", + "test_fixture_hotspots", +] +FindingFamilyFilter = Literal["all", "clone", "structural", "dead_code", "design"] +FindingNoveltyFilter = Literal["all", "new", "known"] +ReportSection = Literal[ + "all", + "meta", + "inventory", + "findings", + "metrics", + "derived", + "integrity", +] + +_LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() +_REPORT_DUMMY_PATH = Path(".cache/codeclone/report.json") +_MCP_CONFIG_KEYS = frozenset( + { + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + "processes", + "cache_path", + "max_cache_size_mb", + "baseline", + "max_baseline_size_mb", + "metrics_baseline", + } +) +_RESOURCE_SECTION_MAP: dict[str, ReportSection] = { + "report.json": "all", + "summary": "meta", + "overview": "derived", +} + + +class MCPServiceError(RuntimeError): + """Base class for CodeClone MCP service errors.""" + + +class MCPServiceContractError(MCPServiceError): + """Raised when an MCP request violates the CodeClone service contract.""" + + +class MCPRunNotFoundError(MCPServiceError): + """Raised when a requested MCP run is not available in the in-memory registry.""" + + +class MCPFindingNotFoundError(MCPServiceError): + """Raised when a requested finding id is not present in the selected run.""" + + +class _BufferConsole: + def __init__(self) -> None: + self.messages: list[str] = [] + + def print(self, *objects: object, **_kwargs: object) -> None: + text = " ".join(str(obj) for obj in objects).strip() + if text: + self.messages.append(text) + + +@dataclass(frozen=True, slots=True) +class MCPAnalysisRequest: + root: str = DEFAULT_ROOT + analysis_mode: AnalysisMode = "full" + respect_pyproject: bool = True + processes: int | None = None + min_loc: int | None = None + min_stmt: int | None = None + block_min_loc: int | None = None + block_min_stmt: int | None = None + segment_min_loc: int | None = None + segment_min_stmt: int | None = None + baseline_path: str | None = None + metrics_baseline_path: str | None = None + max_baseline_size_mb: int | None = None + cache_policy: CachePolicy = "reuse" + cache_path: str | None = None + max_cache_size_mb: int | None = None + + +@dataclass(frozen=True, slots=True) +class MCPGateRequest: + run_id: str | None = None + fail_on_new: bool = False + fail_threshold: int = -1 + fail_complexity: int = -1 + fail_coupling: int = -1 + fail_cohesion: int = -1 + fail_cycles: bool = False + fail_dead_code: bool = False + fail_health: int = -1 + fail_on_new_metrics: bool = False + + +@dataclass(frozen=True, slots=True) +class MCPRunRecord: + run_id: str + root: Path + request: MCPAnalysisRequest + report_document: dict[str, object] + report_json: str + summary: dict[str, object] + warnings: tuple[str, ...] + failures: tuple[str, ...] + analysis: AnalysisResult + new_func: frozenset[str] + new_block: frozenset[str] + metrics_diff: MetricsDiff | None + + +class CodeCloneMCPRunStore: + def __init__(self, *, history_limit: int = 16) -> None: + self._history_limit = max(1, history_limit) + self._lock = RLock() + self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() + self._latest_run_id: str | None = None + + def register(self, record: MCPRunRecord) -> MCPRunRecord: + with self._lock: + self._records.pop(record.run_id, None) + self._records[record.run_id] = record + self._records.move_to_end(record.run_id) + self._latest_run_id = record.run_id + while len(self._records) > self._history_limit: + self._records.popitem(last=False) + return record + + def get(self, run_id: str | None = None) -> MCPRunRecord: + with self._lock: + resolved_run_id = run_id or self._latest_run_id + if resolved_run_id is None or resolved_run_id not in self._records: + raise MCPRunNotFoundError("No matching MCP analysis run is available.") + return self._records[resolved_run_id] + + +class CodeCloneMCPService: + def __init__(self, *, history_limit: int = 16) -> None: + self._runs = CodeCloneMCPRunStore(history_limit=history_limit) + + def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: + root_path = self._resolve_root(request.root) + args = self._build_args(root_path=root_path, request=request) + ( + baseline_path, + baseline_exists, + metrics_baseline_path, + metrics_baseline_exists, + shared_baseline_payload, + ) = self._resolve_baseline_inputs(root_path=root_path, args=args) + cache_path = self._resolve_cache_path(root_path=root_path, args=args) + cache = self._build_cache( + root_path=root_path, + args=args, + cache_path=cache_path, + policy=request.cache_policy, + ) + console = _BufferConsole() + + boot = bootstrap( + args=args, + root=root_path, + output_paths=OutputPaths(json=_REPORT_DUMMY_PATH), + cache_path=cache_path, + ) + discovery_result = discover(boot=boot, cache=cache) + processing_result = process(boot=boot, discovery=discovery_result, cache=cache) + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + + if request.cache_policy == "refresh": + self._refresh_cache_projection(cache=cache, analysis=analysis_result) + try: + cache.save() + except CacheError as exc: + console.print(f"Cache save failed: {exc}") + + clone_baseline_state = resolve_clone_baseline_state( + args=args, + baseline_path=baseline_path, + baseline_exists=baseline_exists, + func_groups=analysis_result.func_groups, + block_groups=analysis_result.block_groups, + codeclone_version=__version__, + console=console, + shared_baseline_payload=( + shared_baseline_payload + if metrics_baseline_path == baseline_path + else None + ), + ) + metrics_baseline_state = resolve_metrics_baseline_state( + args=args, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=clone_baseline_state.updated_path, + project_metrics=analysis_result.project_metrics, + console=console, + shared_baseline_payload=( + shared_baseline_payload + if metrics_baseline_path == baseline_path + else None + ), + ) + + cache_status, cache_schema_version = resolve_cache_status(cache) + report_meta = _build_report_meta( + codeclone_version=__version__, + scan_root=root_path, + baseline_path=baseline_path, + baseline=clone_baseline_state.baseline, + baseline_loaded=clone_baseline_state.loaded, + baseline_status=clone_baseline_state.status.value, + cache_path=cache_path, + cache_used=cache_status == CacheStatus.OK, + cache_status=cache_status.value, + cache_schema_version=cache_schema_version, + files_skipped_source_io=len(processing_result.source_read_failures), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline=metrics_baseline_state.baseline, + metrics_baseline_loaded=metrics_baseline_state.loaded, + metrics_baseline_status=metrics_baseline_state.status.value, + health_score=( + analysis_result.project_metrics.health.total + if analysis_result.project_metrics is not None + else None + ), + health_grade=( + analysis_result.project_metrics.health.grade + if analysis_result.project_metrics is not None + else None + ), + analysis_mode=request.analysis_mode, + metrics_computed=self._metrics_computed(request.analysis_mode), + report_generated_at_utc=_current_report_timestamp_utc(), + ) + + baseline_for_diff = ( + clone_baseline_state.baseline + if clone_baseline_state.trusted_for_diff + else Baseline(baseline_path) + ) + new_func, new_block = baseline_for_diff.diff( + analysis_result.func_groups, + analysis_result.block_groups, + ) + metrics_diff = None + if ( + analysis_result.project_metrics is not None + and metrics_baseline_state.trusted_for_diff + ): + metrics_diff = metrics_baseline_state.baseline.diff( + analysis_result.project_metrics + ) + + report_artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + ) + report_json = report_artifacts.json + if report_json is None: + raise MCPServiceError("CodeClone MCP expected a canonical JSON report.") + report_document = self._load_report_document(report_json) + run_id = self._report_digest(report_document) + + warning_items = set(console.messages) + if cache.load_warning: + warning_items.add(cache.load_warning) + warning_items.update(discovery_result.skipped_warnings) + warnings = tuple(sorted(warning_items)) + failures = tuple( + sorted( + { + *processing_result.failed_files, + *processing_result.source_read_failures, + } + ) + ) + + summary = self._build_run_summary_payload( + run_id=run_id, + root_path=root_path, + request=request, + report_document=report_document, + baseline_state=clone_baseline_state, + metrics_baseline_state=metrics_baseline_state, + cache_status=cache_status, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + warnings=warnings, + failures=failures, + ) + record = MCPRunRecord( + run_id=run_id, + root=root_path, + request=request, + report_document=report_document, + report_json=report_json, + summary=summary, + warnings=warnings, + failures=failures, + analysis=analysis_result, + new_func=frozenset(new_func), + new_block=frozenset(new_block), + metrics_diff=metrics_diff, + ) + self._runs.register(record) + return summary + + def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: + return dict(self._runs.get(run_id).summary) + + def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: + record = self._runs.get(request.run_id) + gate_args = Namespace( + fail_on_new=request.fail_on_new, + fail_threshold=request.fail_threshold, + fail_complexity=request.fail_complexity, + fail_coupling=request.fail_coupling, + fail_cohesion=request.fail_cohesion, + fail_cycles=request.fail_cycles, + fail_dead_code=request.fail_dead_code, + fail_health=request.fail_health, + fail_on_new_metrics=request.fail_on_new_metrics, + ) + boot = BootstrapResult( + root=record.root, + config=NormalizationConfig(), + args=gate_args, + output_paths=OutputPaths(), + cache_path=_REPORT_DUMMY_PATH, + ) + gate_result = gate( + boot=boot, + analysis=record.analysis, + new_func=record.new_func, + new_block=record.new_block, + metrics_diff=record.metrics_diff, + ) + return { + "run_id": record.run_id, + "would_fail": gate_result.exit_code != 0, + "exit_code": gate_result.exit_code, + "reasons": list(gate_result.reasons), + "config": { + "fail_on_new": request.fail_on_new, + "fail_threshold": request.fail_threshold, + "fail_complexity": request.fail_complexity, + "fail_coupling": request.fail_coupling, + "fail_cohesion": request.fail_cohesion, + "fail_cycles": request.fail_cycles, + "fail_dead_code": request.fail_dead_code, + "fail_health": request.fail_health, + "fail_on_new_metrics": request.fail_on_new_metrics, + }, + } + + def get_report_section( + self, + *, + run_id: str | None = None, + section: ReportSection = "all", + ) -> dict[str, object]: + report_document = self._runs.get(run_id).report_document + if section == "all": + return dict(report_document) + payload = report_document.get(section) + if not isinstance(payload, Mapping): + raise MCPServiceContractError( + f"Report section '{section}' is not available in this run." + ) + return dict(payload) + + def list_findings( + self, + *, + run_id: str | None = None, + family: FindingFamilyFilter = "all", + severity: str | None = None, + source_kind: str | None = None, + novelty: FindingNoveltyFilter = "all", + offset: int = 0, + limit: int = 50, + ) -> dict[str, object]: + record = self._runs.get(run_id) + findings = self._flatten_findings(record.report_document) + filtered = [ + finding + for finding in findings + if self._matches_finding_filters( + finding=finding, + family=family, + severity=severity, + source_kind=source_kind, + novelty=novelty, + ) + ] + total = len(filtered) + normalized_offset = max(0, offset) + normalized_limit = max(1, min(limit, 200)) + items = filtered[normalized_offset : normalized_offset + normalized_limit] + next_offset = normalized_offset + len(items) + return { + "run_id": record.run_id, + "offset": normalized_offset, + "limit": normalized_limit, + "returned": len(items), + "total": total, + "next_offset": next_offset if next_offset < total else None, + "items": items, + } + + def get_finding( + self, + *, + finding_id: str, + run_id: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + for finding in self._flatten_findings(record.report_document): + if str(finding.get("id")) == finding_id: + return finding + raise MCPFindingNotFoundError( + f"Finding id '{finding_id}' was not found in run '{record.run_id}'." + ) + + def list_hotspots( + self, + *, + kind: HotlistKind, + run_id: str | None = None, + limit: int = 10, + ) -> dict[str, object]: + record = self._runs.get(run_id) + derived = self._as_mapping(record.report_document.get("derived")) + materialized = materialize_report_overview( + overview=self._as_mapping(derived.get("overview")), + hotlists=self._as_mapping(derived.get("hotlists")), + findings=self._as_mapping(record.report_document.get("findings")), + ) + rows = self._as_sequence(materialized.get(kind)) + normalized_limit = max(1, min(limit, 50)) + return { + "run_id": record.run_id, + "kind": kind, + "returned": min(len(rows), normalized_limit), + "total": len(rows), + "items": [dict(self._as_mapping(item)) for item in rows[:normalized_limit]], + } + + def read_resource(self, uri: str) -> str: + latest_prefix = "codeclone://latest/" + run_prefix = "codeclone://runs/" + if uri.startswith(latest_prefix): + latest = self._runs.get() + suffix = uri[len(latest_prefix) :] + return self._render_resource(latest, suffix) + if not uri.startswith(run_prefix): + raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") + remainder = uri[len(run_prefix) :] + run_id, sep, suffix = remainder.partition("/") + if not sep: + raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") + record = self._runs.get(run_id) + return self._render_resource(record, suffix) + + def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: + if suffix == "summary": + return json.dumps( + record.summary, + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + if suffix == "report.json": + return record.report_json + if suffix == "overview": + return json.dumps( + self.list_hotspots(kind="highest_spread", run_id=record.run_id), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + finding_prefix = "findings/" + if suffix.startswith(finding_prefix): + finding_id = suffix[len(finding_prefix) :] + return json.dumps( + self.get_finding(run_id=record.run_id, finding_id=finding_id), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + raise MCPServiceContractError( + f"Unsupported CodeClone resource suffix '{suffix}'." + ) + + def _resolve_root(self, root: str) -> Path: + try: + root_path = Path(root).expanduser().resolve() + except OSError as exc: + raise MCPServiceContractError(f"Invalid root path '{root}': {exc}") from exc + if not root_path.exists(): + raise MCPServiceContractError(f"Root path does not exist: {root_path}") + if not root_path.is_dir(): + raise MCPServiceContractError(f"Root path is not a directory: {root_path}") + return root_path + + def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namespace: + args = Namespace( + root=str(root_path), + min_loc=DEFAULT_MIN_LOC, + min_stmt=DEFAULT_MIN_STMT, + block_min_loc=DEFAULT_BLOCK_MIN_LOC, + block_min_stmt=DEFAULT_BLOCK_MIN_STMT, + segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, + processes=DEFAULT_PROCESSES, + cache_path=None, + max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, + baseline=DEFAULT_BASELINE_PATH, + max_baseline_size_mb=DEFAULT_MAX_BASELINE_SIZE_MB, + update_baseline=False, + fail_on_new=False, + fail_threshold=-1, + ci=False, + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, + update_metrics_baseline=False, + metrics_baseline=DEFAULT_BASELINE_PATH, + skip_metrics=False, + skip_dead_code=False, + skip_dependencies=False, + html_out=None, + json_out=None, + md_out=None, + sarif_out=None, + text_out=None, + no_progress=True, + no_color=True, + quiet=True, + verbose=False, + debug=False, + open_html_report=False, + timestamped_report_paths=False, + ) + if request.respect_pyproject: + try: + config_values = load_pyproject_config(root_path) + except ConfigValidationError as exc: + raise MCPServiceContractError(str(exc)) from exc + for key in sorted(_MCP_CONFIG_KEYS.intersection(config_values)): + setattr(args, key, config_values[key]) + + self._apply_request_overrides(args=args, root_path=root_path, request=request) + + if request.analysis_mode == "clones_only": + args.skip_metrics = True + args.skip_dead_code = True + args.skip_dependencies = True + else: + args.skip_metrics = False + args.skip_dead_code = False + args.skip_dependencies = False + + if not validate_numeric_args(args): + raise MCPServiceContractError( + "Numeric analysis settings must be non-negative and thresholds " + "must be >= -1." + ) + + return args + + def _apply_request_overrides( + self, + *, + args: Namespace, + root_path: Path, + request: MCPAnalysisRequest, + ) -> None: + override_map: dict[str, object | None] = { + "processes": request.processes, + "min_loc": request.min_loc, + "min_stmt": request.min_stmt, + "block_min_loc": request.block_min_loc, + "block_min_stmt": request.block_min_stmt, + "segment_min_loc": request.segment_min_loc, + "segment_min_stmt": request.segment_min_stmt, + "max_baseline_size_mb": request.max_baseline_size_mb, + "max_cache_size_mb": request.max_cache_size_mb, + } + for key, value in override_map.items(): + if value is not None: + setattr(args, key, value) + + if request.baseline_path is not None: + args.baseline = str( + self._resolve_optional_path(request.baseline_path, root_path) + ) + if request.metrics_baseline_path is not None: + args.metrics_baseline = str( + self._resolve_optional_path(request.metrics_baseline_path, root_path) + ) + if request.cache_path is not None: + args.cache_path = str( + self._resolve_optional_path(request.cache_path, root_path) + ) + + def _resolve_optional_path(self, value: str, root_path: Path) -> Path: + candidate = Path(value).expanduser() + resolved = candidate if candidate.is_absolute() else root_path / candidate + try: + return resolved.resolve() + except OSError as exc: + raise MCPServiceContractError( + f"Invalid path '{value}' relative to '{root_path}': {exc}" + ) from exc + + def _resolve_baseline_inputs( + self, + *, + root_path: Path, + args: Namespace, + ) -> tuple[Path, bool, Path, bool, dict[str, object] | None]: + baseline_path = self._resolve_optional_path(str(args.baseline), root_path) + baseline_exists = baseline_path.exists() + + metrics_baseline_arg_path = self._resolve_optional_path( + str(args.metrics_baseline), + root_path, + ) + shared_baseline_payload: dict[str, object] | None = None + if metrics_baseline_arg_path == baseline_path: + probe = probe_metrics_baseline_section(metrics_baseline_arg_path) + metrics_baseline_exists = probe.has_metrics_section + shared_baseline_payload = probe.payload + else: + metrics_baseline_exists = metrics_baseline_arg_path.exists() + + return ( + baseline_path, + baseline_exists, + metrics_baseline_arg_path, + metrics_baseline_exists, + shared_baseline_payload, + ) + + def _resolve_cache_path(self, *, root_path: Path, args: Namespace) -> Path: + return resolve_cache_path( + root_path=root_path, + args=args, + from_args=bool(args.cache_path), + legacy_cache_path=_LEGACY_CACHE_PATH, + console=_BufferConsole(), + ) + + def _build_cache( + self, + *, + root_path: Path, + args: Namespace, + cache_path: Path, + policy: CachePolicy, + ) -> Cache: + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=int(args.max_cache_size_mb) * 1024 * 1024, + min_loc=int(args.min_loc), + min_stmt=int(args.min_stmt), + block_min_loc=int(args.block_min_loc), + block_min_stmt=int(args.block_min_stmt), + segment_min_loc=int(args.segment_min_loc), + segment_min_stmt=int(args.segment_min_stmt), + ) + if policy != "off": + cache.load() + return cache + + def _refresh_cache_projection( + self, + *, + cache: Cache, + analysis: AnalysisResult, + ) -> None: + if not hasattr(cache, "segment_report_projection"): + return + new_projection = build_segment_report_projection( + suppressed=analysis.suppressed_segment_groups, + digest=analysis.segment_groups_raw_digest, + groups=analysis.segment_groups, + ) + if new_projection != cache.segment_report_projection: + cache.segment_report_projection = new_projection + + def _metrics_computed(self, analysis_mode: AnalysisMode) -> tuple[str, ...]: + return ( + () + if analysis_mode == "clones_only" + else ( + "complexity", + "coupling", + "cohesion", + "health", + "dependencies", + "dead_code", + ) + ) + + def _load_report_document(self, report_json: str) -> dict[str, object]: + try: + payload = json.loads(report_json) + except json.JSONDecodeError as exc: + raise MCPServiceError( + f"Generated canonical report is not valid JSON: {exc}" + ) from exc + if not isinstance(payload, dict): + raise MCPServiceError("Generated canonical report must be a JSON object.") + return dict(payload) + + def _report_digest(self, report_document: Mapping[str, object]) -> str: + integrity = self._as_mapping(report_document.get("integrity")) + digest = self._as_mapping(integrity.get("digest")) + value = digest.get("value") + if not isinstance(value, str) or not value: + raise MCPServiceError("Canonical report digest is missing.") + return value + + def _build_run_summary_payload( + self, + *, + run_id: str, + root_path: Path, + request: MCPAnalysisRequest, + report_document: Mapping[str, object], + baseline_state: CloneBaselineState, + metrics_baseline_state: MetricsBaselineState, + cache_status: CacheStatus, + new_func: Sequence[str] | set[str], + new_block: Sequence[str] | set[str], + metrics_diff: MetricsDiff | None, + warnings: Sequence[str], + failures: Sequence[str], + ) -> dict[str, object]: + meta = self._as_mapping(report_document.get("meta")) + meta_baseline = self._as_mapping(meta.get("baseline")) + meta_metrics_baseline = self._as_mapping(meta.get("metrics_baseline")) + meta_cache = self._as_mapping(meta.get("cache")) + inventory = self._as_mapping(report_document.get("inventory")) + findings = self._as_mapping(report_document.get("findings")) + metrics = self._as_mapping(report_document.get("metrics")) + metrics_summary = self._as_mapping(metrics.get("summary")) + summary = self._as_mapping(findings.get("summary")) + return { + "run_id": run_id, + "root": str(root_path), + "analysis_mode": request.analysis_mode, + "codeclone_version": meta.get("codeclone_version", __version__), + "report_schema_version": report_document.get( + "report_schema_version", + REPORT_SCHEMA_VERSION, + ), + "baseline": { + "path": meta_baseline.get( + "path", + str(root_path / DEFAULT_BASELINE_PATH), + ), + "loaded": bool(meta_baseline.get("loaded", baseline_state.loaded)), + "status": str(meta_baseline.get("status", baseline_state.status.value)), + "trusted_for_diff": baseline_state.trusted_for_diff, + }, + "metrics_baseline": { + "path": meta_metrics_baseline.get( + "path", + str(root_path / DEFAULT_BASELINE_PATH), + ), + "loaded": bool( + meta_metrics_baseline.get( + "loaded", + metrics_baseline_state.loaded, + ) + ), + "status": str( + meta_metrics_baseline.get( + "status", + metrics_baseline_state.status.value, + ) + ), + "trusted_for_diff": metrics_baseline_state.trusted_for_diff, + }, + "cache": { + "path": meta_cache.get("path"), + "status": str(meta_cache.get("status", cache_status.value)), + "used": bool(meta_cache.get("used", False)), + "schema_version": meta_cache.get("schema_version"), + }, + "inventory": dict(inventory), + "findings_summary": dict(summary), + "health": dict(self._as_mapping(metrics_summary.get("health"))), + "baseline_diff": { + "new_function_clone_groups": len(new_func), + "new_block_clone_groups": len(new_block), + "new_clone_groups_total": len(new_func) + len(new_block), + }, + "metrics_diff": self._metrics_diff_payload(metrics_diff), + "warnings": list(warnings), + "failures": list(failures), + } + + def _metrics_diff_payload( + self, + metrics_diff: MetricsDiff | None, + ) -> dict[str, object] | None: + if metrics_diff is None: + return None + new_high_risk_functions = tuple( + cast(Sequence[str], getattr(metrics_diff, "new_high_risk_functions", ())) + ) + new_high_coupling_classes = tuple( + cast(Sequence[str], getattr(metrics_diff, "new_high_coupling_classes", ())) + ) + new_cycles = tuple( + cast(Sequence[object], getattr(metrics_diff, "new_cycles", ())) + ) + new_dead_code = tuple( + cast(Sequence[str], getattr(metrics_diff, "new_dead_code", ())) + ) + health_delta = getattr(metrics_diff, "health_delta", 0) + return { + "new_high_risk_functions": len(new_high_risk_functions), + "new_high_coupling_classes": len(new_high_coupling_classes), + "new_cycles": len(new_cycles), + "new_dead_code": len(new_dead_code), + "health_delta": int(health_delta), + } + + def _flatten_findings( + self, + report_document: Mapping[str, object], + ) -> list[dict[str, object]]: + findings = self._as_mapping(report_document.get("findings")) + groups = self._as_mapping(findings.get("groups")) + clone_groups = self._as_mapping(groups.get("clones")) + return [ + *self._dict_list(clone_groups.get("functions")), + *self._dict_list(clone_groups.get("blocks")), + *self._dict_list(clone_groups.get("segments")), + *self._dict_list(self._as_mapping(groups.get("structural")).get("groups")), + *self._dict_list(self._as_mapping(groups.get("dead_code")).get("groups")), + *self._dict_list(self._as_mapping(groups.get("design")).get("groups")), + ] + + def _matches_finding_filters( + self, + *, + finding: Mapping[str, object], + family: FindingFamilyFilter, + severity: str | None, + source_kind: str | None, + novelty: FindingNoveltyFilter, + ) -> bool: + finding_family = str(finding.get("family", "")).strip() + if family != "all" and finding_family != family: + return False + if ( + severity is not None + and str(finding.get("severity", "")).strip() != severity + ): + return False + dominant_kind = str( + self._as_mapping(finding.get("source_scope")).get("dominant_kind", "") + ).strip() + if source_kind is not None and dominant_kind != source_kind: + return False + return novelty == "all" or str(finding.get("novelty", "")).strip() == novelty + + def _dict_list(self, value: object) -> list[dict[str, object]]: + return [dict(self._as_mapping(item)) for item in self._as_sequence(value)] + + @staticmethod + def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + @staticmethod + def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance( + value, + (str, bytes, bytearray), + ): + return value + return () diff --git a/docs/README.md b/docs/README.md index b46ffd7..fe96524 100644 --- a/docs/README.md +++ b/docs/README.md @@ -37,6 +37,7 @@ repository build: ## Interfaces - [CLI behavior, modes, and UX](book/09-cli.md) +- [MCP interface contract](book/20-mcp-interface.md) - [HTML report rendering contract](book/10-html-render.md) ## System Properties @@ -58,6 +59,7 @@ repository build: - [Architecture narrative](architecture.md) - [CFG design and semantics](cfg.md) +- [MCP integration for AI agents and clients](mcp.md) - [SARIF integration for IDE/code-scanning use](sarif.md) - [Docs publishing and Pages workflow](publishing.md) diff --git a/docs/architecture.md b/docs/architecture.md index 1cce692..08b5ec7 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -171,6 +171,47 @@ Explainability contract (v1): --- +## 9. MCP Agent Interface + +CodeClone also exposes an optional MCP layer for AI agents and MCP-capable +clients. + +Current shape: + +- install via the optional `codeclone[mcp]` extra +- launch via `codeclone-mcp` +- transports: + - `stdio` + - `streamable-http` +- semantics: + - read-only + - baseline-aware + - built on the same pipeline/report contracts as the CLI + - bounded in-memory run history + +Operational note: + +- `codeclone/mcp_server.py` is only a thin launcher/registration layer. +- The optional MCP runtime is imported lazily so the base `codeclone` install + and normal CI paths do not require MCP packages. +- `codeclone/mcp_service.py` is the in-process adapter over the existing + pipeline/report contracts. + +The MCP layer is intentionally thin. It does not add a separate analysis engine; +it adapts the existing pipeline into tools/resources such as: + +- analyze repository +- get run summary +- list findings +- inspect one finding +- list hotspots +- preview gate outcomes + +This keeps agent integrations deterministic and aligned with the same canonical +report document used by JSON/HTML/SARIF. + +--- + ## CI Integration Baseline comparison allows CI to fail **only on new clones**, diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index a7c0114..ecc5cff 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -12,6 +12,7 @@ Main ownership layers: - Quality metrics pipeline: complexity/coupling/cohesion/dependencies/dead-code/health. - Contracts and persistence: baseline, metrics baseline, cache, exit semantics. - Report model and serialization: deterministic JSON/TXT + explainability facts. +- MCP agent surface: read-only server layer over the same pipeline/report contracts. - Render layer: HTML rendering and template assets. ## Data model @@ -27,6 +28,7 @@ Main ownership layers: | Report core | `codeclone/report/*`, `codeclone/_cli_meta.py` | Merge windows, explainability facts, deterministic JSON/TXT schema + shared metadata | | Persistence | `codeclone/baseline.py`, `codeclone/metrics_baseline.py`, `codeclone/cache.py` | Baseline/cache trust/compat/integrity and atomic persistence | | Runtime orchestration | `codeclone/pipeline.py`, `codeclone/cli.py`, `codeclone/_cli_args.py`, `codeclone/_cli_paths.py`, `codeclone/_cli_summary.py`, `codeclone/_cli_config.py`, `codeclone/ui_messages.py` | CLI UX, stage orchestration, status handling, outputs, error markers | +| MCP agent interface | `codeclone/mcp_service.py`, `codeclone/mcp_server.py` | Read-only MCP tools/resources over canonical analysis and report layers | | Rendering | `codeclone/html_report.py`, `codeclone/_html_report/*`, `codeclone/_html_badges.py`, `codeclone/_html_js.py`, `codeclone/_html_escape.py`, `codeclone/_html_snippets.py`, `codeclone/templates.py` | HTML-only view layer over report data | Refs: @@ -39,6 +41,8 @@ Refs: - Core analysis modules do not depend on render/UI modules. - HTML renderer receives already-computed report data/facts and does not recompute detection semantics. +- MCP layer reuses current pipeline/report semantics and must not introduce a + separate analysis truth path. - Baseline, metrics baseline, and cache are validated before being trusted. Refs: @@ -106,6 +110,7 @@ Refs: | Clone baseline trust/compat/integrity | [06-baseline.md](06-baseline.md) | | Cache trust and fail-open behavior | [07-cache.md](07-cache.md) | | Report schema and provenance | [08-report.md](08-report.md), [10-html-render.md](10-html-render.md) | +| MCP agent surface | [20-mcp-interface.md](20-mcp-interface.md) | | Metrics gates and metrics baseline | [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) | | Dead-code liveness policy | [16-dead-code-contract.md](16-dead-code-contract.md) | | Suggestions and clone typing | [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) | diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 3f37d6e..24b7a14 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -2,7 +2,7 @@ ## Purpose -Define report contracts in `2.0.0b2`: canonical JSON (`report_schema_version=2.1`) +Define report contracts in `2.0.0b3`: canonical JSON (`report_schema_version=2.1`) plus deterministic TXT/Markdown/SARIF projections. ## Public surface @@ -73,6 +73,8 @@ Per-group common axes (family-specific fields may extend): - JSON is source of truth for report semantics. - Markdown and SARIF are deterministic projections from the same report document. +- MCP summary/finding/hotlist/report-section queries are deterministic views over + the same canonical report document. - SARIF is an IDE/code-scanning-oriented projection: - repo-relative result paths are anchored via `%SRCROOT%` - referenced files are listed under `run.artifacts` @@ -148,6 +150,7 @@ Refs: - [07-cache.md](07-cache.md) - [09-cli.md](09-cli.md) - [10-html-render.md](10-html-render.md) +- [20-mcp-interface.md](20-mcp-interface.md) - [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) - [../sarif.md](../sarif.md) - [../examples/report.md](../examples/report.md) diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index f5b15c1..10b5e3e 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -115,5 +115,6 @@ Refs: ## See also - [04-config-and-defaults.md](04-config-and-defaults.md) +- [20-mcp-interface.md](20-mcp-interface.md) - [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) - [16-dead-code-contract.md](16-dead-code-contract.md) diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index 563ff7d..31f7740 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -12,6 +12,7 @@ compatibility is enforced. - Metrics baseline compatibility checks: `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` - Cache compatibility checks: `codeclone/cache.py:Cache.load` - Report schema assignment: `codeclone/report/json_contract.py:build_report_document` +- MCP public surface: `codeclone/mcp_server.py`, `codeclone/mcp_service.py` ## Data model @@ -37,6 +38,9 @@ Version bump rules: - Bump **report schema** for canonical report document contract changes (`report_schema_version`, consumed by JSON/TXT/Markdown/SARIF and HTML provenance/view). - Bump **metrics-baseline schema** only for standalone metrics-baseline payload changes. +- MCP does not currently define a separate schema/version constant; tool names, + resource shapes, and documented request/response semantics are therefore + package-versioned public surface and must be documented/tested when changed. Baseline compatibility rules: diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md new file mode 100644 index 0000000..3c2dcc1 --- /dev/null +++ b/docs/book/20-mcp-interface.md @@ -0,0 +1,150 @@ +# 20. MCP Interface + +## Purpose + +Define the current public MCP surface in `2.0.0b3`. + +This interface is **optional** and is installed via the `mcp` extra. It does +not replace the CLI or the canonical JSON report contract. Instead, it exposes +the existing deterministic analysis pipeline as a **read-only MCP server** for +AI agents and MCP-capable clients. + +## Public surface + +- Package extra: `codeclone[mcp]` +- MCP launcher: `codeclone-mcp` +- MCP server: `codeclone/mcp_server.py` +- MCP service adapter: `codeclone/mcp_service.py` + +## Data model + +Current server characteristics: + +- optional dependency; base `codeclone` install does not require `mcp` +- transports: + - `stdio` + - `streamable-http` +- run storage: + - in-memory only + - bounded history (`--history-limit`, default `16`) + - latest-run pointer for `codeclone://latest/...` resources +- run identity: + - `run_id` is derived from the canonical report integrity digest +- analysis modes: + - `full` + - `clones_only` +- cache policies: + - `reuse` + - `refresh` + - `off` +- summary payload: + - `run_id`, `root`, `analysis_mode` + - `baseline`, `metrics_baseline`, `cache` + - `inventory`, `findings_summary`, `health` + - `baseline_diff`, `metrics_diff` + - `warnings`, `failures` + +The MCP layer does not introduce a separate analysis engine. It calls the +current CodeClone pipeline and reuses the canonical report document already +produced by the report contract. + +## Tools + +Current tool set: + +| Tool | Purpose | +|----------------------|------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | Run deterministic CodeClone analysis and register the result as the latest MCP run | +| `get_run_summary` | Return the stored summary for the latest or specified run | +| `evaluate_gates` | Evaluate CI/gating conditions against an existing run without exiting the process | +| `get_report_section` | Return a canonical report section (`meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`, or `all`) | +| `list_findings` | Return deterministically ordered finding groups with filters and pagination | +| `get_finding` | Return one canonical finding group by id | +| `list_hotspots` | Return one derived hotlist (`most_actionable`, `highest_spread`, `production_hotspots`, `test_fixture_hotspots`) | + +All current tools are registered as read-only MCP tools. + +## Resources + +Current resources: + +- `codeclone://latest/summary` +- `codeclone://latest/report.json` +- `codeclone://runs/{run_id}/summary` +- `codeclone://runs/{run_id}/report.json` +- `codeclone://runs/{run_id}/findings/{finding_id}` + +Resources are convenience views over already registered runs. They do not +trigger fresh analysis by themselves. + +## Contracts + +- MCP is **read-only**: + - no source-file mutation + - no baseline update + - no metrics-baseline update +- MCP must reuse current: + - pipeline stages + - baseline trust semantics + - cache semantics + - canonical report contract +- `get_run_summary` is a deterministic convenience projection derived from the + canonical report (`meta`, `inventory`, `findings.summary`, + `metrics.summary.health`) plus baseline-diff/gate context. +- Canonical JSON remains the source of truth for report semantics. +- `list_findings` and `list_hotspots` are deterministic projections over the + canonical report, not a separate analysis branch. +- `analysis_mode="clones_only"` must mirror the same metric/dependency + skip-semantics as the regular pipeline. +- Missing optional MCP dependency is handled explicitly by the launcher with a + user-facing install hint and exit code `2`. + +## Invariants (MUST) + +- Tool names are stable public surface. +- Resource URI shapes are stable public surface. +- Read-only tool annotations remain accurate. +- `analyze_repository` always registers exactly one latest run. +- `get_run_summary` with no `run_id` resolves to the latest stored run. +- `get_report_section(section="all")` returns the full canonical report document. +- `run_id` must equal the canonical report digest for that run. + +## Failure modes + +| Condition | Behavior | +|--------------------------------------------|---------------------------------------------------| +| `mcp` extra not installed | `codeclone-mcp` prints install hint and exits `2` | +| Invalid root path / invalid numeric config | service raises contract error | +| Requested run missing | service raises run-not-found error | +| Requested finding missing | service raises finding-not-found error | +| Unsupported report section/resource suffix | service raises contract error | + +## Determinism / canonicalization + +- MCP run identity is derived from canonical report integrity digest. +- Finding order is inherited from canonical report ordering. +- Hotlists are derived from canonical report data and deterministic derived ids. +- No MCP-only heuristics may change analysis or gating semantics. + +## Locked by tests + +- `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` +- `tests/test_mcp_service.py::test_mcp_service_lists_findings_and_hotspots` +- `tests/test_mcp_service.py::test_mcp_service_evaluate_gates_on_existing_run` +- `tests/test_mcp_service.py::test_mcp_service_resources_expose_latest_summary_and_report` +- `tests/test_mcp_server.py::test_mcp_server_exposes_expected_read_only_tools` +- `tests/test_mcp_server.py::test_mcp_server_tool_roundtrip_and_resources` +- `tests/test_mcp_server.py::test_mcp_server_main_reports_missing_optional_dependency` + +## Non-guarantees + +- There is currently no standalone `mcp_api_version` constant. +- In-memory run history does not survive process restart. +- Client-specific UI/approval behavior is not part of the CodeClone contract. + +## See also + +- [09-cli.md](09-cli.md) +- [08-report.md](08-report.md) +- [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) +- [../mcp.md](../mcp.md) diff --git a/docs/book/README.md b/docs/book/README.md index d2024cd..e995d74 100644 --- a/docs/book/README.md +++ b/docs/book/README.md @@ -29,6 +29,7 @@ If a statement is not enforced by code/tests, it is explicitly marked as non-con ### Interfaces - [09-cli.md](09-cli.md) +- [20-mcp-interface.md](20-mcp-interface.md) - [10-html-render.md](10-html-render.md) ### System properties diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index fcb2388..e40c73f 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -2,14 +2,14 @@ ## Purpose -Compact structural layouts for baseline/cache/report contracts in `2.0.0b2`. +Compact structural layouts for baseline/cache/report contracts in `2.0.0b3`. ## Baseline schema (`2.0`) ```json { "meta": { - "generator": { "name": "codeclone", "version": "2.0.0b2" }, + "generator": { "name": "codeclone", "version": "2.0.0b3" }, "schema_version": "2.0", "fingerprint_version": "1", "python_tag": "cp313", @@ -83,7 +83,7 @@ Notes: { "report_schema_version": "2.1", "meta": { - "codeclone_version": "2.0.0b2", + "codeclone_version": "2.0.0b3", "project_name": "codeclone", "scan_root": ".", "analysis_mode": "full", @@ -264,7 +264,7 @@ Notes: "tool": { "driver": { "name": "codeclone", - "version": "2.0.0b2", + "version": "2.0.0b3", "rules": [ { "id": "CCLONE001", diff --git a/docs/book/appendix/c-error-catalog.md b/docs/book/appendix/c-error-catalog.md index 24115c7..f9545dd 100644 --- a/docs/book/appendix/c-error-catalog.md +++ b/docs/book/appendix/c-error-catalog.md @@ -71,9 +71,24 @@ Refs: - `codeclone/cli.py:_main_impl` +## MCP interface errors + +| Condition | Behavior | +|-----------|----------| +| Optional `mcp` extra missing | `codeclone-mcp` prints install hint and exits `2` | +| Invalid root path / invalid numeric config | MCP service contract error | +| Missing run or finding id | MCP service request error | +| Unsupported MCP resource URI / report section | MCP service contract error | + +Refs: + +- `codeclone/mcp_server.py:main` +- `codeclone/mcp_service.py` + ## Locked by tests - `tests/test_cli_inprocess.py::test_cli_report_write_error_is_contract_error` - `tests/test_cli_inprocess.py::test_cli_update_baseline_write_error_is_contract_error` - `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` - `tests/test_cli_unit.py::test_cli_internal_error_marker` +- `tests/test_mcp_server.py::test_mcp_server_main_reports_missing_optional_dependency` diff --git a/docs/mcp.md b/docs/mcp.md new file mode 100644 index 0000000..baea3e8 --- /dev/null +++ b/docs/mcp.md @@ -0,0 +1,359 @@ +# MCP for AI Agents and IDE Clients + +## Purpose + +Explain how to use CodeClone as an MCP server in real agent workflows. + +Important framing: MCP is primarily a **client integration surface**, not a +model-specific trick. CodeClone does not care whether the backend model is +GPT-5.x, Claude, Gemini, or something else. What matters is whether the +client/application you use can talk to MCP and which transport it expects. + +## Install + +Base install stays lean: + +```bash +pip install codeclone +``` + +Install MCP support only when you need it: + +```bash +pip install "codeclone[mcp]" +``` + +Tool install example: + +```bash +uv tool install "codeclone[mcp]" +``` + +## Start the server + +### Local agent workflows: prefer `stdio` + +```bash +codeclone-mcp --transport stdio +``` + +This is the best default when the MCP-capable client runs on the same machine +and needs access to the local repository. + +### Remote or HTTP-only clients: use `streamable-http` + +```bash +codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 +``` + +With current FastMCP defaults, clients usually connect to the streamable HTTP +endpoint at: + +```text +http://127.0.0.1:8000/mcp +``` + +Use this mode when the client only supports remote MCP endpoints or when you +want to expose CodeClone from a controlled local/remote service boundary. + +## What agents get + +CodeClone MCP is designed as a **read-only structural governance layer**: + +- run CodeClone analysis against a repository +- get a compact run summary +- list clone / structural / dead-code / design findings +- inspect one finding by id +- retrieve derived hotlists +- preview gate decisions without exiting the process +- read the canonical JSON report for a stored run + +It does **not**: + +- update baselines +- mutate source files +- add suppressions automatically + +Current tool surface: + +| Tool | Typical use | +|------|-------------| +| `analyze_repository` | Run a fresh analysis and register it as the latest in-memory run | +| `get_run_summary` | Get the compact baseline/cache/health/findings snapshot for the latest or selected run | +| `list_findings` | Browse findings with filters and pagination | +| `get_finding` | Inspect one finding group deeply by id | +| `list_hotspots` | Jump to high-signal derived views such as `highest_spread` or `production_hotspots` | +| `get_report_section` | Read a canonical section (`meta`, `findings`, `metrics`, `derived`, etc.) | +| `evaluate_gates` | Preview CI/gating outcomes without exiting the process | + +## Recommended agent workflow + +For agentic coding and review loops, the clean sequence is: + +1. `analyze_repository` +2. `get_run_summary` +3. `list_hotspots` or `list_findings` +4. `get_finding` for the specific item the agent should inspect +5. `evaluate_gates` before finalizing the change + +That pattern works especially well for AI-generated code because CodeClone is +baseline-aware: it helps separate accepted legacy debt from new structural +regressions introduced by the latest change set. + +## Prompt patterns for real agent workflows + +The most effective way to use CodeClone MCP is to ask the agent for a +**specific analysis task**, not just "run CodeClone". + +Good prompts usually include: + +- the scope: + - full repository + - clones only + - production findings only +- the goal: + - review + - triage + - safe cleanup plan + - gate preview +- the constraint: + - do not mutate code yet + - do not add suppressions automatically + - prioritize runtime-facing findings + +Use prompts like these. + +### 1. Full repository health check + +```text +Use codeclone MCP to analyze this repository and give me a concise structural health summary. +Prioritize the highest-signal findings and explain what is worth looking at first. +``` + +### 2. Clone-focused review only + +```text +Use codeclone MCP in clones-only mode and show me the most important clone findings. +Separate production findings from test/fixture noise and suggest which clone group is the safest first cleanup target. +``` + +### 3. Production-only clone triage + +```text +Analyze this repository through codeclone MCP, filter to clone findings in production code only, +and show me the top 3 clone groups worth fixing first. +If there are no production clones, say that explicitly. +``` + +### 4. Structural hotspot review + +```text +Use codeclone MCP to find the most important production structural findings. +Focus on duplicated branches, cohesion, coupling, and complexity hotspots. +Give me a safe cleanup plan ordered by ROI. +``` + +### 5. Dead-code triage + +```text +Use codeclone MCP to review dead-code findings in this repository. +Separate actionable items from likely framework/runtime false positives and explain what should actually be cleaned up. +Do not add suppressions automatically. +``` + +### 6. Gate preview before CI + +```text +Run codeclone through MCP and tell me whether this repository would fail stricter gating. +Preview the result for fail_on_new plus a zero clone threshold, and explain the exact reasons. +Do not change any files. +``` + +### 7. AI-generated code review + +```text +I added a lot of code with an AI agent. Use codeclone MCP to check whether we introduced structural drift: +new clone groups, dead code, duplicated branches, or design hotspots. +Prioritize what is genuinely new or risky, not accepted baseline debt. +``` + +### 8. Safe refactor planning + +```text +Use codeclone MCP as the source of truth for structural findings. +Pick one production issue that looks safe to refactor, explain why it is a good candidate, +and outline a minimal plan that should not change behavior. +``` + +### 9. Explain one finding deeply + +```text +Use codeclone MCP to find the highest-priority production finding, then inspect it in detail. +Explain what triggered it, where it lives, how risky it is, and what refactoring shape would address it. +Do not make code changes yet. +``` + +### 10. Review after a change + +```text +Use codeclone MCP to analyze the repository after my latest changes. +Tell me whether the structural picture got better, worse, or stayed flat relative to baseline, +and summarize only the findings that are worth acting on. +``` + +## Prompting tips + +- Prefer "production-only" when you care about runtime code. +- Prefer "clones-only mode" when you want the cheapest focused pass on duplication. +- Ask for "safe first candidate" when you want the agent to move from triage to refactor planning. +- If your broader agent also has shell or file-editing tools, you can still say + "do not update baseline" as a workflow constraint. CodeClone MCP itself is + read-only and never updates baseline. +- For AI-generated code, explicitly ask the agent to separate: + - accepted baseline debt + - from new structural regressions + +## Client recipes + +Client UX changes fast, so prefer official client documentation for the exact +setup screens. The integration shape below is the stable part on the CodeClone +side. + +### Codex / local command-based OpenAI clients + +Recommended mode: `stdio` + +```bash +codeclone-mcp --transport stdio +``` + +A typical command-based registration looks like: + +```toml +[mcp_servers.codeclone] +enabled = true +command = "codeclone-mcp" +args = ["--transport", "stdio"] +``` + +Use command-based MCP registration when the client can spawn a local server +process. If `codeclone-mcp` is not on `PATH`, use an absolute path to the +launcher. + +Official docs: + +- [OpenAI: Connectors and MCP servers](https://platform.openai.com/docs/guides/tools-connectors-mcp?lang=javascript) +- [OpenAI Responses API reference (`mcp` tool)](https://platform.openai.com/docs/api-reference/responses/compact?api-mode=responses) + +### OpenAI Responses API / remote MCP-capable OpenAI clients + +Recommended mode: `streamable-http` + +```bash +codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 +``` + +Then register the remote MCP endpoint in the client or API flow that expects an +HTTP MCP server. Prefer allowing only the CodeClone tools you need for the +current workflow. + +### Claude Code / Anthropic MCP-capable clients + +Recommended mode: `stdio` + +Generic command-based configuration: + +```json +{ + "mcpServers": { + "codeclone": { + "command": "codeclone-mcp", + "args": ["--transport", "stdio"] + } + } +} +``` + +This is the best fit when Claude runs on the same machine and should analyze +the local checkout directly. + +Official docs: + +- [Anthropic: Model Context Protocol (MCP)](https://docs.anthropic.com/en/docs/build-with-claude/mcp) +- [Anthropic: MCP with Claude Code](https://docs.anthropic.com/en/docs/claude-code/mcp) + +### GitHub Copilot Chat / IDE MCP clients + +Recommended mode: `stdio` + +Use the same local command registration pattern: + +```json +{ + "mcpServers": { + "codeclone": { + "command": "codeclone-mcp", + "args": ["--transport", "stdio"] + } + } +} +``` + +Then configure the MCP server in the IDE/client that hosts Copilot Chat. + +Official docs: + +- [GitHub Docs: Extending GitHub Copilot Chat with MCP](https://docs.github.com/en/copilot/how-tos/provide-context/use-mcp/extend-copilot-chat-with-mcp?tool=visualstudio) + +### Other MCP-capable clients + +Use the same transport rule: + +- `stdio` for local repository analysis +- `streamable-http` for remote-only or hosted MCP clients + +The CodeClone server surface itself stays the same. + +## Security and operations + +- CodeClone MCP is read-only by design. +- It stores run history in memory only. +- Repository access is limited to what the server process can read locally. +- Baseline/cache/report semantics remain owned by the normal CodeClone contracts. + +## Troubleshooting + +### `CodeClone MCP support requires the optional 'mcp' extra` + +Install the extra: + +```bash +pip install "codeclone[mcp]" +``` + +### The client cannot find `codeclone-mcp` + +Either install it as a tool: + +```bash +uv tool install "codeclone[mcp]" +``` + +or point the client at the absolute path to the launcher from the environment +where CodeClone was installed. + +### The client only accepts remote MCP servers + +Run CodeClone in `streamable-http` mode and point the client at the MCP +endpoint instead of using `stdio`. + +### The agent is reading stale results + +Run `analyze_repository` again. Runs are stored in memory per server process and +`latest` always points at the most recently analyzed run in that process. + +## See also + +- [book/20-mcp-interface.md](book/20-mcp-interface.md) +- [book/08-report.md](book/08-report.md) +- [book/09-cli.md](book/09-cli.md) diff --git a/mkdocs.yml b/mkdocs.yml index fae6e1d..cd68046 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -70,6 +70,7 @@ nav: - Report: book/08-report.md - Interfaces: - CLI: book/09-cli.md + - MCP Interface: book/20-mcp-interface.md - HTML Render: book/10-html-render.md - System Properties: - Security Model: book/11-security-model.md @@ -89,6 +90,7 @@ nav: - Deep Dives: - Architecture Narrative: architecture.md - CFG Semantics: cfg.md + - MCP for AI Agents: mcp.md - SARIF for IDEs: sarif.md - Publishing and Docs Site: publishing.md - Examples: diff --git a/pyproject.toml b/pyproject.toml index 28e465b..1218afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codeclone" -version = "2.0.0b2" +version = "2.0.0b3" description = "Structural code quality analysis for Python" readme = { file = "README.md", content-type = "text/markdown" } license = "MIT" @@ -61,6 +61,9 @@ Changelog = "https://github.com/orenlab/codeclone/releases" Documentation = "https://orenlab.github.io/codeclone/" [project.optional-dependencies] +mcp = [ + "mcp>=1.26.0,<2", +] dev = [ "pytest>=9.0.0", "pytest-cov>=7.1.0", @@ -73,6 +76,7 @@ dev = [ [project.scripts] codeclone = "codeclone.cli:main" +codeclone-mcp = "codeclone.mcp_server:main" [tool.setuptools] packages = [ diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py new file mode 100644 index 0000000..8046788 --- /dev/null +++ b/tests/test_mcp_server.py @@ -0,0 +1,273 @@ +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import asyncio +import builtins +import json +import sys +from collections.abc import Mapping +from pathlib import Path +from typing import cast + +import pytest + +from codeclone import mcp_server +from codeclone.mcp_server import MCPDependencyError, build_mcp_server + + +def _structured_tool_result(result: object) -> dict[str, object]: + if isinstance(result, dict): + return result + assert isinstance(result, tuple) + assert len(result) == 2 + payload = result[1] + assert isinstance(payload, dict) + return cast("dict[str, object]", payload) + + +def _require_mcp_runtime() -> None: + pytest.importorskip("mcp.server.fastmcp") + + +def _write_clone_fixture(root: Path) -> None: + root.joinpath("pkg").mkdir() + root.joinpath("pkg", "__init__.py").write_text("", "utf-8") + root.joinpath("pkg", "dup.py").write_text( + ( + "def alpha(value: int) -> int:\n" + " total = value + 1\n" + " total += 2\n" + " total += 3\n" + " total += 4\n" + " total += 5\n" + " total += 6\n" + " total += 7\n" + " total += 8\n" + " return total\n\n" + "def beta(value: int) -> int:\n" + " total = value + 1\n" + " total += 2\n" + " total += 3\n" + " total += 4\n" + " total += 5\n" + " total += 6\n" + " total += 7\n" + " total += 8\n" + " return total\n" + ), + "utf-8", + ) + + +def test_mcp_server_exposes_expected_read_only_tools() -> None: + _require_mcp_runtime() + server = build_mcp_server(history_limit=4) + + tools = {tool.name: tool for tool in asyncio.run(server.list_tools())} + assert set(tools) == { + "analyze_repository", + "get_run_summary", + "evaluate_gates", + "get_report_section", + "list_findings", + "get_finding", + "list_hotspots", + } + for tool in tools.values(): + assert tool.annotations is not None + assert tool.annotations.readOnlyHint is True + assert tool.annotations.destructiveHint is False + assert tool.annotations.idempotentHint is True + + +def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: + _require_mcp_runtime() + _write_clone_fixture(tmp_path) + server = build_mcp_server(history_limit=4) + + summary = _structured_tool_result( + asyncio.run( + server.call_tool( + "analyze_repository", + { + "root": str(tmp_path), + "respect_pyproject": False, + "cache_policy": "off", + }, + ) + ) + ) + run_id = str(summary["run_id"]) + + latest = _structured_tool_result( + asyncio.run(server.call_tool("get_run_summary", {})) + ) + assert latest["run_id"] == run_id + + findings_result = _structured_tool_result( + asyncio.run(server.call_tool("list_findings", {"family": "clone"})) + ) + assert cast(int, findings_result["total"]) >= 1 + + latest_summary_resource = list( + asyncio.run(server.read_resource("codeclone://latest/summary")) + ) + assert latest_summary_resource + latest_summary_text = latest_summary_resource[0].content + latest_summary = json.loads(latest_summary_text) + assert latest_summary["run_id"] == run_id + + latest_report_resource = list( + asyncio.run(server.read_resource("codeclone://latest/report.json")) + ) + assert ( + json.loads(latest_report_resource[0].content)["report_schema_version"] == "2.1" + ) + + report_resource = list( + asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/report.json")) + ) + assert report_resource + report_payload = json.loads(report_resource[0].content) + assert report_payload["report_schema_version"] == "2.1" + + finding_items = cast("list[dict[str, object]]", findings_result["items"]) + first_finding_id = str(finding_items[0]["id"]) + + gate_result = _structured_tool_result( + asyncio.run(server.call_tool("evaluate_gates", {"fail_threshold": 0})) + ) + assert gate_result["would_fail"] is True + + report_section = _structured_tool_result( + asyncio.run(server.call_tool("get_report_section", {"section": "meta"})) + ) + assert report_section["codeclone_version"] + + finding = _structured_tool_result( + asyncio.run(server.call_tool("get_finding", {"finding_id": first_finding_id})) + ) + assert finding["id"] == first_finding_id + + hotspots = _structured_tool_result( + asyncio.run(server.call_tool("list_hotspots", {"kind": "highest_spread"})) + ) + assert cast(int, hotspots["total"]) >= 1 + + run_summary_resource = list( + asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/summary")) + ) + assert json.loads(run_summary_resource[0].content)["run_id"] == run_id + + finding_resource = list( + asyncio.run( + server.read_resource( + f"codeclone://runs/{run_id}/findings/{first_finding_id}" + ) + ) + ) + assert json.loads(finding_resource[0].content)["id"] == first_finding_id + + +def test_mcp_server_parser_defaults_and_main_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + parser = mcp_server.build_parser() + args = parser.parse_args([]) + assert args.transport == "stdio" + assert args.history_limit == 16 + assert args.json_response is True + assert args.stateless_http is True + assert args.log_level == "INFO" + + captured: dict[str, object] = {} + + class _FakeServer: + def run(self, *, transport: str) -> None: + captured["transport"] = transport + + def _fake_build_mcp_server(**kwargs: object) -> _FakeServer: + captured["kwargs"] = kwargs + return _FakeServer() + + monkeypatch.setattr(mcp_server, "build_mcp_server", _fake_build_mcp_server) + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone-mcp", + "--transport", + "streamable-http", + "--port", + "9000", + "--history-limit", + "8", + ], + ) + + mcp_server.main() + + assert captured["transport"] == "streamable-http" + kwargs = cast("dict[str, object]", captured["kwargs"]) + assert kwargs["port"] == 9000 + assert kwargs["history_limit"] == 8 + + +def test_mcp_server_main_reports_missing_optional_dependency( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + def _boom() -> tuple[object, object]: + raise MCPDependencyError("install codeclone[mcp]") + + monkeypatch.setattr(mcp_server, "_load_mcp_runtime", _boom) + monkeypatch.setattr(sys, "argv", ["codeclone-mcp"]) + + with pytest.raises(SystemExit) as exc_info: + mcp_server.main() + + assert exc_info.value.code == 2 + err = capsys.readouterr().err + assert "codeclone[mcp]" in err + + +def test_mcp_server_load_runtime_wraps_import_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + original_import = builtins.__import__ + + def _fake_import( + name: str, + globals: Mapping[str, object] | None = None, + locals: Mapping[str, object] | None = None, + fromlist: tuple[str, ...] = (), + level: int = 0, + ) -> object: + if name.startswith("mcp.server.fastmcp"): + raise ImportError("missing mcp") + return original_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _fake_import) + + with pytest.raises(MCPDependencyError): + mcp_server._load_mcp_runtime() + + +def test_mcp_server_main_swallows_keyboard_interrupt( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class _FakeServer: + def run(self, *, transport: str) -> None: + raise KeyboardInterrupt() + + monkeypatch.setattr( + mcp_server, + "build_mcp_server", + lambda **_kwargs: _FakeServer(), + ) + monkeypatch.setattr( + sys, "argv", ["codeclone-mcp", "--transport", "streamable-http"] + ) + + mcp_server.main() diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py new file mode 100644 index 0000000..7662416 --- /dev/null +++ b/tests/test_mcp_service.py @@ -0,0 +1,547 @@ +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import importlib +import json +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import pytest + +from codeclone import mcp_service as mcp_service_mod +from codeclone._cli_config import ConfigValidationError +from codeclone.cache import Cache +from codeclone.errors import CacheError +from codeclone.mcp_service import ( + CodeCloneMCPService, + MCPAnalysisRequest, + MCPFindingNotFoundError, + MCPGateRequest, + MCPRunNotFoundError, + MCPServiceContractError, + MCPServiceError, +) +from codeclone.models import MetricsDiff + + +def _write_clone_fixture(root: Path) -> None: + root.joinpath("pkg").mkdir() + root.joinpath("pkg", "__init__.py").write_text("", "utf-8") + root.joinpath("pkg", "dup.py").write_text( + ( + "def alpha(value: int) -> int:\n" + " total = value + 1\n" + " total += 2\n" + " total += 3\n" + " total += 4\n" + " total += 5\n" + " total += 6\n" + " total += 7\n" + " total += 8\n" + " return total\n\n" + "def beta(value: int) -> int:\n" + " total = value + 1\n" + " total += 2\n" + " total += 3\n" + " total += 4\n" + " total += 5\n" + " total += 6\n" + " total += 7\n" + " total += 8\n" + " return total\n" + ), + "utf-8", + ) + + +def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + latest = service.get_run_summary() + assert summary["run_id"] == latest["run_id"] + assert summary["root"] == str(tmp_path) + assert summary["analysis_mode"] == "full" + assert summary["report_schema_version"] == "2.1" + latest_baseline = cast("dict[str, object]", latest["baseline"]) + latest_cache = cast("dict[str, object]", latest["cache"]) + assert latest_baseline["status"] == "missing" + assert latest_cache["used"] is False + assert latest_cache["path"] == ".cache/codeclone/cache.json" + latest_health = cast("dict[str, object]", latest["health"]) + assert isinstance(latest_health["score"], int) + assert latest_health["grade"] + + +def test_mcp_service_lists_findings_and_hotspots(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + findings = service.list_findings(family="clone") + assert findings["run_id"] == summary["run_id"] + findings_total = cast(int, findings["total"]) + assert findings_total >= 1 + first = cast("list[dict[str, object]]", findings["items"])[0] + assert str(first["id"]).startswith("clone:") + + finding = service.get_finding(finding_id=str(first["id"])) + assert finding["id"] == first["id"] + + hotspots = service.list_hotspots(kind="highest_spread") + assert hotspots["run_id"] == summary["run_id"] + assert cast(int, hotspots["total"]) >= 1 + + +def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="reuse", + ) + ) + + report_meta = service.get_report_section( + run_id=str(summary["run_id"]), + section="meta", + ) + report_metrics = service.get_report_section( + run_id=str(summary["run_id"]), + section="metrics", + ) + cache_summary = cast("dict[str, object]", summary["cache"]) + cache_meta = cast("dict[str, object]", report_meta["cache"]) + health_summary = cast("dict[str, object]", summary["health"]) + metrics_summary = cast("dict[str, object]", report_metrics["summary"]) + metrics_health = cast("dict[str, object]", metrics_summary["health"]) + + assert cache_summary["path"] == cache_meta["path"] + assert cache_summary["status"] == cache_meta["status"] + assert cache_summary["used"] == cache_meta["used"] + assert cache_summary["schema_version"] == cache_meta["schema_version"] + assert health_summary == metrics_health + + +def test_mcp_service_evaluate_gates_on_existing_run(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + gate_result = service.evaluate_gates( + MCPGateRequest(run_id=str(summary["run_id"]), fail_threshold=0) + ) + + assert gate_result["run_id"] == summary["run_id"] + assert gate_result["would_fail"] is True + assert gate_result["exit_code"] == 3 + assert gate_result["reasons"] == ["clone:threshold:1:0"] + + +def test_mcp_service_resources_expose_latest_summary_and_report(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + latest_summary = json.loads(service.read_resource("codeclone://latest/summary")) + latest_report = json.loads(service.read_resource("codeclone://latest/report.json")) + + assert latest_summary["run_id"] == summary["run_id"] + assert latest_report["report_schema_version"] == "2.1" + + +def test_mcp_service_run_store_evicts_old_runs(tmp_path: Path) -> None: + first_root = tmp_path / "first" + second_root = tmp_path / "second" + first_root.mkdir() + second_root.mkdir() + _write_clone_fixture(first_root) + _write_clone_fixture(second_root) + service = CodeCloneMCPService(history_limit=1) + + first = service.analyze_repository( + MCPAnalysisRequest( + root=str(first_root), + respect_pyproject=False, + cache_policy="off", + ) + ) + second = service.analyze_repository( + MCPAnalysisRequest( + root=str(second_root), + respect_pyproject=False, + cache_policy="off", + ) + ) + + assert service.get_run_summary()["run_id"] == second["run_id"] + with pytest.raises(MCPRunNotFoundError): + service.get_run_summary(str(first["run_id"])) + + +def test_mcp_service_reports_contract_errors_for_resources_and_findings( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + run_id = str(summary["run_id"]) + + overview = json.loads(service.read_resource("codeclone://latest/overview")) + assert overview["run_id"] == run_id + + with pytest.raises(MCPServiceContractError): + service.get_report_section(section=cast("object", "unknown")) # type: ignore[arg-type] + with pytest.raises(MCPFindingNotFoundError): + service.get_finding(run_id=run_id, finding_id="missing") + with pytest.raises(MCPServiceContractError): + service.read_resource("bad://resource") + with pytest.raises(MCPServiceContractError): + service.read_resource(f"codeclone://runs/{run_id}") + with pytest.raises(MCPServiceContractError): + service.read_resource(f"codeclone://runs/{run_id}/unsupported") + + +def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + + monkeypatch.setattr( + mcp_service_mod, + "load_pyproject_config", + lambda _root: { + "min_loc": 12, + "baseline": "conf-baseline.json", + "cache_path": "conf-cache.json", + }, + ) + args = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest( + respect_pyproject=True, + analysis_mode="clones_only", + metrics_baseline_path="metrics.json", + ), + ) + assert args.min_loc == 12 + assert args.skip_metrics is True + assert args.skip_dead_code is True + assert args.skip_dependencies is True + assert str(args.baseline).endswith("conf-baseline.json") + assert str(args.cache_path).endswith("conf-cache.json") + assert str(args.metrics_baseline).endswith("metrics.json") + + monkeypatch.setattr( + mcp_service_mod, + "load_pyproject_config", + lambda _root: (_ for _ in ()).throw(ConfigValidationError("bad config")), + ) + with pytest.raises(MCPServiceContractError): + service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=True), + ) + + with pytest.raises(MCPServiceContractError): + service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest( + respect_pyproject=False, + max_cache_size_mb=-1, + ), + ) + + +def test_mcp_service_root_and_helper_contract_errors( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=4) + missing_root = tmp_path / "missing" + file_root = tmp_path / "root.py" + file_root.write_text("print('x')\n", "utf-8") + + with pytest.raises(MCPServiceContractError): + service.analyze_repository( + MCPAnalysisRequest( + root=str(missing_root), + respect_pyproject=False, + ) + ) + with pytest.raises(MCPServiceContractError): + service.analyze_repository( + MCPAnalysisRequest( + root=str(file_root), + respect_pyproject=False, + ) + ) + + with pytest.raises(MCPServiceError): + service._load_report_document("{") + with pytest.raises(MCPServiceError): + service._load_report_document("[]") + with pytest.raises(MCPServiceError): + service._report_digest({}) + + +def test_mcp_service_helper_filters_and_metrics_payload() -> None: + service = CodeCloneMCPService(history_limit=4) + + payload = service._metrics_diff_payload( + MetricsDiff( + new_high_risk_functions=("pkg.a:f",), + new_high_coupling_classes=("pkg.a:C",), + new_cycles=(("pkg.a", "pkg.b"),), + new_dead_code=("pkg.a:unused",), + health_delta=-3, + ) + ) + assert payload == { + "new_high_risk_functions": 1, + "new_high_coupling_classes": 1, + "new_cycles": 1, + "new_dead_code": 1, + "health_delta": -3, + } + assert service._metrics_diff_payload(None) is None + + finding = { + "family": "clone", + "severity": "high", + "novelty": "new", + "source_scope": {"dominant_kind": "production"}, + } + assert ( + service._matches_finding_filters( + finding=finding, + family="all", + severity="medium", + source_kind=None, + novelty="all", + ) + is False + ) + assert ( + service._matches_finding_filters( + finding=finding, + family="all", + severity=None, + source_kind="tests", + novelty="all", + ) + is False + ) + assert ( + service._matches_finding_filters( + finding=finding, + family="all", + severity=None, + source_kind=None, + novelty="known", + ) + is False + ) + assert service._as_sequence("not-a-sequence") == () + + +def test_mcp_service_refresh_cache_reports_save_warning( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + refresh_calls: list[str] = [] + + def _fake_refresh(*, cache: object, analysis: object) -> None: + refresh_calls.append("called") + + def _fake_save(self: Cache) -> None: + raise CacheError("boom") + + monkeypatch.setattr(service, "_refresh_cache_projection", _fake_refresh) + monkeypatch.setattr(Cache, "save", _fake_save) + + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="refresh", + ) + ) + + assert refresh_calls == ["called"] + assert "Cache save failed: boom" in cast("list[str]", summary["warnings"]) + + +def test_mcp_service_all_section_and_optional_path_overrides(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + report_document = service.get_report_section(section="all") + assert report_document["report_schema_version"] == "2.1" + + args = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest( + respect_pyproject=False, + baseline_path="custom-baseline.json", + metrics_baseline_path="metrics-only.json", + cache_path="custom-cache.json", + ), + ) + assert str(args.baseline).endswith("custom-baseline.json") + assert str(args.metrics_baseline).endswith("metrics-only.json") + assert str(args.cache_path).endswith("custom-cache.json") + + _, _, metrics_baseline_path, metrics_baseline_exists, shared_payload = ( + service._resolve_baseline_inputs(root_path=tmp_path, args=args) + ) + assert str(metrics_baseline_path).endswith("metrics-only.json") + assert metrics_baseline_exists is False + assert shared_payload is None + + +def test_mcp_service_root_cache_and_projection_helpers( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + args = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=False), + ) + load_calls: list[str] = [] + + def _fake_load(self: Cache) -> None: + load_calls.append("loaded") + + monkeypatch.setattr(Cache, "load", _fake_load) + service._build_cache( + root_path=tmp_path, + args=args, + cache_path=tmp_path / "cache.json", + policy="reuse", + ) + assert load_calls == ["loaded"] + + cache_without_projection = SimpleNamespace() + service._refresh_cache_projection( + cache=cast(Any, cache_without_projection), + analysis=cast( + Any, + SimpleNamespace( + suppressed_segment_groups=0, + segment_groups_raw_digest=None, + segment_groups={}, + ), + ), + ) + + cache_with_projection = SimpleNamespace(segment_report_projection=()) + service._refresh_cache_projection( + cache=cast(Any, cache_with_projection), + analysis=cast( + Any, + SimpleNamespace( + suppressed_segment_groups=0, + segment_groups_raw_digest="digest", + segment_groups={}, + ), + ), + ) + assert cache_with_projection.segment_report_projection is not None + + +def test_mcp_service_invalid_path_resolution_contract_errors( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + + def _boom(self: Path, *args: object, **kwargs: object) -> Path: + raise OSError("bad path") + + monkeypatch.setattr(Path, "resolve", _boom) + + with pytest.raises(MCPServiceContractError): + service._resolve_root(str(tmp_path)) + with pytest.raises(MCPServiceContractError): + service._resolve_optional_path("cache.json", tmp_path) + + +def test_mcp_service_reports_missing_json_artifact(tmp_path: Path) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + service_module = cast(Any, importlib.import_module("codeclone.mcp_service")) + original_report = service_module.report + + def _fake_report(**kwargs: Any) -> object: + artifacts = cast(Any, original_report)(**kwargs) + return SimpleNamespace( + json=None, + html=artifacts.html, + md=artifacts.md, + sarif=artifacts.sarif, + text=artifacts.text, + ) + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr("codeclone.mcp_service.report", _fake_report) + try: + with pytest.raises(MCPServiceError): + service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + finally: + monkeypatch.undo() diff --git a/uv.lock b/uv.lock index 2b9ac29..949c5b8 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,38 @@ version = 1 revision = 3 requires-python = ">=3.10" +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "attrs" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, +] + [[package]] name = "backports-tarfile" version = "1.2.0" @@ -45,36 +77,77 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/93/d7/516d984057745a6cd96575eea814fe1edd6646ee6efd552fb7b0921dec83/cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44", size = 184283, upload-time = "2025-09-08T23:22:08.01Z" }, + { url = "https://files.pythonhosted.org/packages/9e/84/ad6a0b408daa859246f57c03efd28e5dd1b33c21737c2db84cae8c237aa5/cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49", size = 180504, upload-time = "2025-09-08T23:22:10.637Z" }, { url = "https://files.pythonhosted.org/packages/50/bd/b1a6362b80628111e6653c961f987faa55262b4002fcec42308cad1db680/cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c", size = 208811, upload-time = "2025-09-08T23:22:12.267Z" }, { url = "https://files.pythonhosted.org/packages/4f/27/6933a8b2562d7bd1fb595074cf99cc81fc3789f6a6c05cdabb46284a3188/cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb", size = 216402, upload-time = "2025-09-08T23:22:13.455Z" }, + { url = "https://files.pythonhosted.org/packages/05/eb/b86f2a2645b62adcfff53b0dd97e8dfafb5c8aa864bd0d9a2c2049a0d551/cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0", size = 203217, upload-time = "2025-09-08T23:22:14.596Z" }, + { url = "https://files.pythonhosted.org/packages/9f/e0/6cbe77a53acf5acc7c08cc186c9928864bd7c005f9efd0d126884858a5fe/cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4", size = 203079, upload-time = "2025-09-08T23:22:15.769Z" }, { url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453", size = 216475, upload-time = "2025-09-08T23:22:17.427Z" }, { url = "https://files.pythonhosted.org/packages/21/7a/13b24e70d2f90a322f2900c5d8e1f14fa7e2a6b3332b7309ba7b2ba51a5a/cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495", size = 218829, upload-time = "2025-09-08T23:22:19.069Z" }, { url = "https://files.pythonhosted.org/packages/60/99/c9dc110974c59cc981b1f5b66e1d8af8af764e00f0293266824d9c4254bc/cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5", size = 211211, upload-time = "2025-09-08T23:22:20.588Z" }, { url = "https://files.pythonhosted.org/packages/49/72/ff2d12dbf21aca1b32a40ed792ee6b40f6dc3a9cf1644bd7ef6e95e0ac5e/cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb", size = 218036, upload-time = "2025-09-08T23:22:22.143Z" }, + { url = "https://files.pythonhosted.org/packages/e2/cc/027d7fb82e58c48ea717149b03bcadcbdc293553edb283af792bd4bcbb3f/cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a", size = 172184, upload-time = "2025-09-08T23:22:23.328Z" }, + { url = "https://files.pythonhosted.org/packages/33/fa/072dd15ae27fbb4e06b437eb6e944e75b068deb09e2a2826039e49ee2045/cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739", size = 182790, upload-time = "2025-09-08T23:22:24.752Z" }, + { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" }, + { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" }, { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" }, { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" }, + { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" }, + { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" }, { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" }, { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" }, { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" }, { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" }, + { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, ] [[package]] @@ -191,9 +264,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" }, ] +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + [[package]] name = "codeclone" -version = "2.0.0b2" +version = "2.0.0b3" source = { editable = "." } dependencies = [ { name = "pygments" }, @@ -211,10 +296,14 @@ dev = [ { name = "ruff" }, { name = "twine" }, ] +mcp = [ + { name = "mcp" }, +] [package.metadata] requires-dist = [ { name = "build", marker = "extra == 'dev'", specifier = ">=1.4.1" }, + { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.26.0,<2" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.19.1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.5.1" }, { name = "pygments", specifier = ">=2.19.2" }, @@ -225,7 +314,7 @@ requires-dist = [ { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=5.0.0" }, ] -provides-extras = ["dev"] +provides-extras = ["mcp", "dev"] [[package]] name = "colorama" @@ -364,37 +453,54 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/a4/ba/04b1bd4218cbc58dc90ce967106d51582371b898690f3ae0402876cc4f34/cryptography-46.0.6.tar.gz", hash = "sha256:27550628a518c5c6c903d84f637fbecf287f6cb9ced3804838a1295dc1fd0759", size = 750542, upload-time = "2026-03-25T23:34:53.396Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/47/23/9285e15e3bc57325b0a72e592921983a701efc1ee8f91c06c5f0235d86d9/cryptography-46.0.6-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:64235194bad039a10bb6d2d930ab3323baaec67e2ce36215fd0952fad0930ca8", size = 7176401, upload-time = "2026-03-25T23:33:22.096Z" }, { url = "https://files.pythonhosted.org/packages/60/f8/e61f8f13950ab6195b31913b42d39f0f9afc7d93f76710f299b5ec286ae6/cryptography-46.0.6-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:26031f1e5ca62fcb9d1fcb34b2b60b390d1aacaa15dc8b895a9ed00968b97b30", size = 4275275, upload-time = "2026-03-25T23:33:23.844Z" }, { url = "https://files.pythonhosted.org/packages/19/69/732a736d12c2631e140be2348b4ad3d226302df63ef64d30dfdb8db7ad1c/cryptography-46.0.6-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9a693028b9cbe51b5a1136232ee8f2bc242e4e19d456ded3fa7c86e43c713b4a", size = 4425320, upload-time = "2026-03-25T23:33:25.703Z" }, { url = "https://files.pythonhosted.org/packages/d4/12/123be7292674abf76b21ac1fc0e1af50661f0e5b8f0ec8285faac18eb99e/cryptography-46.0.6-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:67177e8a9f421aa2d3a170c3e56eca4e0128883cf52a071a7cbf53297f18b175", size = 4278082, upload-time = "2026-03-25T23:33:27.423Z" }, + { url = "https://files.pythonhosted.org/packages/5b/ba/d5e27f8d68c24951b0a484924a84c7cdaed7502bac9f18601cd357f8b1d2/cryptography-46.0.6-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:d9528b535a6c4f8ff37847144b8986a9a143585f0540fbcb1a98115b543aa463", size = 4926514, upload-time = "2026-03-25T23:33:29.206Z" }, { url = "https://files.pythonhosted.org/packages/34/71/1ea5a7352ae516d5512d17babe7e1b87d9db5150b21f794b1377eac1edc0/cryptography-46.0.6-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:22259338084d6ae497a19bae5d4c66b7ca1387d3264d1c2c0e72d9e9b6a77b97", size = 4457766, upload-time = "2026-03-25T23:33:30.834Z" }, { url = "https://files.pythonhosted.org/packages/01/59/562be1e653accee4fdad92c7a2e88fced26b3fdfce144047519bbebc299e/cryptography-46.0.6-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:760997a4b950ff00d418398ad73fbc91aa2894b5c1db7ccb45b4f68b42a63b3c", size = 3986535, upload-time = "2026-03-25T23:33:33.02Z" }, { url = "https://files.pythonhosted.org/packages/d6/8b/b1ebfeb788bf4624d36e45ed2662b8bd43a05ff62157093c1539c1288a18/cryptography-46.0.6-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:3dfa6567f2e9e4c5dceb8ccb5a708158a2a871052fa75c8b78cb0977063f1507", size = 4277618, upload-time = "2026-03-25T23:33:34.567Z" }, + { url = "https://files.pythonhosted.org/packages/dd/52/a005f8eabdb28df57c20f84c44d397a755782d6ff6d455f05baa2785bd91/cryptography-46.0.6-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:cdcd3edcbc5d55757e5f5f3d330dd00007ae463a7e7aa5bf132d1f22a4b62b19", size = 4890802, upload-time = "2026-03-25T23:33:37.034Z" }, { url = "https://files.pythonhosted.org/packages/ec/4d/8e7d7245c79c617d08724e2efa397737715ca0ec830ecb3c91e547302555/cryptography-46.0.6-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:d4e4aadb7fc1f88687f47ca20bb7227981b03afaae69287029da08096853b738", size = 4457425, upload-time = "2026-03-25T23:33:38.904Z" }, { url = "https://files.pythonhosted.org/packages/1d/5c/f6c3596a1430cec6f949085f0e1a970638d76f81c3ea56d93d564d04c340/cryptography-46.0.6-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2b417edbe8877cda9022dde3a008e2deb50be9c407eef034aeeb3a8b11d9db3c", size = 4405530, upload-time = "2026-03-25T23:33:40.842Z" }, { url = "https://files.pythonhosted.org/packages/7e/c9/9f9cea13ee2dbde070424e0c4f621c091a91ffcc504ffea5e74f0e1daeff/cryptography-46.0.6-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:380343e0653b1c9d7e1f55b52aaa2dbb2fdf2730088d48c43ca1c7c0abb7cc2f", size = 4667896, upload-time = "2026-03-25T23:33:42.781Z" }, + { url = "https://files.pythonhosted.org/packages/ad/b5/1895bc0821226f129bc74d00eccfc6a5969e2028f8617c09790bf89c185e/cryptography-46.0.6-cp311-abi3-win32.whl", hash = "sha256:bcb87663e1f7b075e48c3be3ecb5f0b46c8fc50b50a97cf264e7f60242dca3f2", size = 3026348, upload-time = "2026-03-25T23:33:45.021Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f8/c9bcbf0d3e6ad288b9d9aa0b1dee04b063d19e8c4f871855a03ab3a297ab/cryptography-46.0.6-cp311-abi3-win_amd64.whl", hash = "sha256:6739d56300662c468fddb0e5e291f9b4d084bead381667b9e654c7dd81705124", size = 3483896, upload-time = "2026-03-25T23:33:46.649Z" }, + { url = "https://files.pythonhosted.org/packages/01/41/3a578f7fd5c70611c0aacba52cd13cb364a5dee895a5c1d467208a9380b0/cryptography-46.0.6-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:2ef9e69886cbb137c2aef9772c2e7138dc581fad4fcbcf13cc181eb5a3ab6275", size = 7117147, upload-time = "2026-03-25T23:33:48.249Z" }, { url = "https://files.pythonhosted.org/packages/fa/87/887f35a6fca9dde90cad08e0de0c89263a8e59b2d2ff904fd9fcd8025b6f/cryptography-46.0.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7f417f034f91dcec1cb6c5c35b07cdbb2ef262557f701b4ecd803ee8cefed4f4", size = 4266221, upload-time = "2026-03-25T23:33:49.874Z" }, { url = "https://files.pythonhosted.org/packages/aa/a8/0a90c4f0b0871e0e3d1ed126aed101328a8a57fd9fd17f00fb67e82a51ca/cryptography-46.0.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d24c13369e856b94892a89ddf70b332e0b70ad4a5c43cf3e9cb71d6d7ffa1f7b", size = 4408952, upload-time = "2026-03-25T23:33:52.128Z" }, { url = "https://files.pythonhosted.org/packages/16/0b/b239701eb946523e4e9f329336e4ff32b1247e109cbab32d1a7b61da8ed7/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:aad75154a7ac9039936d50cf431719a2f8d4ed3d3c277ac03f3339ded1a5e707", size = 4270141, upload-time = "2026-03-25T23:33:54.11Z" }, + { url = "https://files.pythonhosted.org/packages/0f/a8/976acdd4f0f30df7b25605f4b9d3d89295351665c2091d18224f7ad5cdbf/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:3c21d92ed15e9cfc6eb64c1f5a0326db22ca9c2566ca46d845119b45b4400361", size = 4904178, upload-time = "2026-03-25T23:33:55.725Z" }, { url = "https://files.pythonhosted.org/packages/b1/1b/bf0e01a88efd0e59679b69f42d4afd5bced8700bb5e80617b2d63a3741af/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4668298aef7cddeaf5c6ecc244c2302a2b8e40f384255505c22875eebb47888b", size = 4441812, upload-time = "2026-03-25T23:33:57.364Z" }, { url = "https://files.pythonhosted.org/packages/bb/8b/11df86de2ea389c65aa1806f331cae145f2ed18011f30234cc10ca253de8/cryptography-46.0.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:8ce35b77aaf02f3b59c90b2c8a05c73bac12cea5b4e8f3fbece1f5fddea5f0ca", size = 3963923, upload-time = "2026-03-25T23:33:59.361Z" }, { url = "https://files.pythonhosted.org/packages/91/e0/207fb177c3a9ef6a8108f234208c3e9e76a6aa8cf20d51932916bd43bda0/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c89eb37fae9216985d8734c1afd172ba4927f5a05cfd9bf0e4863c6d5465b013", size = 4269695, upload-time = "2026-03-25T23:34:00.909Z" }, + { url = "https://files.pythonhosted.org/packages/21/5e/19f3260ed1e95bced52ace7501fabcd266df67077eeb382b79c81729d2d3/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:ed418c37d095aeddf5336898a132fba01091f0ac5844e3e8018506f014b6d2c4", size = 4869785, upload-time = "2026-03-25T23:34:02.796Z" }, { url = "https://files.pythonhosted.org/packages/10/38/cd7864d79aa1d92ef6f1a584281433419b955ad5a5ba8d1eb6c872165bcb/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:69cf0056d6947edc6e6760e5f17afe4bea06b56a9ac8a06de9d2bd6b532d4f3a", size = 4441404, upload-time = "2026-03-25T23:34:04.35Z" }, { url = "https://files.pythonhosted.org/packages/09/0a/4fe7a8d25fed74419f91835cf5829ade6408fd1963c9eae9c4bce390ecbb/cryptography-46.0.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e7304c4f4e9490e11efe56af6713983460ee0780f16c63f219984dab3af9d2d", size = 4397549, upload-time = "2026-03-25T23:34:06.342Z" }, { url = "https://files.pythonhosted.org/packages/5f/a0/7d738944eac6513cd60a8da98b65951f4a3b279b93479a7e8926d9cd730b/cryptography-46.0.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b928a3ca837c77a10e81a814a693f2295200adb3352395fad024559b7be7a736", size = 4651874, upload-time = "2026-03-25T23:34:07.916Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f1/c2326781ca05208845efca38bf714f76939ae446cd492d7613808badedf1/cryptography-46.0.6-cp314-cp314t-win32.whl", hash = "sha256:97c8115b27e19e592a05c45d0dd89c57f81f841cc9880e353e0d3bf25b2139ed", size = 3001511, upload-time = "2026-03-25T23:34:09.892Z" }, + { url = "https://files.pythonhosted.org/packages/c9/57/fe4a23eb549ac9d903bd4698ffda13383808ef0876cc912bcb2838799ece/cryptography-46.0.6-cp314-cp314t-win_amd64.whl", hash = "sha256:c797e2517cb7880f8297e2c0f43bb910e91381339336f75d2c1c2cbf811b70b4", size = 3471692, upload-time = "2026-03-25T23:34:11.613Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cc/f330e982852403da79008552de9906804568ae9230da8432f7496ce02b71/cryptography-46.0.6-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:12cae594e9473bca1a7aceb90536060643128bb274fcea0fc459ab90f7d1ae7a", size = 7162776, upload-time = "2026-03-25T23:34:13.308Z" }, { url = "https://files.pythonhosted.org/packages/49/b3/dc27efd8dcc4bff583b3f01d4a3943cd8b5821777a58b3a6a5f054d61b79/cryptography-46.0.6-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:639301950939d844a9e1c4464d7e07f902fe9a7f6b215bb0d4f28584729935d8", size = 4270529, upload-time = "2026-03-25T23:34:15.019Z" }, { url = "https://files.pythonhosted.org/packages/e6/05/e8d0e6eb4f0d83365b3cb0e00eb3c484f7348db0266652ccd84632a3d58d/cryptography-46.0.6-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ed3775295fb91f70b4027aeba878d79b3e55c0b3e97eaa4de71f8f23a9f2eb77", size = 4414827, upload-time = "2026-03-25T23:34:16.604Z" }, { url = "https://files.pythonhosted.org/packages/2f/97/daba0f5d2dc6d855e2dcb70733c812558a7977a55dd4a6722756628c44d1/cryptography-46.0.6-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8927ccfbe967c7df312ade694f987e7e9e22b2425976ddbf28271d7e58845290", size = 4271265, upload-time = "2026-03-25T23:34:18.586Z" }, + { url = "https://files.pythonhosted.org/packages/89/06/fe1fce39a37ac452e58d04b43b0855261dac320a2ebf8f5260dd55b201a9/cryptography-46.0.6-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b12c6b1e1651e42ab5de8b1e00dc3b6354fdfd778e7fa60541ddacc27cd21410", size = 4916800, upload-time = "2026-03-25T23:34:20.561Z" }, { url = "https://files.pythonhosted.org/packages/ff/8a/b14f3101fe9c3592603339eb5d94046c3ce5f7fc76d6512a2d40efd9724e/cryptography-46.0.6-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:063b67749f338ca9c5a0b7fe438a52c25f9526b851e24e6c9310e7195aad3b4d", size = 4448771, upload-time = "2026-03-25T23:34:22.406Z" }, { url = "https://files.pythonhosted.org/packages/01/b3/0796998056a66d1973fd52ee89dc1bb3b6581960a91ad4ac705f182d398f/cryptography-46.0.6-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:02fad249cb0e090b574e30b276a3da6a149e04ee2f049725b1f69e7b8351ec70", size = 3978333, upload-time = "2026-03-25T23:34:24.281Z" }, { url = "https://files.pythonhosted.org/packages/c5/3d/db200af5a4ffd08918cd55c08399dc6c9c50b0bc72c00a3246e099d3a849/cryptography-46.0.6-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e6142674f2a9291463e5e150090b95a8519b2fb6e6aaec8917dd8d094ce750d", size = 4271069, upload-time = "2026-03-25T23:34:25.895Z" }, + { url = "https://files.pythonhosted.org/packages/d7/18/61acfd5b414309d74ee838be321c636fe71815436f53c9f0334bf19064fa/cryptography-46.0.6-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:456b3215172aeefb9284550b162801d62f5f264a081049a3e94307fe20792cfa", size = 4878358, upload-time = "2026-03-25T23:34:27.67Z" }, { url = "https://files.pythonhosted.org/packages/8b/65/5bf43286d566f8171917cae23ac6add941654ccf085d739195a4eacf1674/cryptography-46.0.6-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:341359d6c9e68834e204ceaf25936dffeafea3829ab80e9503860dcc4f4dac58", size = 4448061, upload-time = "2026-03-25T23:34:29.375Z" }, { url = "https://files.pythonhosted.org/packages/e0/25/7e49c0fa7205cf3597e525d156a6bce5b5c9de1fd7e8cb01120e459f205a/cryptography-46.0.6-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9a9c42a2723999a710445bc0d974e345c32adfd8d2fac6d8a251fa829ad31cfb", size = 4399103, upload-time = "2026-03-25T23:34:32.036Z" }, { url = "https://files.pythonhosted.org/packages/44/46/466269e833f1c4718d6cd496ffe20c56c9c8d013486ff66b4f69c302a68d/cryptography-46.0.6-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6617f67b1606dfd9fe4dbfa354a9508d4a6d37afe30306fe6c101b7ce3274b72", size = 4659255, upload-time = "2026-03-25T23:34:33.679Z" }, + { url = "https://files.pythonhosted.org/packages/0a/09/ddc5f630cc32287d2c953fc5d32705e63ec73e37308e5120955316f53827/cryptography-46.0.6-cp38-abi3-win32.whl", hash = "sha256:7f6690b6c55e9c5332c0b59b9c8a3fb232ebf059094c17f9019a51e9827df91c", size = 3010660, upload-time = "2026-03-25T23:34:35.418Z" }, + { url = "https://files.pythonhosted.org/packages/1b/82/ca4893968aeb2709aacfb57a30dec6fa2ab25b10fa9f064b8882ce33f599/cryptography-46.0.6-cp38-abi3-win_amd64.whl", hash = "sha256:79e865c642cfc5c0b3eb12af83c35c5aeff4fa5c672dc28c43721c2c9fdd2f0f", size = 3471160, upload-time = "2026-03-25T23:34:37.191Z" }, + { url = "https://files.pythonhosted.org/packages/2e/84/7ccff00ced5bac74b775ce0beb7d1be4e8637536b522b5df9b73ada42da2/cryptography-46.0.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:2ea0f37e9a9cf0df2952893ad145fd9627d326a59daec9b0802480fa3bcd2ead", size = 3475444, upload-time = "2026-03-25T23:34:38.944Z" }, { url = "https://files.pythonhosted.org/packages/bc/1f/4c926f50df7749f000f20eede0c896769509895e2648db5da0ed55db711d/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a3e84d5ec9ba01f8fd03802b2147ba77f0c8f2617b2aff254cedd551844209c8", size = 4218227, upload-time = "2026-03-25T23:34:40.871Z" }, { url = "https://files.pythonhosted.org/packages/c6/65/707be3ffbd5f786028665c3223e86e11c4cda86023adbc56bd72b1b6bab5/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:12f0fa16cc247b13c43d56d7b35287ff1569b5b1f4c5e87e92cc4fcc00cd10c0", size = 4381399, upload-time = "2026-03-25T23:34:42.609Z" }, { url = "https://files.pythonhosted.org/packages/f3/6d/73557ed0ef7d73d04d9aba745d2c8e95218213687ee5e76b7d236a5030fc/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:50575a76e2951fe7dbd1f56d181f8c5ceeeb075e9ff88e7ad997d2f42af06e7b", size = 4217595, upload-time = "2026-03-25T23:34:44.205Z" }, { url = "https://files.pythonhosted.org/packages/9e/c5/e1594c4eec66a567c3ac4400008108a415808be2ce13dcb9a9045c92f1a0/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:90e5f0a7b3be5f40c3a0a0eafb32c681d8d2c181fc2a1bdabe9b3f611d9f6b1a", size = 4380912, upload-time = "2026-03-25T23:34:46.328Z" }, + { url = "https://files.pythonhosted.org/packages/1a/89/843b53614b47f97fe1abc13f9a86efa5ec9e275292c457af1d4a60dc80e0/cryptography-46.0.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6728c49e3b2c180ef26f8e9f0a883a2c585638db64cf265b49c9ba10652d430e", size = 3409955, upload-time = "2026-03-25T23:34:48.465Z" }, ] [[package]] @@ -436,6 +542,52 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + [[package]] name = "id" version = "1.6.1" @@ -532,6 +684,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, ] +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + [[package]] name = "keyring" version = "25.7.0" @@ -647,6 +826,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] +[[package]] +name = "mcp" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -824,6 +1028,153 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, + { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, + { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, + { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, + { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, + { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, + { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, + { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, + { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, + { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, + { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, + { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, + { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, + { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, + { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, + { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, + { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, + { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -833,6 +1184,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pyproject-hooks" version = "1.2.0" @@ -887,6 +1255,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/0f/019d3949a40280f6193b62bc010177d4ce702d0fce424322286488569cd3/python_discovery-1.2.1-py3-none-any.whl", hash = "sha256:b6a957b24c1cd79252484d3566d1b49527581d46e789aaf43181005e56201502", size = 31674, upload-time = "2026-03-26T22:30:43.396Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/40/44efbb0dfbd33aca6a6483191dae0716070ed99e2ecb0c53683f400a0b4f/pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3", size = 8760432, upload-time = "2025-07-14T20:13:05.9Z" }, + { url = "https://files.pythonhosted.org/packages/5e/bf/360243b1e953bd254a82f12653974be395ba880e7ec23e3731d9f73921cc/pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b", size = 9590103, upload-time = "2025-07-14T20:13:07.698Z" }, + { url = "https://files.pythonhosted.org/packages/57/38/d290720e6f138086fb3d5ffe0b6caa019a791dd57866940c82e4eeaf2012/pywin32-311-cp310-cp310-win_arm64.whl", hash = "sha256:0502d1facf1fed4839a9a51ccbcc63d952cf318f78ffc00a7e78528ac27d7a2b", size = 8778557, upload-time = "2025-07-14T20:13:11.11Z" }, + { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, + { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, + { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + [[package]] name = "pywin32-ctypes" version = "0.2.3" @@ -974,6 +1382,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/67/921ec3024056483db83953ae8e48079ad62b92db7880013ca77632921dd0/readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151", size = 13310, upload-time = "2024-07-08T15:00:56.577Z" }, ] +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + [[package]] name = "requests" version = "2.33.0" @@ -1023,6 +1445,128 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/0c/0c411a0ec64ccb6d104dcabe0e713e05e153a9a2c3c2bd2b32ce412166fe/rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288", size = 370490, upload-time = "2025-11-30T20:21:33.256Z" }, + { url = "https://files.pythonhosted.org/packages/19/6a/4ba3d0fb7297ebae71171822554abe48d7cab29c28b8f9f2c04b79988c05/rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00", size = 359751, upload-time = "2025-11-30T20:21:34.591Z" }, + { url = "https://files.pythonhosted.org/packages/cd/7c/e4933565ef7f7a0818985d87c15d9d273f1a649afa6a52ea35ad011195ea/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389a2d49eded1896c3d48b0136ead37c48e221b391c052fba3f4055c367f60a6", size = 389696, upload-time = "2025-11-30T20:21:36.122Z" }, + { url = "https://files.pythonhosted.org/packages/5e/01/6271a2511ad0815f00f7ed4390cf2567bec1d4b1da39e2c27a41e6e3b4de/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32c8528634e1bf7121f3de08fa85b138f4e0dc47657866630611b03967f041d7", size = 403136, upload-time = "2025-11-30T20:21:37.728Z" }, + { url = "https://files.pythonhosted.org/packages/55/64/c857eb7cd7541e9b4eee9d49c196e833128a55b89a9850a9c9ac33ccf897/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f207f69853edd6f6700b86efb84999651baf3789e78a466431df1331608e5324", size = 524699, upload-time = "2025-11-30T20:21:38.92Z" }, + { url = "https://files.pythonhosted.org/packages/9c/ed/94816543404078af9ab26159c44f9e98e20fe47e2126d5d32c9d9948d10a/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67b02ec25ba7a9e8fa74c63b6ca44cf5707f2fbfadae3ee8e7494297d56aa9df", size = 412022, upload-time = "2025-11-30T20:21:40.407Z" }, + { url = "https://files.pythonhosted.org/packages/61/b5/707f6cf0066a6412aacc11d17920ea2e19e5b2f04081c64526eb35b5c6e7/rpds_py-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0e95f6819a19965ff420f65578bacb0b00f251fefe2c8b23347c37174271f3", size = 390522, upload-time = "2025-11-30T20:21:42.17Z" }, + { url = "https://files.pythonhosted.org/packages/13/4e/57a85fda37a229ff4226f8cbcf09f2a455d1ed20e802ce5b2b4a7f5ed053/rpds_py-0.30.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a452763cc5198f2f98898eb98f7569649fe5da666c2dc6b5ddb10fde5a574221", size = 404579, upload-time = "2025-11-30T20:21:43.769Z" }, + { url = "https://files.pythonhosted.org/packages/f9/da/c9339293513ec680a721e0e16bf2bac3db6e5d7e922488de471308349bba/rpds_py-0.30.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0b65193a413ccc930671c55153a03ee57cecb49e6227204b04fae512eb657a7", size = 421305, upload-time = "2025-11-30T20:21:44.994Z" }, + { url = "https://files.pythonhosted.org/packages/f9/be/522cb84751114f4ad9d822ff5a1aa3c98006341895d5f084779b99596e5c/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:858738e9c32147f78b3ac24dc0edb6610000e56dc0f700fd5f651d0a0f0eb9ff", size = 572503, upload-time = "2025-11-30T20:21:46.91Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9b/de879f7e7ceddc973ea6e4629e9b380213a6938a249e94b0cdbcc325bb66/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:da279aa314f00acbb803da1e76fa18666778e8a8f83484fba94526da5de2cba7", size = 598322, upload-time = "2025-11-30T20:21:48.709Z" }, + { url = "https://files.pythonhosted.org/packages/48/ac/f01fc22efec3f37d8a914fc1b2fb9bcafd56a299edbe96406f3053edea5a/rpds_py-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7c64d38fb49b6cdeda16ab49e35fe0da2e1e9b34bc38bd78386530f218b37139", size = 560792, upload-time = "2025-11-30T20:21:50.024Z" }, + { url = "https://files.pythonhosted.org/packages/e2/da/4e2b19d0f131f35b6146425f846563d0ce036763e38913d917187307a671/rpds_py-0.30.0-cp310-cp310-win32.whl", hash = "sha256:6de2a32a1665b93233cde140ff8b3467bdb9e2af2b91079f0333a0974d12d464", size = 221901, upload-time = "2025-11-30T20:21:51.32Z" }, + { url = "https://files.pythonhosted.org/packages/96/cb/156d7a5cf4f78a7cc571465d8aec7a3c447c94f6749c5123f08438bcf7bc/rpds_py-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:1726859cd0de969f88dc8673bdd954185b9104e05806be64bcd87badbe313169", size = 235823, upload-time = "2025-11-30T20:21:52.505Z" }, + { url = "https://files.pythonhosted.org/packages/4d/6e/f964e88b3d2abee2a82c1ac8366da848fce1c6d834dc2132c3fda3970290/rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425", size = 370157, upload-time = "2025-11-30T20:21:53.789Z" }, + { url = "https://files.pythonhosted.org/packages/94/ba/24e5ebb7c1c82e74c4e4f33b2112a5573ddc703915b13a073737b59b86e0/rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d", size = 359676, upload-time = "2025-11-30T20:21:55.475Z" }, + { url = "https://files.pythonhosted.org/packages/84/86/04dbba1b087227747d64d80c3b74df946b986c57af0a9f0c98726d4d7a3b/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4", size = 389938, upload-time = "2025-11-30T20:21:57.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/bb/1463f0b1722b7f45431bdd468301991d1328b16cffe0b1c2918eba2c4eee/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f", size = 402932, upload-time = "2025-11-30T20:21:58.47Z" }, + { url = "https://files.pythonhosted.org/packages/99/ee/2520700a5c1f2d76631f948b0736cdf9b0acb25abd0ca8e889b5c62ac2e3/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4", size = 525830, upload-time = "2025-11-30T20:21:59.699Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ad/bd0331f740f5705cc555a5e17fdf334671262160270962e69a2bdef3bf76/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97", size = 412033, upload-time = "2025-11-30T20:22:00.991Z" }, + { url = "https://files.pythonhosted.org/packages/f8/1e/372195d326549bb51f0ba0f2ecb9874579906b97e08880e7a65c3bef1a99/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89", size = 390828, upload-time = "2025-11-30T20:22:02.723Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2b/d88bb33294e3e0c76bc8f351a3721212713629ffca1700fa94979cb3eae8/rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d", size = 404683, upload-time = "2025-11-30T20:22:04.367Z" }, + { url = "https://files.pythonhosted.org/packages/50/32/c759a8d42bcb5289c1fac697cd92f6fe01a018dd937e62ae77e0e7f15702/rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038", size = 421583, upload-time = "2025-11-30T20:22:05.814Z" }, + { url = "https://files.pythonhosted.org/packages/2b/81/e729761dbd55ddf5d84ec4ff1f47857f4374b0f19bdabfcf929164da3e24/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7", size = 572496, upload-time = "2025-11-30T20:22:07.713Z" }, + { url = "https://files.pythonhosted.org/packages/14/f6/69066a924c3557c9c30baa6ec3a0aa07526305684c6f86c696b08860726c/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed", size = 598669, upload-time = "2025-11-30T20:22:09.312Z" }, + { url = "https://files.pythonhosted.org/packages/5f/48/905896b1eb8a05630d20333d1d8ffd162394127b74ce0b0784ae04498d32/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85", size = 561011, upload-time = "2025-11-30T20:22:11.309Z" }, + { url = "https://files.pythonhosted.org/packages/22/16/cd3027c7e279d22e5eb431dd3c0fbc677bed58797fe7581e148f3f68818b/rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c", size = 221406, upload-time = "2025-11-30T20:22:13.101Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5b/e7b7aa136f28462b344e652ee010d4de26ee9fd16f1bfd5811f5153ccf89/rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825", size = 236024, upload-time = "2025-11-30T20:22:14.853Z" }, + { url = "https://files.pythonhosted.org/packages/14/a6/364bba985e4c13658edb156640608f2c9e1d3ea3c81b27aa9d889fff0e31/rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229", size = 229069, upload-time = "2025-11-30T20:22:16.577Z" }, + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, + { url = "https://files.pythonhosted.org/packages/69/71/3f34339ee70521864411f8b6992e7ab13ac30d8e4e3309e07c7361767d91/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58", size = 372292, upload-time = "2025-11-30T20:24:16.537Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/f183df9b8f2d66720d2ef71075c59f7e1b336bec7ee4c48f0a2b06857653/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a", size = 362128, upload-time = "2025-11-30T20:24:18.086Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/5c2594e937253457342e078f0cc1ded3dd7b2ad59afdbf2d354869110a02/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb", size = 391542, upload-time = "2025-11-30T20:24:20.092Z" }, + { url = "https://files.pythonhosted.org/packages/49/5c/31ef1afd70b4b4fbdb2800249f34c57c64beb687495b10aec0365f53dfc4/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c", size = 404004, upload-time = "2025-11-30T20:24:22.231Z" }, + { url = "https://files.pythonhosted.org/packages/e3/63/0cfbea38d05756f3440ce6534d51a491d26176ac045e2707adc99bb6e60a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3", size = 527063, upload-time = "2025-11-30T20:24:24.302Z" }, + { url = "https://files.pythonhosted.org/packages/42/e6/01e1f72a2456678b0f618fc9a1a13f882061690893c192fcad9f2926553a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5", size = 413099, upload-time = "2025-11-30T20:24:25.916Z" }, + { url = "https://files.pythonhosted.org/packages/b8/25/8df56677f209003dcbb180765520c544525e3ef21ea72279c98b9aa7c7fb/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738", size = 392177, upload-time = "2025-11-30T20:24:27.834Z" }, + { url = "https://files.pythonhosted.org/packages/4a/b4/0a771378c5f16f8115f796d1f437950158679bcd2a7c68cf251cfb00ed5b/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f", size = 406015, upload-time = "2025-11-30T20:24:29.457Z" }, + { url = "https://files.pythonhosted.org/packages/36/d8/456dbba0af75049dc6f63ff295a2f92766b9d521fa00de67a2bd6427d57a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877", size = 423736, upload-time = "2025-11-30T20:24:31.22Z" }, + { url = "https://files.pythonhosted.org/packages/13/64/b4d76f227d5c45a7e0b796c674fd81b0a6c4fbd48dc29271857d8219571c/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a", size = 573981, upload-time = "2025-11-30T20:24:32.934Z" }, + { url = "https://files.pythonhosted.org/packages/20/91/092bacadeda3edf92bf743cc96a7be133e13a39cdbfd7b5082e7ab638406/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4", size = 599782, upload-time = "2025-11-30T20:24:35.169Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, +] + [[package]] name = "ruff" version = "0.15.8" @@ -1061,6 +1605,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, ] +[[package]] +name = "sse-starlette" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/14/2f/9223c24f568bb7a0c03d751e609844dce0968f13b39a3f73fbb3a96cd27a/sse_starlette-3.3.3.tar.gz", hash = "sha256:72a95d7575fd5129bd0ae15275ac6432bb35ac542fdebb82889c24bb9f3f4049", size = 32420, upload-time = "2026-03-17T20:05:55.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/e2/b8cff57a67dddf9a464d7e943218e031617fb3ddc133aeeb0602ff5f6c85/sse_starlette-3.3.3-py3-none-any.whl", hash = "sha256:c5abb5082a1cc1c6294d89c5290c46b5f67808cfdb612b7ec27e8ba061c22e8d", size = 14329, upload-time = "2026-03-17T20:05:54.35Z" }, +] + +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + [[package]] name = "tomli" version = "2.4.1" @@ -1144,6 +1714,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" @@ -1153,6 +1735,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uvicorn" +version = "0.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" }, +] + [[package]] name = "virtualenv" version = "21.2.0" From 2c8b3adb9290206823f4d19bdf974ca74b11745b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sat, 28 Mar 2026 21:05:39 +0500 Subject: [PATCH 02/15] feat(mcp): deliver b3 diff-aware agent workflows, SARIF hardening, and changed-scope CLI UX - add optional read-only MCP server with diff-aware analysis, compare-runs, remediation payloads, reviewed state, and stable resources - complete b3 MCP and SARIF review spec work without changing baseline semantics - add changed-scope CLI flags and render changed-scope as a first-class summary block - add HTML IDE deep links and stable finding anchors for agent/IDE navigation - sync docs, AGENTS, and changelog with the new MCP/CLI/report surface --- AGENTS.md | 18 +- CHANGELOG.md | 24 +- README.md | 59 +- codeclone/_cli_args.py | 17 + codeclone/_cli_meta.py | 3 + codeclone/_cli_summary.py | 39 + codeclone/_html_css.py | 38 +- codeclone/_html_js.py | 100 + codeclone/_html_report/_assemble.py | 24 +- codeclone/_html_report/_icons.py | 5 + codeclone/_html_report/_sections/_clones.py | 12 +- .../_html_report/_sections/_suggestions.py | 5 +- codeclone/_html_report/_tables.py | 4 +- codeclone/cli.py | 289 ++- codeclone/domain/__init__.py | 2 + codeclone/domain/findings.py | 2 + codeclone/mcp_server.py | 400 ++- codeclone/mcp_service.py | 2259 ++++++++++++++++- codeclone/metrics/dead_code.py | 4 +- codeclone/pipeline.py | 21 +- codeclone/report/findings.py | 10 +- codeclone/report/json_contract.py | 3 + codeclone/report/sarif.py | 110 +- codeclone/templates.py | 2 +- codeclone/ui_messages.py | 45 + docs/README.md | 7 + docs/architecture.md | 5 + docs/book/08-report.md | 4 +- docs/book/09-cli.md | 18 + docs/book/20-mcp-interface.md | 78 +- docs/book/appendix/b-schema-layouts.md | 14 +- docs/mcp.md | 122 +- docs/sarif.md | 10 +- tests/test_cli_unit.py | 298 ++- tests/test_html_report.py | 78 +- tests/test_mcp_server.py | 154 +- tests/test_mcp_service.py | 1160 ++++++++- tests/test_report.py | 15 +- tests/test_report_contract_coverage.py | 30 +- 39 files changed, 5210 insertions(+), 278 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index b871f7f..591190d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -61,6 +61,8 @@ Key artifacts: - `.cache/codeclone/cache.json` — analysis cache (integrity-checked) - `.cache/codeclone/report.html|report.json|report.md|report.sarif|report.txt` — reports - `codeclone-mcp` — optional read-only MCP server (install via `codeclone[mcp]`) +- MCP runs are in-memory only; review markers are session-local and must never + leak into baseline/cache/report artifacts - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml` — published documentation site and docs build pipeline --- @@ -170,6 +172,7 @@ Reports come in: MCP is a separate optional interface, not a report format. It must remain a read-only agent layer over the same canonical report/baseline/cache contracts. +Session review markers are allowed only as ephemeral MCP process state. ### Report invariants @@ -179,6 +182,10 @@ read-only agent layer over the same canonical report/baseline/cache contracts. - baseline fingerprint + schema versions - baseline generator version - cache path / cache used +- SARIF `partialFingerprints.primaryLocationLineHash` must remain stable across + line-only shifts for the same finding identity. +- SARIF `automationDetails.id` must be unique per run; result `kind` should be + explicit when emitted. ### Explainability contract (core owns facts) @@ -256,6 +263,13 @@ Agents must preserve these semantics: - **3** — analysis gating failure (e.g., `--fail-threshold` exceeded or new clones in `--ci` as designed) - **5** — internal error (unexpected exception escaped top-level CLI handling) +Changed-scope flags are contract-sensitive: + +- `--changed-only` keeps the canonical analysis/report full, but applies clone + summary/threshold evaluation to the changed-files projection. +- `--diff-against` requires `--changed-only`. +- `--paths-from-git-diff` implies `--changed-only`. + If you introduce a new exit reason, document it and add tests. --- @@ -349,7 +363,8 @@ Use this map to route changes to the right owner module. - `codeclone/report/*.py` (other modules) — deterministic projections/format transforms ( text/markdown/sarif/derived/findings/suggestions); avoid injecting new analysis heuristics here. - `codeclone/mcp_service.py` — typed, in-process MCP service adapter over the current pipeline/report contracts; keep - it read-only and deterministic; do not move shell UX or `sys.exit` behavior here. + it deterministic; allow only session-local in-memory state such as reviewed markers, and never move shell UX or + `sys.exit` behavior here. - `codeclone/mcp_server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. - `codeclone/html_report.py` — public HTML facade/re-export surface; preserve backward-compatible imports here; do not @@ -453,6 +468,7 @@ Policy: - Canonical report JSON schema/payload semantics (`REPORT_SCHEMA_VERSION` contract family). - Documented report projections and their machine/user-facing semantics (HTML/Markdown/SARIF/Text). - Documented MCP launcher/install behavior, tool names, resource URIs, and read-only semantics. +- Session-local MCP review state semantics (`mark_finding_reviewed`, `exclude_reviewed`) as documented public behavior. - Documented finding families/kinds/ids and suppression-facing report fields. - Metrics baseline schema/compatibility where used by CI/gating. - Benchmark schema/outputs if consumed as a reproducible contract surface. diff --git a/CHANGELOG.md b/CHANGELOG.md index 00bfb5b..1cd0bfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,29 @@ - Add optional `codeclone[mcp]` extra and `codeclone-mcp` launcher. - Add a deterministic, read-only MCP server over the canonical pipeline and report contracts. -- Expose MCP tools/resources for repository analysis, run summaries, report sections, findings, hotlists, and gate previews. +- Expose diff-aware MCP tools/resources for changed-files analysis, run comparison, report sections, findings, + remediation payloads, hotlists, granular checks, and gate previews. +- Add stable MCP resources for latest-run summary/report/health/gates/changed projections and schema discovery. +- Add session-local reviewed-finding state for long AI-agent workflows without mutating baseline or repo state. +- Add stable HTML deep-link anchors (`finding-{finding_id}`) for clone and structural finding cards. + +### CLI + +- Add `--changed-only`, `--diff-against`, and `--paths-from-git-diff` for changed-scope clone review and gating over a + full canonical analysis. +- Render changed-scope results as a first-class summary block in normal CLI output while keeping quiet mode compact. + +### SARIF + +- Stabilize `primaryLocationLineHash` across line-only shifts by hashing finding identity without line numbers. +- Emit run-unique `automationDetails.id`, optional `startTimeUtc`, and explicit result `kind: "fail"`. +- Move ancillary finding identity fields to SARIF `properties` and keep `partialFingerprints` minimal. + +### HTML + +- Add IDE picker with persistent selection (localStorage) supporting PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, and + Zed. +- Make file paths across Clones, Quality, Suggestions, Dead Code, and Findings tabs clickable IDE deep links. ## [2.0.0b2] diff --git a/README.md b/README.md index 2c52577..af4fc80 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,13 @@ Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · Live sample report: [orenlab.github.io/codeclone/examples/report/](https://orenlab.github.io/codeclone/examples/report/) +> [!NOTE] +> This README and docs site track the in-development `v2.0.x` line from `main`. +> For the latest stable CodeClone documentation (`v1.4.4`), see the +> [`v1.4.4` README](https://github.com/orenlab/codeclone/blob/v1.4.4/README.md) +> and the +> [`v1.4.4` docs tree](https://github.com/orenlab/codeclone/tree/v1.4.4/docs). + ## Features - **Clone detection** — function (CFG fingerprint), block (statement windows), and segment (report-only) clones @@ -48,6 +55,8 @@ codeclone . --html # generate HTML report codeclone . --html --open-html-report # generate and open HTML report codeclone . --json --md --sarif --text # generate machine-readable reports codeclone . --html --json --timestamped-report-paths # keep timestamped report snapshots +codeclone . --changed-only --diff-against main # changed-scope clone gating against git diff +codeclone . --paths-from-git-diff HEAD~1 # shorthand diff source for changed-scope review codeclone . --ci # CI mode (--fail-on-new --no-color --quiet) ``` @@ -80,8 +89,29 @@ For local command-based clients, prefer `stdio`. Use `streamable-http` only when the client expects a remote MCP endpoint. CodeClone MCP is read-only and baseline-aware. It exposes deterministic tools -for analysis, summaries, findings, hotspots, report sections, and gate previews -without mutating source files or baselines. +for: + +- full repository analysis and changed-files analysis +- run summaries and run-to-run comparison +- findings, hotspots, remediation payloads, and PR summaries +- granular clone / complexity / coupling / cohesion / dead-code checks +- session-local review markers for long agent workflows + +It never mutates source files, baselines, or repo state. +Diff-aware MCP calls use repo-relative `changed_paths` lists (or `git_diff_ref`) +and may reuse the same `run_id` when the canonical report digest stays +unchanged. +Focused `check_*` MCP tools may trigger a full analysis first when no stored run +exists yet. + +Latest-run resources are also available for MCP-capable clients: + +- `codeclone://latest/summary` +- `codeclone://latest/report.json` +- `codeclone://latest/health` +- `codeclone://latest/gates` +- `codeclone://latest/changed` +- `codeclone://schema` Docs: [MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) @@ -240,6 +270,7 @@ Dynamic/runtime false positives are resolved via explicit inline suppressions, n "...": "..." }, "runtime": { + "analysis_started_at_utc": "...", "report_generated_at_utc": "..." } }, @@ -329,20 +360,20 @@ CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) ## Documentation -| Topic | Link | -|----------------------------|----------------------------------------------------------------------------------------------------| -| Contract book (start here) | [Contracts and guarantees](https://orenlab.github.io/codeclone/book/00-intro/) | -| Exit codes | [Exit codes and failure policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) | -| Configuration | [Config and defaults](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) | -| Baseline contract | [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) | -| Cache contract | [Cache contract](https://orenlab.github.io/codeclone/book/07-cache/) | -| Report contract | [Report contract](https://orenlab.github.io/codeclone/book/08-report/) | +| Topic | Link | +|----------------------------|-----------------------------------------------------------------------------------------------------| +| Contract book (start here) | [Contracts and guarantees](https://orenlab.github.io/codeclone/book/00-intro/) | +| Exit codes | [Exit codes and failure policy](https://orenlab.github.io/codeclone/book/03-contracts-exit-codes/) | +| Configuration | [Config and defaults](https://orenlab.github.io/codeclone/book/04-config-and-defaults/) | +| Baseline contract | [Baseline contract](https://orenlab.github.io/codeclone/book/06-baseline/) | +| Cache contract | [Cache contract](https://orenlab.github.io/codeclone/book/07-cache/) | +| Report contract | [Report contract](https://orenlab.github.io/codeclone/book/08-report/) | | Metrics & quality gates | [Metrics and quality gates](https://orenlab.github.io/codeclone/book/15-metrics-and-quality-gates/) | -| Dead code | [Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) | -| Docker benchmark contract | [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) | -| Determinism | [Determinism policy](https://orenlab.github.io/codeclone/book/12-determinism/) | +| Dead code | [Dead-code contract](https://orenlab.github.io/codeclone/book/16-dead-code-contract/) | +| Docker benchmark contract | [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) | +| Determinism | [Determinism policy](https://orenlab.github.io/codeclone/book/12-determinism/) | -## * Benchmarking +## * Benchmarking
Reproducible Docker Benchmark diff --git a/codeclone/_cli_args.py b/codeclone/_cli_args.py index d2796b9..f995660 100644 --- a/codeclone/_cli_args.py +++ b/codeclone/_cli_args.py @@ -130,6 +130,23 @@ def build_parser(version: str) -> _ArgumentParser: default=DEFAULT_PROCESSES, help=ui.HELP_PROCESSES, ) + _add_bool_optional_argument( + analysis_group, + flag="--changed-only", + help_text=ui.HELP_CHANGED_ONLY, + ) + analysis_group.add_argument( + "--diff-against", + default=None, + metavar="GIT_REF", + help=ui.HELP_DIFF_AGAINST, + ) + analysis_group.add_argument( + "--paths-from-git-diff", + default=None, + metavar="GIT_REF", + help=ui.HELP_PATHS_FROM_GIT_DIFF, + ) _add_optional_path_argument( analysis_group, flag="--cache-path", diff --git a/codeclone/_cli_meta.py b/codeclone/_cli_meta.py index 6d893ec..b9b8c20 100644 --- a/codeclone/_cli_meta.py +++ b/codeclone/_cli_meta.py @@ -67,6 +67,7 @@ class ReportMeta(TypedDict): health_grade: str | None analysis_mode: str metrics_computed: list[str] + analysis_started_at_utc: str | None report_generated_at_utc: str @@ -91,6 +92,7 @@ def _build_report_meta( health_grade: str | None, analysis_mode: str, metrics_computed: tuple[str, ...], + analysis_started_at_utc: str | None, report_generated_at_utc: str, ) -> ReportMeta: project_name = scan_root.name or str(scan_root) @@ -133,5 +135,6 @@ def _build_report_meta( "health_grade": health_grade, "analysis_mode": analysis_mode, "metrics_computed": list(metrics_computed), + "analysis_started_at_utc": analysis_started_at_utc, "report_generated_at_utc": report_generated_at_utc, } diff --git a/codeclone/_cli_summary.py b/codeclone/_cli_summary.py index d1d2369..a0496f0 100644 --- a/codeclone/_cli_summary.py +++ b/codeclone/_cli_summary.py @@ -25,6 +25,14 @@ class MetricsSnapshot: suppressed_dead_code_count: int = 0 +@dataclass(frozen=True, slots=True) +class ChangedScopeSnapshot: + paths_count: int + findings_total: int + findings_new: int + findings_known: int + + class _Printer(Protocol): def print(self, *objects: object, **kwargs: object) -> None: ... @@ -149,3 +157,34 @@ def _print_metrics( suppressed=metrics.suppressed_dead_code_count, ) ) + + +def _print_changed_scope( + *, + console: _Printer, + quiet: bool, + changed_scope: ChangedScopeSnapshot, +) -> None: + if quiet: + console.print( + ui.fmt_changed_scope_compact( + paths=changed_scope.paths_count, + findings=changed_scope.findings_total, + new=changed_scope.findings_new, + known=changed_scope.findings_known, + ) + ) + return + + from rich.rule import Rule + + console.print() + console.print(Rule(title=ui.CHANGED_SCOPE_TITLE, style="dim", characters="\u2500")) + console.print(ui.fmt_changed_scope_paths(count=changed_scope.paths_count)) + console.print( + ui.fmt_changed_scope_findings( + total=changed_scope.findings_total, + new=changed_scope.findings_new, + known=changed_scope.findings_known, + ) + ) diff --git a/codeclone/_html_css.py b/codeclone/_html_css.py index 3accd98..fd18350 100644 --- a/codeclone/_html_css.py +++ b/codeclone/_html_css.py @@ -1075,6 +1075,11 @@ .theme-toggle{font-size:0;gap:0;width:32px;height:32px; padding:0;align-items:center;justify-content:center} .theme-toggle svg{width:16px;height:16px} + .ide-picker-btn{font-size:0;gap:0;width:32px;height:32px; + padding:0;align-items:center;justify-content:center} + .ide-picker-btn svg{width:16px;height:16px} + .ide-picker-label{display:none} + .ide-menu{right:0;min-width:140px} .main-tabs-wrap{position:sticky;top:0;z-index:90;padding:var(--sp-2) 0 0} .main-tabs{padding:var(--sp-1);gap:2px; background: @@ -1091,10 +1096,41 @@ .brand-logo{width:28px;height:28px} } +/* IDE link */ +.ide-link{color:inherit;text-decoration:none;cursor:default} +[data-ide]:not([data-ide=""]) .ide-link{cursor:pointer;color:var(--accent-primary); + text-decoration-line:underline;text-decoration-style:dotted;text-underline-offset:2px} +[data-ide]:not([data-ide=""]) .ide-link:hover{text-decoration-style:solid} + +/* IDE picker dropdown */ +.ide-picker{position:relative;display:inline-flex} +.ide-picker-btn{display:inline-flex;align-items:center;gap:var(--sp-1); + padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); + border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; + font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease); + white-space:nowrap} +.ide-picker-btn:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} +.ide-picker-btn svg{width:16px;height:16px;flex-shrink:0} +.ide-picker-btn[aria-expanded="true"]{color:var(--accent-primary);border-color:var(--accent-primary)} +.ide-menu{display:none;position:absolute;top:100%;right:0;margin-top:var(--sp-1); + min-width:160px;background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius);box-shadow:0 4px 12px rgba(0,0,0,.15); + z-index:100;padding:var(--sp-1) 0;list-style:none} +.ide-menu[data-open]{display:block} +.ide-menu li{padding:0} +.ide-menu button{display:flex;align-items:center;gap:var(--sp-2);width:100%; + padding:var(--sp-1) var(--sp-3);background:none;border:none;color:var(--text-primary); + font-size:.8rem;font-family:var(--font-sans);cursor:pointer;text-align:left} +.ide-menu button:hover{background:var(--bg-alt)} +.ide-menu button[aria-checked="true"]{color:var(--accent-primary);font-weight:600} +.ide-menu button[aria-checked="true"]::before{content:'\\2713';font-size:.7rem; + width:14px;text-align:center;flex-shrink:0} +.ide-menu button[aria-checked="false"]::before{content:'';width:14px;flex-shrink:0} + /* Print */ @media print{ .topbar,.toolbar,.pagination,.theme-toggle,.toast-container, - .novelty-tabs,.clear-btn,.btn{display:none!important} + .novelty-tabs,.clear-btn,.btn,.ide-picker{display:none!important} .tab-panel{display:block!important;break-inside:avoid} .group-body{display:block!important} body{background:#fff;color:#000} diff --git a/codeclone/_html_js.py b/codeclone/_html_js.py index 12ad40e..5f95074 100644 --- a/codeclone/_html_js.py +++ b/codeclone/_html_js.py @@ -567,6 +567,105 @@ _LAZY_HIGHLIGHT = "" +# --------------------------------------------------------------------------- +# IDE links +# --------------------------------------------------------------------------- + +_IDE_LINKS = r""" +(function initIdeLinks(){ + const KEY='codeclone-ide'; + const root=document.documentElement; + var scanRoot=root.getAttribute('data-scan-root')||''; + var projectName=scanRoot.replace(/\/$/,'').split('/').pop()||''; + + function relPath(abs){ + var r=scanRoot.replace(/\/$/,'')+'/'; + if(abs.indexOf(r)===0)return abs.substring(r.length); + return abs; + } + + const SCHEMES={ + pycharm:{label:'PyCharm', + url:function(f,l){return 'jetbrains://pycharm/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, + idea:{label:'IntelliJ IDEA', + url:function(f,l){return 'jetbrains://idea/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, + vscode:{label:'VS Code', + url:function(f,l){return 'vscode://file'+f+':'+l}}, + cursor:{label:'Cursor', + url:function(f,l){return 'cursor://file'+f+':'+l}}, + fleet:{label:'Fleet', + url:function(f,l){return 'fleet://open?file='+encodeURIComponent(f)+'&line='+l}}, + zed:{label:'Zed', + url:function(f,l){return 'zed://file'+f+':'+l}}, + '': {label:'None',url:null} + }; + + var current=localStorage.getItem(KEY)||''; + root.setAttribute('data-ide',current); + + const btn=$('.ide-picker-btn'); + const menu=$('.ide-menu'); + const label=$('.ide-picker-label'); + if(!btn||!menu)return; + + function updateLabel(){ + if(!label)return; + var s=SCHEMES[current]; + label.textContent=s&¤t?s.label:'IDE'; + } + + function setChecked(){ + menu.querySelectorAll('button').forEach(function(b){ + b.setAttribute('aria-checked',b.dataset.ide===current?'true':'false'); + }); + } + + function applyHrefs(){ + var s=SCHEMES[current]; + $$('.ide-link[data-file]').forEach(function(a){ + if(!current||!s||!s.url){a.removeAttribute('href');return} + var f=a.getAttribute('data-file'),l=a.getAttribute('data-line')||'1'; + if(!f)return; + a.setAttribute('href',s.url(f,l)); + }); + } + + setChecked(); + updateLabel(); + applyHrefs(); + + // Reapply hrefs when new content becomes visible (tab switch) + var mo=new MutationObserver(function(){applyHrefs()}); + document.querySelectorAll('.tab-panel').forEach(function(p){ + mo.observe(p,{attributes:true,attributeFilter:['class']}); + }); + + btn.addEventListener('click',function(e){ + e.stopPropagation(); + var open=menu.hasAttribute('data-open'); + if(open){menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false')} + else{menu.setAttribute('data-open','');btn.setAttribute('aria-expanded','true')} + }); + + document.addEventListener('click',function(){ + menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); + }); + + menu.addEventListener('click',function(e){ + e.stopPropagation(); + var b=e.target.closest('button[data-ide]'); + if(!b)return; + current=b.dataset.ide; + localStorage.setItem(KEY,current); + root.setAttribute('data-ide',current); + setChecked(); + updateLabel(); + applyHrefs(); + menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); + }); + +})(); +""" # --------------------------------------------------------------------------- # Public API @@ -589,6 +688,7 @@ _TABLE_SORT, _SCOPE_COUNTERS, _LAZY_HIGHLIGHT, + _IDE_LINKS, ) diff --git a/codeclone/_html_report/_assemble.py b/codeclone/_html_report/_assemble.py index 91172af..94cd045 100644 --- a/codeclone/_html_report/_assemble.py +++ b/codeclone/_html_report/_assemble.py @@ -10,7 +10,7 @@ from .. import __version__, _coerce from .._html_css import build_css -from .._html_escape import _escape_html +from .._html_escape import _escape_attr, _escape_html from .._html_js import build_js from .._html_snippets import _FileCache, _pygments_css from ..contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL @@ -185,6 +185,22 @@ def _tab_badge(count: int) -> str: else: prov_dot_cls = "dot-neutral" + # -- IDE picker menu -- + ide_options = [ + ("pycharm", "PyCharm"), + ("idea", "IntelliJ IDEA"), + ("vscode", "VS Code"), + ("cursor", "Cursor"), + ("fleet", "Fleet"), + ("zed", "Zed"), + ("", "None"), + ] + ide_menu_items = "".join( + f'
  • ' + for ide_id, label in ide_options + ) + # -- Topbar -- topbar_html = ( '
    ' @@ -195,6 +211,11 @@ def _tab_badge(count: int) -> str: f'
    {ctx.brand_meta}
    ' "
    " '
    ' + '
    ' + '' + f'
    ' f'' f'
    " f"{compare_html}" f"{snippet.code_html}" "" @@ -411,6 +414,7 @@ def _render_group_html( section_novelty: Mapping[str, str], ) -> str: group_id = f"{section_id}-{group_index}" + finding_id = clone_group_id(_clone_kind_for_section(section_id), group_key) search_parts: list[str] = [str(group_key)] for item in items: search_parts.append(str(item.get("qualname", ""))) @@ -463,8 +467,10 @@ def _render_group_html( explanation_html = _render_group_explanation(block_meta) if block_meta else "" return ( - f'
    ' diff --git a/codeclone/_html_report/_sections/_suggestions.py b/codeclone/_html_report/_sections/_suggestions.py index a643229..5cc3212 100644 --- a/codeclone/_html_report/_sections/_suggestions.py +++ b/codeclone/_html_report/_sections/_suggestions.py @@ -11,7 +11,7 @@ from ... import _coerce from ..._html_badges import _tab_empty from ..._html_data_attrs import _build_data_attrs -from ..._html_escape import _escape_html +from ..._html_escape import _escape_attr, _escape_html from ..._html_filters import SPREAD_OPTIONS, _render_select from ...domain.findings import ( CATEGORY_CLONE, @@ -115,9 +115,10 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: if s.representative_locations: locs_items = "".join( '
  • ' + f'' f"{_escape_html(loc.relative_path)}" f':{loc.start_line}\u2013{loc.end_line}' - "" + "" f'{_escape_html(ctx.bare_qualname(loc.qualname, loc.filepath))}' "
  • " for loc in s.representative_locations diff --git a/codeclone/_html_report/_tables.py b/codeclone/_html_report/_tables.py index 8d8a1fd..5153c0f 100644 --- a/codeclone/_html_report/_tables.py +++ b/codeclone/_html_report/_tables.py @@ -103,7 +103,9 @@ def _td(col_idx: int, cell: str) -> str: if h in _PATH_HEADERS and ctx is not None: short = ctx.relative_path(cell) return ( - f'{_escape_html(short)}' + f'' + f'' + f"{_escape_html(short)}" ) return f"{_escape_html(cell)}" diff --git a/codeclone/cli.py b/codeclone/cli.py index 4de107c..2a97ef9 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -4,13 +4,15 @@ from __future__ import annotations import os +import subprocess import sys import time +from collections.abc import Mapping, Sequence from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Literal, Protocol, cast -from . import __version__ +from . import __version__, _coerce from . import ui_messages as ui from ._cli_args import build_parser from ._cli_baselines import ( @@ -80,7 +82,13 @@ from ._cli_runtime import ( validate_numeric_args as _validate_numeric_args_impl, ) -from ._cli_summary import MetricsSnapshot, _print_metrics, _print_summary +from ._cli_summary import ( + ChangedScopeSnapshot, + MetricsSnapshot, + _print_changed_scope, + _print_metrics, + _print_summary, +) from .baseline import Baseline from .cache import Cache, CacheStatus, build_segment_report_projection from .contracts import ISSUES_URL, ExitCode @@ -171,6 +179,207 @@ class ProcessingResult: structural_findings: list[object] | None = None +@dataclass(frozen=True, slots=True) +class ChangedCloneGate: + changed_paths: tuple[str, ...] + new_func: frozenset[str] + new_block: frozenset[str] + total_clone_groups: int + findings_total: int + findings_new: int + findings_known: int + + +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + + +def _validate_changed_scope_args(*, args: Namespace) -> str | None: + if args.diff_against and args.paths_from_git_diff: + console.print( + ui.fmt_contract_error( + "Use --diff-against or --paths-from-git-diff, not both." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if args.paths_from_git_diff: + args.changed_only = True + return str(args.paths_from_git_diff) + if args.diff_against and not args.changed_only: + console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) + sys.exit(ExitCode.CONTRACT_ERROR) + if args.changed_only and not args.diff_against: + console.print( + ui.fmt_contract_error( + "--changed-only requires --diff-against or --paths-from-git-diff." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + return str(args.diff_against) if args.diff_against else None + + +def _normalize_changed_paths( + *, + root_path: Path, + paths: Sequence[str], +) -> tuple[str, ...]: + normalized: set[str] = set() + for raw_path in paths: + candidate = raw_path.strip() + if not candidate: + continue + candidate_path = Path(candidate) + try: + absolute_path = ( + candidate_path.resolve() + if candidate_path.is_absolute() + else (root_path / candidate_path).resolve() + ) + except OSError as exc: + console.print( + ui.fmt_contract_error( + f"Unable to resolve changed path '{candidate}': {exc}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + try: + relative_path = absolute_path.relative_to(root_path) + except ValueError: + console.print( + ui.fmt_contract_error( + f"Changed path '{candidate}' is outside the scan root." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + cleaned = str(relative_path).replace("\\", "/").strip("/") + if cleaned: + normalized.add(cleaned) + return tuple(sorted(normalized)) + + +def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + try: + completed = subprocess.run( + ["git", "diff", "--name-only", git_diff_ref, "--"], + cwd=str(root_path), + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except ( + FileNotFoundError, + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + ) as exc: + console.print( + ui.fmt_contract_error( + "Unable to resolve changed files from git diff ref " + f"'{git_diff_ref}': {exc}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + lines = [line.strip() for line in completed.stdout.splitlines() if line.strip()] + return _normalize_changed_paths(root_path=root_path, paths=lines) + + +def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: + return any( + relative_path == candidate or relative_path.startswith(candidate + "/") + for candidate in changed_paths + ) + + +def _flatten_report_findings( + report_document: Mapping[str, object], +) -> list[dict[str, object]]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get("clones")) + return [ + *[ + dict(_as_mapping(item)) + for item in _as_sequence(clone_groups.get("functions")) + ], + *[dict(_as_mapping(item)) for item in _as_sequence(clone_groups.get("blocks"))], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(clone_groups.get("segments")) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence( + _as_mapping(groups.get("structural")).get("groups") + ) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(_as_mapping(groups.get("design")).get("groups")) + ], + ] + + +def _finding_touches_changed_paths( + finding: Mapping[str, object], + *, + changed_paths: Sequence[str], +) -> bool: + for item in _as_sequence(finding.get("items")): + relative_path = str(_as_mapping(item).get("relative_path", "")).strip() + if relative_path and _path_matches(relative_path, changed_paths): + return True + return False + + +def _changed_clone_gate_from_report( + report_document: Mapping[str, object], + *, + changed_paths: Sequence[str], +) -> ChangedCloneGate: + findings = [ + finding + for finding in _flatten_report_findings(report_document) + if _finding_touches_changed_paths(finding, changed_paths=changed_paths) + ] + clone_findings = [ + finding + for finding in findings + if str(finding.get("family", "")).strip() == "clone" + and str(finding.get("category", "")).strip() in {"function", "block"} + ] + new_func = frozenset( + str(finding.get("id", "")) + for finding in clone_findings + if str(finding.get("category", "")).strip() == "function" + and str(finding.get("novelty", "")).strip() == "new" + ) + new_block = frozenset( + str(finding.get("id", "")) + for finding in clone_findings + if str(finding.get("category", "")).strip() == "block" + and str(finding.get("novelty", "")).strip() == "new" + ) + findings_new = sum( + 1 for finding in findings if str(finding.get("novelty", "")).strip() == "new" + ) + findings_known = sum( + 1 for finding in findings if str(finding.get("novelty", "")).strip() == "known" + ) + return ChangedCloneGate( + changed_paths=tuple(changed_paths), + new_func=new_func, + new_block=new_block, + total_clone_groups=len(clone_findings), + findings_total=len(findings), + findings_new=findings_new, + findings_known=findings_known, + ) + + def process_file( filepath: str, root: str, @@ -262,6 +471,7 @@ def report( new_block: set[str], html_builder: Callable[..., str] | None = None, metrics_diff: MetricsDiff | None = None, + include_report_document: bool = False, ) -> ReportArtifacts: return cast( "ReportArtifacts", @@ -275,6 +485,7 @@ def report( new_block=new_block, html_builder=html_builder, metrics_diff=metrics_diff, + include_report_document=include_report_document, ), ) @@ -757,6 +968,7 @@ def _enforce_gating( new_block: set[str], metrics_diff: MetricsDiff | None, html_report_path: str | None, + clone_threshold_total: int | None = None, ) -> None: if source_read_contract_failure: console.print( @@ -791,6 +1003,25 @@ def _enforce_gating( new_block=new_block, metrics_diff=metrics_diff, ) + if clone_threshold_total is not None: + reasons = [ + reason + for reason in gate_result.reasons + if not reason.startswith("clone:threshold:") + ] + if 0 <= args.fail_threshold < clone_threshold_total: + reasons.append( + f"clone:threshold:{clone_threshold_total}:{args.fail_threshold}" + ) + gate_result = cast( + "GatingResult", + _pipeline_module().GatingResult( + exit_code=( + int(ExitCode.GATING_FAILURE) if reasons else int(ExitCode.SUCCESS) + ), + reasons=tuple(reasons), + ), + ) metric_reasons = [ reason[len("metric:") :] @@ -867,6 +1098,7 @@ def _main_impl() -> None: run_started_at = time.monotonic() from ._cli_meta import _build_report_meta, _current_report_timestamp_utc + analysis_started_at_utc = _current_report_timestamp_utc() ap = build_parser(__version__) def _prepare_run_inputs() -> tuple[ @@ -879,6 +1111,8 @@ def _prepare_run_inputs() -> tuple[ OutputPaths, Path, dict[str, object] | None, + tuple[str, ...], + str, str, ]: global console @@ -920,6 +1154,12 @@ def _prepare_run_inputs() -> tuple[ config_values=pyproject_config, explicit_cli_dests=explicit_cli_dests, ) + git_diff_ref = _validate_changed_scope_args(args=args) + changed_paths = ( + _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) + if git_diff_ref is not None + else () + ) if args.debug: os.environ["CODECLONE_DEBUG"] = "1" @@ -1028,6 +1268,8 @@ def _prepare_run_inputs() -> tuple[ output_paths, cache_path, shared_baseline_payload, + changed_paths, + analysis_started_at_utc, report_generated_at_utc, ) @@ -1041,6 +1283,8 @@ def _prepare_run_inputs() -> tuple[ output_paths, cache_path, shared_baseline_payload, + changed_paths, + analysis_started_at_utc, report_generated_at_utc, ) = _prepare_run_inputs() @@ -1142,6 +1386,7 @@ def _prepare_run_inputs() -> tuple[ ), analysis_mode=("clones_only" if args.skip_metrics else "full"), metrics_computed=_metrics_computed(args), + analysis_started_at_utc=analysis_started_at_utc, report_generated_at_utc=report_generated_at_utc, ) @@ -1214,7 +1459,27 @@ def _prepare_run_inputs() -> tuple[ new_block=new_block, html_builder=build_html_report, metrics_diff=metrics_diff, + include_report_document=bool(changed_paths), + ) + changed_clone_gate = ( + _changed_clone_gate_from_report( + report_artifacts.report_document or {}, + changed_paths=changed_paths, + ) + if args.changed_only and report_artifacts.report_document is not None + else None ) + if changed_clone_gate is not None: + _print_changed_scope( + console=cast("_PrinterLike", console), + quiet=args.quiet, + changed_scope=ChangedScopeSnapshot( + paths_count=len(changed_clone_gate.changed_paths), + findings_total=changed_clone_gate.findings_total, + findings_new=changed_clone_gate.findings_new, + findings_known=changed_clone_gate.findings_known, + ), + ) html_report_path = _write_report_outputs( args=args, output_paths=output_paths, @@ -1230,13 +1495,27 @@ def _prepare_run_inputs() -> tuple[ source_read_contract_failure=source_read_contract_failure, baseline_failure_code=baseline_state.failure_code, metrics_baseline_failure_code=metrics_baseline_state.failure_code, - new_func=new_func, - new_block=new_block, + new_func=set(changed_clone_gate.new_func) if changed_clone_gate else new_func, + new_block=( + set(changed_clone_gate.new_block) if changed_clone_gate else new_block + ), metrics_diff=metrics_diff, html_report_path=html_report_path, + clone_threshold_total=( + changed_clone_gate.total_clone_groups if changed_clone_gate else None + ), ) - if not args.update_baseline and not args.fail_on_new and new_clones_count > 0: + notice_new_clones_count = ( + len(changed_clone_gate.new_func) + len(changed_clone_gate.new_block) + if changed_clone_gate is not None + else new_clones_count + ) + if ( + not args.update_baseline + and not args.fail_on_new + and notice_new_clones_count > 0 + ): console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) if not args.quiet: diff --git a/codeclone/domain/__init__.py b/codeclone/domain/__init__.py index 59fc066..86ffb32 100644 --- a/codeclone/domain/__init__.py +++ b/codeclone/domain/__init__.py @@ -29,6 +29,7 @@ STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, STRUCTURAL_KIND_DUPLICATED_BRANCHES, SYMBOL_KIND_CLASS, + SYMBOL_KIND_FUNCTION, SYMBOL_KIND_IMPORT, SYMBOL_KIND_METHOD, ) @@ -127,6 +128,7 @@ "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", "STRUCTURAL_KIND_DUPLICATED_BRANCHES", "SYMBOL_KIND_CLASS", + "SYMBOL_KIND_FUNCTION", "SYMBOL_KIND_IMPORT", "SYMBOL_KIND_METHOD", ] diff --git a/codeclone/domain/findings.py b/codeclone/domain/findings.py index 37928b2..07f0c49 100644 --- a/codeclone/domain/findings.py +++ b/codeclone/domain/findings.py @@ -9,6 +9,7 @@ CLONE_KIND_BLOCK: Final = "block" CLONE_KIND_SEGMENT: Final = "segment" +SYMBOL_KIND_FUNCTION: Final = "function" SYMBOL_KIND_CLASS: Final = "class" SYMBOL_KIND_METHOD: Final = "method" SYMBOL_KIND_IMPORT: Final = "import" @@ -69,6 +70,7 @@ "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", "STRUCTURAL_KIND_DUPLICATED_BRANCHES", "SYMBOL_KIND_CLASS", + "SYMBOL_KIND_FUNCTION", "SYMBOL_KIND_IMPORT", "SYMBOL_KIND_METHOD", ] diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py index e507787..902ce4d 100644 --- a/codeclone/mcp_server.py +++ b/codeclone/mcp_server.py @@ -5,7 +5,8 @@ import argparse import sys -from typing import TYPE_CHECKING, Literal, cast +from collections.abc import Callable +from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast from . import __version__ from .contracts import DOCS_URL @@ -35,7 +36,10 @@ class MCPDependencyError(RuntimeError): """Raised when the optional MCP runtime dependency is unavailable.""" -def _load_mcp_runtime() -> tuple[type[FastMCP], ToolAnnotations]: +MCPCallable = TypeVar("MCPCallable", bound=Callable[..., object]) + + +def _load_mcp_runtime() -> tuple[type[FastMCP], ToolAnnotations, ToolAnnotations]: try: from mcp.server.fastmcp import FastMCP as runtime_fastmcp from mcp.types import ToolAnnotations as runtime_tool_annotations @@ -49,6 +53,12 @@ def _load_mcp_runtime() -> tuple[type[FastMCP], ToolAnnotations]: idempotentHint=True, openWorldHint=False, ), + runtime_tool_annotations( + readOnlyHint=False, + destructiveHint=False, + idempotentHint=True, + openWorldHint=False, + ), ) @@ -62,7 +72,7 @@ def build_mcp_server( debug: bool = False, log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", ) -> FastMCP: - runtime_fastmcp, read_only_tool = _load_mcp_runtime() + runtime_fastmcp, read_only_tool, session_tool = _load_mcp_runtime() service = CodeCloneMCPService(history_limit=history_limit) mcp = runtime_fastmcp( name="CodeClone", @@ -77,19 +87,36 @@ def build_mcp_server( dependencies=(f"codeclone=={__version__}",), ) - @mcp.tool( + def tool(*args: Any, **kwargs: Any) -> Callable[[MCPCallable], MCPCallable]: + return cast( + "Callable[[MCPCallable], MCPCallable]", + mcp.tool(*args, **kwargs), + ) + + def resource( + *args: Any, + **kwargs: Any, + ) -> Callable[[MCPCallable], MCPCallable]: + return cast( + "Callable[[MCPCallable], MCPCallable]", + mcp.resource(*args, **kwargs), + ) + + @tool( title="Analyze Repository", description=( "Run a deterministic CodeClone analysis for a repository and register " "the result as the latest MCP run." ), - annotations=read_only_tool, + annotations=session_tool, structured_output=True, ) def analyze_repository( root: str = ".", analysis_mode: str = "full", respect_pyproject: bool = True, + changed_paths: list[str] | None = None, + git_diff_ref: str | None = None, processes: int | None = None, min_loc: int | None = None, min_stmt: int | None = None, @@ -97,6 +124,9 @@ def analyze_repository( block_min_stmt: int | None = None, segment_min_loc: int | None = None, segment_min_stmt: int | None = None, + complexity_threshold: int | None = None, + coupling_threshold: int | None = None, + cohesion_threshold: int | None = None, baseline_path: str | None = None, metrics_baseline_path: str | None = None, max_baseline_size_mb: int | None = None, @@ -109,6 +139,66 @@ def analyze_repository( root=root, analysis_mode=analysis_mode, # type: ignore[arg-type] respect_pyproject=respect_pyproject, + changed_paths=tuple(changed_paths or ()), + git_diff_ref=git_diff_ref, + processes=processes, + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + complexity_threshold=complexity_threshold, + coupling_threshold=coupling_threshold, + cohesion_threshold=cohesion_threshold, + baseline_path=baseline_path, + metrics_baseline_path=metrics_baseline_path, + max_baseline_size_mb=max_baseline_size_mb, + cache_policy=cache_policy, # type: ignore[arg-type] + cache_path=cache_path, + max_cache_size_mb=max_cache_size_mb, + ) + ) + + @tool( + title="Analyze Changed Paths", + description=( + "Run a deterministic CodeClone analysis and return a changed-files " + "projection using explicit paths or a git diff ref." + ), + annotations=session_tool, + structured_output=True, + ) + def analyze_changed_paths( + root: str = ".", + changed_paths: list[str] | None = None, + git_diff_ref: str | None = None, + analysis_mode: str = "full", + respect_pyproject: bool = True, + processes: int | None = None, + min_loc: int | None = None, + min_stmt: int | None = None, + block_min_loc: int | None = None, + block_min_stmt: int | None = None, + segment_min_loc: int | None = None, + segment_min_stmt: int | None = None, + complexity_threshold: int | None = None, + coupling_threshold: int | None = None, + cohesion_threshold: int | None = None, + baseline_path: str | None = None, + metrics_baseline_path: str | None = None, + max_baseline_size_mb: int | None = None, + cache_policy: str = "reuse", + cache_path: str | None = None, + max_cache_size_mb: int | None = None, + ) -> dict[str, object]: + return service.analyze_changed_paths( + MCPAnalysisRequest( + root=root, + changed_paths=tuple(changed_paths or ()), + git_diff_ref=git_diff_ref, + analysis_mode=analysis_mode, # type: ignore[arg-type] + respect_pyproject=respect_pyproject, processes=processes, min_loc=min_loc, min_stmt=min_stmt, @@ -116,6 +206,9 @@ def analyze_repository( block_min_stmt=block_min_stmt, segment_min_loc=segment_min_loc, segment_min_stmt=segment_min_stmt, + complexity_threshold=complexity_threshold, + coupling_threshold=coupling_threshold, + cohesion_threshold=cohesion_threshold, baseline_path=baseline_path, metrics_baseline_path=metrics_baseline_path, max_baseline_size_mb=max_baseline_size_mb, @@ -125,7 +218,7 @@ def analyze_repository( ) ) - @mcp.tool( + @tool( title="Get Run Summary", description="Return the stored summary for the latest or specified MCP run.", annotations=read_only_tool, @@ -134,13 +227,13 @@ def analyze_repository( def get_run_summary(run_id: str | None = None) -> dict[str, object]: return service.get_run_summary(run_id) - @mcp.tool( + @tool( title="Evaluate Gates", description=( "Evaluate CodeClone gate conditions against an existing MCP run without " "modifying baselines or exiting the process." ), - annotations=read_only_tool, + annotations=session_tool, structured_output=True, ) def evaluate_gates( @@ -170,7 +263,7 @@ def evaluate_gates( ) ) - @mcp.tool( + @tool( title="Get Report Section", description=( "Return a canonical CodeClone report section for the latest or " @@ -188,7 +281,7 @@ def get_report_section( section=section, # type: ignore[arg-type] ) - @mcp.tool( + @tool( title="List Findings", description=( "List canonical finding groups with deterministic ordering, optional " @@ -200,23 +293,37 @@ def get_report_section( def list_findings( run_id: str | None = None, family: str = "all", + category: str | None = None, severity: str | None = None, source_kind: str | None = None, novelty: str = "all", + sort_by: str = "default", + detail_level: str = "normal", + changed_paths: list[str] | None = None, + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, offset: int = 0, limit: int = 50, + max_results: int | None = None, ) -> dict[str, object]: return service.list_findings( run_id=run_id, family=family, # type: ignore[arg-type] + category=category, severity=severity, source_kind=source_kind, novelty=novelty, # type: ignore[arg-type] + sort_by=sort_by, # type: ignore[arg-type] + detail_level=detail_level, # type: ignore[arg-type] + changed_paths=tuple(changed_paths or ()), + git_diff_ref=git_diff_ref, + exclude_reviewed=exclude_reviewed, offset=offset, limit=limit, + max_results=max_results, ) - @mcp.tool( + @tool( title="Get Finding", description="Return a single canonical finding group by id.", annotations=read_only_tool, @@ -228,7 +335,24 @@ def get_finding( ) -> dict[str, object]: return service.get_finding(finding_id=finding_id, run_id=run_id) - @mcp.tool( + @tool( + title="Get Remediation", + description="Return actionable remediation guidance for a single finding.", + annotations=read_only_tool, + structured_output=True, + ) + def get_remediation( + finding_id: str, + run_id: str | None = None, + detail_level: str = "full", + ) -> dict[str, object]: + return service.get_remediation( + finding_id=finding_id, + run_id=run_id, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( title="List Hotspots", description=( "Return one of the derived CodeClone hotlists for the latest or " @@ -240,15 +364,219 @@ def get_finding( def list_hotspots( kind: str, run_id: str | None = None, + detail_level: str = "normal", + changed_paths: list[str] | None = None, + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, limit: int = 10, + max_results: int | None = None, ) -> dict[str, object]: return service.list_hotspots( kind=kind, # type: ignore[arg-type] run_id=run_id, + detail_level=detail_level, # type: ignore[arg-type] + changed_paths=tuple(changed_paths or ()), + git_diff_ref=git_diff_ref, + exclude_reviewed=exclude_reviewed, limit=limit, + max_results=max_results, + ) + + @tool( + title="Compare Runs", + description=( + "Compare two registered CodeClone MCP runs by finding ids and health." + ), + annotations=read_only_tool, + structured_output=True, + ) + def compare_runs( + run_id_before: str, + run_id_after: str | None = None, + focus: str = "all", + ) -> dict[str, object]: + return service.compare_runs( + run_id_before=run_id_before, + run_id_after=run_id_after, + focus=focus, # type: ignore[arg-type] ) - @mcp.resource( + @tool( + title="Check Complexity", + description=( + "Return complexity hotspots for a path or repository. If no run " + "exists yet, this triggers a full analysis first." + ), + annotations=session_tool, + structured_output=True, + ) + def check_complexity( + run_id: str | None = None, + root: str = ".", + path: str | None = None, + min_complexity: int | None = None, + max_results: int = 10, + detail_level: str = "normal", + ) -> dict[str, object]: + return service.check_complexity( + run_id=run_id, + root=root, + path=path, + min_complexity=min_complexity, + max_results=max_results, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( + title="Check Clones", + description=( + "Return clone findings for a path or repository. If no run exists " + "yet, this triggers a full analysis first." + ), + annotations=session_tool, + structured_output=True, + ) + def check_clones( + run_id: str | None = None, + root: str = ".", + path: str | None = None, + clone_type: str | None = None, + source_kind: str | None = None, + max_results: int = 10, + detail_level: str = "normal", + ) -> dict[str, object]: + return service.check_clones( + run_id=run_id, + root=root, + path=path, + clone_type=clone_type, + source_kind=source_kind, + max_results=max_results, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( + title="Check Coupling", + description=( + "Return coupling hotspots for a path or repository. If no run " + "exists yet, this triggers a full analysis first." + ), + annotations=session_tool, + structured_output=True, + ) + def check_coupling( + run_id: str | None = None, + root: str = ".", + path: str | None = None, + max_results: int = 10, + detail_level: str = "normal", + ) -> dict[str, object]: + return service.check_coupling( + run_id=run_id, + root=root, + path=path, + max_results=max_results, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( + title="Check Cohesion", + description=( + "Return cohesion hotspots for a path or repository. If no run " + "exists yet, this triggers a full analysis first." + ), + annotations=session_tool, + structured_output=True, + ) + def check_cohesion( + run_id: str | None = None, + root: str = ".", + path: str | None = None, + max_results: int = 10, + detail_level: str = "normal", + ) -> dict[str, object]: + return service.check_cohesion( + run_id=run_id, + root=root, + path=path, + max_results=max_results, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( + title="Check Dead Code", + description=( + "Return dead-code findings for a path or repository. If no run " + "exists yet, this triggers a full analysis first." + ), + annotations=session_tool, + structured_output=True, + ) + def check_dead_code( + run_id: str | None = None, + root: str = ".", + path: str | None = None, + min_severity: str | None = None, + max_results: int = 10, + detail_level: str = "normal", + ) -> dict[str, object]: + return service.check_dead_code( + run_id=run_id, + root=root, + path=path, + min_severity=min_severity, + max_results=max_results, + detail_level=detail_level, # type: ignore[arg-type] + ) + + @tool( + title="Generate PR Summary", + description="Generate a PR-friendly CodeClone summary for changed files.", + annotations=read_only_tool, + structured_output=True, + ) + def generate_pr_summary( + run_id: str | None = None, + changed_paths: list[str] | None = None, + git_diff_ref: str | None = None, + format: str = "markdown", + ) -> dict[str, object]: + return service.generate_pr_summary( + run_id=run_id, + changed_paths=tuple(changed_paths or ()), + git_diff_ref=git_diff_ref, + format=format, # type: ignore[arg-type] + ) + + @tool( + title="Mark Finding Reviewed", + description="Mark a finding as reviewed in the current in-memory MCP session.", + annotations=session_tool, + structured_output=True, + ) + def mark_finding_reviewed( + finding_id: str, + run_id: str | None = None, + note: str | None = None, + ) -> dict[str, object]: + return service.mark_finding_reviewed( + finding_id=finding_id, + run_id=run_id, + note=note, + ) + + @tool( + title="List Reviewed Findings", + description=( + "List in-memory reviewed findings for the current or specified run." + ), + annotations=read_only_tool, + structured_output=True, + ) + def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: + return service.list_reviewed_findings(run_id=run_id) + + @resource( "codeclone://latest/summary", title="Latest Run Summary", description="Canonical JSON summary for the latest CodeClone MCP run.", @@ -257,7 +585,7 @@ def list_hotspots( def latest_summary_resource() -> str: return service.read_resource("codeclone://latest/summary") - @mcp.resource( + @resource( "codeclone://latest/report.json", title="Latest Canonical Report", description="Canonical JSON report for the latest CodeClone MCP run.", @@ -266,7 +594,45 @@ def latest_summary_resource() -> str: def latest_report_resource() -> str: return service.read_resource("codeclone://latest/report.json") - @mcp.resource( + @resource( + "codeclone://latest/health", + title="Latest Health Snapshot", + description="Health score and dimensions for the latest CodeClone MCP run.", + mime_type="application/json", + ) + def latest_health_resource() -> str: + return service.read_resource("codeclone://latest/health") + + @resource( + "codeclone://latest/gates", + title="Latest Gate Evaluation", + description="Last gate evaluation result produced by this MCP session.", + mime_type="application/json", + ) + def latest_gates_resource() -> str: + return service.read_resource("codeclone://latest/gates") + + @resource( + "codeclone://latest/changed", + title="Latest Changed Findings", + description=( + "Changed-files projection for the latest diff-aware CodeClone MCP run." + ), + mime_type="application/json", + ) + def latest_changed_resource() -> str: + return service.read_resource("codeclone://latest/changed") + + @resource( + "codeclone://schema", + title="CodeClone Report Schema", + description="JSON schema-style descriptor for the canonical CodeClone report.", + mime_type="application/json", + ) + def schema_resource() -> str: + return service.read_resource("codeclone://schema") + + @resource( "codeclone://runs/{run_id}/summary", title="Run Summary", description="Canonical JSON summary for a specific CodeClone MCP run.", @@ -275,7 +641,7 @@ def latest_report_resource() -> str: def run_summary_resource(run_id: str) -> str: return service.read_resource(f"codeclone://runs/{run_id}/summary") - @mcp.resource( + @resource( "codeclone://runs/{run_id}/report.json", title="Run Canonical Report", description="Canonical JSON report for a specific CodeClone MCP run.", @@ -284,7 +650,7 @@ def run_summary_resource(run_id: str) -> str: def run_report_resource(run_id: str) -> str: return service.read_resource(f"codeclone://runs/{run_id}/report.json") - @mcp.resource( + @resource( "codeclone://runs/{run_id}/findings/{finding_id}", title="Run Finding", description="Canonical JSON finding group for a specific CodeClone MCP run.", diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py index c642498..8f765be 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/mcp_service.py @@ -4,6 +4,7 @@ from __future__ import annotations import json +import subprocess from argparse import Namespace from collections import OrderedDict from collections.abc import Mapping, Sequence @@ -12,7 +13,7 @@ from threading import RLock from typing import Literal, cast -from . import __version__ +from . import __version__, _coerce from ._cli_args import ( DEFAULT_BASELINE_PATH, DEFAULT_BLOCK_MIN_LOC, @@ -42,7 +43,38 @@ ) from .baseline import Baseline from .cache import Cache, CacheStatus, build_segment_report_projection -from .contracts import REPORT_SCHEMA_VERSION +from .contracts import ( + DEFAULT_COHESION_THRESHOLD, + DEFAULT_COMPLEXITY_THRESHOLD, + DEFAULT_COUPLING_THRESHOLD, + REPORT_SCHEMA_VERSION, +) +from .domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + CLONE_KIND_SEGMENT, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, +) +from .domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_LOW, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) from .errors import CacheError from .models import MetricsDiff from .normalize import NormalizationConfig @@ -57,18 +89,30 @@ process, report, ) -from .report.overview import materialize_report_overview +from .report.json_contract import ( + _source_scope_from_filepaths, + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, +) +from .report.overview import serialize_finding_group_card AnalysisMode = Literal["full", "clones_only"] CachePolicy = Literal["reuse", "refresh", "off"] HotlistKind = Literal[ "most_actionable", "highest_spread", + "highest_priority", "production_hotspots", "test_fixture_hotspots", ] FindingFamilyFilter = Literal["all", "clone", "structural", "dead_code", "design"] FindingNoveltyFilter = Literal["all", "new", "known"] +FindingSort = Literal["default", "priority", "severity", "spread"] +DetailLevel = Literal["summary", "normal", "full"] +ComparisonFocus = Literal["all", "clones", "structural", "metrics"] +PRSummaryFormat = Literal["markdown", "json"] ReportSection = Literal[ "all", "meta", @@ -76,6 +120,7 @@ "findings", "metrics", "derived", + "changed", "integrity", ] @@ -100,8 +145,263 @@ _RESOURCE_SECTION_MAP: dict[str, ReportSection] = { "report.json": "all", "summary": "meta", + "health": "metrics", + "changed": "changed", "overview": "derived", } +_SEVERITY_WEIGHT = { + SEVERITY_CRITICAL: 1.0, + SEVERITY_WARNING: 0.6, + SEVERITY_INFO: 0.2, +} +_EFFORT_WEIGHT = { + EFFORT_EASY: 1.0, + EFFORT_MODERATE: 0.6, + EFFORT_HARD: 0.3, +} +_NOVELTY_WEIGHT = {"new": 1.0, "known": 0.5} +_RUNTIME_WEIGHT = { + "production": 1.0, + "mixed": 0.8, + "tests": 0.4, + "fixtures": 0.2, + "other": 0.5, +} +_CONFIDENCE_WEIGHT = { + CONFIDENCE_HIGH: 1.0, + CONFIDENCE_MEDIUM: 0.7, + CONFIDENCE_LOW: 0.3, +} +# Canonical report groups use FAMILY_CLONES ("clones"), while individual finding +# payloads use FAMILY_CLONE ("clone"). +_VALID_ANALYSIS_MODES = frozenset({"full", "clones_only"}) +_VALID_CACHE_POLICIES = frozenset({"reuse", "refresh", "off"}) +_VALID_FINDING_FAMILIES = frozenset( + {"all", "clone", "structural", "dead_code", "design"} +) +_VALID_FINDING_NOVELTY = frozenset({"all", "new", "known"}) +_VALID_FINDING_SORT = frozenset({"default", "priority", "severity", "spread"}) +_VALID_DETAIL_LEVELS = frozenset({"summary", "normal", "full"}) +_VALID_COMPARISON_FOCUS = frozenset({"all", "clones", "structural", "metrics"}) +_VALID_PR_SUMMARY_FORMATS = frozenset({"markdown", "json"}) +_VALID_REPORT_SECTIONS = frozenset( + { + "all", + "meta", + "inventory", + "findings", + "metrics", + "derived", + "changed", + "integrity", + } +) +_VALID_HOTLIST_KINDS = frozenset( + { + "most_actionable", + "highest_spread", + "highest_priority", + "production_hotspots", + "test_fixture_hotspots", + } +) +_VALID_SEVERITIES = frozenset({SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO}) +_as_int = _coerce.as_int +_as_float = _coerce.as_float +_as_str = _coerce.as_str + + +def _design_singleton_group_payload( + *, + category: str, + kind: str, + severity: str, + qualname: str, + filepath: str, + start_line: int, + end_line: int, + item_data: Mapping[str, object], + facts: Mapping[str, object], + scan_root: str, +) -> dict[str, object]: + relative_path = filepath + return { + "id": design_group_id(category, qualname), + "family": FAMILY_DESIGN, + "category": category, + "kind": kind, + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": 2.0 if severity == SEVERITY_WARNING else 3.0, + "count": 1, + "source_scope": _source_scope_from_filepaths( + (relative_path,), + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": relative_path, + "qualname": qualname, + "start_line": start_line, + "end_line": end_line, + **item_data, + } + ], + "facts": dict(facts), + } + + +def _complexity_group_for_threshold_payload( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + cc = _as_int(item_map.get("cyclomatic_complexity", 1), 1) + if cc <= threshold: + return None + severity = SEVERITY_CRITICAL if cc > max(40, threshold * 2) else SEVERITY_WARNING + return _design_singleton_group_payload( + category=CATEGORY_COMPLEXITY, + kind="function_hotspot", + severity=severity, + qualname=str(item_map.get("qualname", "")), + filepath=str(item_map.get("relative_path", "")), + start_line=_as_int(item_map.get("start_line", 0), 0), + end_line=_as_int(item_map.get("end_line", 0), 0), + scan_root=scan_root, + item_data={ + "cyclomatic_complexity": cc, + "nesting_depth": _as_int(item_map.get("nesting_depth", 0), 0), + "risk": str(item_map.get("risk", "")), + }, + facts={ + "cyclomatic_complexity": cc, + "nesting_depth": _as_int(item_map.get("nesting_depth", 0), 0), + }, + ) + + +def _coupling_group_for_threshold_payload( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + cbo = _as_int(item_map.get("cbo", 0), 0) + if cbo <= threshold: + return None + coupled_classes = list(_coerce.as_sequence(item_map.get("coupled_classes"))) + return _design_singleton_group_payload( + category=CATEGORY_COUPLING, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=str(item_map.get("qualname", "")), + filepath=str(item_map.get("relative_path", "")), + start_line=_as_int(item_map.get("start_line", 0), 0), + end_line=_as_int(item_map.get("end_line", 0), 0), + scan_root=scan_root, + item_data={ + "cbo": cbo, + "risk": str(item_map.get("risk", "")), + "coupled_classes": coupled_classes, + }, + facts={ + "cbo": cbo, + "coupled_classes": coupled_classes, + }, + ) + + +def _cohesion_group_for_threshold_payload( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + lcom4 = _as_int(item_map.get("lcom4", 0), 0) + if lcom4 <= threshold: + return None + return _design_singleton_group_payload( + category=CATEGORY_COHESION, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=str(item_map.get("qualname", "")), + filepath=str(item_map.get("relative_path", "")), + start_line=_as_int(item_map.get("start_line", 0), 0), + end_line=_as_int(item_map.get("end_line", 0), 0), + scan_root=scan_root, + item_data={ + "lcom4": lcom4, + "risk": str(item_map.get("risk", "")), + "method_count": _as_int(item_map.get("method_count", 0), 0), + "instance_var_count": _as_int(item_map.get("instance_var_count", 0), 0), + }, + facts={ + "lcom4": lcom4, + "method_count": _as_int(item_map.get("method_count", 0), 0), + "instance_var_count": _as_int(item_map.get("instance_var_count", 0), 0), + }, + ) + + +def _suggestion_finding_id_payload(suggestion: object) -> str: + if not hasattr(suggestion, "finding_family"): + return "" + family = str(getattr(suggestion, "finding_family", "")).strip() + if family == FAMILY_CLONES: + kind = str(getattr(suggestion, "finding_kind", "")).strip() + subject_key = str(getattr(suggestion, "subject_key", "")).strip() + return clone_group_id(kind or CLONE_KIND_SEGMENT, subject_key) + if family == FAMILY_STRUCTURAL: + return structural_group_id( + str(getattr(suggestion, "finding_kind", "")).strip() or CATEGORY_STRUCTURAL, + str(getattr(suggestion, "subject_key", "")).strip(), + ) + category = str(getattr(suggestion, "category", "")).strip() + subject_key = str(getattr(suggestion, "subject_key", "")).strip() + if category == CATEGORY_DEAD_CODE: + return dead_code_group_id(subject_key) + return design_group_id( + category, + subject_key or str(getattr(suggestion, "title", "")), + ) + + +def _git_diff_lines_payload( + *, + root_path: Path, + git_diff_ref: str, +) -> tuple[str, ...]: + try: + completed = subprocess.run( + ["git", "diff", "--name-only", git_diff_ref, "--"], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: + raise MCPGitDiffError( + f"Unable to resolve changed paths from git diff ref '{git_diff_ref}'." + ) from exc + return tuple( + sorted({line.strip() for line in completed.stdout.splitlines() if line.strip()}) + ) + + +def _load_report_document_payload(report_json: str) -> dict[str, object]: + try: + payload = json.loads(report_json) + except json.JSONDecodeError as exc: + raise MCPServiceError( + f"Generated canonical report is not valid JSON: {exc}" + ) from exc + if not isinstance(payload, dict): + raise MCPServiceError("Generated canonical report must be a JSON object.") + return dict(payload) class MCPServiceError(RuntimeError): @@ -120,6 +420,10 @@ class MCPFindingNotFoundError(MCPServiceError): """Raised when a requested finding id is not present in the selected run.""" +class MCPGitDiffError(MCPServiceError): + """Raised when changed paths cannot be resolved from a git ref.""" + + class _BufferConsole: def __init__(self) -> None: self.messages: list[str] = [] @@ -135,6 +439,8 @@ class MCPAnalysisRequest: root: str = DEFAULT_ROOT analysis_mode: AnalysisMode = "full" respect_pyproject: bool = True + changed_paths: tuple[str, ...] = () + git_diff_ref: str | None = None processes: int | None = None min_loc: int | None = None min_stmt: int | None = None @@ -142,6 +448,9 @@ class MCPAnalysisRequest: block_min_stmt: int | None = None segment_min_loc: int | None = None segment_min_stmt: int | None = None + complexity_threshold: int | None = None + coupling_threshold: int | None = None + cohesion_threshold: int | None = None baseline_path: str | None = None metrics_baseline_path: str | None = None max_baseline_size_mb: int | None = None @@ -172,6 +481,8 @@ class MCPRunRecord: report_document: dict[str, object] report_json: str summary: dict[str, object] + changed_paths: tuple[str, ...] + changed_projection: dict[str, object] | None warnings: tuple[str, ...] failures: tuple[str, ...] analysis: AnalysisResult @@ -204,13 +515,28 @@ def get(self, run_id: str | None = None) -> MCPRunRecord: raise MCPRunNotFoundError("No matching MCP analysis run is available.") return self._records[resolved_run_id] + def records(self) -> tuple[MCPRunRecord, ...]: + with self._lock: + return tuple(self._records.values()) + class CodeCloneMCPService: def __init__(self, *, history_limit: int = 16) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) + self._state_lock = RLock() + self._review_state: dict[str, OrderedDict[str, str | None]] = {} + self._last_gate_results: dict[str, dict[str, object]] = {} + self._spread_max_cache: dict[str, int] = {} def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: + self._validate_analysis_request(request) root_path = self._resolve_root(request.root) + analysis_started_at_utc = _current_report_timestamp_utc() + changed_paths = self._resolve_request_changed_paths( + root_path=root_path, + changed_paths=request.changed_paths, + git_diff_ref=request.git_diff_ref, + ) args = self._build_args(root_path=root_path, request=request) ( baseline_path, @@ -306,6 +632,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ), analysis_mode=request.analysis_mode, metrics_computed=self._metrics_computed(request.analysis_mode), + analysis_started_at_utc=analysis_started_at_utc, report_generated_at_utc=_current_report_timestamp_utc(), ) @@ -357,7 +684,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ) ) - summary = self._build_run_summary_payload( + base_summary = self._build_run_summary_payload( run_id=run_id, root_path=root_path, request=request, @@ -371,6 +698,28 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: warnings=warnings, failures=failures, ) + provisional_record = MCPRunRecord( + run_id=run_id, + root=root_path, + request=request, + report_document=report_document, + report_json=report_json, + summary=base_summary, + changed_paths=changed_paths, + changed_projection=None, + warnings=warnings, + failures=failures, + analysis=analysis_result, + new_func=frozenset(new_func), + new_block=frozenset(new_block), + metrics_diff=metrics_diff, + ) + changed_projection = self._build_changed_projection(provisional_record) + summary = self._augment_summary_with_changed( + summary=base_summary, + changed_paths=changed_paths, + changed_projection=changed_projection, + ) record = MCPRunRecord( run_id=run_id, root=root_path, @@ -378,6 +727,8 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: report_document=report_document, report_json=report_json, summary=summary, + changed_paths=changed_paths, + changed_projection=changed_projection, warnings=warnings, failures=failures, analysis=analysis_result, @@ -386,11 +737,74 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: metrics_diff=metrics_diff, ) self._runs.register(record) + self._prune_session_state() return summary + def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: + if not request.changed_paths and request.git_diff_ref is None: + raise MCPServiceContractError( + "analyze_changed_paths requires changed_paths or git_diff_ref." + ) + return self.analyze_repository(request) + def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: return dict(self._runs.get(run_id).summary) + def compare_runs( + self, + *, + run_id_before: str, + run_id_after: str | None = None, + focus: ComparisonFocus = "all", + ) -> dict[str, object]: + validated_focus = cast( + "ComparisonFocus", + self._validate_choice("focus", focus, _VALID_COMPARISON_FOCUS), + ) + before = self._runs.get(run_id_before) + after = self._runs.get(run_id_after) + before_findings = self._comparison_index(before, focus=validated_focus) + after_findings = self._comparison_index(after, focus=validated_focus) + before_ids = set(before_findings) + after_ids = set(after_findings) + regressions = sorted(after_ids - before_ids) + improvements = sorted(before_ids - after_ids) + common = before_ids & after_ids + health_before = self._summary_health_score(before.summary) + health_after = self._summary_health_score(after.summary) + health_delta = health_after - health_before + verdict = self._comparison_verdict( + regressions=len(regressions), + improvements=len(improvements), + health_delta=health_delta, + ) + return { + "before": { + "run_id": before.run_id, + "health": health_before, + }, + "after": { + "run_id": after.run_id, + "health": health_after, + }, + "health_delta": health_delta, + "verdict": verdict, + "regressions": [ + self._finding_summary_card(after, after_findings[finding_id]) + for finding_id in regressions + ], + "improvements": [ + self._finding_summary_card(before, before_findings[finding_id]) + for finding_id in improvements + ], + "unchanged_count": len(common), + "summary": self._comparison_summary_text( + regressions=len(regressions), + improvements=len(improvements), + health_delta=health_delta, + ), + } + def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: record = self._runs.get(request.run_id) gate_args = Namespace( @@ -418,7 +832,7 @@ def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: new_block=record.new_block, metrics_diff=record.metrics_diff, ) - return { + result = { "run_id": record.run_id, "would_fail": gate_result.exit_code != 0, "exit_code": gate_result.exit_code, @@ -435,6 +849,9 @@ def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: "fail_on_new_metrics": request.fail_on_new_metrics, }, } + with self._state_lock: + self._last_gate_results[record.run_id] = dict(result) + return result def get_report_section( self, @@ -442,13 +859,24 @@ def get_report_section( run_id: str | None = None, section: ReportSection = "all", ) -> dict[str, object]: - report_document = self._runs.get(run_id).report_document - if section == "all": + validated_section = cast( + "ReportSection", + self._validate_choice("section", section, _VALID_REPORT_SECTIONS), + ) + record = self._runs.get(run_id) + report_document = record.report_document + if validated_section == "all": return dict(report_document) - payload = report_document.get(section) + if validated_section == "changed": + if record.changed_projection is None: + raise MCPServiceContractError( + "Report section 'changed' is not available in this run." + ) + return dict(record.changed_projection) + payload = report_document.get(validated_section) if not isinstance(payload, Mapping): raise MCPServiceContractError( - f"Report section '{section}' is not available in this run." + f"Report section '{validated_section}' is not available in this run." ) return dict(payload) @@ -457,32 +885,71 @@ def list_findings( *, run_id: str | None = None, family: FindingFamilyFilter = "all", + category: str | None = None, severity: str | None = None, source_kind: str | None = None, novelty: FindingNoveltyFilter = "all", + sort_by: FindingSort = "default", + detail_level: DetailLevel = "normal", + changed_paths: Sequence[str] = (), + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, offset: int = 0, limit: int = 50, + max_results: int | None = None, ) -> dict[str, object]: + validated_family = cast( + "FindingFamilyFilter", + self._validate_choice("family", family, _VALID_FINDING_FAMILIES), + ) + validated_novelty = cast( + "FindingNoveltyFilter", + self._validate_choice("novelty", novelty, _VALID_FINDING_NOVELTY), + ) + validated_sort = cast( + "FindingSort", + self._validate_choice("sort_by", sort_by, _VALID_FINDING_SORT), + ) + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + validated_severity = self._validate_optional_choice( + "severity", + severity, + _VALID_SEVERITIES, + ) record = self._runs.get(run_id) - findings = self._flatten_findings(record.report_document) - filtered = [ - finding - for finding in findings - if self._matches_finding_filters( - finding=finding, - family=family, - severity=severity, - source_kind=source_kind, - novelty=novelty, - ) - ] + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + normalized_limit = max( + 1, + min(max_results if max_results is not None else limit, 200), + ) + filtered = self._query_findings( + record=record, + family=validated_family, + category=category, + severity=validated_severity, + source_kind=source_kind, + novelty=validated_novelty, + sort_by=validated_sort, + detail_level=validated_detail, + changed_paths=paths_filter, + exclude_reviewed=exclude_reviewed, + ) total = len(filtered) normalized_offset = max(0, offset) - normalized_limit = max(1, min(limit, 200)) items = filtered[normalized_offset : normalized_offset + normalized_limit] next_offset = normalized_offset + len(items) return { "run_id": record.run_id, + "detail_level": validated_detail, + "sort_by": validated_sort, + "changed_paths": list(paths_filter), "offset": normalized_offset, "limit": normalized_limit, "returned": len(items), @@ -498,38 +965,430 @@ def get_finding( run_id: str | None = None, ) -> dict[str, object]: record = self._runs.get(run_id) - for finding in self._flatten_findings(record.report_document): + for finding in self._base_findings(record): if str(finding.get("id")) == finding_id: - return finding + return self._decorate_finding( + record, + finding, + detail_level="full", + ) raise MCPFindingNotFoundError( f"Finding id '{finding_id}' was not found in run '{record.run_id}'." ) + def get_remediation( + self, + *, + finding_id: str, + run_id: str | None = None, + detail_level: DetailLevel = "full", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + record = self._runs.get(run_id) + finding = self.get_finding(finding_id=finding_id, run_id=record.run_id) + remediation = self._as_mapping(finding.get("remediation")) + if not remediation: + raise MCPFindingNotFoundError( + f"Finding id '{finding_id}' does not expose remediation guidance." + ) + return { + "run_id": record.run_id, + "finding_id": finding_id, + "detail_level": validated_detail, + "remediation": self._project_remediation( + remediation, + detail_level=validated_detail, + ), + } + def list_hotspots( self, *, kind: HotlistKind, run_id: str | None = None, + detail_level: DetailLevel = "normal", + changed_paths: Sequence[str] = (), + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, limit: int = 10, + max_results: int | None = None, ) -> dict[str, object]: + validated_kind = cast( + "HotlistKind", + self._validate_choice("kind", kind, _VALID_HOTLIST_KINDS), + ) + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) record = self._runs.get(run_id) - derived = self._as_mapping(record.report_document.get("derived")) - materialized = materialize_report_overview( - overview=self._as_mapping(derived.get("overview")), - hotlists=self._as_mapping(derived.get("hotlists")), - findings=self._as_mapping(record.report_document.get("findings")), + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + rows = self._hotspot_rows( + record=record, + kind=validated_kind, + detail_level=validated_detail, + changed_paths=paths_filter, + exclude_reviewed=exclude_reviewed, + ) + normalized_limit = max( + 1, + min(max_results if max_results is not None else limit, 50), ) - rows = self._as_sequence(materialized.get(kind)) - normalized_limit = max(1, min(limit, 50)) return { "run_id": record.run_id, - "kind": kind, + "kind": validated_kind, + "detail_level": validated_detail, + "changed_paths": list(paths_filter), "returned": min(len(rows), normalized_limit), "total": len(rows), "items": [dict(self._as_mapping(item)) for item in rows[:normalized_limit]], } + def generate_pr_summary( + self, + *, + run_id: str | None = None, + changed_paths: Sequence[str] = (), + git_diff_ref: str | None = None, + format: PRSummaryFormat = "markdown", + ) -> dict[str, object]: + output_format = cast( + "PRSummaryFormat", + self._validate_choice("format", format, _VALID_PR_SUMMARY_FORMATS), + ) + record = self._runs.get(run_id) + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + prefer_record_paths=True, + ) + changed_items = self._query_findings( + record=record, + detail_level="summary", + changed_paths=paths_filter, + ) + previous = self._previous_run_for_root(record) + resolved: list[dict[str, object]] = [] + if previous is not None: + compare_payload = self.compare_runs( + run_id_before=previous.run_id, + run_id_after=record.run_id, + focus="all", + ) + resolved = cast("list[dict[str, object]]", compare_payload["improvements"]) + with self._state_lock: + gate_result = dict( + self._last_gate_results.get( + record.run_id, + {"would_fail": False, "reasons": []}, + ) + ) + verdict = self._changed_verdict( + changed_projection={ + "total": len(changed_items), + "new": sum( + 1 for item in changed_items if str(item.get("novelty", "")) == "new" + ), + }, + health_delta=self._summary_health_delta(record.summary), + ) + payload = { + "run_id": record.run_id, + "changed_paths": list(paths_filter), + "health": self._as_mapping(record.summary.get("health")), + "health_delta": self._summary_health_delta(record.summary), + "verdict": verdict, + "new_findings_in_changed_files": changed_items, + "resolved": resolved, + "blocking_gates": list(cast(Sequence[str], gate_result.get("reasons", []))), + } + if output_format == "json": + return payload + return { + "run_id": record.run_id, + "format": output_format, + "content": self._render_pr_summary_markdown(payload), + } + + def mark_finding_reviewed( + self, + *, + finding_id: str, + run_id: str | None = None, + note: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + self.get_finding(finding_id=finding_id, run_id=record.run_id) + with self._state_lock: + review_map = self._review_state.setdefault(record.run_id, OrderedDict()) + review_map[finding_id] = ( + note.strip() if isinstance(note, str) and note.strip() else None + ) + review_map.move_to_end(finding_id) + return { + "run_id": record.run_id, + "finding_id": finding_id, + "reviewed": True, + "note": review_map[finding_id], + "reviewed_count": len(review_map), + } + + def list_reviewed_findings( + self, + *, + run_id: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + with self._state_lock: + review_items = tuple( + self._review_state.get(record.run_id, OrderedDict()).items() + ) + items = [] + for finding_id, note in review_items: + try: + finding = self.get_finding(finding_id=finding_id, run_id=record.run_id) + except MCPFindingNotFoundError: + continue + items.append( + { + "finding_id": finding_id, + "note": note, + "finding": self._project_finding_detail( + finding, + detail_level="summary", + ), + } + ) + return { + "run_id": record.run_id, + "reviewed_count": len(items), + "items": items, + } + + def check_complexity( + self, + *, + run_id: str | None = None, + root: str = ".", + path: str | None = None, + min_complexity: int | None = None, + max_results: int = 10, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="design", + category=CATEGORY_COMPLEXITY, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if min_complexity is not None: + findings = [ + finding + for finding in findings + if _as_int( + self._as_mapping(finding.get("facts")).get( + "cyclomatic_complexity", + 0, + ) + ) + >= min_complexity + ] + return self._granular_payload( + record=record, + check="complexity", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + + def check_clones( + self, + *, + run_id: str | None = None, + root: str = ".", + path: str | None = None, + clone_type: str | None = None, + source_kind: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="clones_only", + ) + findings = self._query_findings( + record=record, + family="clone", + source_kind=source_kind, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if clone_type is not None: + findings = [ + finding + for finding in findings + if str(finding.get("clone_type", "")).strip() == clone_type + ] + return self._granular_payload( + record=record, + check="clones", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + + def check_coupling( + self, + *, + run_id: str | None = None, + root: str = ".", + path: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="design", + category=CATEGORY_COUPLING, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + return self._granular_payload( + record=record, + check="coupling", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + + def check_cohesion( + self, + *, + run_id: str | None = None, + root: str = ".", + path: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="design", + category=CATEGORY_COHESION, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + return self._granular_payload( + record=record, + check="cohesion", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + + def check_dead_code( + self, + *, + run_id: str | None = None, + root: str = ".", + path: str | None = None, + min_severity: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = cast( + "DetailLevel", + self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + ) + validated_min_severity = self._validate_optional_choice( + "min_severity", + min_severity, + _VALID_SEVERITIES, + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="dead_code", + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if validated_min_severity is not None: + findings = [ + finding + for finding in findings + if self._severity_rank(str(finding.get("severity", ""))) + >= self._severity_rank(validated_min_severity) + ] + return self._granular_payload( + record=record, + check="dead_code", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + def read_resource(self, uri: str) -> str: + if uri == "codeclone://schema": + return json.dumps( + self._schema_resource_payload(), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) latest_prefix = "codeclone://latest/" run_prefix = "codeclone://runs/" if uri.startswith(latest_prefix): @@ -553,6 +1412,44 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: indent=2, sort_keys=True, ) + if suffix == "health": + return json.dumps( + self._as_mapping(record.summary.get("health")), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + if suffix == "gates": + with self._state_lock: + gate_result = self._last_gate_results.get(record.run_id) + if gate_result is None: + raise MCPServiceContractError( + "No gate evaluation result is available in this MCP session." + ) + return json.dumps( + gate_result, + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + if suffix == "changed": + if record.changed_projection is None: + raise MCPServiceContractError( + "Changed-findings projection is not available in this run." + ) + return json.dumps( + record.changed_projection, + ensure_ascii=False, + indent=2, + sort_keys=True, + ) + if suffix == "schema": + return json.dumps( + self._schema_resource_payload(), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) if suffix == "report.json": return record.report_json if suffix == "overview": @@ -575,42 +1472,1201 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: f"Unsupported CodeClone resource suffix '{suffix}'." ) - def _resolve_root(self, root: str) -> Path: - try: - root_path = Path(root).expanduser().resolve() - except OSError as exc: - raise MCPServiceContractError(f"Invalid root path '{root}': {exc}") from exc - if not root_path.exists(): - raise MCPServiceContractError(f"Root path does not exist: {root_path}") - if not root_path.is_dir(): - raise MCPServiceContractError(f"Root path is not a directory: {root_path}") - return root_path + def _resolve_request_changed_paths( + self, + *, + root_path: Path, + changed_paths: Sequence[str], + git_diff_ref: str | None, + ) -> tuple[str, ...]: + if changed_paths and git_diff_ref is not None: + raise MCPServiceContractError( + "Provide changed_paths or git_diff_ref, not both." + ) + if git_diff_ref is not None: + return self._git_diff_paths(root_path=root_path, git_diff_ref=git_diff_ref) + if not changed_paths: + return () + return self._normalize_changed_paths(root_path=root_path, paths=changed_paths) - def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namespace: - args = Namespace( - root=str(root_path), - min_loc=DEFAULT_MIN_LOC, - min_stmt=DEFAULT_MIN_STMT, - block_min_loc=DEFAULT_BLOCK_MIN_LOC, - block_min_stmt=DEFAULT_BLOCK_MIN_STMT, - segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, - segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, - processes=DEFAULT_PROCESSES, - cache_path=None, - max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, - baseline=DEFAULT_BASELINE_PATH, - max_baseline_size_mb=DEFAULT_MAX_BASELINE_SIZE_MB, - update_baseline=False, - fail_on_new=False, - fail_threshold=-1, - ci=False, - fail_complexity=-1, - fail_coupling=-1, - fail_cohesion=-1, - fail_cycles=False, - fail_dead_code=False, - fail_health=-1, - fail_on_new_metrics=False, + def _resolve_query_changed_paths( + self, + *, + record: MCPRunRecord, + changed_paths: Sequence[str], + git_diff_ref: str | None, + prefer_record_paths: bool = False, + ) -> tuple[str, ...]: + if changed_paths or git_diff_ref is not None: + return self._resolve_request_changed_paths( + root_path=record.root, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + if prefer_record_paths: + return record.changed_paths + return () + + def _normalize_changed_paths( + self, + *, + root_path: Path, + paths: Sequence[str], + ) -> tuple[str, ...]: + normalized: set[str] = set() + for raw_path in paths: + candidate = Path(str(raw_path)).expanduser() + if candidate.is_absolute(): + try: + relative = candidate.resolve().relative_to(root_path) + except (OSError, ValueError) as exc: + raise MCPServiceContractError( + f"Changed path '{raw_path}' is outside root '{root_path}'." + ) from exc + normalized.add(relative.as_posix()) + continue + cleaned = candidate.as_posix().strip("./") + if cleaned: + normalized.add(cleaned) + return tuple(sorted(normalized)) + + def _git_diff_paths( + self, + *, + root_path: Path, + git_diff_ref: str, + ) -> tuple[str, ...]: + lines = _git_diff_lines_payload( + root_path=root_path, + git_diff_ref=git_diff_ref, + ) + return self._normalize_changed_paths(root_path=root_path, paths=lines) + + def _prune_session_state(self) -> None: + active_run_ids = {record.run_id for record in self._runs.records()} + with self._state_lock: + for state_map in ( + self._review_state, + self._last_gate_results, + self._spread_max_cache, + ): + stale_run_ids = [ + run_id for run_id in state_map if run_id not in active_run_ids + ] + for run_id in stale_run_ids: + state_map.pop(run_id, None) + + def _summary_health_score(self, summary: Mapping[str, object]) -> int: + health = self._as_mapping(summary.get("health")) + score = health.get("score", 0) + return _as_int(score, 0) + + def _summary_health_delta(self, summary: Mapping[str, object]) -> int: + metrics_diff = self._as_mapping(summary.get("metrics_diff")) + value = metrics_diff.get("health_delta", 0) + return _as_int(value, 0) + + def _severity_rank(self, severity: str) -> int: + return { + SEVERITY_CRITICAL: 3, + SEVERITY_WARNING: 2, + SEVERITY_INFO: 1, + }.get(severity, 0) + + def _path_filter_tuple(self, path: str | None) -> tuple[str, ...]: + if not path: + return () + cleaned = Path(path).as_posix().strip("./") + return (cleaned,) if cleaned else () + + def _previous_run_for_root(self, record: MCPRunRecord) -> MCPRunRecord | None: + previous: MCPRunRecord | None = None + for item in self._runs.records(): + if item.run_id == record.run_id: + return previous + if item.root == record.root: + previous = item + return None + + def _resolve_granular_record( + self, + *, + run_id: str | None, + root: str, + analysis_mode: AnalysisMode, + ) -> MCPRunRecord: + if run_id is not None: + return self._runs.get(run_id) + summary = self.analyze_repository( + MCPAnalysisRequest( + root=root, + analysis_mode=analysis_mode, + ) + ) + return self._runs.get(str(summary["run_id"])) + + def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: + report_document = record.report_document + findings = self._as_mapping(report_document.get("findings")) + groups = self._as_mapping(findings.get("groups")) + clone_groups = self._as_mapping(groups.get(FAMILY_CLONES)) + design_groups = self._design_groups_for_record(record, groups=groups) + return [ + *self._dict_list(clone_groups.get("functions")), + *self._dict_list(clone_groups.get("blocks")), + *self._dict_list(clone_groups.get("segments")), + *self._dict_list( + self._as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") + ), + *self._dict_list( + self._as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") + ), + *design_groups, + ] + + def _design_groups_for_record( + self, + record: MCPRunRecord, + *, + groups: Mapping[str, object], + ) -> list[dict[str, object]]: + canonical_design_groups = self._dict_list( + self._as_mapping(groups.get(FAMILY_DESIGN)).get("groups") + ) + if ( + record.request.complexity_threshold is None + and record.request.coupling_threshold is None + and record.request.cohesion_threshold is None + ): + return canonical_design_groups + + metrics = self._as_mapping(record.report_document.get("metrics")) + families = self._as_mapping(metrics.get("families")) + complexity_threshold = ( + record.request.complexity_threshold + if record.request.complexity_threshold is not None + else DEFAULT_COMPLEXITY_THRESHOLD + ) + coupling_threshold = ( + record.request.coupling_threshold + if record.request.coupling_threshold is not None + else DEFAULT_COUPLING_THRESHOLD + ) + cohesion_threshold = ( + record.request.cohesion_threshold + if record.request.cohesion_threshold is not None + else DEFAULT_COHESION_THRESHOLD + ) + groups_out: list[dict[str, object]] = [] + for item in self._as_sequence( + self._as_mapping(families.get(CATEGORY_COMPLEXITY)).get("items") + ): + group = self._complexity_group_for_threshold( + self._as_mapping(item), + threshold=complexity_threshold, + scan_root=str(record.root), + ) + if group is not None: + groups_out.append(group) + for item in self._as_sequence( + self._as_mapping(families.get(CATEGORY_COUPLING)).get("items") + ): + group = self._coupling_group_for_threshold( + self._as_mapping(item), + threshold=coupling_threshold, + scan_root=str(record.root), + ) + if group is not None: + groups_out.append(group) + for item in self._as_sequence( + self._as_mapping(families.get(CATEGORY_COHESION)).get("items") + ): + group = self._cohesion_group_for_threshold( + self._as_mapping(item), + threshold=cohesion_threshold, + scan_root=str(record.root), + ) + if group is not None: + groups_out.append(group) + groups_out.extend( + group + for group in canonical_design_groups + if str(group.get("category", "")) == CATEGORY_DEPENDENCY + ) + groups_out.sort( + key=lambda group: ( + -_as_float(group.get("priority", 0.0), 0.0), + str(group.get("id", "")), + ) + ) + return groups_out + + def _design_singleton_group( + self, + *, + category: str, + kind: str, + severity: str, + qualname: str, + filepath: str, + start_line: int, + end_line: int, + item_data: Mapping[str, object], + facts: Mapping[str, object], + scan_root: str, + ) -> dict[str, object]: + return _design_singleton_group_payload( + category=category, + kind=kind, + severity=severity, + qualname=qualname, + filepath=filepath, + start_line=start_line, + end_line=end_line, + item_data=item_data, + facts=facts, + scan_root=scan_root, + ) + + def _complexity_group_for_threshold( + self, + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, + ) -> dict[str, object] | None: + return _complexity_group_for_threshold_payload( + item_map, + threshold=threshold, + scan_root=scan_root, + ) + + def _coupling_group_for_threshold( + self, + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, + ) -> dict[str, object] | None: + return _coupling_group_for_threshold_payload( + item_map, + threshold=threshold, + scan_root=scan_root, + ) + + def _cohesion_group_for_threshold( + self, + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, + ) -> dict[str, object] | None: + return _cohesion_group_for_threshold_payload( + item_map, + threshold=threshold, + scan_root=scan_root, + ) + + def _query_findings( + self, + *, + record: MCPRunRecord, + family: FindingFamilyFilter = "all", + category: str | None = None, + severity: str | None = None, + source_kind: str | None = None, + novelty: FindingNoveltyFilter = "all", + sort_by: FindingSort = "default", + detail_level: DetailLevel = "normal", + changed_paths: Sequence[str] = (), + exclude_reviewed: bool = False, + ) -> list[dict[str, object]]: + findings = self._base_findings(record) + max_spread_value = max( + (self._spread_value(finding) for finding in findings), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + filtered = [ + finding + for finding in findings + if self._matches_finding_filters( + finding=finding, + family=family, + category=category, + severity=severity, + source_kind=source_kind, + novelty=novelty, + ) + and ( + not changed_paths + or self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ) + ) + and (not exclude_reviewed or not self._finding_is_reviewed(record, finding)) + ] + remediation_map = { + str(finding.get("id", "")): self._remediation_for_finding(record, finding) + for finding in filtered + } + priority_map = { + str(finding.get("id", "")): self._priority_score( + record, + finding, + remediation=remediation_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in filtered + } + ordered = self._sort_findings( + record=record, + findings=filtered, + sort_by=sort_by, + priority_map=priority_map, + ) + return [ + self._decorate_finding( + record, + finding, + detail_level=detail_level, + remediation=remediation_map[str(finding.get("id", ""))], + priority_payload=priority_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in ordered + ] + + def _sort_findings( + self, + *, + record: MCPRunRecord, + findings: Sequence[Mapping[str, object]], + sort_by: FindingSort, + priority_map: Mapping[str, Mapping[str, object]] | None = None, + ) -> list[dict[str, object]]: + finding_rows = [dict(finding) for finding in findings] + if sort_by == "default": + return finding_rows + if sort_by == "severity": + finding_rows.sort( + key=lambda finding: ( + -self._severity_rank(str(finding.get("severity", ""))), + str(finding.get("id", "")), + ) + ) + return finding_rows + if sort_by == "spread": + finding_rows.sort( + key=lambda finding: ( + -self._spread_value(finding), + -_as_float(finding.get("priority", 0.0), 0.0), + str(finding.get("id", "")), + ) + ) + return finding_rows + finding_rows.sort( + key=lambda finding: ( + -_as_float( + self._as_mapping( + (priority_map or {}).get(str(finding.get("id", ""))) + ).get("score", 0.0), + 0.0, + ) + if priority_map is not None + else -_as_float(self._priority_score(record, finding)["score"], 0.0), + -self._severity_rank(str(finding.get("severity", ""))), + str(finding.get("id", "")), + ) + ) + return finding_rows + + def _decorate_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + detail_level: DetailLevel, + remediation: Mapping[str, object] | None = None, + priority_payload: Mapping[str, object] | None = None, + max_spread_value: int | None = None, + ) -> dict[str, object]: + resolved_remediation = ( + remediation + if remediation is not None + else self._remediation_for_finding(record, finding) + ) + resolved_priority_payload = ( + dict(priority_payload) + if priority_payload is not None + else self._priority_score( + record, + finding, + remediation=resolved_remediation, + max_spread_value=max_spread_value, + ) + ) + payload = dict(finding) + payload["priority_score"] = resolved_priority_payload["score"] + payload["priority_factors"] = resolved_priority_payload["factors"] + payload["locations"] = self._locations_for_finding(record, finding) + payload["html_anchor"] = f"finding-{finding.get('id', '')}" + if resolved_remediation is not None: + payload["remediation"] = resolved_remediation + return self._project_finding_detail(payload, detail_level=detail_level) + + def _project_finding_detail( + self, + finding: Mapping[str, object], + *, + detail_level: DetailLevel, + ) -> dict[str, object]: + if detail_level == "full": + return dict(finding) + if detail_level == "summary": + return self._finding_summary_card_payload(finding) + payload = dict(finding) + if "remediation" in payload: + payload["remediation"] = self._project_remediation( + self._as_mapping(payload["remediation"]), + detail_level="summary", + ) + return payload + + def _finding_summary_card( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object]: + return self._finding_summary_card_payload( + self._decorate_finding(record, finding, detail_level="normal") + ) + + def _finding_summary_card_payload( + self, + finding: Mapping[str, object], + ) -> dict[str, object]: + card = serialize_finding_group_card(finding) + return { + "id": str(finding.get("id", "")), + **card, + "novelty": str(finding.get("novelty", "")), + "priority_score": _as_float(finding.get("priority_score", 0.0), 0.0), + "priority_factors": dict(self._as_mapping(finding.get("priority_factors"))), + "locations": [ + dict(self._as_mapping(item)) + for item in self._as_sequence(finding.get("locations"))[:3] + ], + } + + def _matches_finding_filters( + self, + *, + finding: Mapping[str, object], + family: FindingFamilyFilter, + category: str | None = None, + severity: str | None, + source_kind: str | None, + novelty: FindingNoveltyFilter, + ) -> bool: + finding_family = str(finding.get("family", "")).strip() + if family != "all" and finding_family != family: + return False + if ( + category is not None + and str(finding.get("category", "")).strip() != category + ): + return False + if ( + severity is not None + and str(finding.get("severity", "")).strip() != severity + ): + return False + dominant_kind = str( + self._as_mapping(finding.get("source_scope")).get("dominant_kind", "") + ).strip() + if source_kind is not None and dominant_kind != source_kind: + return False + return novelty == "all" or str(finding.get("novelty", "")).strip() == novelty + + def _finding_touches_paths( + self, + *, + finding: Mapping[str, object], + changed_paths: Sequence[str], + ) -> bool: + normalized_paths = tuple(changed_paths) + for item in self._as_sequence(finding.get("items")): + relative_path = str(self._as_mapping(item).get("relative_path", "")).strip() + if relative_path and self._path_matches(relative_path, normalized_paths): + return True + return False + + def _path_matches(self, relative_path: str, changed_paths: Sequence[str]) -> bool: + for candidate in changed_paths: + if relative_path == candidate or relative_path.startswith(candidate + "/"): + return True + return False + + def _finding_is_reviewed( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> bool: + with self._state_lock: + review_map = self._review_state.get(record.run_id, OrderedDict()) + return str(finding.get("id", "")) in review_map + + def _priority_score( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + remediation: Mapping[str, object] | None = None, + max_spread_value: int | None = None, + ) -> dict[str, object]: + spread_weight = self._spread_weight( + record, + finding, + max_spread_value=max_spread_value, + ) + factors = { + "severity_weight": _SEVERITY_WEIGHT.get( + str(finding.get("severity", "")), + 0.2, + ), + "effort_weight": _EFFORT_WEIGHT.get( + ( + str(remediation.get("effort", EFFORT_MODERATE)) + if remediation is not None + else EFFORT_MODERATE + ), + 0.6, + ), + "novelty_weight": _NOVELTY_WEIGHT.get( + str(finding.get("novelty", "")), + 0.7, + ), + "runtime_weight": _RUNTIME_WEIGHT.get( + str( + self._as_mapping(finding.get("source_scope")).get( + "dominant_kind", + "other", + ) + ), + 0.5, + ), + "spread_weight": spread_weight, + "confidence_weight": _CONFIDENCE_WEIGHT.get( + str(finding.get("confidence", CONFIDENCE_MEDIUM)), + 0.7, + ), + } + product = 1.0 + for value in factors.values(): + product *= max(_as_float(value, 0.01), 0.01) + score = product ** (1.0 / max(len(factors), 1)) + return { + "score": round(score, 4), + "factors": { + key: round(_as_float(value, 0.0), 4) for key, value in factors.items() + }, + } + + def _spread_weight( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + max_spread_value: int | None = None, + ) -> float: + spread_value = self._spread_value(finding) + if max_spread_value is None: + with self._state_lock: + max_spread_value = self._spread_max_cache.get(record.run_id) + if max_spread_value is None: + max_spread_value = max( + (self._spread_value(item) for item in self._base_findings(record)), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + max_value = max_spread_value + if max_value <= 0: + return 0.3 + return max(0.2, min(1.0, spread_value / max_value)) + + def _spread_value(self, finding: Mapping[str, object]) -> int: + spread = self._as_mapping(finding.get("spread")) + files = _as_int(spread.get("files", 0), 0) + functions = _as_int(spread.get("functions", 0), 0) + count = _as_int(finding.get("count", 0), 0) + return max(files, functions, count, 1) + + def _locations_for_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> list[dict[str, object]]: + locations: list[dict[str, object]] = [] + for item in self._as_sequence(finding.get("items")): + item_map = self._as_mapping(item) + relative_path = str(item_map.get("relative_path", "")).strip() + if not relative_path: + continue + absolute_path = (record.root / relative_path).resolve() + line = _as_int(item_map.get("start_line", 0) or 0, 0) + symbol = str(item_map.get("qualname", item_map.get("module", ""))).strip() + uri = absolute_path.as_uri() + if line > 0: + uri = f"{uri}#L{line}" + locations.append( + { + "file": relative_path, + "line": line, + "symbol": symbol, + "uri": uri, + } + ) + deduped: list[dict[str, object]] = [] + seen: set[tuple[str, int, str]] = set() + for location in locations: + key = ( + str(location.get("file", "")), + _as_int(location.get("line", 0), 0), + str(location.get("symbol", "")), + ) + if key in seen: + continue + seen.add(key) + deduped.append(location) + return deduped + + def _suggestion_finding_id(self, suggestion: object) -> str: + return _suggestion_finding_id_payload(suggestion) + + def _remediation_for_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object] | None: + suggestion = self._suggestion_for_finding(record, str(finding.get("id", ""))) + if suggestion is None: + return None + source_kind = str(getattr(suggestion, "source_kind", "other")) + spread_files = _as_int(getattr(suggestion, "spread_files", 0), 0) + spread_functions = _as_int(getattr(suggestion, "spread_functions", 0), 0) + title = str(getattr(suggestion, "title", "")).strip() + severity = str(finding.get("severity", "")).strip() + novelty = str(finding.get("novelty", "known")).strip() + count = _as_int( + getattr(suggestion, "fact_count", 0) or finding.get("count", 0) or 0, + 0, + ) + safe_refactor_shape = self._safe_refactor_shape(suggestion) + effort = str(getattr(suggestion, "effort", EFFORT_MODERATE)) + confidence = str(getattr(suggestion, "confidence", CONFIDENCE_MEDIUM)) + risk_level = self._risk_level_for_effort(effort) + return { + "effort": effort, + "priority": _as_float(getattr(suggestion, "priority", 0.0), 0.0), + "confidence": confidence, + "safe_refactor_shape": safe_refactor_shape, + "steps": list(getattr(suggestion, "steps", ())), + "risk_level": risk_level, + "why_now": self._why_now_text( + title=title, + severity=severity, + novelty=novelty, + count=count, + source_kind=source_kind, + spread_files=spread_files, + spread_functions=spread_functions, + effort=effort, + ), + "blast_radius": { + "files": spread_files, + "functions": spread_functions, + "is_production": source_kind == "production", + }, + } + + def _suggestion_for_finding( + self, + record: MCPRunRecord, + finding_id: str, + ) -> object | None: + for suggestion in record.analysis.suggestions: + if self._suggestion_finding_id(suggestion) == finding_id: + return suggestion + return None + + def _safe_refactor_shape(self, suggestion: object) -> str: + category = str(getattr(suggestion, "category", "")).strip() + clone_type = str(getattr(suggestion, "clone_type", "")).strip() + title = str(getattr(suggestion, "title", "")).strip() + if category == CATEGORY_CLONE and clone_type == "Type-1": + return "Keep one canonical implementation and route callers through it." + if category == CATEGORY_CLONE and clone_type == "Type-2": + return "Extract shared implementation with explicit parameters." + if category == CATEGORY_CLONE and "Block" in title: + return "Extract the repeated statement sequence into a helper." + if category == CATEGORY_STRUCTURAL: + return "Extract the repeated branch family into a named helper." + if category == CATEGORY_COMPLEXITY: + return "Split the function into smaller named steps." + if category == CATEGORY_COUPLING: + return "Isolate responsibilities and invert unnecessary dependencies." + if category == CATEGORY_COHESION: + return "Split the class by responsibility boundary." + if category == CATEGORY_DEAD_CODE: + return "Delete the unused symbol or document intentional reachability." + if category == CATEGORY_DEPENDENCY: + return "Break the cycle by moving shared abstractions to a lower layer." + return "Extract the repeated logic into a shared, named abstraction." + + def _risk_level_for_effort(self, effort: str) -> str: + return { + EFFORT_EASY: "low", + EFFORT_MODERATE: "medium", + EFFORT_HARD: "high", + }.get(effort, "medium") + + def _why_now_text( + self, + *, + title: str, + severity: str, + novelty: str, + count: int, + source_kind: str, + spread_files: int, + spread_functions: int, + effort: str, + ) -> str: + novelty_text = "new regression" if novelty == "new" else "known debt" + context = ( + "production code" + if source_kind == "production" + else source_kind or "mixed scope" + ) + spread_text = f"{spread_files} files / {spread_functions} functions" + count_text = f"{count} instances" if count > 0 else "localized issue" + return ( + f"{severity.upper()} {title} in {context} — {count_text}, " + f"{spread_text}, {effort} fix, {novelty_text}." + ) + + def _project_remediation( + self, + remediation: Mapping[str, object], + *, + detail_level: DetailLevel, + ) -> dict[str, object]: + if detail_level == "full": + return dict(remediation) + projected = { + "effort": remediation.get("effort"), + "priority": remediation.get("priority"), + "confidence": remediation.get("confidence"), + "safe_refactor_shape": remediation.get("safe_refactor_shape"), + "risk_level": remediation.get("risk_level"), + "why_now": remediation.get("why_now"), + } + if detail_level == "summary": + return projected + projected["blast_radius"] = dict( + self._as_mapping(remediation.get("blast_radius")) + ) + projected["steps"] = list(self._as_sequence(remediation.get("steps"))) + return projected + + def _hotspot_rows( + self, + *, + record: MCPRunRecord, + kind: HotlistKind, + detail_level: DetailLevel, + changed_paths: Sequence[str], + exclude_reviewed: bool, + ) -> list[dict[str, object]]: + findings = self._base_findings(record) + finding_index = {str(finding.get("id", "")): finding for finding in findings} + max_spread_value = max( + (self._spread_value(finding) for finding in findings), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + remediation_map = { + str(finding.get("id", "")): self._remediation_for_finding(record, finding) + for finding in findings + } + priority_map = { + str(finding.get("id", "")): self._priority_score( + record, + finding, + remediation=remediation_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in findings + } + derived = self._as_mapping(record.report_document.get("derived")) + hotlists = self._as_mapping(derived.get("hotlists")) + if kind == "highest_priority": + ordered_ids = [ + str(finding.get("id", "")) + for finding in self._sort_findings( + record=record, + findings=findings, + sort_by="priority", + priority_map=priority_map, + ) + ] + else: + hotlist_key = f"{kind}_ids" + ordered_ids = [ + str(item) + for item in self._as_sequence(hotlists.get(hotlist_key)) + if str(item) + ] + rows: list[dict[str, object]] = [] + for finding_id in ordered_ids: + finding = finding_index.get(finding_id) + if finding is None: + continue + if changed_paths and not self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ): + continue + if exclude_reviewed and self._finding_is_reviewed(record, finding): + continue + finding_id_key = str(finding.get("id", "")) + decorated = self._decorate_finding( + record, + finding, + detail_level=detail_level, + remediation=remediation_map[finding_id_key], + priority_payload=priority_map[finding_id_key], + max_spread_value=max_spread_value, + ) + if detail_level == "summary": + rows.append(self._finding_summary_card_payload(decorated)) + elif detail_level == "full": + rows.append(decorated) + else: + rows.append( + { + **serialize_finding_group_card(decorated), + "id": finding_id, + "novelty": decorated.get("novelty"), + "priority_score": decorated.get("priority_score"), + "priority_factors": decorated.get("priority_factors"), + "locations": decorated.get("locations"), + } + ) + return rows + + def _build_changed_projection( + self, + record: MCPRunRecord, + ) -> dict[str, object] | None: + if not record.changed_paths: + return None + items = self._query_findings( + record=record, + detail_level="summary", + changed_paths=record.changed_paths, + ) + new_count = sum(1 for item in items if str(item.get("novelty", "")) == "new") + known_count = sum( + 1 for item in items if str(item.get("novelty", "")) == "known" + ) + health_delta = self._summary_health_delta(record.summary) + return { + "run_id": record.run_id, + "changed_paths": list(record.changed_paths), + "total": len(items), + "new": new_count, + "known": known_count, + "items": items, + "health": dict(self._as_mapping(record.summary.get("health"))), + "health_delta": health_delta, + "verdict": self._changed_verdict( + changed_projection={"new": new_count, "total": len(items)}, + health_delta=health_delta, + ), + } + + def _augment_summary_with_changed( + self, + *, + summary: Mapping[str, object], + changed_paths: Sequence[str], + changed_projection: Mapping[str, object] | None, + ) -> dict[str, object]: + payload = dict(summary) + if changed_paths: + payload["changed_paths"] = list(changed_paths) + if changed_projection is not None: + payload["changed_findings"] = { + "total": _as_int(changed_projection.get("total", 0), 0), + "new": _as_int(changed_projection.get("new", 0), 0), + "known": _as_int(changed_projection.get("known", 0), 0), + "items": [ + dict(self._as_mapping(item)) + for item in self._as_sequence(changed_projection.get("items"))[:10] + ], + } + payload["health_delta"] = _as_int( + changed_projection.get("health_delta", 0), + 0, + ) + payload["verdict"] = str(changed_projection.get("verdict", "stable")) + return payload + + def _changed_verdict( + self, + *, + changed_projection: Mapping[str, object], + health_delta: int, + ) -> str: + if _as_int(changed_projection.get("new", 0), 0) > 0 or health_delta < 0: + return "regressed" + if _as_int(changed_projection.get("total", 0), 0) == 0 and health_delta > 0: + return "improved" + return "stable" + + def _comparison_index( + self, + record: MCPRunRecord, + *, + focus: ComparisonFocus, + ) -> dict[str, dict[str, object]]: + findings = self._base_findings(record) + if focus == "clones": + findings = [f for f in findings if str(f.get("family", "")) == FAMILY_CLONE] + elif focus == "structural": + findings = [ + f for f in findings if str(f.get("family", "")) == FAMILY_STRUCTURAL + ] + elif focus == "metrics": + findings = [ + f + for f in findings + if str(f.get("family", "")) in {FAMILY_DESIGN, FAMILY_DEAD_CODE} + ] + return {str(finding.get("id", "")): dict(finding) for finding in findings} + + def _comparison_verdict( + self, + *, + regressions: int, + improvements: int, + health_delta: int, + ) -> str: + if regressions > 0 or health_delta < 0: + return "regressed" + if improvements > 0 or health_delta > 0: + return "improved" + return "stable" + + def _comparison_summary_text( + self, + *, + regressions: int, + improvements: int, + health_delta: int, + ) -> str: + return ( + f"{improvements} findings resolved, {regressions} new regressions, " + f"health delta {health_delta:+d}" + ) + + def _render_pr_summary_markdown(self, payload: Mapping[str, object]) -> str: + health = self._as_mapping(payload.get("health")) + score = health.get("score", "n/a") + grade = health.get("grade", "n/a") + delta = _as_int(payload.get("health_delta", 0), 0) + changed_items = [ + self._as_mapping(item) + for item in self._as_sequence(payload.get("new_findings_in_changed_files")) + ] + resolved = [ + self._as_mapping(item) + for item in self._as_sequence(payload.get("resolved")) + ] + blocking_gates = [ + str(item) + for item in self._as_sequence(payload.get("blocking_gates")) + if str(item) + ] + lines = [ + "## CodeClone Summary", + "", + ( + f"Health: {score}/100 ({grade}) | Delta: {delta:+d} | " + f"Verdict: {payload.get('verdict', 'stable')}" + ), + "", + f"### New findings in changed files ({len(changed_items)})", + ] + if not changed_items: + lines.append("- None") + else: + lines.extend( + [ + ( + f"- **{str(item.get('severity', 'info')).upper()}** " + f"{item.get('title', 'Finding')} in " + f"`{item.get('location', '(unknown)')}`" + ) + for item in changed_items[:10] + ] + ) + lines.extend(["", f"### Resolved ({len(resolved)})"]) + if not resolved: + lines.append("- None") + else: + lines.extend( + [ + ( + f"- {item.get('title', 'Finding')} in " + f"`{item.get('location', '(unknown)')}`" + ) + for item in resolved[:10] + ] + ) + lines.extend(["", "### Blocking gates"]) + if not blocking_gates: + lines.append("- none") + else: + lines.extend([f"- `{reason}`" for reason in blocking_gates]) + return "\n".join(lines) + + def _granular_payload( + self, + *, + record: MCPRunRecord, + check: str, + items: Sequence[Mapping[str, object]], + detail_level: DetailLevel, + max_results: int, + path: str | None, + ) -> dict[str, object]: + bounded_items = [dict(item) for item in items[: max(1, max_results)]] + return { + "run_id": record.run_id, + "check": check, + "detail_level": detail_level, + "path": path, + "returned": len(bounded_items), + "total": len(items), + "health": dict(self._as_mapping(record.summary.get("health"))), + "items": bounded_items, + } + + def _schema_resource_payload(self) -> dict[str, object]: + return { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "CodeCloneCanonicalReport", + "type": "object", + "required": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "derived", + "integrity", + ], + "properties": { + "report_schema_version": { + "type": "string", + "const": REPORT_SCHEMA_VERSION, + }, + "meta": {"type": "object"}, + "inventory": {"type": "object"}, + "findings": {"type": "object"}, + "metrics": {"type": "object"}, + "derived": {"type": "object"}, + "integrity": {"type": "object"}, + }, + } + + def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: + self._validate_choice( + "analysis_mode", + request.analysis_mode, + _VALID_ANALYSIS_MODES, + ) + self._validate_choice( + "cache_policy", + request.cache_policy, + _VALID_CACHE_POLICIES, + ) + + def _validate_choice( + self, + name: str, + value: str, + allowed: Sequence[str] | frozenset[str], + ) -> str: + if value not in allowed: + allowed_list = ", ".join(sorted(allowed)) + raise MCPServiceContractError( + f"Invalid value for {name}: {value!r}. Expected one of: {allowed_list}." + ) + return value + + def _validate_optional_choice( + self, + name: str, + value: str | None, + allowed: Sequence[str] | frozenset[str], + ) -> str | None: + if value is None: + return None + return self._validate_choice(name, value, allowed) + + def _resolve_root(self, root: str) -> Path: + try: + root_path = Path(root).expanduser().resolve() + except OSError as exc: + raise MCPServiceContractError(f"Invalid root path '{root}': {exc}") from exc + if not root_path.exists(): + raise MCPServiceContractError(f"Root path does not exist: {root_path}") + if not root_path.is_dir(): + raise MCPServiceContractError(f"Root path is not a directory: {root_path}") + return root_path + + def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namespace: + args = Namespace( + root=str(root_path), + min_loc=DEFAULT_MIN_LOC, + min_stmt=DEFAULT_MIN_STMT, + block_min_loc=DEFAULT_BLOCK_MIN_LOC, + block_min_stmt=DEFAULT_BLOCK_MIN_STMT, + segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, + processes=DEFAULT_PROCESSES, + cache_path=None, + max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, + baseline=DEFAULT_BASELINE_PATH, + max_baseline_size_mb=DEFAULT_MAX_BASELINE_SIZE_MB, + update_baseline=False, + fail_on_new=False, + fail_threshold=-1, + ci=False, + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, update_metrics_baseline=False, metrics_baseline=DEFAULT_BASELINE_PATH, skip_metrics=False, @@ -750,13 +2806,16 @@ def _build_cache( cache = Cache( cache_path, root=root_path, - max_size_bytes=int(args.max_cache_size_mb) * 1024 * 1024, - min_loc=int(args.min_loc), - min_stmt=int(args.min_stmt), - block_min_loc=int(args.block_min_loc), - block_min_stmt=int(args.block_min_stmt), - segment_min_loc=int(args.segment_min_loc), - segment_min_stmt=int(args.segment_min_stmt), + max_size_bytes=_as_int(args.max_cache_size_mb, 0) * 1024 * 1024, + min_loc=_as_int(args.min_loc, DEFAULT_MIN_LOC), + min_stmt=_as_int(args.min_stmt, DEFAULT_MIN_STMT), + block_min_loc=_as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), + block_min_stmt=_as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), + segment_min_loc=_as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), + segment_min_stmt=_as_int( + args.segment_min_stmt, + DEFAULT_SEGMENT_MIN_STMT, + ), ) if policy != "off": cache.load() @@ -793,15 +2852,7 @@ def _metrics_computed(self, analysis_mode: AnalysisMode) -> tuple[str, ...]: ) def _load_report_document(self, report_json: str) -> dict[str, object]: - try: - payload = json.loads(report_json) - except json.JSONDecodeError as exc: - raise MCPServiceError( - f"Generated canonical report is not valid JSON: {exc}" - ) from exc - if not isinstance(payload, dict): - raise MCPServiceError("Generated canonical report must be a JSON object.") - return dict(payload) + return _load_report_document_payload(report_json) def _report_digest(self, report_document: Mapping[str, object]) -> str: integrity = self._as_mapping(report_document.get("integrity")) @@ -916,49 +2967,9 @@ def _metrics_diff_payload( "new_high_coupling_classes": len(new_high_coupling_classes), "new_cycles": len(new_cycles), "new_dead_code": len(new_dead_code), - "health_delta": int(health_delta), + "health_delta": _as_int(health_delta, 0), } - def _flatten_findings( - self, - report_document: Mapping[str, object], - ) -> list[dict[str, object]]: - findings = self._as_mapping(report_document.get("findings")) - groups = self._as_mapping(findings.get("groups")) - clone_groups = self._as_mapping(groups.get("clones")) - return [ - *self._dict_list(clone_groups.get("functions")), - *self._dict_list(clone_groups.get("blocks")), - *self._dict_list(clone_groups.get("segments")), - *self._dict_list(self._as_mapping(groups.get("structural")).get("groups")), - *self._dict_list(self._as_mapping(groups.get("dead_code")).get("groups")), - *self._dict_list(self._as_mapping(groups.get("design")).get("groups")), - ] - - def _matches_finding_filters( - self, - *, - finding: Mapping[str, object], - family: FindingFamilyFilter, - severity: str | None, - source_kind: str | None, - novelty: FindingNoveltyFilter, - ) -> bool: - finding_family = str(finding.get("family", "")).strip() - if family != "all" and finding_family != family: - return False - if ( - severity is not None - and str(finding.get("severity", "")).strip() != severity - ): - return False - dominant_kind = str( - self._as_mapping(finding.get("source_scope")).get("dominant_kind", "") - ).strip() - if source_kind is not None and dominant_kind != source_kind: - return False - return novelty == "all" or str(finding.get("novelty", "")).strip() == novelty - def _dict_list(self, value: object) -> list[dict[str, object]]: return [dict(self._as_mapping(item)) for item in self._as_sequence(value)] diff --git a/codeclone/metrics/dead_code.py b/codeclone/metrics/dead_code.py index eeccc81..4762548 100644 --- a/codeclone/metrics/dead_code.py +++ b/codeclone/metrics/dead_code.py @@ -6,7 +6,7 @@ from dataclasses import replace from typing import Literal -from ..domain.findings import CLONE_KIND_FUNCTION, SYMBOL_KIND_METHOD +from ..domain.findings import SYMBOL_KIND_FUNCTION, SYMBOL_KIND_METHOD from ..domain.quality import CONFIDENCE_HIGH, CONFIDENCE_MEDIUM from ..models import DeadCandidate, DeadItem from ..paths import is_test_filepath @@ -104,7 +104,7 @@ def _is_non_actionable_candidate(symbol: DeadCandidate) -> bool: return True # Module-level dynamic hooks (PEP 562) are invoked by import/runtime lookup. - if symbol.kind == CLONE_KIND_FUNCTION: + if symbol.kind == SYMBOL_KIND_FUNCTION: return symbol.local_name in _MODULE_RUNTIME_HOOK_NAMES # Magic methods and visitor callbacks are invoked by runtime dispatch. if symbol.kind == SYMBOL_KIND_METHOD: diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py index a3701bb..f99a946 100644 --- a/codeclone/pipeline.py +++ b/codeclone/pipeline.py @@ -190,6 +190,7 @@ class ReportArtifacts: text: str | None = None md: str | None = None sarif: str | None = None + report_document: dict[str, object] | None = None @dataclass(frozen=True, slots=True) @@ -1439,6 +1440,7 @@ def report( new_block: Collection[str], html_builder: Callable[..., str] | None = None, metrics_diff: object | None = None, + include_report_document: bool = False, ) -> ReportArtifacts: contents: dict[str, str | None] = { "html": None, @@ -1466,13 +1468,17 @@ def report( "file_list": list(discovery.all_file_paths), } report_document: dict[str, object] | None = None - needs_report_document = boot.output_paths.html is not None or any( - path is not None - for path in ( - boot.output_paths.json, - boot.output_paths.md, - boot.output_paths.sarif, - boot.output_paths.text, + needs_report_document = ( + include_report_document + or boot.output_paths.html is not None + or any( + path is not None + for path in ( + boot.output_paths.json, + boot.output_paths.md, + boot.output_paths.sarif, + boot.output_paths.text, + ) ) ) @@ -1572,6 +1578,7 @@ def report( md=contents["md"], sarif=contents["sarif"], text=contents["text"], + report_document=report_document, ) diff --git a/codeclone/report/findings.py b/codeclone/report/findings.py index b8745eb..19de07b 100644 --- a/codeclone/report/findings.py +++ b/codeclone/report/findings.py @@ -27,6 +27,7 @@ relative_report_path, report_location_from_structural_occurrence, ) +from .json_contract import structural_group_id if TYPE_CHECKING: from collections.abc import Sequence @@ -117,8 +118,10 @@ def _rows_for(entries: Sequence[StructuralFindingOccurrence]) -> str: short_path = relative_report_path(item.file_path, scan_root=scan_root) rows.append( "" - f'' - f"{_escape_html(short_path)}" + f'' + f'' + f"{_escape_html(short_path)}" f'{_source_kind_badge_html(location.source_kind)} ' f"{_escape_html(item.qualname)}" f'{item.start}-{item.end}' @@ -449,9 +452,12 @@ def _render_finding_card( # Scope text — concise spread summary scope_text = _finding_scope_text(deduped_items) + finding_id = structural_group_id(g.finding_kind, g.finding_key) return ( f'
    ' diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py index 330f92f..e53746c 100644 --- a/codeclone/report/json_contract.py +++ b/codeclone/report/json_contract.py @@ -889,6 +889,9 @@ def _build_meta_payload( ), }, "runtime": { + "analysis_started_at_utc": _optional_str( + meta.get("analysis_started_at_utc") + ), "report_generated_at_utc": _optional_str( meta.get("report_generated_at_utc") ), diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py index c6bd6ff..01f5dce 100644 --- a/codeclone/report/sarif.py +++ b/codeclone/report/sarif.py @@ -16,6 +16,7 @@ CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING, + CATEGORY_DEPENDENCY, CLONE_KIND_BLOCK, CLONE_KIND_FUNCTION, FAMILY_CLONE, @@ -32,6 +33,7 @@ STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, STRUCTURAL_KIND_DUPLICATED_BRANCHES, SYMBOL_KIND_CLASS, + SYMBOL_KIND_FUNCTION, SYMBOL_KIND_METHOD, ) from ..domain.quality import ( @@ -79,26 +81,12 @@ def _severity_to_level(severity: str) -> str: if severity == SEVERITY_CRITICAL: return "error" if severity == SEVERITY_WARNING: - return SEVERITY_WARNING + return "warning" return "note" -def _slug(text: str) -> str: - slug_chars: list[str] = [] - prev_dash = False - for char in text.lower(): - if char.isalnum(): - slug_chars.append(char) - prev_dash = False - continue - if not prev_dash: - slug_chars.append("-") - prev_dash = True - return "".join(slug_chars).strip("-") or "finding" - - def _rule_name(spec: _RuleSpec) -> str: - return f"codeclone.{_slug(spec.short_description)}" + return f"codeclone.{spec.rule_id}" def _rule_remediation(spec: _RuleSpec) -> str: @@ -285,7 +273,7 @@ def _structural_rule_spec(kind: str) -> _RuleSpec: def _dead_code_rule_spec(category: str) -> _RuleSpec: - if category == CLONE_KIND_FUNCTION: + if category == SYMBOL_KIND_FUNCTION: return _RuleSpec( "CDEAD001", "Unused function", @@ -416,7 +404,7 @@ def _structural_result_message( ) if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: drift_fields = _as_sequence(signature.get("drift_fields")) - drift_label = ",".join(_text(item) for item in drift_fields) or "profile" + drift_label = ", ".join(_text(item) for item in drift_fields) or "profile" cohort_id = _text(signature.get("cohort_id")) return ( f"Clone cohort drift ({drift_label}), " @@ -439,7 +427,7 @@ def _dead_code_result_message( ) -> str: confidence = _text(group.get("confidence")) or "reported" target = qualname or relative_path - return f"Unused {category} with {confidence} confidence: {target}" + return f"Unused {category} with {confidence} confidence: {target}." def _design_result_message( @@ -451,15 +439,15 @@ def _design_result_message( ) -> str: if category == CATEGORY_COHESION: lcom4 = _as_int(facts.get("lcom4")) - return f"Low cohesion class (LCOM4={lcom4}): {qualname}" + return f"Low cohesion class (LCOM4={lcom4}): {qualname}." if category == CATEGORY_COMPLEXITY: cc = _as_int(facts.get("cyclomatic_complexity")) - return f"High complexity function (CC={cc}): {qualname}" + return f"High complexity function (CC={cc}): {qualname}." if category == CATEGORY_COUPLING: cbo = _as_int(facts.get("cbo")) - return f"High coupling class (CBO={cbo}): {qualname}" + return f"High coupling class (CBO={cbo}): {qualname}." modules = [_text(item.get("module")) for item in items if _text(item.get("module"))] - return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}" + return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}." def _result_message(group: Mapping[str, object]) -> str: @@ -515,13 +503,7 @@ def _location_message( ) -> str: family = _text(group.get("family")) category = _text(group.get("category")) - if family == FAMILY_CLONE: - return ( - "Representative occurrence" - if related_id is None - else f"Related occurrence #{related_id}" - ) - if family == FAMILY_STRUCTURAL: + if family in {FAMILY_CLONE, FAMILY_STRUCTURAL}: return ( "Representative occurrence" if related_id is None @@ -533,7 +515,7 @@ def _location_message( if related_id is None else f"Related declaration #{related_id}" ) - if category == "dependency": + if category == CATEGORY_DEPENDENCY: return ( "Cycle member" if related_id is None @@ -688,8 +670,6 @@ def _result_properties(group: Mapping[str, object]) -> dict[str, object]: props, facts=_as_mapping(group.get("facts")), ) - if family == FAMILY_DEAD_CODE: - props["confidence"] = _text(group.get("confidence")) return props @@ -703,15 +683,6 @@ def _partial_fingerprints( path = _text(primary_item.get("relative_path")) qualname = _text(primary_item.get("qualname")) start_line = _as_int(primary_item.get("start_line")) - end_line = _as_int(primary_item.get("end_line")) - fingerprints = { - "rule": rule_id, - "path": path, - } - if qualname: - fingerprints["qualname"] = qualname - if start_line > 0: - fingerprints["region"] = f"{start_line}-{end_line or start_line}" if path and start_line > 0: fingerprint_material = "\0".join( ( @@ -719,16 +690,32 @@ def _partial_fingerprints( finding_id, path, qualname, - str(start_line), - str(end_line or start_line), ) ) - fingerprints["primaryLocationLineHash"] = ( - f"{hashlib.sha256(fingerprint_material.encode('utf-8')).hexdigest()[:16]}" - f":{start_line}" - ) - fingerprints["finding"] = finding_id - return fingerprints + return { + "primaryLocationLineHash": ( + f"{hashlib.sha256(fingerprint_material.encode('utf-8')).hexdigest()[:16]}" + f":{start_line}" + ) + } + return {} + + +def _primary_location_properties( + primary_item: Mapping[str, object], +) -> dict[str, object]: + path = _text(primary_item.get("relative_path")) + qualname = _text(primary_item.get("qualname")) + start_line = _as_int(primary_item.get("start_line")) + end_line = _as_int(primary_item.get("end_line")) + props: dict[str, object] = {} + if path: + props["primaryPath"] = path + if qualname: + props["primaryQualname"] = qualname + if start_line > 0: + props["primaryRegion"] = f"{start_line}-{end_line or start_line}" + return props def _baseline_state(group: Mapping[str, object]) -> str: @@ -763,6 +750,7 @@ def _result_entry( result: dict[str, object] = { "ruleId": rule_id, "ruleIndex": rule_index, + "kind": "fail", "level": _severity_to_level(_text(group.get("severity"))), "message": { "text": _result_message(group), @@ -778,6 +766,9 @@ def _result_entry( ), "properties": _result_properties(group), } + if primary_item: + properties = cast(dict[str, object], result["properties"]) + properties.update(_primary_location_properties(primary_item)) baseline_state = _baseline_state(group) if baseline_state: result["baselineState"] = baseline_state @@ -802,6 +793,7 @@ def _result_entry( def render_sarif_report_document(payload: Mapping[str, object]) -> str: meta = _as_mapping(payload.get("meta")) runtime = _as_mapping(meta.get("runtime")) + analysis_started_at = _text(runtime.get("analysis_started_at_utc")) generated_at = _text(runtime.get("report_generated_at_utc")) analysis_mode = _text(meta.get("analysis_mode")) or "full" findings = sorted( @@ -837,6 +829,7 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: ] invocation: dict[str, object] = { "executionSuccessful": True, + **({"startTimeUtc": analysis_started_at} if analysis_started_at else {}), **({"endTimeUtc": generated_at} if generated_at else {}), } if scan_root_uri: @@ -846,7 +839,6 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: "driver": { "name": "codeclone", "version": _text(meta.get("codeclone_version")), - "semanticVersion": _text(meta.get("codeclone_version")), "informationUri": REPOSITORY_URL, "rules": [ { @@ -869,7 +861,20 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: } }, "automationDetails": { - "id": f"codeclone/{analysis_mode}", + "id": "/".join( + part + for part in ( + "codeclone", + analysis_mode, + generated_at + or _text( + _as_mapping( + _as_mapping(payload.get("integrity")).get("digest") + ).get("value") + )[:12], + ) + if part + ), }, **( { @@ -898,7 +903,6 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: ), **({"reportGeneratedAtUtc": generated_at} if generated_at else {}), }, - "columnKind": "utf16CodeUnits", } return json.dumps( { diff --git a/codeclone/templates.py b/codeclone/templates.py index eed9082..67b2891 100644 --- a/codeclone/templates.py +++ b/codeclone/templates.py @@ -20,7 +20,7 @@ REPORT_TEMPLATE = Template( r""" - + diff --git a/codeclone/ui_messages.py b/codeclone/ui_messages.py index c95a9f3..1b9f35d 100644 --- a/codeclone/ui_messages.py +++ b/codeclone/ui_messages.py @@ -31,6 +31,18 @@ HELP_MIN_LOC = "Minimum Lines of Code (LOC) required for clone analysis.\nDefault: 10." HELP_MIN_STMT = "Minimum AST statement count required for clone analysis.\nDefault: 6." HELP_PROCESSES = "Number of parallel worker processes.\nDefault: 4." +HELP_CHANGED_ONLY = ( + "Limit clone gating and changed-scope summaries to findings that touch\n" + "files from a git diff selection." +) +HELP_DIFF_AGAINST = ( + "Resolve changed files from `git diff --name-only `.\n" + "Use together with --changed-only." +) +HELP_PATHS_FROM_GIT_DIFF = ( + "Shorthand for --changed-only using `git diff --name-only `.\n" + "Useful for PR and CI review flows." +) HELP_CACHE_PATH = ( "Path to the cache file.\n" "If FILE is omitted, uses /.cache/codeclone/cache.json." @@ -134,6 +146,7 @@ SUMMARY_TITLE = "Summary" METRICS_TITLE = "Metrics" +CHANGED_SCOPE_TITLE = "Changed Scope" CLI_LAYOUT_MAX_WIDTH = 80 @@ -164,6 +177,9 @@ " lcom4={lcom_avg}/{lcom_max} cycles={cycles} dead_code={dead}" " health={health}({grade})" ) +SUMMARY_COMPACT_CHANGED_SCOPE = ( + "Changed paths={paths} findings={findings} new={new} known={known}" +) WARN_SUMMARY_ACCOUNTING_MISMATCH = ( "Summary accounting mismatch: " @@ -516,6 +532,35 @@ def fmt_metrics_dead_code(count: int, *, suppressed: int = 0) -> str: ) +def fmt_changed_scope_paths(*, count: int) -> str: + return f" {'Paths':<{_L}}{_v(count, 'bold cyan')} from git diff" + + +def fmt_changed_scope_findings(*, total: int, new: int, known: int) -> str: + parts = [ + f"{_v(total, 'bold')} total", + f"{_v(new, 'bold cyan')} new", + f"{_v(known)} known", + ] + separator = " \u00b7 " + return f" {'Findings':<{_L}}{separator.join(parts)}" + + +def fmt_changed_scope_compact( + *, + paths: int, + findings: int, + new: int, + known: int, +) -> str: + return SUMMARY_COMPACT_CHANGED_SCOPE.format( + paths=paths, + findings=findings, + new=new, + known=known, + ) + + def fmt_pipeline_done(elapsed: float) -> str: return f" [dim]Pipeline done in {elapsed:.2f}s[/dim]" diff --git a/docs/README.md b/docs/README.md index fe96524..d6b6ad8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,6 +3,13 @@ This site is built with MkDocs and published to [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/). +!!! note "Version Notice" + This site currently documents the in-development `v2.0.x` line from `main`. + For the latest stable CodeClone documentation (`v1.4.4`), see the + [`v1.4.4` README](https://github.com/orenlab/codeclone/blob/v1.4.4/README.md) + and the + [`v1.4.4` docs tree](https://github.com/orenlab/codeclone/tree/v1.4.4/docs). + It has two documentation layers: - [Contracts Book](book/README.md): **contract-first** documentation. This is the canonical diff --git a/docs/architecture.md b/docs/architecture.md index 08b5ec7..f416ddc 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -201,11 +201,16 @@ The MCP layer is intentionally thin. It does not add a separate analysis engine; it adapts the existing pipeline into tools/resources such as: - analyze repository +- analyze changed paths - get run summary +- compare runs - list findings - inspect one finding +- project remediation payloads - list hotspots +- generate PR summary - preview gate outcomes +- keep session-local reviewed markers This keeps agent integrations deterministic and aligned with the same canonical report document used by JSON/HTML/SARIF. diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 24b7a14..8ffe3d1 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -87,7 +87,9 @@ Per-group common axes (family-specific fields may extend): - Overview hotspot/source-breakdown sections must resolve from canonical report data or deterministic derived IDs; HTML must not silently substitute stale placeholders such as `n/a` or empty-state cards when canonical data exists. -- `report_generated_at_utc` is carried in `meta.runtime` and reused by UI/renderers. +- `analysis_started_at_utc` and `report_generated_at_utc` are carried in + `meta.runtime`; renderers/projections may use them for provenance but must not + reinterpret them as semantic analysis data. - Canonical `meta.scan_root` is normalized to `"."`; absolute runtime paths are exposed under `meta.runtime.*_absolute`. - `clone_type` and `novelty` are group-level properties inside clone groups. diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 10b5e3e..0d059cb 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -42,6 +42,13 @@ Refs: - `--open-html-report` is a local UX action layered on top of `--html`; it does not implicitly enable HTML output. - `--timestamped-report-paths` only rewrites default report paths requested via bare report flags; explicit FILE values stay unchanged. +- Changed-scope clone review uses: + - `--changed-only` + - `--diff-against GIT_REF` + - `--paths-from-git-diff GIT_REF` + Typical usage: + - `codeclone . --changed-only --diff-against main` + - `codeclone . --paths-from-git-diff HEAD~1` - Contract errors are prefixed by `CONTRACT ERROR:`. - Gating failures are prefixed by `GATING FAILURE:`. - Internal errors use `fmt_internal_error` with optional debug details. @@ -65,9 +72,15 @@ Refs: `.cache/codeclone/`. - `--open-html-report` requires `--html`; invalid combination is a contract error. - `--timestamped-report-paths` requires at least one requested report output; invalid combination is a contract error. +- `--changed-only` requires either `--diff-against` or `--paths-from-git-diff`. +- `--diff-against` requires `--changed-only`. +- `--diff-against` and `--paths-from-git-diff` are mutually exclusive. - Browser-open failure after a successful HTML write is warning-only and does not change the process exit code. - Baseline update write failure is contract error. - In gating mode, unreadable source files are contract errors with higher priority than clone gating failure. +- Changed-scope flags do not create a second canonical report: they project clone + summary/threshold decisions over the changed-files subset after the normal full + analysis completes. Refs: @@ -82,6 +95,9 @@ Refs: | Invalid output extension/path | contract | 2 | | `--open-html-report` without `--html` | contract | 2 | | `--timestamped-report-paths` without reports | contract | 2 | +| `--changed-only` without diff source | contract | 2 | +| `--diff-against` without `--changed-only` | contract | 2 | +| `--diff-against` + `--paths-from-git-diff` | contract | 2 | | Baseline untrusted in CI/gating | contract | 2 | | Unreadable source in CI/gating | contract | 2 | | New clones with `--fail-on-new` | gating | 3 | @@ -93,6 +109,8 @@ Refs: - Summary metric ordering is fixed. - Compact summary mode (`--quiet`) is fixed-format text. - Help epilog is generated from static constants. +- `git diff --name-only` input is normalized to sorted repo-relative paths before + changed-scope projection is applied. Refs: diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 3c2dcc1..bc19dca 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -42,6 +42,8 @@ Current server characteristics: - `baseline`, `metrics_baseline`, `cache` - `inventory`, `findings_summary`, `health` - `baseline_diff`, `metrics_diff` + - optional `changed_paths` (`list[str]`, repo-relative), + `changed_findings`, `health_delta`, `verdict` - `warnings`, `failures` The MCP layer does not introduce a separate analysis engine. It calls the @@ -52,27 +54,49 @@ produced by the report contract. Current tool set: -| Tool | Purpose | -|----------------------|------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | Run deterministic CodeClone analysis and register the result as the latest MCP run | -| `get_run_summary` | Return the stored summary for the latest or specified run | -| `evaluate_gates` | Evaluate CI/gating conditions against an existing run without exiting the process | -| `get_report_section` | Return a canonical report section (`meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`, or `all`) | -| `list_findings` | Return deterministically ordered finding groups with filters and pagination | -| `get_finding` | Return one canonical finding group by id | -| `list_hotspots` | Return one derived hotlist (`most_actionable`, `highest_spread`, `production_hotspots`, `test_fixture_hotspots`) | - -All current tools are registered as read-only MCP tools. +| Tool | Key parameters | Purpose / notes | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | `root`, `analysis_mode`, `changed_paths`, `git_diff_ref`, inline thresholds, cache/baseline paths | Run deterministic CodeClone analysis and register the result as the latest MCP run | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, inline thresholds | Diff-aware fast path: analyze a repo and attach a changed-files projection to the run | +| `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and health delta | +| `evaluate_gates` | `run_id`, gate thresholds/booleans | Evaluate CI/gating conditions against an existing run without exiting the process | +| `get_report_section` | `run_id`, `section` | Return a canonical report section (`meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`, `changed`, or `all`) | +| `list_findings` | `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, pagination | Return deterministically ordered finding groups with filtering and pagination | +| `get_finding` | `finding_id`, `run_id` | Return one canonical finding group by id with locations, priority, and remediation payload when available | +| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return just the remediation/explainability packet for one finding | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, `limit`, `max_results` | Return one derived hotlist (`most_actionable`, `highest_spread`, `highest_priority`, `production_hotspots`, `test_fixture_hotspots`) | +| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Return clone findings for a repository or path | +| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Return complexity hotspots for a repository or path | +| `check_coupling` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return coupling hotspots for a repository or path | +| `check_cohesion` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return cohesion hotspots for a repository or path | +| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `max_results`, `detail_level` | Return dead-code findings for a repository or path | +| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | Build a PR-friendly changed-files summary in markdown or JSON | +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the in-memory MCP session | +| `list_reviewed_findings` | `run_id` | Return the current reviewed findings for the selected run | + +All analysis/report tools are read-only with respect to repo state. The only +mutable MCP tool is `mark_finding_reviewed`, and its state is in-memory only. +`analyze_repository`, `analyze_changed_paths`, `evaluate_gates`, and the +granular `check_*` tools are sessionful: they may populate or reuse in-memory +run state, and the `check_*` tools may trigger a full analysis when no +compatible run exists yet. ## Resources Current resources: -- `codeclone://latest/summary` -- `codeclone://latest/report.json` -- `codeclone://runs/{run_id}/summary` -- `codeclone://runs/{run_id}/report.json` -- `codeclone://runs/{run_id}/findings/{finding_id}` +| Resource | Payload | Availability | +|---------------------------------------------------|-------------------------------------------------------|-------------------------------------------------------| +| `codeclone://latest/summary` | latest run summary projection | always after at least one run | +| `codeclone://latest/report.json` | latest canonical report document | always after at least one run | +| `codeclone://latest/health` | latest health score + dimensions | always after at least one run | +| `codeclone://latest/gates` | latest gate evaluation result | only after `evaluate_gates` in current server process | +| `codeclone://latest/changed` | latest changed-files projection | only for a diff-aware latest run | +| `codeclone://schema` | schema-style descriptor for canonical report sections | always available | +| `codeclone://runs/{run_id}/summary` | run-specific summary projection | for any stored run | +| `codeclone://runs/{run_id}/report.json` | run-specific canonical report | for any stored run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | run-specific canonical finding group | for an existing finding in a stored run | Resources are convenience views over already registered runs. They do not trigger fresh analysis by themselves. @@ -83,6 +107,9 @@ trigger fresh analysis by themselves. - no source-file mutation - no baseline update - no metrics-baseline update +- Session review markers are **ephemeral only**: + - stored in memory per server process + - never written to baseline, cache, or report artifacts - MCP must reuse current: - pipeline stages - baseline trust semantics @@ -90,10 +117,12 @@ trigger fresh analysis by themselves. - canonical report contract - `get_run_summary` is a deterministic convenience projection derived from the canonical report (`meta`, `inventory`, `findings.summary`, - `metrics.summary.health`) plus baseline-diff/gate context. + `metrics.summary.health`) plus baseline-diff/gate/changed-files context. - Canonical JSON remains the source of truth for report semantics. - `list_findings` and `list_hotspots` are deterministic projections over the canonical report, not a separate analysis branch. +- `get_remediation` is a deterministic MCP projection over existing + suggestions/explainability data, not a second remediation engine. - `analysis_mode="clones_only"` must mirror the same metric/dependency skip-semantics as the regular pipeline. - Missing optional MCP dependency is handled explicitly by the launcher with a @@ -103,11 +132,22 @@ trigger fresh analysis by themselves. - Tool names are stable public surface. - Resource URI shapes are stable public surface. -- Read-only tool annotations remain accurate. +- Read-only vs session-local tool annotations remain accurate. - `analyze_repository` always registers exactly one latest run. +- `analyze_changed_paths` requires `changed_paths` or `git_diff_ref`. +- `changed_paths` is a structured `list[str]` of repo-relative paths, not a + comma-separated string payload. +- `analyze_changed_paths` may return the same `run_id` as a previous run when + the canonical report digest is unchanged; changed-files state is an overlay, + not a second canonical report. - `get_run_summary` with no `run_id` resolves to the latest stored run. - `get_report_section(section="all")` returns the full canonical report document. +- `get_report_section(section="changed")` is available only for diff-aware runs. - `run_id` must equal the canonical report digest for that run. +- Finding `locations` and `html_anchor` values are stable projections over the + current run and do not invent non-canonical ids. +- `compare_runs` is only semantically meaningful when both runs use comparable + repository scope/root and analysis settings. ## Failure modes @@ -130,6 +170,8 @@ trigger fresh analysis by themselves. - `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` - `tests/test_mcp_service.py::test_mcp_service_lists_findings_and_hotspots` +- `tests/test_mcp_service.py::test_mcp_service_changed_runs_remediation_and_review_flow` +- `tests/test_mcp_service.py::test_mcp_service_granular_checks_pr_summary_and_resources` - `tests/test_mcp_service.py::test_mcp_service_evaluate_gates_on_existing_run` - `tests/test_mcp_service.py::test_mcp_service_resources_expose_latest_summary_and_report` - `tests/test_mcp_server.py::test_mcp_server_exposes_expected_read_only_tools` diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index e40c73f..1d3b0a3 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -98,6 +98,7 @@ Notes: "...": "..." }, "runtime": { + "analysis_started_at_utc": "2026-03-11T08:36:29Z", "report_generated_at_utc": "2026-03-11T08:36:32Z" } }, @@ -268,7 +269,7 @@ Notes: "rules": [ { "id": "CCLONE001", - "name": "codeclone.function-clone-group", + "name": "codeclone.CCLONE001", "shortDescription": { "text": "Function clone group" }, @@ -297,6 +298,9 @@ Notes: ] } }, + "automationDetails": { + "id": "codeclone/full/2026-03-11T08:36:32Z" + }, "artifacts": [ { "location": { @@ -308,18 +312,19 @@ Notes: "invocations": [ { "executionSuccessful": true, + "startTimeUtc": "2026-03-11T08:36:29Z", "workingDirectory": { "uri": "file:///repo/project/" } } ], - "columnKind": "utf16CodeUnits", "properties": { "profileVersion": "1.0", "reportSchemaVersion": "2.1" }, "results": [ { + "kind": "fail", "ruleId": "CCLONE001", "ruleIndex": 0, "baselineState": "new", @@ -349,6 +354,11 @@ Notes: } } ], + "properties": { + "primaryPath": "codeclone/report/sarif.py", + "primaryQualname": "codeclone.report.sarif:render_sarif_report_document", + "primaryRegion": "1:10" + }, "relatedLocations": [], "partialFingerprints": { "primaryLocationLineHash": "0123456789abcdef:1" diff --git a/docs/mcp.md b/docs/mcp.md index baea3e8..ef98f8a 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -74,17 +74,61 @@ It does **not**: - mutate source files - add suppressions automatically +Practical contract notes: + +- `changed_paths` is a structured `list[str]` of repo-relative paths, not a + comma-separated string. +- `analyze_changed_paths` adds a changed-files projection to the current run. If + the canonical report digest does not change, the call may return the same + `run_id` as a prior full analysis. +- `compare_runs` is most useful when both runs were produced for the same + repository scope/root and comparable analysis settings. +- The focused `check_*` tools may trigger a full analysis first when no stored + run exists yet. +- `mark_finding_reviewed` is the only mutable MCP tool, and its state lives only + in memory for the current server process. + Current tool surface: | Tool | Typical use | |------|-------------| | `analyze_repository` | Run a fresh analysis and register it as the latest in-memory run | +| `analyze_changed_paths` | Run the diff-aware fast path using explicit `changed_paths` or `git_diff_ref` | | `get_run_summary` | Get the compact baseline/cache/health/findings snapshot for the latest or selected run | +| `compare_runs` | Compare two stored runs and see regressions, improvements, and health delta | | `list_findings` | Browse findings with filters and pagination | | `get_finding` | Inspect one finding group deeply by id | -| `list_hotspots` | Jump to high-signal derived views such as `highest_spread` or `production_hotspots` | +| `get_remediation` | Pull the structured remediation/explainability payload for one finding | +| `list_hotspots` | Jump to high-signal derived views such as `highest_priority` or `production_hotspots` | | `get_report_section` | Read a canonical section (`meta`, `findings`, `metrics`, `derived`, etc.) | | `evaluate_gates` | Preview CI/gating outcomes without exiting the process | +| `check_clones` | Run a focused clone-only check for a repo or path | +| `check_complexity` | Run a focused complexity hotspot check | +| `check_coupling` | Run a focused coupling hotspot check | +| `check_cohesion` | Run a focused cohesion hotspot check | +| `check_dead_code` | Run a focused dead-code check | +| `generate_pr_summary` | Build a PR-friendly markdown or JSON summary | +| `mark_finding_reviewed` | Mark one finding as reviewed in the current MCP session | +| `list_reviewed_findings` | List the reviewed findings currently stored in memory for the run | + +Current resource surface: + +| Resource | Typical use | +|----------|-------------| +| `codeclone://latest/summary` | Quick latest-run status for clients that prefer resource reads | +| `codeclone://latest/report.json` | Full canonical report for the latest stored run | +| `codeclone://latest/health` | Lightweight health snapshot only | +| `codeclone://latest/gates` | Read back the most recent gate preview in the current MCP session | +| `codeclone://latest/changed` | Read the latest changed-files projection after a diff-aware run | +| `codeclone://schema` | Discover the canonical report shape and major section layout | +| `codeclone://runs/{run_id}/summary` | Stable summary lookup for a specific stored run | +| `codeclone://runs/{run_id}/report.json` | Stable canonical report lookup for a specific stored run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | Direct lookup for one finding in one stored run | + +If a client needs pure machine-to-machine navigation, the clean split is: + +- use tools to create or refine analysis state +- use resources to re-read stored summaries, reports, health, gate, and finding payloads ## Recommended agent workflow @@ -96,6 +140,31 @@ For agentic coding and review loops, the clean sequence is: 4. `get_finding` for the specific item the agent should inspect 5. `evaluate_gates` before finalizing the change +For change-focused workflows, prefer: + +1. `analyze_changed_paths` +2. `get_report_section(section="changed")` +3. `list_findings(changed_paths=..., sort_by="priority")` +4. `get_remediation` +5. `generate_pr_summary` + +In practice, the changed-files projection is also exposed through: + +- `get_report_section(section="changed")` +- `codeclone://latest/changed` + +If you want a resource-first flow after one initial analysis, a practical loop is: + +1. `analyze_repository` or `analyze_changed_paths` +2. `codeclone://latest/summary` +3. `codeclone://latest/report.json` or `codeclone://runs/{run_id}/findings/{finding_id}` + +For review/refactor loops, add: + +1. `mark_finding_reviewed` +2. `list_reviewed_findings` +3. `exclude_reviewed=true` on later `list_findings` / `list_hotspots` calls + That pattern works especially well for AI-generated code because CodeClone is baseline-aware: it helps separate accepted legacy debt from new structural regressions introduced by the latest change set. @@ -201,14 +270,45 @@ Tell me whether the structural picture got better, worse, or stayed flat relativ and summarize only the findings that are worth acting on. ``` +### 11. Changed-files only review + +```text +Use codeclone MCP in changed-files mode for my latest edits. +Focus only on findings that touch changed files and rank them by priority. +``` + +### 12. Run-to-run comparison + +```text +Compare the latest CodeClone MCP run against the previous run for this repository. +Show me new regressions, resolved findings, and the health delta. +``` + +### 13. Remediation-first workflow + +```text +Use codeclone MCP to find one high-priority production finding and fetch its remediation payload. +Explain the safest refactor shape and why this is a good first target. +``` + +### 14. Session-based review loop + +```text +Use codeclone MCP to review findings one by one. +Mark each finding as reviewed after we discuss it, and exclude reviewed findings from the next list. +``` + ## Prompting tips - Prefer "production-only" when you care about runtime code. +- Prefer `analyze_changed_paths` or explicit `changed_paths` when the agent is + reviewing one patch or PR, not the whole repository. - Prefer "clones-only mode" when you want the cheapest focused pass on duplication. - Ask for "safe first candidate" when you want the agent to move from triage to refactor planning. -- If your broader agent also has shell or file-editing tools, you can still say - "do not update baseline" as a workflow constraint. CodeClone MCP itself is - read-only and never updates baseline. +- Use "compare the latest run against the previous run" when you want the agent + to reason about improvements/regressions instead of absolute repo state. +- Use "mark as reviewed" / "exclude reviewed" for long sessions so the agent + does not keep circling around the same finding. - For AI-generated code, explicitly ask the agent to separate: - accepted baseline debt - from new structural regressions @@ -257,6 +357,14 @@ Then register the remote MCP endpoint in the client or API flow that expects an HTTP MCP server. Prefer allowing only the CodeClone tools you need for the current workflow. +### Gemini CLI / Gemini MCP-capable clients + +Recommended mode: `stdio` + +Use the same command-based local server registration pattern when the Gemini +client can spawn MCP commands locally. If the client only accepts remote MCP +URLs, use `streamable-http` and point it to the `/mcp` endpoint. + ### Claude Code / Anthropic MCP-capable clients Recommended mode: `stdio` @@ -318,6 +426,7 @@ The CodeClone server surface itself stays the same. - CodeClone MCP is read-only by design. - It stores run history in memory only. +- Review markers are also in-memory only and disappear when the server process stops. - Repository access is limited to what the server process can read locally. - Baseline/cache/report semantics remain owned by the normal CodeClone contracts. @@ -352,6 +461,11 @@ endpoint instead of using `stdio`. Run `analyze_repository` again. Runs are stored in memory per server process and `latest` always points at the most recently analyzed run in that process. +### Changed-files tools are rejecting `changed_paths` + +Pass `changed_paths` as a real list of repo-relative paths. Do not pass a +single comma-separated string. + ## See also - [book/20-mcp-interface.md](book/20-mcp-interface.md) diff --git a/docs/sarif.md b/docs/sarif.md index e62d4b8..3f3b7b1 100644 --- a/docs/sarif.md +++ b/docs/sarif.md @@ -38,7 +38,10 @@ Current behavior: - `artifactLocation.uri` uses repository-relative paths - `artifactLocation.index` aligns locations with artifacts for stable linking - `run.invocations[*].workingDirectory` mirrors the scan root URI when available -- `run.columnKind` is fixed to `utf16CodeUnits` +- `run.invocations[*].startTimeUtc` is emitted when analysis start time is + available in canonical runtime meta +- `run.automationDetails.id` is unique per run so code-scanning systems can + correlate uploads reliably This helps consumers resolve results back to workspace files consistently. @@ -53,6 +56,10 @@ Current SARIF output includes: human-readable role labels such as `Representative occurrence` - `relatedLocations[*]` when the result has multiple relevant locations - `partialFingerprints.primaryLocationLineHash` for stable per-location identity + without encoding line numbers into the hash digest +- result `properties` with stable identity/context fields such as primary path, + qualname, and region +- explicit `kind: "fail"` on results For clone results, CodeClone also carries novelty-aware metadata when known: @@ -68,6 +75,7 @@ Rule records are intentionally richer than a minimal SARIF export. They include: - stable rule IDs +- stable rule names derived from `ruleId` - display name - help text / markdown - tags diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 89c3c26..12682c0 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -1,11 +1,12 @@ import json import os +import subprocess import sys import webbrowser from argparse import Namespace from collections.abc import Callable from pathlib import Path -from typing import cast +from typing import Any, cast import pytest @@ -151,6 +152,9 @@ def test_cli_help_text_consistency( "Structural code quality analysis for Python.", "Target:", "Analysis:", + "--changed-only", + "--diff-against GIT_REF", + "--paths-from-git-diff GIT_REF", "Baselines and CI:", "Quality gates:", "Analysis stages:", @@ -206,6 +210,16 @@ def test_report_path_origins_distinguish_bare_and_explicit_flags() -> None: } +def test_report_path_origins_stops_at_double_dash() -> None: + assert cli._report_path_origins(("--json=out.json", "--", "--html")) == { + "html": None, + "json": "explicit", + "md": None, + "sarif": None, + "text": None, + } + + def test_timestamped_report_path_appends_utc_slug() -> None: path = Path("/tmp/report.html") assert cli._timestamped_report_path( @@ -304,6 +318,229 @@ def test_argument_parser_contract_error_marker_for_invalid_args( assert "CONTRACT ERROR:" in err +def test_validate_changed_scope_args_requires_diff_source() -> None: + cli.console = cli._make_console(no_color=True) + args = Namespace( + changed_only=True, + diff_against=None, + paths_from_git_diff=None, + ) + with pytest.raises(SystemExit) as exc: + cli._validate_changed_scope_args(args=args) + assert exc.value.code == 2 + + +def test_validate_changed_scope_args_requires_changed_only_for_diff_against() -> None: + cli.console = cli._make_console(no_color=True) + args = Namespace( + changed_only=False, + diff_against="main", + paths_from_git_diff=None, + ) + with pytest.raises(SystemExit) as exc: + cli._validate_changed_scope_args(args=args) + assert exc.value.code == 2 + + +def test_validate_changed_scope_args_promotes_paths_from_git_diff() -> None: + args = Namespace( + changed_only=False, + diff_against=None, + paths_from_git_diff="HEAD~1", + ) + assert cli._validate_changed_scope_args(args=args) == "HEAD~1" + assert args.changed_only is True + + +def test_normalize_changed_paths_relativizes_dedupes_and_sorts(tmp_path: Path) -> None: + root_path = tmp_path.resolve() + pkg_dir = root_path / "pkg" + pkg_dir.mkdir() + first = pkg_dir / "b.py" + second = pkg_dir / "a.py" + first.write_text("pass\n", "utf-8") + second.write_text("pass\n", "utf-8") + + assert cli._normalize_changed_paths( + root_path=root_path, + paths=("pkg/b.py", str(second), " pkg/b.py ", ""), + ) == ("pkg/a.py", "pkg/b.py") + + +def test_normalize_changed_paths_reports_unresolvable_path( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + cli.console = cli._make_console(no_color=True) + root_path = tmp_path.resolve() + original_resolve = Path.resolve + + def _broken_resolve(self: Path, strict: bool = False) -> Path: + if self.name == "broken.py": + raise OSError("boom") + return original_resolve(self, strict=strict) + + monkeypatch.setattr(Path, "resolve", _broken_resolve) + with pytest.raises(SystemExit) as exc: + cli._normalize_changed_paths(root_path=root_path, paths=("broken.py",)) + assert exc.value.code == 2 + + +def test_normalize_changed_paths_rejects_outside_root(tmp_path: Path) -> None: + cli.console = cli._make_console(no_color=True) + root_path = tmp_path.resolve() + outside_dir = tmp_path.parent / f"{tmp_path.name}-outside" + outside_dir.mkdir() + outside_path = outside_dir / "external.py" + outside_path.write_text("pass\n", "utf-8") + + with pytest.raises(SystemExit) as exc: + cli._normalize_changed_paths(root_path=root_path, paths=(str(outside_path),)) + assert exc.value.code == 2 + + +def test_git_diff_changed_paths_normalizes_subprocess_output( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + root_path = tmp_path.resolve() + pkg_dir = root_path / "pkg" + pkg_dir.mkdir() + (pkg_dir / "a.py").write_text("pass\n", "utf-8") + (pkg_dir / "b.py").write_text("pass\n", "utf-8") + + def _run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess( + args=["git", "diff", "--name-only", "HEAD~1", "--"], + returncode=0, + stdout="pkg/b.py\npkg/a.py\n\n", + stderr="", + ) + + monkeypatch.setattr(subprocess, "run", _run) + assert cli._git_diff_changed_paths(root_path=root_path, git_diff_ref="HEAD~1") == ( + "pkg/a.py", + "pkg/b.py", + ) + + +def test_git_diff_changed_paths_reports_subprocess_errors( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + cli.console = cli._make_console(no_color=True) + + def _run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]: + raise subprocess.TimeoutExpired(cmd="git diff", timeout=30) + + monkeypatch.setattr(subprocess, "run", _run) + with pytest.raises(SystemExit) as exc: + cli._git_diff_changed_paths(root_path=tmp_path.resolve(), git_diff_ref="HEAD~1") + assert exc.value.code == 2 + + +def test_changed_clone_gate_from_report_filters_changed_scope() -> None: + gate = cli._changed_clone_gate_from_report( + { + "findings": { + "groups": { + "clones": { + "functions": [ + { + "id": "clone:function:new", + "family": "clone", + "category": "function", + "novelty": "new", + "items": [{"relative_path": "pkg/dup.py"}], + }, + { + "id": "clone:function:known", + "family": "clone", + "category": "function", + "novelty": "known", + "items": [{"relative_path": "pkg/other.py"}], + }, + ], + "blocks": [ + { + "id": "clone:block:known", + "family": "clone", + "category": "block", + "novelty": "known", + "items": [{"relative_path": "pkg/dup.py"}], + } + ], + "segments": [], + }, + "structural": { + "groups": [ + { + "id": "structural:changed", + "family": "structural", + "novelty": "new", + "items": [{"relative_path": "pkg/dup.py"}], + } + ] + }, + "dead_code": {"groups": []}, + "design": {"groups": []}, + } + } + }, + changed_paths=("pkg/dup.py",), + ) + assert gate.changed_paths == ("pkg/dup.py",) + assert gate.total_clone_groups == 2 + assert gate.new_func == frozenset({"clone:function:new"}) + assert gate.new_block == frozenset() + assert gate.findings_total == 3 + assert gate.findings_new == 2 + assert gate.findings_known == 1 + + +def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cli.console = cli._make_console(no_color=True) + observed: dict[str, object] = {} + + monkeypatch.setattr( + cli, + "gate", + lambda **_kwargs: pipeline.GatingResult( + exit_code=3, + reasons=("clone:threshold:8:1",), + ), + ) + monkeypatch.setattr( + cli, + "_print_gating_failure_block", + lambda *, code, entries, args: observed.update( + {"code": code, "entries": tuple(entries), "threshold": args.fail_threshold} + ), + ) + + with pytest.raises(SystemExit) as exc: + cli._enforce_gating( + args=Namespace(fail_threshold=1, verbose=False), + boot=cast("pipeline.BootstrapResult", object()), + analysis=cast("pipeline.AnalysisResult", object()), + processing=cast(Any, Namespace(source_read_failures=[])), + source_read_contract_failure=False, + baseline_failure_code=None, + metrics_baseline_failure_code=None, + new_func=set(), + new_block=set(), + metrics_diff=None, + html_report_path=None, + clone_threshold_total=2, + ) + + assert exc.value.code == 3 + assert observed["code"] == "threshold" + assert observed["entries"] == ( + ("clone_groups_total", 2), + ("clone_groups_limit", 1), + ) + + def test_make_console_caps_width_to_layout_limit( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -415,6 +652,65 @@ def test_ui_summary_formatters_cover_optional_branches() -> None: clean_with_suppressed = ui.fmt_metrics_dead_code(0, suppressed=9) assert "✔ clean" in clean_with_suppressed assert "(9 suppressed)" in clean_with_suppressed + changed_paths = ui.fmt_changed_scope_paths(count=45) + assert "45" in changed_paths + assert "from git diff" in changed_paths + changed_findings = ui.fmt_changed_scope_findings(total=7, new=2, known=5) + assert "total" in changed_findings + assert "new" in changed_findings + assert "5 known" in changed_findings + changed_compact = ui.fmt_changed_scope_compact( + paths=45, + findings=7, + new=2, + known=5, + ) + assert "Changed" in changed_compact + assert "paths=45" in changed_compact + assert "findings=7" in changed_compact + + +def test_print_changed_scope_uses_dedicated_block( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + cli_summary._print_changed_scope( + console=cast("cli_summary._Printer", cli.console), + quiet=False, + changed_scope=cli_summary.ChangedScopeSnapshot( + paths_count=45, + findings_total=7, + findings_new=2, + findings_known=5, + ), + ) + out = capsys.readouterr().out + assert "Changed Scope" in out + assert "Paths" in out + assert "Findings" in out + assert "from git diff" in out + + +def test_print_changed_scope_uses_compact_line_in_quiet_mode( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr(cli, "console", cli._make_console(no_color=True)) + cli_summary._print_changed_scope( + console=cast("cli_summary._Printer", cli.console), + quiet=True, + changed_scope=cli_summary.ChangedScopeSnapshot( + paths_count=45, + findings_total=7, + findings_new=2, + findings_known=5, + ), + ) + out = capsys.readouterr().out + assert "Changed" in out + assert "paths=45" in out + assert "findings=7" in out + assert "new=2" in out + assert "known=5" in out def test_configure_metrics_mode_rejects_skip_metrics_with_metrics_flags( diff --git a/tests/test_html_report.py b/tests/test_html_report.py index eae09b9..96c1338 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -23,7 +23,11 @@ Suggestion, ) from codeclone.report import build_block_group_facts -from codeclone.report.json_contract import build_report_document +from codeclone.report.json_contract import ( + build_report_document, + clone_group_id, + structural_group_id, +) from codeclone.report.serialize import render_json_report_document from tests._report_fixtures import ( REPEATED_ASSERT_SOURCE, @@ -552,6 +556,78 @@ def test_html_report_structural_findings_why_modal_renders_examples( assert needle in html +def test_html_report_finding_cards_expose_stable_anchor_ids(tmp_path: Path) -> None: + f1 = tmp_path / "a.py" + f2 = tmp_path / "b.py" + f1.write_text("def alpha():\n return 1\n", "utf-8") + f2.write_text("def beta():\n return 1\n", "utf-8") + clone_key = "pkg.mod:dup" + finding_key = "anchor-key" + html = build_html_report( + func_groups={ + clone_key: [ + { + "qualname": "pkg.mod:alpha", + "filepath": str(f1), + "start_line": 1, + "end_line": 2, + }, + { + "qualname": "pkg.mod:beta", + "filepath": str(f2), + "start_line": 1, + "end_line": 2, + }, + ] + }, + block_groups={}, + segment_groups={}, + structural_findings=[ + StructuralFindingGroup( + finding_kind="duplicated_branches", + finding_key=finding_key, + signature={ + "calls": "1", + "has_loop": "0", + "has_try": "0", + "nested_if": "0", + "raises": "0", + "stmt_seq": "Expr,Return", + "terminal": "return_const", + }, + items=( + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key=finding_key, + file_path=str(f1), + qualname="pkg.mod:alpha", + start=1, + end=2, + signature={"stmt_seq": "Expr,Return"}, + ), + StructuralFindingOccurrence( + finding_kind="duplicated_branches", + finding_key=finding_key, + file_path=str(f2), + qualname="pkg.mod:beta", + start=1, + end=2, + signature={"stmt_seq": "Expr,Return"}, + ), + ), + ) + ], + ) + clone_id = clone_group_id("function", clone_key) + finding_id = structural_group_id("duplicated_branches", finding_key) + _assert_html_contains( + html, + f'id="finding-{clone_id}"', + f'id="finding-{finding_id}"', + f'data-finding-id="{finding_id}"', + ) + + def test_html_report_block_group_includes_match_basis_and_compact_key() -> None: group_key = _REPEATED_BLOCK_GROUP_KEY html = build_html_report( diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 8046788..0a50124 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -60,6 +60,31 @@ def _write_clone_fixture(root: Path) -> None: ) +def _write_quality_fixture(root: Path) -> None: + pkg = root.joinpath("pkg") + pkg.mkdir(exist_ok=True) + pkg.joinpath("__init__.py").write_text("", "utf-8") + pkg.joinpath("quality.py").write_text( + ( + "def complex_branch(flag: int) -> int:\n" + " total = 0\n" + " for item in range(flag):\n" + " if item % 2 == 0:\n" + " total += item\n" + " elif item % 3 == 0:\n" + " total -= item\n" + " elif item % 5 == 0:\n" + " total += item * 2\n" + " else:\n" + " total += 1\n" + " return total\n\n" + "def unused_helper() -> int:\n" + " return 42\n" + ), + "utf-8", + ) + + def test_mcp_server_exposes_expected_read_only_tools() -> None: _require_mcp_runtime() server = build_mcp_server(history_limit=4) @@ -67,23 +92,52 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: tools = {tool.name: tool for tool in asyncio.run(server.list_tools())} assert set(tools) == { "analyze_repository", + "analyze_changed_paths", "get_run_summary", "evaluate_gates", "get_report_section", "list_findings", "get_finding", + "get_remediation", "list_hotspots", + "compare_runs", + "check_complexity", + "check_clones", + "check_coupling", + "check_cohesion", + "check_dead_code", + "generate_pr_summary", + "mark_finding_reviewed", + "list_reviewed_findings", } - for tool in tools.values(): + for name, tool in tools.items(): assert tool.annotations is not None - assert tool.annotations.readOnlyHint is True + assert tool.annotations.readOnlyHint is ( + name + in { + "get_run_summary", + "get_report_section", + "list_findings", + "get_finding", + "get_remediation", + "list_hotspots", + "compare_runs", + "generate_pr_summary", + "list_reviewed_findings", + } + ) assert tool.annotations.destructiveHint is False assert tool.annotations.idempotentHint is True + assert "triggers a full analysis first" in str( + tools["check_complexity"].description + ) + assert "triggers a full analysis first" in str(tools["check_clones"].description) def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: _require_mcp_runtime() _write_clone_fixture(tmp_path) + _write_quality_fixture(tmp_path) server = build_mcp_server(history_limit=4) summary = _structured_tool_result( @@ -94,6 +148,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: "root": str(tmp_path), "respect_pyproject": False, "cache_policy": "off", + "changed_paths": ["pkg/dup.py", "pkg/quality.py"], }, ) ) @@ -106,7 +161,16 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert latest["run_id"] == run_id findings_result = _structured_tool_result( - asyncio.run(server.call_tool("list_findings", {"family": "clone"})) + asyncio.run( + server.call_tool( + "list_findings", + { + "family": "clone", + "detail_level": "summary", + "changed_paths": ["pkg/dup.py"], + }, + ) + ) ) assert cast(int, findings_result["total"]) >= 1 @@ -124,6 +188,14 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert ( json.loads(latest_report_resource[0].content)["report_schema_version"] == "2.1" ) + latest_health_resource = list( + asyncio.run(server.read_resource("codeclone://latest/health")) + ) + assert json.loads(latest_health_resource[0].content)["score"] + latest_changed_resource = list( + asyncio.run(server.read_resource("codeclone://latest/changed")) + ) + assert json.loads(latest_changed_resource[0].content)["run_id"] == run_id report_resource = list( asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/report.json")) @@ -139,22 +211,89 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: asyncio.run(server.call_tool("evaluate_gates", {"fail_threshold": 0})) ) assert gate_result["would_fail"] is True + latest_gates_resource = list( + asyncio.run(server.read_resource("codeclone://latest/gates")) + ) + assert json.loads(latest_gates_resource[0].content)["run_id"] == run_id report_section = _structured_tool_result( asyncio.run(server.call_tool("get_report_section", {"section": "meta"})) ) assert report_section["codeclone_version"] + changed_section = _structured_tool_result( + asyncio.run(server.call_tool("get_report_section", {"section": "changed"})) + ) + assert changed_section["changed_paths"] == ["pkg/dup.py", "pkg/quality.py"] finding = _structured_tool_result( asyncio.run(server.call_tool("get_finding", {"finding_id": first_finding_id})) ) assert finding["id"] == first_finding_id + remediation = _structured_tool_result( + asyncio.run( + server.call_tool("get_remediation", {"finding_id": first_finding_id}) + ) + ) + assert remediation["finding_id"] == first_finding_id hotspots = _structured_tool_result( - asyncio.run(server.call_tool("list_hotspots", {"kind": "highest_spread"})) + asyncio.run(server.call_tool("list_hotspots", {"kind": "highest_priority"})) ) assert cast(int, hotspots["total"]) >= 1 + complexity = _structured_tool_result( + asyncio.run( + server.call_tool( + "check_complexity", + { + "run_id": run_id, + "path": "pkg/quality.py", + "min_complexity": 1, + }, + ) + ) + ) + clones = _structured_tool_result( + asyncio.run( + server.call_tool( + "check_clones", + {"run_id": run_id, "path": "pkg/dup.py"}, + ) + ) + ) + reviewed = _structured_tool_result( + asyncio.run( + server.call_tool( + "mark_finding_reviewed", + { + "run_id": run_id, + "finding_id": first_finding_id, + "note": "triaged", + }, + ) + ) + ) + reviewed_items = _structured_tool_result( + asyncio.run(server.call_tool("list_reviewed_findings", {"run_id": run_id})) + ) + pr_summary = _structured_tool_result( + asyncio.run( + server.call_tool( + "generate_pr_summary", + { + "run_id": run_id, + "changed_paths": ["pkg/dup.py"], + "format": "markdown", + }, + ) + ) + ) + assert complexity["check"] == "complexity" + assert cast(int, clones["total"]) >= 1 + assert reviewed["reviewed"] is True + assert reviewed_items["reviewed_count"] == 1 + assert "## CodeClone Summary" in str(pr_summary["content"]) + run_summary_resource = list( asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/summary")) ) @@ -169,6 +308,11 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) assert json.loads(finding_resource[0].content)["id"] == first_finding_id + schema_resource = list(asyncio.run(server.read_resource("codeclone://schema"))) + schema_payload = json.loads(schema_resource[0].content) + assert schema_payload["title"] == "CodeCloneCanonicalReport" + assert "report_schema_version" in schema_payload["properties"] + def test_mcp_server_parser_defaults_and_main_success( monkeypatch: pytest.MonkeyPatch, @@ -218,7 +362,7 @@ def test_mcp_server_main_reports_missing_optional_dependency( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - def _boom() -> tuple[object, object]: + def _boom() -> tuple[object, object, object]: raise MCPDependencyError("install codeclone[mcp]") monkeypatch.setattr(mcp_server, "_load_mcp_runtime", _boom) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 7662416..cd80912 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -4,6 +4,8 @@ import importlib import json +import subprocess +from collections import OrderedDict from pathlib import Path from types import SimpleNamespace from typing import Any, cast @@ -19,7 +21,9 @@ MCPAnalysisRequest, MCPFindingNotFoundError, MCPGateRequest, + MCPGitDiffError, MCPRunNotFoundError, + MCPRunRecord, MCPServiceContractError, MCPServiceError, ) @@ -27,7 +31,7 @@ def _write_clone_fixture(root: Path) -> None: - root.joinpath("pkg").mkdir() + root.joinpath("pkg").mkdir(exist_ok=True) root.joinpath("pkg", "__init__.py").write_text("", "utf-8") root.joinpath("pkg", "dup.py").write_text( ( @@ -56,6 +60,77 @@ def _write_clone_fixture(root: Path) -> None: ) +def _write_quality_fixture(root: Path) -> None: + pkg = root.joinpath("pkg") + pkg.mkdir(exist_ok=True) + pkg.joinpath("__init__.py").write_text("", "utf-8") + pkg.joinpath("quality.py").write_text( + ( + "class SplitByConcern:\n" + " def __init__(self) -> None:\n" + " self.alpha = 1\n" + " self.beta = 2\n" + " self.gamma = 3\n\n" + " def sync(self, flag: int) -> int:\n" + " total = 0\n" + " for item in range(flag):\n" + " if item % 2 == 0:\n" + " total += item\n" + " elif item % 3 == 0:\n" + " total -= item\n" + " elif item % 5 == 0:\n" + " total += item * 2\n" + " elif item % 7 == 0:\n" + " total -= item * 2\n" + " else:\n" + " total += 1\n" + " if total > 20:\n" + " return total\n" + " if total < -20:\n" + " return -total\n" + " return total + self.alpha\n\n" + " def render(self) -> str:\n" + " return f'{self.beta}:{self.gamma}'\n\n" + "def unused_helper() -> int:\n" + " return 42\n" + ), + "utf-8", + ) + + +def _dummy_run_record(root: Path, run_id: str) -> MCPRunRecord: + return MCPRunRecord( + run_id=run_id, + root=root, + request=MCPAnalysisRequest(root=str(root), respect_pyproject=False), + report_document={}, + report_json="{}", + summary={"run_id": run_id, "health": {"score": 0, "grade": "N/A"}}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + analysis=cast(Any, SimpleNamespace(suggestions=[])), + new_func=frozenset(), + new_block=frozenset(), + metrics_diff=None, + ) + + +def _build_quality_service(root: Path) -> CodeCloneMCPService: + _write_clone_fixture(root) + _write_quality_fixture(root) + service = CodeCloneMCPService(history_limit=4) + service.analyze_repository( + MCPAnalysisRequest( + root=str(root), + respect_pyproject=False, + cache_policy="off", + ) + ) + return service + + def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> None: _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) @@ -109,6 +184,165 @@ def test_mcp_service_lists_findings_and_hotspots(tmp_path: Path) -> None: assert cast(int, hotspots["total"]) >= 1 +def test_mcp_service_changed_runs_remediation_and_review_flow(tmp_path: Path) -> None: + pkg = tmp_path / "pkg" + pkg.mkdir() + pkg.joinpath("__init__.py").write_text("", "utf-8") + pkg.joinpath("base.py").write_text( + "def baseline_only(value: int) -> int:\n return value + 1\n", + "utf-8", + ) + service = CodeCloneMCPService(history_limit=4) + + before = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + _write_clone_fixture(tmp_path) + after = service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + changed_paths=("pkg/dup.py",), + ) + ) + + changed = service.get_report_section( + run_id=str(after["run_id"]), + section="changed", + ) + assert changed["run_id"] == after["run_id"] + assert changed["changed_paths"] == ["pkg/dup.py"] + assert cast(int, changed["total"]) >= 1 + + comparison = service.compare_runs( + run_id_before=str(before["run_id"]), + run_id_after=str(after["run_id"]), + focus="clones", + ) + assert comparison["verdict"] == "regressed" + assert cast("list[dict[str, object]]", comparison["regressions"]) + + findings = service.list_findings( + family="clone", + detail_level="summary", + changed_paths=("pkg/dup.py",), + sort_by="priority", + ) + assert findings["changed_paths"] == ["pkg/dup.py"] + clone_items = cast("list[dict[str, object]]", findings["items"]) + first_id = str(clone_items[0]["id"]) + + remediation = service.get_remediation( + finding_id=first_id, + detail_level="summary", + ) + remediation_payload = cast("dict[str, object]", remediation["remediation"]) + assert remediation["finding_id"] == first_id + assert remediation_payload["safe_refactor_shape"] + assert remediation_payload["why_now"] + + reviewed = service.mark_finding_reviewed( + finding_id=first_id, + note="handled in current session", + ) + assert reviewed["reviewed"] is True + + reviewed_items = service.list_reviewed_findings(run_id=str(after["run_id"])) + assert reviewed_items["reviewed_count"] == 1 + + unreviewed = service.list_findings( + run_id=str(after["run_id"]), + family="clone", + exclude_reviewed=True, + detail_level="summary", + ) + assert cast(int, unreviewed["total"]) < cast(int, findings["total"]) + + +def test_mcp_service_granular_checks_pr_summary_and_resources( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + _write_quality_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + summary = service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + changed_paths=("pkg/dup.py", "pkg/quality.py"), + complexity_threshold=1, + ) + ) + run_id = str(summary["run_id"]) + + clones = service.check_clones( + run_id=run_id, + path="pkg/dup.py", + detail_level="summary", + ) + assert clones["check"] == "clones" + assert cast(int, clones["total"]) >= 1 + + complexity = service.check_complexity( + run_id=run_id, + path="pkg/quality.py", + min_complexity=1, + detail_level="summary", + ) + assert complexity["check"] == "complexity" + assert "items" in complexity + + dead_code = service.check_dead_code( + run_id=run_id, + path="pkg/quality.py", + detail_level="summary", + ) + assert dead_code["check"] == "dead_code" + + coupling = service.check_coupling(run_id=run_id, detail_level="summary") + cohesion = service.check_cohesion(run_id=run_id, detail_level="summary") + assert coupling["check"] == "coupling" + assert cohesion["check"] == "cohesion" + + gate_result = service.evaluate_gates( + MCPGateRequest(run_id=run_id, fail_threshold=0) + ) + latest_gates = json.loads(service.read_resource("codeclone://latest/gates")) + latest_health = json.loads(service.read_resource("codeclone://latest/health")) + latest_changed = json.loads(service.read_resource("codeclone://latest/changed")) + schema = json.loads(service.read_resource("codeclone://schema")) + + assert latest_gates["run_id"] == gate_result["run_id"] + summary_health = cast("dict[str, object]", summary["health"]) + assert latest_health["score"] == summary_health["score"] + assert latest_changed["run_id"] == run_id + assert schema["title"] == "CodeCloneCanonicalReport" + schema_properties = cast("dict[str, object]", schema["properties"]) + assert "report_schema_version" in schema_properties + + markdown_summary = service.generate_pr_summary( + run_id=run_id, + changed_paths=("pkg/dup.py",), + format="markdown", + ) + json_summary = service.generate_pr_summary( + run_id=run_id, + changed_paths=("pkg/dup.py",), + format="json", + ) + assert markdown_summary["format"] == "markdown" + assert "## CodeClone Summary" in str(markdown_summary["content"]) + assert json_summary["run_id"] == run_id + assert json_summary["changed_paths"] == ["pkg/dup.py"] + + def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( tmp_path: Path, ) -> None: @@ -545,3 +779,927 @@ def _fake_report(**kwargs: Any) -> object: ) finally: monkeypatch.undo() + + +def test_mcp_service_low_level_runtime_helpers_and_run_store( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + console = mcp_service_mod._BufferConsole() + console.print("alpha", 2) + console.print(" ") + assert console.messages == ["alpha 2"] + + monkeypatch.setattr( + cast(Any, mcp_service_mod).subprocess, + "run", + lambda *args, **kwargs: SimpleNamespace( + stdout="pkg/a.py\npkg/b.py\npkg/a.py\n" + ), + ) + assert mcp_service_mod._git_diff_lines_payload( + root_path=tmp_path, + git_diff_ref="HEAD", + ) == ("pkg/a.py", "pkg/b.py") + + def _raise_subprocess(*args: object, **kwargs: object) -> object: + raise subprocess.CalledProcessError(1, ["git", "diff"]) + + monkeypatch.setattr(cast(Any, mcp_service_mod).subprocess, "run", _raise_subprocess) + with pytest.raises(MCPGitDiffError): + mcp_service_mod._git_diff_lines_payload(root_path=tmp_path, git_diff_ref="HEAD") + + assert mcp_service_mod._load_report_document_payload('{"ok": true}') == {"ok": True} + with pytest.raises(MCPServiceError): + mcp_service_mod._load_report_document_payload("{") + with pytest.raises(MCPServiceError): + mcp_service_mod._load_report_document_payload("[]") + + store = mcp_service_mod.CodeCloneMCPRunStore(history_limit=1) + first = _dummy_run_record(tmp_path, "first") + second = _dummy_run_record(tmp_path, "second") + assert store.register(first) is first + assert store.get().run_id == "first" + store.register(second) + assert tuple(record.run_id for record in store.records()) == ("second",) + with pytest.raises(MCPRunNotFoundError): + store.get("first") + + +def test_mcp_service_branch_helpers_on_real_runs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = _build_quality_service(tmp_path) + changed = service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + changed_paths=("pkg/dup.py", "pkg/quality.py"), + complexity_threshold=1, + coupling_threshold=1, + cohesion_threshold=1, + ) + ) + run_id = str(changed["run_id"]) + record = service._runs.get(run_id) + + assert service.get_report_section(run_id=run_id, section="inventory") + assert service.get_report_section(run_id=run_id, section="derived") + + severity_rows = service.list_findings( + run_id=run_id, + sort_by="severity", + detail_level="full", + limit=5, + ) + spread_rows = service.list_findings( + run_id=run_id, + sort_by="spread", + detail_level="normal", + limit=5, + ) + assert cast("list[dict[str, object]]", severity_rows["items"]) + assert cast("list[dict[str, object]]", spread_rows["items"]) + + highest_priority_summary = service.list_hotspots( + kind="highest_priority", + run_id=run_id, + detail_level="summary", + limit=2, + ) + highest_priority_normal = service.list_hotspots( + kind="highest_priority", + run_id=run_id, + detail_level="normal", + limit=1, + ) + highest_priority_full = service.list_hotspots( + kind="highest_priority", + run_id=run_id, + detail_level="full", + limit=1, + ) + assert cast("list[dict[str, object]]", highest_priority_summary["items"]) + assert cast("list[dict[str, object]]", highest_priority_normal["items"]) + assert cast("list[dict[str, object]]", highest_priority_full["items"]) + + reviewed_id = str( + cast("list[dict[str, object]]", highest_priority_summary["items"])[0]["id"] + ) + service.mark_finding_reviewed(run_id=run_id, finding_id=reviewed_id) + filtered_hotspots = service.list_hotspots( + kind="highest_priority", + run_id=run_id, + detail_level="summary", + exclude_reviewed=True, + ) + assert all( + str(item.get("id", "")) != reviewed_id + for item in cast("list[dict[str, object]]", filtered_hotspots["items"]) + ) + + assert ( + service.check_clones( + run_id=run_id, + clone_type="Type-999", + detail_level="summary", + )["total"] + == 0 + ) + assert ( + service.check_complexity( + run_id=run_id, + min_complexity=999, + detail_level="summary", + )["total"] + == 0 + ) + + clone_check = service.check_clones( + root=str(tmp_path), + path="pkg/dup.py", + detail_level="summary", + ) + assert cast(int, clone_check["total"]) >= 1 + + no_changed_service = CodeCloneMCPService(history_limit=2) + no_changed_service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + with pytest.raises(MCPServiceContractError): + no_changed_service.read_resource("codeclone://latest/gates") + with pytest.raises(MCPServiceContractError): + no_changed_service.read_resource("codeclone://latest/changed") + with pytest.raises(MCPServiceContractError): + no_changed_service.get_report_section(section="changed") + + abs_dup = tmp_path / "pkg" / "dup.py" + normalized = service._normalize_changed_paths( + root_path=tmp_path, + paths=(str(abs_dup), "./pkg/dup.py", "pkg"), + ) + assert normalized == ("pkg", "pkg/dup.py") + with pytest.raises(MCPServiceContractError): + service._normalize_changed_paths( + root_path=tmp_path, + paths=(str(tmp_path.parent / "outside.py"),), + ) + + monkeypatch.setattr( + mcp_service_mod, + "_git_diff_lines_payload", + lambda **kwargs: ("pkg/dup.py", "pkg/dup.py"), + ) + assert service._resolve_request_changed_paths( + root_path=tmp_path, + changed_paths=(), + git_diff_ref="HEAD", + ) == ("pkg/dup.py",) + with pytest.raises(MCPServiceContractError): + service._resolve_request_changed_paths( + root_path=tmp_path, + changed_paths=("pkg/dup.py",), + git_diff_ref="HEAD", + ) + assert ( + service._resolve_query_changed_paths( + record=record, + changed_paths=(), + git_diff_ref=None, + prefer_record_paths=True, + ) + == record.changed_paths + ) + + duplicate_locations = service._locations_for_finding( + record, + { + "items": [ + { + "relative_path": "pkg/dup.py", + "start_line": 1, + "qualname": "pkg.dup:alpha", + }, + { + "relative_path": "pkg/dup.py", + "start_line": 1, + "qualname": "pkg.dup:alpha", + }, + {"relative_path": "", "start_line": 0, "qualname": ""}, + ] + }, + ) + assert len(duplicate_locations) == 1 + assert service._path_matches("pkg/dup.py", ("pkg",)) + assert service._finding_touches_paths( + finding={"items": [{"relative_path": "pkg/dup.py"}]}, + changed_paths=("pkg",), + ) + service._review_state["stale"] = OrderedDict([("missing", None)]) + service._prune_session_state() + assert "stale" not in service._review_state + + +def test_mcp_service_remediation_and_comparison_helper_branches( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + _write_quality_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + before = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + tmp_path.joinpath("pkg", "dup.py").write_text( + "def alpha(value: int) -> int:\n return value + 1\n", + "utf-8", + ) + after = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + before_record = service._runs.get(str(before["run_id"])) + after_record = service._runs.get(str(after["run_id"])) + + comparison = service.compare_runs( + run_id_before=str(before["run_id"]), + run_id_after=str(after["run_id"]), + focus="clones", + ) + assert comparison["verdict"] == "improved" + assert ( + service._comparison_verdict( + regressions=1, + improvements=0, + health_delta=0, + ) + == "regressed" + ) + assert ( + service._comparison_verdict( + regressions=0, + improvements=1, + health_delta=0, + ) + == "improved" + ) + assert ( + service._comparison_verdict( + regressions=0, + improvements=0, + health_delta=0, + ) + == "stable" + ) + assert ( + service._changed_verdict( + changed_projection={"new": 1, "total": 1}, + health_delta=0, + ) + == "regressed" + ) + assert ( + service._changed_verdict( + changed_projection={"new": 0, "total": 0}, + health_delta=1, + ) + == "improved" + ) + assert ( + service._changed_verdict( + changed_projection={"new": 0, "total": 1}, + health_delta=0, + ) + == "stable" + ) + + assert service._comparison_index(before_record, focus="clones") + structural_index = service._comparison_index( + before_record, + focus="structural", + ) + assert isinstance(structural_index, dict) + assert service._comparison_index(before_record, focus="metrics") + + remediation = { + "effort": "moderate", + "priority": 1.2, + "confidence": "high", + "safe_refactor_shape": "Extract helper", + "risk_level": "medium", + "why_now": "Because", + "blast_radius": {"files": 1}, + "steps": ["one", "two"], + } + assert service._project_remediation(remediation, detail_level="full") == remediation + assert "blast_radius" not in service._project_remediation( + remediation, + detail_level="summary", + ) + normal_remediation = service._project_remediation( + remediation, + detail_level="normal", + ) + assert normal_remediation["steps"] == ["one", "two"] + assert service._risk_level_for_effort("easy") == "low" + assert service._risk_level_for_effort("hard") == "high" + assert "new regression" in service._why_now_text( + title="Clone group", + severity="warning", + novelty="new", + count=2, + source_kind="production", + spread_files=1, + spread_functions=2, + effort="moderate", + ) + assert "known debt" in service._why_now_text( + title="Clone group", + severity="warning", + novelty="known", + count=0, + source_kind="tests", + spread_files=1, + spread_functions=1, + effort="easy", + ) + + assert service._safe_refactor_shape( + SimpleNamespace(category="clone", clone_type="Type-1", title="Function clone"), + ).startswith("Keep one canonical") + assert service._safe_refactor_shape( + SimpleNamespace(category="clone", clone_type="Type-2", title="Function clone"), + ).startswith("Extract shared") + assert service._safe_refactor_shape( + SimpleNamespace(category="clone", clone_type="Type-4", title="Block clone"), + ).startswith("Extract the repeated statement") + assert service._safe_refactor_shape( + SimpleNamespace(category="structural", clone_type="", title="Branches"), + ).startswith("Extract the repeated branch") + assert service._safe_refactor_shape( + SimpleNamespace(category="complexity", clone_type="", title="Complex"), + ).startswith("Split the function") + assert service._safe_refactor_shape( + SimpleNamespace(category="coupling", clone_type="", title="Coupling"), + ).startswith("Isolate responsibilities") + assert service._safe_refactor_shape( + SimpleNamespace(category="cohesion", clone_type="", title="Cohesion"), + ).startswith("Split the class") + assert service._safe_refactor_shape( + SimpleNamespace(category="dead_code", clone_type="", title="Dead code"), + ).startswith("Delete the unused symbol") + assert service._safe_refactor_shape( + SimpleNamespace(category="dependency", clone_type="", title="Cycle"), + ).startswith("Break the cycle") + assert service._safe_refactor_shape( + SimpleNamespace(category="other", clone_type="", title="Other"), + ).startswith("Extract the repeated logic") + + empty_markdown = service._render_pr_summary_markdown( + { + "health": {"score": 81, "grade": "B"}, + "health_delta": 0, + "verdict": "stable", + "new_findings_in_changed_files": [], + "resolved": [], + "blocking_gates": [], + } + ) + assert "- None" in empty_markdown + assert "- none" in empty_markdown + assert service._build_changed_projection(after_record) is None + augmented = service._augment_summary_with_changed( + summary={"run_id": after["run_id"]}, + changed_paths=("pkg/dup.py",), + changed_projection={ + "total": 1, + "new": 0, + "known": 1, + "items": [{"id": "x"}], + "health_delta": -1, + "verdict": "regressed", + }, + ) + assert augmented["changed_paths"] == ["pkg/dup.py"] + assert cast("dict[str, object]", augmented["changed_findings"])["total"] == 1 + + +def test_mcp_service_additional_projection_and_error_branches( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = _build_quality_service(tmp_path) + + with pytest.raises(MCPServiceContractError): + service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + summary = service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + changed_paths=("pkg/dup.py",), + complexity_threshold=1, + coupling_threshold=1, + cohesion_threshold=1, + ) + ) + run_id = str(summary["run_id"]) + record = service._runs.get(run_id) + + complexity_group = mcp_service_mod._complexity_group_for_threshold_payload( + { + "qualname": "pkg.quality:hot", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "cyclomatic_complexity": 99, + "nesting_depth": 4, + "risk": "high", + }, + threshold=20, + scan_root=str(tmp_path), + ) + assert complexity_group is not None + assert complexity_group["severity"] == "critical" + assert mcp_service_mod._coupling_group_for_threshold_payload( + { + "qualname": "pkg.quality:coupled", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "cbo": 3, + "risk": "high", + "coupled_classes": ["A", "B"], + }, + threshold=1, + scan_root=str(tmp_path), + ) + assert mcp_service_mod._cohesion_group_for_threshold_payload( + { + "qualname": "pkg.quality:cohesive", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "lcom4": 2, + "risk": "medium", + "method_count": 3, + "instance_var_count": 2, + }, + threshold=1, + scan_root=str(tmp_path), + ) + assert mcp_service_mod._suggestion_finding_id_payload(object()) == "" + assert mcp_service_mod._suggestion_finding_id_payload( + SimpleNamespace( + finding_family="structural", + finding_kind="duplicated_branches", + subject_key="key", + category="structural", + title="Structural", + ) + ).startswith("structural:") + assert mcp_service_mod._suggestion_finding_id_payload( + SimpleNamespace( + finding_family="design", + finding_kind="", + subject_key="dead-key", + category="dead_code", + title="Dead code", + ) + ).startswith("dead_code:") + assert mcp_service_mod._suggestion_finding_id_payload( + SimpleNamespace( + finding_family="design", + finding_kind="", + subject_key="", + category="coupling", + title="Coupling title", + ) + ).startswith("design:coupling:") + + original_service_get = service.get_finding + original_runs_get = service._runs.get + monkeypatch.setattr( + service, + "get_finding", + lambda **kwargs: {"id": "no-remediation"}, + ) + monkeypatch.setattr(service._runs, "get", lambda run_id=None: record) + with pytest.raises(MCPFindingNotFoundError): + service.get_remediation(finding_id="no-remediation", run_id=run_id) + monkeypatch.setattr(service, "get_finding", original_service_get) + monkeypatch.setattr(service._runs, "get", original_runs_get) + + original_get_finding = service.get_finding + + def _patched_get_finding( + *, + finding_id: str, + run_id: str | None = None, + ) -> dict[str, object]: + if finding_id == "missing": + raise MCPFindingNotFoundError("missing") + return original_get_finding(finding_id=finding_id, run_id=run_id) + + monkeypatch.setattr(service, "get_finding", _patched_get_finding) + service._review_state[run_id] = OrderedDict([("missing", None)]) + reviewed_items = service.list_reviewed_findings(run_id=run_id) + assert reviewed_items["reviewed_count"] == 0 + + assert ( + service.check_dead_code( + run_id=run_id, + min_severity="warning", + detail_level="summary", + )["check"] + == "dead_code" + ) + assert ( + json.loads(service.read_resource(f"codeclone://runs/{run_id}/schema"))["title"] + == "CodeCloneCanonicalReport" + ) + findings_payload = service.list_findings(run_id=run_id) + first_finding_id = str( + cast("list[dict[str, object]]", findings_payload["items"])[0]["id"] + ) + assert ( + json.loads( + service.read_resource( + f"codeclone://runs/{run_id}/findings/{first_finding_id}" + ) + )["id"] + == first_finding_id + ) + + pr_summary = service.generate_pr_summary( + run_id=run_id, + changed_paths=("pkg/dup.py",), + format="json", + ) + assert pr_summary["resolved"] == [] + assert service.generate_pr_summary(run_id=run_id, format="json")["resolved"] == [] + + other_root = tmp_path / "other" + other_root.mkdir() + service_other = CodeCloneMCPService(history_limit=4) + _write_clone_fixture(other_root) + first = service_other.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + second = service_other.analyze_repository( + MCPAnalysisRequest( + root=str(other_root), + respect_pyproject=False, + cache_policy="off", + ) + ) + assert ( + service_other._previous_run_for_root( + service_other._runs.get(str(second["run_id"])) + ) + is None + ) + assert ( + service_other._previous_run_for_root( + service_other._runs.get(str(first["run_id"])) + ) + is None + ) + + same_root_service = CodeCloneMCPService(history_limit=4) + _write_clone_fixture(other_root) + first_same_root = same_root_service.analyze_repository( + MCPAnalysisRequest( + root=str(other_root), + respect_pyproject=False, + cache_policy="off", + ) + ) + other_root.joinpath("pkg", "dup.py").write_text( + "def alpha(value: int) -> int:\n return value + 1\n", + "utf-8", + ) + second_same_root = same_root_service.analyze_repository( + MCPAnalysisRequest( + root=str(other_root), + respect_pyproject=False, + cache_policy="off", + ) + ) + previous_same_root = same_root_service._previous_run_for_root( + same_root_service._runs.get(str(second_same_root["run_id"])) + ) + assert previous_same_root is not None + assert previous_same_root.run_id == first_same_root["run_id"] + assert same_root_service.generate_pr_summary( + run_id=str(second_same_root["run_id"]), + format="json", + )["resolved"] + + fake_design_record = MCPRunRecord( + run_id="design", + root=tmp_path, + request=MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + complexity_threshold=1, + coupling_threshold=1, + cohesion_threshold=1, + ), + report_document={ + "metrics": { + "families": { + "complexity": { + "items": [ + { + "qualname": "pkg.quality:hot", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "cyclomatic_complexity": 3, + "nesting_depth": 1, + "risk": "medium", + } + ] + }, + "coupling": { + "items": [ + { + "qualname": "pkg.quality:coupled", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "cbo": 2, + "risk": "medium", + "coupled_classes": ["A"], + } + ] + }, + "cohesion": { + "items": [ + { + "qualname": "pkg.quality:cohesive", + "relative_path": "pkg/quality.py", + "start_line": 1, + "end_line": 5, + "lcom4": 2, + "risk": "medium", + "method_count": 2, + "instance_var_count": 2, + } + ] + }, + } + }, + "findings": { + "groups": { + "design": {"groups": []}, + "clones": {"functions": [], "blocks": [], "segments": []}, + "structural": {"groups": []}, + "dead_code": {"groups": []}, + } + }, + }, + report_json="{}", + summary={"run_id": "design", "health": {"score": 80, "grade": "B"}}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + analysis=cast(Any, SimpleNamespace(suggestions=[])), + new_func=frozenset(), + new_block=frozenset(), + metrics_diff=None, + ) + findings_section = cast( + "dict[str, object]", + fake_design_record.report_document["findings"], + ) + fake_design_groups = cast("dict[str, object]", findings_section["groups"]) + assert ( + len( + service._design_groups_for_record( + fake_design_record, + groups=fake_design_groups, + ) + ) + == 3 + ) + wrapped_group = service._design_singleton_group( + category="cohesion", + kind="class_hotspot", + severity="warning", + qualname="pkg.quality:cohesive", + filepath="pkg/quality.py", + start_line=1, + end_line=5, + item_data={"lcom4": 2}, + facts={"lcom4": 2}, + scan_root=str(tmp_path), + ) + assert wrapped_group["category"] == "cohesion" + detail_payload = service._project_finding_detail( + { + "id": "finding", + "title": "Finding", + "remediation": {"steps": ["a"], "blast_radius": {"files": 1}}, + }, + detail_level="normal", + ) + assert "remediation" in detail_payload + assert ( + service._project_finding_detail( + {"id": "finding", "title": "Finding"}, + detail_level="normal", + )["id"] + == "finding" + ) + assert ( + service._matches_finding_filters( + finding={"family": "clone", "category": "clone"}, + family="all", + category="structural", + severity=None, + source_kind=None, + novelty="all", + ) + is False + ) + assert ( + service._spread_weight(_dummy_run_record(tmp_path, "empty"), {"spread": {}}) + == 0.3 + ) + location_uri = service._locations_for_finding( + record, + { + "items": [ + { + "relative_path": "pkg/dup.py", + "start_line": 1, + "qualname": "pkg.dup:alpha", + } + ] + }, + )[0]["uri"] + assert str(location_uri).endswith("#L1") + location_without_line = service._locations_for_finding( + record, + { + "items": [ + { + "relative_path": "pkg/dup.py", + "start_line": 0, + "qualname": "pkg.dup:alpha", + } + ] + }, + )[0]["uri"] + assert "#L" not in str(location_without_line) + assert ( + service.list_hotspots( + kind="highest_spread", + run_id=run_id, + changed_paths=("does/not/match.py",), + detail_level="summary", + )["total"] + == 0 + ) + fake_hotspot_record = MCPRunRecord( + run_id="hotspot", + root=record.root, + request=record.request, + report_document={ + **record.report_document, + "derived": {"hotlists": {"highest_spread_ids": ["missing-id"]}}, + }, + report_json=record.report_json, + summary=record.summary, + changed_paths=record.changed_paths, + changed_projection=record.changed_projection, + warnings=record.warnings, + failures=record.failures, + analysis=record.analysis, + new_func=record.new_func, + new_block=record.new_block, + metrics_diff=record.metrics_diff, + ) + assert ( + service._hotspot_rows( + record=fake_hotspot_record, + kind="highest_spread", + detail_level="summary", + changed_paths=(), + exclude_reviewed=False, + ) + == [] + ) + metrics_focus = service._comparison_index(record, focus="metrics") + assert isinstance(metrics_focus, dict) + resolved_markdown = service._render_pr_summary_markdown( + { + "health": {"score": 81, "grade": "B"}, + "health_delta": 1, + "verdict": "improved", + "new_findings_in_changed_files": [], + "resolved": [{"title": "Fixed", "location": "pkg/dup.py"}], + "blocking_gates": [], + } + ) + assert "### Resolved (1)" in resolved_markdown + assert ( + service._normalize_changed_paths( + root_path=tmp_path, + paths=(".", "./"), + ) + == () + ) + complexity_check = service.check_complexity( + run_id=run_id, + min_complexity=1, + detail_level="summary", + ) + assert complexity_check["check"] == "complexity" + + +def test_mcp_service_metrics_diff_warning_and_projection_branches( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=2) + + fake_status = SimpleNamespace(value="ok") + fake_metrics_baseline = SimpleNamespace( + schema_version="2.0", + payload_sha256="digest", + diff=lambda metrics: MetricsDiff( + new_high_risk_functions=("pkg.dup:alpha",), + new_high_coupling_classes=(), + new_cycles=(), + new_dead_code=(), + health_delta=-1, + ), + ) + monkeypatch.setattr( + mcp_service_mod, + "resolve_metrics_baseline_state", + lambda **kwargs: SimpleNamespace( + baseline=fake_metrics_baseline, + loaded=True, + status=fake_status, + trusted_for_diff=True, + updated_path=None, + ), + ) + cache_with_warning = Cache( + tmp_path / "cache.json", + root=tmp_path, + max_size_bytes=1024 * 1024, + ) + cache_with_warning.load_warning = "cache warning" + monkeypatch.setattr(service, "_build_cache", lambda **kwargs: cache_with_warning) + + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + metrics_diff = cast("dict[str, object]", summary["metrics_diff"]) + assert metrics_diff["new_high_risk_functions"] == 1 + assert "cache warning" in cast("list[str]", summary["warnings"]) + analysis = cast( + Any, + SimpleNamespace( + suppressed_segment_groups=0, + segment_groups_raw_digest="digest", + segment_groups={}, + ), + ) + service._refresh_cache_projection(cache=cache_with_warning, analysis=analysis) + service._refresh_cache_projection(cache=cache_with_warning, analysis=analysis) diff --git a/tests/test_report.py b/tests/test_report.py index 26d70b8..cacda08 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -437,16 +437,17 @@ def test_report_output_formats( assert sarif_payload["$schema"].endswith("sarif-2.1.0.json") assert sarif_payload["version"] == "2.1.0" assert run["tool"]["driver"]["name"] == "codeclone" - assert run["automationDetails"]["id"] == "codeclone/full" + assert run["automationDetails"]["id"] == "codeclone/full/2026-03-10T12:00:00Z" assert run["properties"]["reportSchemaVersion"] == REPORT_SCHEMA_VERSION assert run["properties"]["reportGeneratedAtUtc"] == "2026-03-10T12:00:00Z" - assert run["columnKind"] == "utf16CodeUnits" + assert "columnKind" not in run assert run["originalUriBaseIds"]["%SRCROOT%"]["uri"] == "file:///repo/" assert run["artifacts"] assert run["invocations"][0]["workingDirectory"]["uri"] == "file:///repo/" + assert "semanticVersion" not in run["tool"]["driver"] assert any(rule["id"] == "CCLONE001" for rule in run["tool"]["driver"]["rules"]) first_rule = run["tool"]["driver"]["rules"][0] - assert first_rule["name"].startswith("codeclone.") + assert first_rule["name"] == "codeclone.CCLONE001" assert "help" in first_rule assert "markdown" in first_rule["help"] assert first_rule["properties"]["tags"] @@ -522,7 +523,13 @@ def test_report_sarif_uses_representative_and_related_locations() -> None: assert result["relatedLocations"][0]["message"]["text"] == "Related occurrence #1" assert result["properties"]["cloneType"] == "Type-2" assert result["properties"]["groupArity"] == 2 - assert "primaryLocationLineHash" in result["partialFingerprints"] + assert result["kind"] == "fail" + assert set(result["partialFingerprints"]) == {"primaryLocationLineHash"} + assert ( + result["properties"]["primaryPath"] == "tests/fixtures/golden_project/alpha.py" + ) + assert result["properties"]["primaryQualname"] == "pkg.alpha:transform_alpha" + assert result["properties"]["primaryRegion"] == "1-10" def test_report_json_deterministic_group_order() -> None: diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index 0d97393..38a03b0 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -64,6 +64,9 @@ from codeclone.report.sarif import ( _result_properties as _sarif_result_properties, ) +from codeclone.report.sarif import ( + _rule_name as _sarif_rule_name, +) from codeclone.report.sarif import ( _rule_spec as _sarif_rule_spec, ) @@ -75,9 +78,6 @@ render_sarif_report_document, to_sarif_report, ) -from codeclone.report.sarif import ( - _slug as _sarif_slug, -) from codeclone.report.sarif import ( _text as _sarif_text, ) @@ -1082,7 +1082,7 @@ def test_sarif_private_helper_family_dispatches() -> None: "items": [{"relative_path": "pkg/mod.py"}], } ) - == "Unused function with medium confidence: pkg/mod.py" + == "Unused function with medium confidence: pkg/mod.py." ) assert "LCOM4=4" in _sarif_result_message( { @@ -1210,14 +1210,30 @@ def test_sarif_private_helper_family_dispatches() -> None: group={"id": "design:cohesion:pkg.mod:Thing"}, primary_item={"relative_path": "", "qualname": "", "start_line": 0}, ) + shifted_line_hash = _sarif_partial_fingerprints( + rule_id="CDESIGN002", + group={"id": "design:complexity:pkg.mod:run"}, + primary_item={ + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:run", + "start_line": 30, + "end_line": 34, + }, + ) assert "primaryLocationLineHash" in line_hash assert "primaryLocationLineHash" not in no_line_hash + assert set(line_hash) == {"primaryLocationLineHash"} + assert ( + line_hash["primaryLocationLineHash"].split(":", 1)[0] + == shifted_line_hash["primaryLocationLineHash"].split(":", 1)[0] + ) def test_sarif_private_helper_edge_branches( monkeypatch: pytest.MonkeyPatch, ) -> None: - assert _sarif_slug("Function /// clone group") == "function-clone-group" + spec = _sarif_rule_spec({"family": "clone", "category": "function"}) + assert _sarif_rule_name(spec) == "codeclone.CCLONE001" assert ( _sarif_scan_root_uri({"meta": {"runtime": {"scan_root_absolute": "repo"}}}) == "" @@ -1294,11 +1310,15 @@ def test_render_sarif_report_document_without_srcroot_keeps_relative_payload() - assert "originalUriBaseIds" not in run invocation = cast(dict[str, object], cast(list[object], run["invocations"])[0]) assert "workingDirectory" not in invocation + assert "startTimeUtc" not in invocation + assert "columnKind" not in run result = cast(dict[str, object], cast(list[object], run["results"])[0]) assert "baselineState" not in result + assert result["kind"] == "fail" primary_location = cast(list[object], result["locations"])[0] location_map = cast(dict[str, object], primary_location) assert cast(dict[str, object], location_map["message"])["text"] == "Cycle member" + assert cast(str, cast(dict[str, object], result["message"])["text"]).endswith(".") def test_collect_paths_from_metrics_covers_all_metric_families_and_skips_missing() -> ( From c8086c7e18e682e3c0dbf2f2e7e861cbf6114242 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 29 Mar 2026 19:32:20 +0500 Subject: [PATCH 03/15] feat(ci): add marketplace-ready CodeClone Action v2 and harden MCP/cache workflows for clean self-checks --- .github/actions/codeclone/README.md | 167 +++++- .github/actions/codeclone/_action_impl.py | 231 ++++++++ .github/actions/codeclone/action.yml | 242 ++++++++- .../actions/codeclone/render_pr_comment.py | 54 ++ .github/actions/codeclone/run_codeclone.py | 28 + .github/workflows/codeclone.yml | 34 ++ CHANGELOG.md | 40 +- README.md | 90 ++-- codeclone/_html_report/_assemble.py | 10 +- codeclone/_html_report/_sections/_clones.py | 4 +- codeclone/baseline.py | 31 +- codeclone/blocks.py | 9 +- codeclone/cache.py | 332 +++--------- codeclone/cache_io.py | 68 +++ codeclone/cache_paths.py | 46 ++ codeclone/cache_segments.py | 181 +++++++ codeclone/cli.py | 9 + codeclone/extractor.py | 68 ++- codeclone/mcp_server.py | 107 +++- codeclone/mcp_service.py | 305 +++++++---- codeclone/metrics/cohesion.py | 18 +- codeclone/metrics/coupling.py | 38 +- codeclone/metrics_baseline.py | 21 +- codeclone/pipeline.py | 56 +- codeclone/report/findings.py | 69 +-- codeclone/structural_findings.py | 42 +- docs/book/10-html-render.md | 24 +- docs/book/20-mcp-interface.md | 36 +- docs/mcp.md | 496 +++++------------- tests/test_cache.py | 39 +- tests/test_core_branch_coverage.py | 27 +- tests/test_github_action_helpers.py | 126 +++++ tests/test_mcp_server.py | 150 +++++- tests/test_mcp_service.py | 390 ++++++++++++-- tests/test_pipeline_process.py | 87 +++ tests/test_structural_findings.py | 20 + 36 files changed, 2602 insertions(+), 1093 deletions(-) create mode 100644 .github/actions/codeclone/_action_impl.py create mode 100644 .github/actions/codeclone/render_pr_comment.py create mode 100644 .github/actions/codeclone/run_codeclone.py create mode 100644 .github/workflows/codeclone.yml create mode 100644 codeclone/cache_io.py create mode 100644 codeclone/cache_paths.py create mode 100644 codeclone/cache_segments.py create mode 100644 tests/test_github_action_helpers.py diff --git a/.github/actions/codeclone/README.md b/.github/actions/codeclone/README.md index 1889dcd..f721a89 100644 --- a/.github/actions/codeclone/README.md +++ b/.github/actions/codeclone/README.md @@ -1,11 +1,168 @@ # CodeClone GitHub Action -Runs CodeClone to detect architectural code duplication in Python projects. +Baseline-aware structural code quality analysis for Python with: -## Usage +- configurable CI gating +- SARIF upload for GitHub Code Scanning +- PR summary comments +- deterministic JSON report generation + +This action is designed for PR and CI workflows where you want CodeClone to act +as a non-LLM review bot: run analysis, upload SARIF, post a concise summary, +and propagate the real gate result. + +## What it does + +The v2 action flow is: + +1. set up Python +2. install `codeclone` from PyPI +3. optionally require a committed baseline +4. run CodeClone with JSON + optional SARIF output +5. optionally upload SARIF to GitHub Code Scanning +6. optionally post or update a PR summary comment +7. return the real CodeClone exit code as the job result + +## Basic usage ```yaml -- uses: orenlab/codeclone/.github/actions/codeclone@v1 +- uses: orenlab/codeclone/.github/actions/codeclone@main with: - path: . - fail-on-new: true + fail-on-new: "true" +``` + +For released references, prefer pinning to a major version tag such as `@v2` +or to an immutable commit SHA. + +## PR workflow example + +```yaml +name: CodeClone + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: ["**/*.py"] + +permissions: + contents: read + security-events: write + pull-requests: write + +jobs: + codeclone: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: orenlab/codeclone/.github/actions/codeclone@main + with: + fail-on-new: "true" + fail-health: "60" + sarif: "true" + pr-comment: "true" +``` + +## Inputs + +| Input | Default | Purpose | +|-------|---------|---------| +| `python-version` | `3.13` | Python version used to run the action | +| `package-version` | `""` | CodeClone version from PyPI; empty means latest stable | +| `path` | `.` | Project root to analyze | +| `json-path` | `.cache/codeclone/report.json` | JSON report output path | +| `sarif` | `true` | Generate SARIF and try to upload it | +| `sarif-path` | `.cache/codeclone/report.sarif` | SARIF output path | +| `pr-comment` | `true` | Post or update a PR summary comment | +| `fail-on-new` | `true` | Fail if new clone groups are detected | +| `fail-on-new-metrics` | `false` | Fail if metrics regress vs baseline | +| `fail-threshold` | `-1` | Max allowed function+block clone groups | +| `fail-complexity` | `-1` | Max cyclomatic complexity | +| `fail-coupling` | `-1` | Max coupling CBO | +| `fail-cohesion` | `-1` | Max cohesion LCOM4 | +| `fail-cycles` | `false` | Fail on dependency cycles | +| `fail-dead-code` | `false` | Fail on high-confidence dead code | +| `fail-health` | `-1` | Minimum health score | +| `require-baseline` | `true` | Fail early if the baseline file is missing | +| `baseline-path` | `codeclone.baseline.json` | Baseline path passed to CodeClone | +| `metrics-baseline-path` | `codeclone.baseline.json` | Metrics baseline path passed to CodeClone | +| `extra-args` | `""` | Additional CodeClone CLI arguments | +| `no-progress` | `true` | Disable progress output | + +For numeric gate inputs, `-1` means "disabled". + +## Outputs + +| Output | Meaning | +|--------|---------| +| `exit-code` | CodeClone process exit code | +| `json-path` | Resolved JSON report path | +| `sarif-path` | Resolved SARIF report path | +| `pr-comment-id` | PR comment id when the action updated or created a comment | + +## Exit behavior + +The action propagates the real CodeClone exit code at the end: + +- `0` — success +- `2` — contract error +- `3` — gating failure +- `5` — internal error + +SARIF upload and PR comment posting are treated as additive integrations. The +final job result is still driven by the CodeClone analysis exit code. + +## Permissions + +Recommended permissions: + +```yaml +permissions: + contents: read + security-events: write + pull-requests: write +``` + +Notes: + +- `security-events: write` is required for SARIF upload +- `pull-requests: write` is required for PR comments +- if you only want gating and JSON output, you can disable `sarif` and + `pr-comment` + +## Stable vs prerelease installs + +Stable: + +```yaml +with: + package-version: "" +``` + +Explicit prerelease: + +```yaml +with: + package-version: "2.0.0b3" +``` + +## Notes and limitations + +- For private repositories without GitHub Advanced Security, SARIF upload may + not be available. In that case, set `sarif: "false"` and rely on the PR + comment + exit code. +- The baseline file must exist in the repository when `require-baseline: true`. +- The action always generates a canonical JSON report, even if SARIF is + disabled. +- PR comments are updated in place using a hidden marker, so repeated runs do + not keep adding duplicate comments. +- Analysis has a 10-minute timeout. For very large repositories, consider + using `extra-args: "--skip-metrics"` or narrowing the scan scope. + +## See also + +- [CodeClone repository](https://github.com/orenlab/codeclone) +- [Documentation](https://orenlab.github.io/codeclone/) +- [SARIF integration](https://orenlab.github.io/codeclone/sarif/) diff --git a/.github/actions/codeclone/_action_impl.py b/.github/actions/codeclone/_action_impl.py new file mode 100644 index 0000000..b8418cf --- /dev/null +++ b/.github/actions/codeclone/_action_impl.py @@ -0,0 +1,231 @@ +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import json +import shlex +import subprocess +from dataclasses import dataclass +from pathlib import Path + +COMMENT_MARKER = "" + + +@dataclass(frozen=True, slots=True) +class ActionInputs: + path: str + json_path: str + sarif: bool + sarif_path: str + fail_on_new: bool + fail_on_new_metrics: bool + fail_threshold: int | None + fail_complexity: int | None + fail_coupling: int | None + fail_cohesion: int | None + fail_cycles: bool + fail_dead_code: bool + fail_health: int | None + baseline_path: str + metrics_baseline_path: str + extra_args: str + no_progress: bool + + +@dataclass(frozen=True, slots=True) +class RunResult: + exit_code: int + json_path: str + json_exists: bool + sarif_path: str + sarif_exists: bool + + +def parse_bool(value: str) -> bool: + return value.strip().lower() == "true" + + +def parse_optional_int(value: str) -> int | None: + normalized = value.strip() + if normalized in {"", "-1"}: + return None + return int(normalized) + + +def build_codeclone_args(inputs: ActionInputs) -> list[str]: + args = [inputs.path, "--json", inputs.json_path] + if inputs.sarif: + args.extend(["--sarif", inputs.sarif_path]) + if inputs.no_progress: + args.append("--no-progress") + if inputs.fail_on_new: + args.append("--fail-on-new") + if inputs.fail_on_new_metrics: + args.append("--fail-on-new-metrics") + if inputs.fail_threshold is not None: + args.extend(["--fail-threshold", str(inputs.fail_threshold)]) + if inputs.fail_complexity is not None: + args.extend(["--fail-complexity", str(inputs.fail_complexity)]) + if inputs.fail_coupling is not None: + args.extend(["--fail-coupling", str(inputs.fail_coupling)]) + if inputs.fail_cohesion is not None: + args.extend(["--fail-cohesion", str(inputs.fail_cohesion)]) + if inputs.fail_cycles: + args.append("--fail-cycles") + if inputs.fail_dead_code: + args.append("--fail-dead-code") + if inputs.fail_health is not None: + args.extend(["--fail-health", str(inputs.fail_health)]) + if inputs.baseline_path.strip(): + args.extend(["--baseline", inputs.baseline_path]) + if inputs.metrics_baseline_path.strip(): + args.extend(["--metrics-baseline", inputs.metrics_baseline_path]) + if inputs.extra_args.strip(): + args.extend(shlex.split(inputs.extra_args)) + return args + + +def ensure_parent_dir(path_text: str) -> None: + Path(path_text).parent.mkdir(parents=True, exist_ok=True) + + +def write_outputs(path: str, values: dict[str, str]) -> None: + with open(path, "a", encoding="utf-8") as handle: + for key, value in values.items(): + handle.write(f"{key}={value}\n") + + +def run_codeclone(inputs: ActionInputs) -> RunResult: + ensure_parent_dir(inputs.json_path) + if inputs.sarif: + ensure_parent_dir(inputs.sarif_path) + argv = ["codeclone", *build_codeclone_args(inputs)] + try: + completed = subprocess.run(argv, check=False, timeout=600) + except subprocess.TimeoutExpired: + print("::error::CodeClone analysis timed out after 10 minutes") + return RunResult( + exit_code=5, + json_path=inputs.json_path, + json_exists=Path(inputs.json_path).exists(), + sarif_path=inputs.sarif_path, + sarif_exists=inputs.sarif and Path(inputs.sarif_path).exists(), + ) + return RunResult( + exit_code=completed.returncode, + json_path=inputs.json_path, + json_exists=Path(inputs.json_path).exists(), + sarif_path=inputs.sarif_path, + sarif_exists=inputs.sarif and Path(inputs.sarif_path).exists(), + ) + + +def _mapping(value: object) -> dict[str, object]: + return value if isinstance(value, dict) else {} + + +def _int(value: object, default: int = 0) -> int: + return value if isinstance(value, int) else default + + +def _str(value: object, default: str = "") -> str: + return value if isinstance(value, str) else default + + +def render_pr_comment(report: dict[str, object], *, exit_code: int) -> str: + meta = _mapping(report.get("meta")) + findings = _mapping(report.get("findings")) + findings_summary = _mapping(findings.get("summary")) + clone_summary = _mapping(findings_summary.get("clones")) + families = _mapping(findings_summary.get("families")) + metrics = _mapping(report.get("metrics")) + metrics_summary = _mapping(metrics.get("summary")) + health = _mapping(metrics_summary.get("health")) + baseline = _mapping(meta.get("baseline")) + cache = _mapping(meta.get("cache")) + + health_score = _int(health.get("score"), default=-1) + health_grade = _str(health.get("grade"), default="?") + baseline_status = _str(baseline.get("status"), default="unknown") + cache_used = bool(cache.get("used")) + codeclone_version = _str(meta.get("codeclone_version"), default="?") + + status_icon = "white_check_mark" + status_label = "Passed" + if exit_code == 3: + status_icon = "x" + status_label = "Failed (gating)" + elif exit_code != 0: + status_icon = "warning" + status_label = "Error" + + lines = [ + COMMENT_MARKER, + "## :microscope: CodeClone Report", + "", + "| Metric | Value |", + "|--------|-------|", + f"| Health | **{health_score}/100 ({health_grade})** |", + f"| Status | :{status_icon}: {status_label} |", + f"| Baseline | `{baseline_status}` |", + f"| Cache | `{'used' if cache_used else 'not used'}` |", + f"| Version | `{codeclone_version}` |", + "", + "### Findings", + "```text", + _clone_summary_line(clone_summary=clone_summary, families=families), + f"Structural: {_int(families.get('structural'))}", + f"Dead code: {_int(families.get('dead_code'))}", + f"Design: {_int(families.get('design'))}", + "```", + "", + ":robot: Generated by " + 'CodeClone', + ] + return "\n".join(lines) + + +def write_step_summary(path: str, body: str) -> None: + with open(path, "a", encoding="utf-8") as handle: + handle.write(body) + handle.write("\n") + + +def load_report(path: str) -> dict[str, object]: + with open(path, encoding="utf-8") as handle: + loaded = json.load(handle) + return loaded if isinstance(loaded, dict) else {} + + +def build_inputs_from_env(env: dict[str, str]) -> ActionInputs: + return ActionInputs( + path=env["INPUT_PATH"], + json_path=env["INPUT_JSON_PATH"], + sarif=parse_bool(env["INPUT_SARIF"]), + sarif_path=env["INPUT_SARIF_PATH"], + fail_on_new=parse_bool(env["INPUT_FAIL_ON_NEW"]), + fail_on_new_metrics=parse_bool(env["INPUT_FAIL_ON_NEW_METRICS"]), + fail_threshold=parse_optional_int(env["INPUT_FAIL_THRESHOLD"]), + fail_complexity=parse_optional_int(env["INPUT_FAIL_COMPLEXITY"]), + fail_coupling=parse_optional_int(env["INPUT_FAIL_COUPLING"]), + fail_cohesion=parse_optional_int(env["INPUT_FAIL_COHESION"]), + fail_cycles=parse_bool(env["INPUT_FAIL_CYCLES"]), + fail_dead_code=parse_bool(env["INPUT_FAIL_DEAD_CODE"]), + fail_health=parse_optional_int(env["INPUT_FAIL_HEALTH"]), + baseline_path=env["INPUT_BASELINE_PATH"], + metrics_baseline_path=env["INPUT_METRICS_BASELINE_PATH"], + extra_args=env["INPUT_EXTRA_ARGS"], + no_progress=parse_bool(env["INPUT_NO_PROGRESS"]), + ) + + +def _clone_summary_line( + *, + clone_summary: dict[str, object], + families: dict[str, object], +) -> str: + return ( + f"Clones: {_int(families.get('clones'))} " + f"({_int(clone_summary.get('new'))} new, " + f"{_int(clone_summary.get('known'))} known)" + ) diff --git a/.github/actions/codeclone/action.yml b/.github/actions/codeclone/action.yml index efb63f2..9532db4 100644 --- a/.github/actions/codeclone/action.yml +++ b/.github/actions/codeclone/action.yml @@ -1,7 +1,7 @@ name: CodeClone description: > - Structural code quality analysis for Python with - CI-friendly baseline enforcement. + Structural code health analysis for Python with baseline-aware CI gating, + SARIF upload, and PR-friendly summaries. author: OrenLab @@ -11,35 +11,124 @@ branding: inputs: python-version: - description: "Python version to use" + description: "Python version" required: false default: "3.13" package-version: - description: "CodeClone version from PyPI (empty = latest)" + description: "CodeClone version from PyPI (empty = latest stable)" required: false default: "" path: - description: "Path to the project root" + description: "Project root" required: false default: "." - fail-on-new: - description: "Fail if new code clones are detected" + json-path: + description: "Canonical JSON report output path" + required: false + default: ".cache/codeclone/report.json" + + sarif: + description: "Generate SARIF and upload to Code Scanning" required: false default: "true" - no-progress: - description: "Disable progress output" + sarif-path: + description: "SARIF output path" + required: false + default: ".cache/codeclone/report.sarif" + + pr-comment: + description: "Post or update a PR summary comment" required: false default: "true" + fail-on-new: + description: "Fail if new clone groups are detected" + required: false + default: "true" + + fail-on-new-metrics: + description: "Fail if metrics regress vs baseline" + required: false + default: "false" + + fail-threshold: + description: "Max allowed function+block clone groups (-1 = disabled)" + required: false + default: "-1" + + fail-complexity: + description: "Max cyclomatic complexity (-1 = disabled)" + required: false + default: "-1" + + fail-coupling: + description: "Max coupling CBO (-1 = disabled)" + required: false + default: "-1" + + fail-cohesion: + description: "Max cohesion LCOM4 (-1 = disabled)" + required: false + default: "-1" + + fail-cycles: + description: "Fail if dependency cycles are detected" + required: false + default: "false" + + fail-dead-code: + description: "Fail if high-confidence dead code is detected" + required: false + default: "false" + + fail-health: + description: "Minimum health score (-1 = disabled)" + required: false + default: "-1" + require-baseline: - description: "Fail if codeclone.baseline.json is missing" + description: "Fail if the baseline file is missing" + required: false + default: "true" + + baseline-path: + description: "Baseline path passed to CodeClone" + required: false + default: "codeclone.baseline.json" + + metrics-baseline-path: + description: "Metrics baseline path passed to CodeClone" + required: false + default: "codeclone.baseline.json" + + extra-args: + description: "Additional CodeClone CLI arguments" + required: false + default: "" + + no-progress: + description: "Disable progress output" required: false default: "true" +outputs: + exit-code: + description: "CodeClone process exit code" + value: ${{ steps.analysis.outputs.exit-code }} + json-path: + description: "Resolved JSON report path" + value: ${{ steps.analysis.outputs.json-path }} + sarif-path: + description: "Resolved SARIF report path" + value: ${{ steps.analysis.outputs.sarif-path }} + pr-comment-id: + description: "Updated PR comment id when a PR comment was posted" + value: ${{ steps.post-pr-comment.outputs.comment-id }} + runs: using: composite steps: @@ -51,29 +140,134 @@ runs: - name: Install CodeClone shell: bash + env: + CODECLONE_VERSION: ${{ inputs.package-version }} run: | python -m pip install --upgrade pip - if [ -n "${{ inputs.package-version }}" ]; then - pip install "codeclone==${{ inputs.package-version }}" + if [ -n "${CODECLONE_VERSION}" ]; then + python -m pip install "codeclone==${CODECLONE_VERSION}" else - pip install codeclone + python -m pip install codeclone fi - - name: Verify baseline exists + - name: Verify baseline if: ${{ inputs.require-baseline == 'true' }} shell: bash + env: + INPUT_PROJECT_PATH: ${{ inputs.path }} + INPUT_BASELINE_PATH: ${{ inputs.baseline-path }} run: | - test -f "${{ inputs.path }}/codeclone.baseline.json" + python - <<'PY' + import os + import sys + from pathlib import Path + + project_root = Path(os.environ["INPUT_PROJECT_PATH"]) + baseline_path = Path(os.environ["INPUT_BASELINE_PATH"]) + target = baseline_path if baseline_path.is_absolute() else project_root / baseline_path + if not target.exists(): + print(f"Missing required CodeClone baseline: {target}", file=sys.stderr) + raise SystemExit(1) + PY - name: Run CodeClone + id: analysis shell: bash + env: + INPUT_PATH: ${{ inputs.path }} + INPUT_JSON_PATH: ${{ inputs.json-path }} + INPUT_SARIF: ${{ inputs.sarif }} + INPUT_SARIF_PATH: ${{ inputs.sarif-path }} + INPUT_FAIL_ON_NEW: ${{ inputs.fail-on-new }} + INPUT_FAIL_ON_NEW_METRICS: ${{ inputs.fail-on-new-metrics }} + INPUT_FAIL_THRESHOLD: ${{ inputs.fail-threshold }} + INPUT_FAIL_COMPLEXITY: ${{ inputs.fail-complexity }} + INPUT_FAIL_COUPLING: ${{ inputs.fail-coupling }} + INPUT_FAIL_COHESION: ${{ inputs.fail-cohesion }} + INPUT_FAIL_CYCLES: ${{ inputs.fail-cycles }} + INPUT_FAIL_DEAD_CODE: ${{ inputs.fail-dead-code }} + INPUT_FAIL_HEALTH: ${{ inputs.fail-health }} + INPUT_BASELINE_PATH: ${{ inputs.baseline-path }} + INPUT_METRICS_BASELINE_PATH: ${{ inputs.metrics-baseline-path }} + INPUT_EXTRA_ARGS: ${{ inputs.extra-args }} + INPUT_NO_PROGRESS: ${{ inputs.no-progress }} run: | - extra="" - if [ "${{ inputs.no-progress }}" = "true" ]; then - extra="--no-progress" - fi - if [ "${{ inputs.fail-on-new }}" = "true" ]; then - codeclone "${{ inputs.path }}" --fail-on-new $extra - else - codeclone "${{ inputs.path }}" $extra - fi + python "${{ github.action_path }}/run_codeclone.py" + + - name: Render PR summary + id: render-pr-comment + if: ${{ inputs.pr-comment == 'true' && github.event_name == 'pull_request' && steps.analysis.outputs.json-exists == 'true' }} + shell: bash + env: + REPORT_PATH: ${{ steps.analysis.outputs.json-path }} + ANALYSIS_EXIT_CODE: ${{ steps.analysis.outputs.exit-code }} + COMMENT_OUTPUT_PATH: ${{ runner.temp }}/codeclone-pr-comment.md + run: | + python "${{ github.action_path }}/render_pr_comment.py" + + - name: Upload SARIF + if: ${{ always() && inputs.sarif == 'true' && steps.analysis.outputs.sarif-exists == 'true' }} + continue-on-error: true + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: ${{ steps.analysis.outputs.sarif-path }} + category: codeclone + + - name: Post or update PR comment + id: post-pr-comment + if: ${{ always() && inputs.pr-comment == 'true' && github.event_name == 'pull_request' && steps.render-pr-comment.outputs.comment-exists == 'true' }} + continue-on-error: true + uses: actions/github-script@v7 + env: + COMMENT_BODY_PATH: ${{ steps.render-pr-comment.outputs.comment-body-path }} + COMMENT_MARKER: "" + with: + script: | + const fs = require("fs"); + const body = fs.readFileSync(process.env.COMMENT_BODY_PATH, "utf8"); + const marker = process.env.COMMENT_MARKER; + const issue_number = context.issue.number; + const { owner, repo } = context.repo; + + const comments = await github.paginate( + github.rest.issues.listComments, + { + owner, + repo, + issue_number, + per_page: 100, + }, + ); + + const existing = comments.find( + (comment) => + comment.user && + comment.user.type === "Bot" && + comment.body && + comment.body.includes(marker), + ); + + let result; + if (existing) { + result = await github.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + } else { + result = await github.rest.issues.createComment({ + owner, + repo, + issue_number, + body, + }); + } + + core.setOutput("comment-id", String(result.data.id)); + + - name: Gate result + if: ${{ always() }} + shell: bash + run: | + exit "${{ steps.analysis.outputs.exit-code }}" diff --git a/.github/actions/codeclone/render_pr_comment.py b/.github/actions/codeclone/render_pr_comment.py new file mode 100644 index 0000000..1edf7b4 --- /dev/null +++ b/.github/actions/codeclone/render_pr_comment.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import os + +from _action_impl import ( + load_report, + render_pr_comment, + write_outputs, + write_step_summary, +) + + +def main() -> int: + report_path = os.environ["REPORT_PATH"] + output_path = os.environ["COMMENT_OUTPUT_PATH"] + exit_code = int(os.environ["ANALYSIS_EXIT_CODE"]) + + if not os.path.exists(report_path): + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + write_outputs( + github_output, + { + "comment-exists": "false", + "comment-body-path": output_path, + }, + ) + return 0 + + body = render_pr_comment(load_report(report_path), exit_code=exit_code) + with open(output_path, "w", encoding="utf-8") as handle: + handle.write(body) + handle.write("\n") + + step_summary = os.environ.get("GITHUB_STEP_SUMMARY") + if step_summary: + write_step_summary(step_summary, body) + + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + write_outputs( + github_output, + { + "comment-exists": "true", + "comment-body-path": output_path, + }, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/actions/codeclone/run_codeclone.py b/.github/actions/codeclone/run_codeclone.py new file mode 100644 index 0000000..1c729e9 --- /dev/null +++ b/.github/actions/codeclone/run_codeclone.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import os + +from _action_impl import build_inputs_from_env, run_codeclone, write_outputs + + +def main() -> int: + result = run_codeclone(build_inputs_from_env(dict(os.environ))) + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + write_outputs( + github_output, + { + "exit-code": str(result.exit_code), + "json-path": result.json_path, + "json-exists": str(result.json_exists).lower(), + "sarif-path": result.sarif_path, + "sarif-exists": str(result.sarif_exists).lower(), + }, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/codeclone.yml b/.github/workflows/codeclone.yml new file mode 100644 index 0000000..0cebd24 --- /dev/null +++ b/.github/workflows/codeclone.yml @@ -0,0 +1,34 @@ +name: CodeClone + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: ["**/*.py"] + +permissions: + contents: read + security-events: write + pull-requests: write + +concurrency: + group: codeclone-${{ github.ref }} + cancel-in-progress: true + +jobs: + codeclone: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Run CodeClone + uses: ./.github/actions/codeclone + with: + python-version: "3.13" + package-version: "2.0.0b3" + fail-on-new: "true" + fail-health: "60" + sarif: "true" + pr-comment: "true" diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cd0bfb..7387527 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,33 +2,37 @@ ## [2.0.0b3] -### MCP +### MCP server -- Add optional `codeclone[mcp]` extra and `codeclone-mcp` launcher. -- Add a deterministic, read-only MCP server over the canonical pipeline and report contracts. -- Expose diff-aware MCP tools/resources for changed-files analysis, run comparison, report sections, findings, - remediation payloads, hotlists, granular checks, and gate previews. -- Add stable MCP resources for latest-run summary/report/health/gates/changed projections and schema discovery. -- Add session-local reviewed-finding state for long AI-agent workflows without mutating baseline or repo state. -- Add stable HTML deep-link anchors (`finding-{finding_id}`) for clone and structural finding cards. +- Add optional `codeclone[mcp]` extra with `codeclone-mcp` launcher (`stdio` and `streamable-http` transports). +- Expose 19 read-only tools and 9 resources over the canonical pipeline: analysis, diff-aware changed-files, run + comparison, findings/hotspots/remediation, granular checks, gate preview, PR summary, and session review markers. +- Bound in-memory run retention (`--history-limit`, default `4`, max `10`) and prune stale session state automatically. +- Require explicit `--allow-remote` for non-loopback `streamable-http` binds; reject `cache_policy=refresh` to preserve + read-only semantics. +- Defer MCP process-count policy to the core runtime when `processes` is not explicitly overridden. ### CLI -- Add `--changed-only`, `--diff-against`, and `--paths-from-git-diff` for changed-scope clone review and gating over a - full canonical analysis. -- Render changed-scope results as a first-class summary block in normal CLI output while keeping quiet mode compact. +- Add `--changed-only`, `--diff-against`, and `--paths-from-git-diff` for changed-scope clone review and gating. +- Render changed-scope results as a first-class summary block in normal CLI output. ### SARIF -- Stabilize `primaryLocationLineHash` across line-only shifts by hashing finding identity without line numbers. -- Emit run-unique `automationDetails.id`, optional `startTimeUtc`, and explicit result `kind: "fail"`. -- Move ancillary finding identity fields to SARIF `properties` and keep `partialFingerprints` minimal. +- Stabilize `primaryLocationLineHash` by excluding line numbers from the hash material. +- Add run-unique `automationDetails.id`, `startTimeUtc`, and explicit result `kind: "fail"`. +- Move ancillary identity fields to SARIF `properties`; keep only `primaryLocationLineHash` in `partialFingerprints`. -### HTML +### HTML report + +- Add IDE picker (PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, Zed) with localStorage persistence. +- Make file paths across all tabs clickable IDE deep links via `jetbrains://`, `vscode://`, and other protocol schemes. +- Add stable deep-link anchors (`finding-{finding_id}`) for clone and structural finding cards. + +### GitHub Action -- Add IDE picker with persistent selection (localStorage) supporting PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, and - Zed. -- Make file paths across Clones, Quality, Suggestions, Dead Code, and Findings tabs clickable IDE deep links. +- Ship composite GitHub Action v2 with configurable quality gates, SARIF upload to Code Scanning, and PR summary + comments. ## [2.0.0b2] diff --git a/README.md b/README.md index af4fc80..bed0c3a 100644 --- a/README.md +++ b/README.md @@ -69,55 +69,6 @@ uvx codeclone@latest .
    -## MCP Server - -Install MCP support only when you need the agent interface: - -```bash -pip install "codeclone[mcp]" -``` - -Then run the optional MCP launcher: - -```bash -codeclone-mcp --transport stdio -# or -codeclone-mcp --transport streamable-http --port 8000 -``` - -For local command-based clients, prefer `stdio`. Use `streamable-http` only -when the client expects a remote MCP endpoint. - -CodeClone MCP is read-only and baseline-aware. It exposes deterministic tools -for: - -- full repository analysis and changed-files analysis -- run summaries and run-to-run comparison -- findings, hotspots, remediation payloads, and PR summaries -- granular clone / complexity / coupling / cohesion / dead-code checks -- session-local review markers for long agent workflows - -It never mutates source files, baselines, or repo state. -Diff-aware MCP calls use repo-relative `changed_paths` lists (or `git_diff_ref`) -and may reuse the same `run_id` when the canonical report digest stays -unchanged. -Focused `check_*` MCP tools may trigger a full analysis first when no stored run -exists yet. - -Latest-run resources are also available for MCP-capable clients: - -- `codeclone://latest/summary` -- `codeclone://latest/report.json` -- `codeclone://latest/health` -- `codeclone://latest/gates` -- `codeclone://latest/changed` -- `codeclone://schema` - -Docs: -[MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) -· -[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) - ## CI Integration ```bash @@ -132,6 +83,28 @@ The `--ci` preset equals `--fail-on-new --no-color --quiet`. When a trusted metrics baseline is loaded, CI mode also enables `--fail-on-new-metrics`. +### GitHub Action + +CodeClone also ships a composite GitHub Action for PR and CI workflows: + +```yaml +- uses: orenlab/codeclone/.github/actions/codeclone@main + with: + fail-on-new: "true" + sarif: "true" + pr-comment: "true" +``` + +It can: + +- run baseline-aware gating +- generate JSON and SARIF reports +- upload SARIF to GitHub Code Scanning +- post or update a PR summary comment + +Action docs: +[.github/actions/codeclone/README.md](https://github.com/orenlab/codeclone/blob/main/.github/actions/codeclone/README.md) + ### Quality Gates ```bash @@ -160,6 +133,25 @@ repos: types: [ python ] ``` +## MCP Server + +CodeClone ships an optional read-only MCP server for AI agents and IDE clients. + +```bash +pip install "codeclone[mcp]" # install the extra +codeclone-mcp --transport stdio # local agents (Claude Code, Codex, Copilot, Gemini CLI) +codeclone-mcp --transport streamable-http --port 8000 # remote/HTTP-only clients +``` + +The server exposes 19 tools (analysis, diff-aware checks, findings, remediation, gates, PR summaries) +and 9 resources — all deterministic, baseline-aware, and read-only. +It never mutates source files, baselines, or repo state. + +Docs: +[MCP usage guide](https://orenlab.github.io/codeclone/mcp/) +· +[MCP interface contract](https://orenlab.github.io/codeclone/book/20-mcp-interface/) + ## Configuration CodeClone can load project-level configuration from `pyproject.toml`: diff --git a/codeclone/_html_report/_assemble.py b/codeclone/_html_report/_assemble.py index 94cd045..b5f6308 100644 --- a/codeclone/_html_report/_assemble.py +++ b/codeclone/_html_report/_assemble.py @@ -228,7 +228,7 @@ def _tab_badge(count: int) -> str: footer_html = ( '" @@ -305,9 +305,11 @@ def _codebox_rules(css: str) -> str: out: list[str] = [] for line in css.splitlines(): stripped = line.strip() - if not stripped or stripped.startswith("/*"): - continue - if not stripped.startswith(".codebox"): + if ( + not stripped + or stripped.startswith("/*") + or not stripped.startswith(".codebox") + ): continue out.append(stripped) return "\n".join(out) diff --git a/codeclone/_html_report/_sections/_clones.py b/codeclone/_html_report/_sections/_clones.py index 11d8168..7e4a419 100644 --- a/codeclone/_html_report/_sections/_clones.py +++ b/codeclone/_html_report/_sections/_clones.py @@ -382,7 +382,9 @@ def _render_group_items_html( group_arity=group_arity, peer_count=peer_count, ) - compare_html = f'
    {compare_text}
    ' + compare_html = ( + f'
    {_escape_html(compare_text)}
    ' + ) rendered.append( f'
    None: - tmp_path = path.with_name(f"{path.name}.tmp") data = json.dumps(payload, indent=2, ensure_ascii=False) + "\n" - with tmp_path.open("wb") as tmp_file: - tmp_file.write(data.encode("utf-8")) - tmp_file.flush() - os.fsync(tmp_file.fileno()) - os.replace(tmp_path, path) + fd_num, tmp_name = tempfile.mkstemp( + dir=path.parent, + suffix=".tmp", + ) + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd_num, "wb") as fd: + fd.write(data.encode("utf-8")) + fd.flush() + os.fsync(fd.fileno()) + os.replace(tmp_path, path) + except BaseException: + tmp_path.unlink(missing_ok=True) + raise def _safe_stat_size(path: Path) -> int: @@ -574,8 +583,8 @@ def _baseline_payload( sorted_functions = sorted(functions) sorted_blocks = sorted(blocks) payload_sha256 = _compute_payload_sha256( - functions=set(sorted_functions), - blocks=set(sorted_blocks), + functions=sorted_functions, + blocks=sorted_blocks, fingerprint_version=resolved_fingerprint, python_tag=resolved_python_tag, ) @@ -601,8 +610,8 @@ def _baseline_payload( def _compute_payload_sha256( *, - functions: set[str], - blocks: set[str], + functions: Collection[str], + blocks: Collection[str], fingerprint_version: str, python_tag: str, ) -> str: diff --git a/codeclone/blocks.py b/codeclone/blocks.py index 2ccad47..8aca801 100644 --- a/codeclone/blocks.py +++ b/codeclone/blocks.py @@ -49,10 +49,11 @@ def extract_blocks( for i in range(len(stmt_hash_rows) - block_size + 1): start = getattr(body[i], "lineno", None) end = getattr(body[i + block_size - 1], "end_lineno", None) - if not start or not end: - continue - - if last_start is not None and start - last_start < min_line_distance: + if ( + not start + or not end + or (last_start is not None and start - last_start < min_line_distance) + ): continue bh = "|".join(stmt_hash_rows[i : i + block_size]) diff --git a/codeclone/cache.py b/codeclone/cache.py index 18b9b44..cecc73e 100644 --- a/codeclone/cache.py +++ b/codeclone/cache.py @@ -3,16 +3,43 @@ from __future__ import annotations -import hashlib -import hmac -import json import os from collections.abc import Collection from enum import Enum +from json import JSONDecodeError from pathlib import Path from typing import TYPE_CHECKING, Literal, TypedDict, TypeGuard, TypeVar, cast from .baseline import current_python_tag +from .cache_io import ( + as_int_or_none as _cache_as_int, +) +from .cache_io import ( + as_object_list as _cache_as_list, +) +from .cache_io import ( + as_str_dict as _cache_as_str_dict, +) +from .cache_io import ( + as_str_or_none as _cache_as_str, +) +from .cache_io import ( + read_json_document, + sign_cache_payload, + verify_cache_payload_signature, + write_json_document_atomically, +) +from .cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime +from .cache_segments import ( + SegmentReportProjection as _SegmentReportProjection, +) +from .cache_segments import ( + build_segment_report_projection as _build_segment_report_projection, +) +from .cache_segments import ( + decode_segment_report_projection, + encode_segment_report_projection, +) from .contracts import BASELINE_FINGERPRINT_VERSION, CACHE_VERSION from .errors import CacheError from .models import ( @@ -34,6 +61,13 @@ if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence +SegmentReportProjection = _SegmentReportProjection +build_segment_report_projection = _build_segment_report_projection +_as_str = _cache_as_str +_as_int = _cache_as_int +_as_list = _cache_as_list +_as_str_dict = _cache_as_str_dict + MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024 LEGACY_CACHE_SECRET_FILENAME = ".cache_secret" _DEFAULT_WIRE_UNIT_FLOW_PROFILES = ( @@ -163,67 +197,6 @@ class CacheData(TypedDict): files: dict[str, CacheEntry] -class SegmentReportProjection(TypedDict): - digest: str - suppressed: int - groups: dict[str, list[SegmentDict]] - - -def build_segment_report_projection( - *, - digest: str, - suppressed: int, - groups: Mapping[str, Sequence[Mapping[str, object]]], -) -> SegmentReportProjection: - normalized_groups: dict[str, list[SegmentDict]] = {} - for group_key in sorted(groups): - normalized_items: list[SegmentDict] = [] - for raw_item in sorted( - groups[group_key], - key=lambda item: ( - str(item.get("filepath", "")), - str(item.get("qualname", "")), - _as_int(item.get("start_line")) or 0, - _as_int(item.get("end_line")) or 0, - ), - ): - segment_hash = _as_str(raw_item.get("segment_hash")) - segment_sig = _as_str(raw_item.get("segment_sig")) - filepath = _as_str(raw_item.get("filepath")) - qualname = _as_str(raw_item.get("qualname")) - start_line = _as_int(raw_item.get("start_line")) - end_line = _as_int(raw_item.get("end_line")) - size = _as_int(raw_item.get("size")) - if ( - segment_hash is None - or segment_sig is None - or filepath is None - or qualname is None - or start_line is None - or end_line is None - or size is None - ): - continue - normalized_items.append( - SegmentGroupItem( - segment_hash=segment_hash, - segment_sig=segment_sig, - filepath=filepath, - qualname=qualname, - start_line=start_line, - end_line=end_line, - size=size, - ) - ) - if normalized_items: - normalized_groups[group_key] = normalized_items - return { - "digest": digest, - "suppressed": max(0, int(suppressed)), - "groups": normalized_groups, - } - - def _normalize_cached_structural_group( group: StructuralFindingGroupDict, *, @@ -421,12 +394,6 @@ def _reject_version_mismatch(self, version: str) -> CacheData | None: schema_version=version, ) - @staticmethod - def _sign_data(data: Mapping[str, object]) -> str: - """Create deterministic SHA-256 signature for canonical payload data.""" - canonical = _canonical_json(data) - return hashlib.sha256(canonical.encode("utf-8")).hexdigest() - def load(self) -> None: try: exists = self.path.exists() @@ -455,7 +422,7 @@ def load(self) -> None: ) return - raw_obj: object = json.loads(self.path.read_text("utf-8")) + raw_obj = read_json_document(self.path) parsed = self._load_and_validate(raw_obj) if parsed is None: return @@ -470,7 +437,7 @@ def load(self) -> None: f"Cache unreadable; ignoring cache: {e}", status=CacheStatus.UNREADABLE, ) - except json.JSONDecodeError: + except JSONDecodeError: self._ignore_cache( "Cache corrupted; ignoring cache.", status=CacheStatus.INVALID_JSON, @@ -499,8 +466,7 @@ def _load_and_validate(self, raw_obj: object) -> CacheData | None: if sig is None or payload is None: return self._reject_invalid_cache_format(schema_version=version) - expected_sig = self._sign_data(payload) - if not hmac.compare_digest(sig, expected_sig): + if not verify_cache_payload_signature(payload, sig): return self._reject_cache_load( "Cache signature mismatch; ignoring cache.", status=CacheStatus.INTEGRITY_FAILED, @@ -556,13 +522,14 @@ def _load_and_validate(self, raw_obj: object) -> CacheData | None: parsed_files: dict[str, CacheEntry] = {} for wire_path, file_entry_obj in files_dict.items(): - runtime_path = self._runtime_filepath_from_wire(wire_path) + runtime_path = runtime_filepath_from_wire(wire_path, root=self.root) parsed_entry = self._decode_entry(file_entry_obj, runtime_path) if parsed_entry is None: return self._reject_invalid_cache_format(schema_version=version) parsed_files[runtime_path] = _canonicalize_cache_entry(parsed_entry) - self.segment_report_projection = self._decode_segment_report_projection( - payload.get("sr") + self.segment_report_projection = decode_segment_report_projection( + payload.get("sr"), + root=self.root, ) self.cache_schema_version = version @@ -578,10 +545,10 @@ def save(self) -> None: if not self._dirty: return try: - self.path.parent.mkdir(parents=True, exist_ok=True) wire_files: dict[str, object] = {} wire_map = { - rp: self._wire_filepath_from_runtime(rp) for rp in self.data["files"] + rp: wire_filepath_from_runtime(rp, root=self.root) + for rp in self.data["files"] } for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__): entry = self.get_file_entry(runtime_path) @@ -595,22 +562,18 @@ def save(self) -> None: "ap": self.analysis_profile, "files": wire_files, } - segment_projection = self._encode_segment_report_projection() + segment_projection = encode_segment_report_projection( + self.segment_report_projection, + root=self.root, + ) if segment_projection is not None: payload["sr"] = segment_projection signed_doc = { "v": self._CACHE_VERSION, "payload": payload, - "sig": self._sign_data(payload), + "sig": sign_cache_payload(payload), } - - tmp_path = self.path.with_name(f"{self.path.name}.tmp") - data = _canonical_json(signed_doc).encode("utf-8") - with tmp_path.open("wb") as tmp_file: - tmp_file.write(data) - tmp_file.flush() - os.fsync(tmp_file.fileno()) - os.replace(tmp_path, self.path) + write_json_document_atomically(self.path, signed_doc) self._dirty = False self.data["version"] = self._CACHE_VERSION @@ -629,131 +592,6 @@ def _decode_entry(value: object, filepath: str) -> CacheEntry | None: def _encode_entry(entry: CacheEntry) -> dict[str, object]: return _encode_wire_file_entry(entry) - def _wire_filepath_from_runtime(self, runtime_filepath: str) -> str: - runtime_path = Path(runtime_filepath) - if self.root is None: - return runtime_path.as_posix() - - try: - relative = runtime_path.relative_to(self.root) - return relative.as_posix() - except ValueError: - pass - - try: - relative = runtime_path.resolve().relative_to(self.root.resolve()) - return relative.as_posix() - except OSError: - return runtime_path.as_posix() - except ValueError: - return runtime_path.as_posix() - - def _runtime_filepath_from_wire(self, wire_filepath: str) -> str: - wire_path = Path(wire_filepath) - if self.root is None or wire_path.is_absolute(): - return str(wire_path) - - combined = self.root / wire_path - try: - return str(combined.resolve(strict=False)) - except OSError: - return str(combined) - - def _decode_segment_report_projection( - self, - value: object, - ) -> SegmentReportProjection | None: - obj = _as_str_dict(value) - if obj is None: - return None - digest = _as_str(obj.get("d")) - suppressed = _as_int(obj.get("s")) - groups_raw = _as_list(obj.get("g")) - if digest is None or suppressed is None or groups_raw is None: - return None - groups: dict[str, list[SegmentDict]] = {} - for group_row in groups_raw: - group_list = _as_list(group_row) - if group_list is None or len(group_list) != 2: - return None - group_key = _as_str(group_list[0]) - items_raw = _as_list(group_list[1]) - if group_key is None or items_raw is None: - return None - items: list[SegmentDict] = [] - for item_raw in items_raw: - item_list = _as_list(item_raw) - if item_list is None or len(item_list) != 7: - return None - wire_filepath = _as_str(item_list[0]) - qualname = _as_str(item_list[1]) - start_line = _as_int(item_list[2]) - end_line = _as_int(item_list[3]) - size = _as_int(item_list[4]) - segment_hash = _as_str(item_list[5]) - segment_sig = _as_str(item_list[6]) - if ( - wire_filepath is None - or qualname is None - or start_line is None - or end_line is None - or size is None - or segment_hash is None - or segment_sig is None - ): - return None - items.append( - SegmentGroupItem( - segment_hash=segment_hash, - segment_sig=segment_sig, - filepath=self._runtime_filepath_from_wire(wire_filepath), - qualname=qualname, - start_line=start_line, - end_line=end_line, - size=size, - ) - ) - groups[group_key] = items - return { - "digest": digest, - "suppressed": max(0, suppressed), - "groups": groups, - } - - def _encode_segment_report_projection(self) -> dict[str, object] | None: - projection = self.segment_report_projection - if projection is None: - return None - groups_rows: list[list[object]] = [] - for group_key in sorted(projection["groups"]): - items = sorted( - projection["groups"][group_key], - key=lambda item: ( - item["filepath"], - item["qualname"], - item["start_line"], - item["end_line"], - ), - ) - encoded_items = [ - [ - self._wire_filepath_from_runtime(item["filepath"]), - item["qualname"], - item["start_line"], - item["end_line"], - item["size"], - item["segment_hash"], - item["segment_sig"], - ] - for item in items - ] - groups_rows.append([group_key, encoded_items]) - return { - "d": projection["digest"], - "s": max(0, int(projection["suppressed"])), - "g": groups_rows, - } - def _store_canonical_file_entry( self, *, @@ -772,8 +610,8 @@ def get_file_entry(self, filepath: str) -> CacheEntry | None: runtime_lookup_key = filepath entry_obj = self.data["files"].get(runtime_lookup_key) if entry_obj is None: - wire_key = self._wire_filepath_from_runtime(filepath) - runtime_lookup_key = self._runtime_filepath_from_wire(wire_key) + wire_key = wire_filepath_from_runtime(filepath, root=self.root) + runtime_lookup_key = runtime_filepath_from_wire(wire_key, root=self.root) entry_obj = self.data["files"].get(runtime_lookup_key) if entry_obj is None: @@ -858,8 +696,9 @@ def put_file_entry( file_metrics: FileMetrics | None = None, structural_findings: list[StructuralFindingGroup] | None = None, ) -> None: - runtime_path = self._runtime_filepath_from_wire( - self._wire_filepath_from_runtime(filepath) + runtime_path = runtime_filepath_from_wire( + wire_filepath_from_runtime(filepath, root=self.root), + root=self.root, ) unit_rows = [_unit_dict_from_model(unit, runtime_path) for unit in units] @@ -953,22 +792,6 @@ def _empty_cache_data( ) -def _canonical_json(data: object) -> str: - return json.dumps(data, sort_keys=True, separators=(",", ":"), ensure_ascii=False) - - -def _as_str(value: object) -> str | None: - return value if isinstance(value, str) else None - - -def _as_int(value: object) -> int | None: - return value if isinstance(value, int) else None - - -def _as_list(value: object) -> list[object] | None: - return value if isinstance(value, list) else None - - def _as_risk_literal(value: object) -> Literal["low", "medium", "high"] | None: match value: case "low": @@ -1181,6 +1004,13 @@ def _as_typed_string_list(value: object) -> list[str] | None: return _as_typed_list(value, predicate=lambda item: isinstance(item, str)) +def _normalized_optional_string_list(value: object) -> list[str] | None: + items = _as_typed_string_list(value) + if not items: + return None + return sorted(set(items)) + + def _is_canonical_cache_entry(value: object) -> TypeGuard[CacheEntry]: return isinstance(value, dict) and _has_cache_entry_container_shape(value) @@ -1311,15 +1141,6 @@ def _decode_wire_qualname_span_size( return qualname, start_line, end_line, size -def _as_str_dict(value: object) -> dict[str, object] | None: - if not isinstance(value, dict): - return None - for key in value: - if not isinstance(key, str): - return None - return value - - def _as_analysis_profile(value: object) -> AnalysisProfile | None: obj = _as_str_dict(value) if obj is None: @@ -2135,6 +1956,15 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: ), ) if class_metrics: + coupled_classes_rows: list[list[object]] = [] + + def _append_coupled_classes_row(metric: ClassMetricsDict) -> None: + coupled_classes = _normalized_optional_string_list( + metric.get("coupled_classes", []) + ) + if coupled_classes: + coupled_classes_rows.append([metric["qualname"], coupled_classes]) + wire["cm"] = [ [ metric["qualname"], @@ -2149,15 +1979,8 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: ] for metric in class_metrics ] - coupled_classes_rows = [] for metric in class_metrics: - coupled_classes_raw = metric.get("coupled_classes", []) - if not _is_string_list(coupled_classes_raw): - continue - coupled_classes = sorted(set(coupled_classes_raw)) - if not coupled_classes: - continue - coupled_classes_rows.append([metric["qualname"], coupled_classes]) + _append_coupled_classes_row(metric) if coupled_classes_rows: wire["cc"] = coupled_classes_rows @@ -2199,10 +2022,9 @@ def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: candidate["kind"], ] suppressed_rules = candidate.get("suppressed_rules", []) - if _is_string_list(suppressed_rules): - normalized_rules = sorted(set(suppressed_rules)) - if normalized_rules: - encoded.append(normalized_rules) + normalized_rules = _normalized_optional_string_list(suppressed_rules) + if normalized_rules: + encoded.append(normalized_rules) encoded_dead_candidates.append(encoded) wire["dc"] = encoded_dead_candidates diff --git a/codeclone/cache_io.py b/codeclone/cache_io.py new file mode 100644 index 0000000..e63e408 --- /dev/null +++ b/codeclone/cache_io.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import hmac +import json +import os +import tempfile +from collections.abc import Mapping +from pathlib import Path + + +def as_str_or_none(value: object) -> str | None: + return value if isinstance(value, str) else None + + +def as_int_or_none(value: object) -> int | None: + return value if isinstance(value, int) else None + + +def as_object_list(value: object) -> list[object] | None: + return value if isinstance(value, list) else None + + +def as_str_dict(value: object) -> dict[str, object] | None: + if not isinstance(value, dict): + return None + if not all(isinstance(key, str) for key in value): + return None + return value + + +def canonical_json(data: object) -> str: + return json.dumps(data, sort_keys=True, separators=(",", ":"), ensure_ascii=False) + + +def sign_cache_payload(data: Mapping[str, object]) -> str: + canonical = canonical_json(data) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def verify_cache_payload_signature( + payload: Mapping[str, object], + signature: str, +) -> bool: + return hmac.compare_digest(signature, sign_cache_payload(payload)) + + +def read_json_document(path: Path) -> object: + return json.loads(path.read_text("utf-8")) + + +def write_json_document_atomically(path: Path, document: object) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + data = canonical_json(document).encode("utf-8") + fd_num, tmp_name = tempfile.mkstemp(dir=path.parent, suffix=".tmp") + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd_num, "wb") as fd: + fd.write(data) + fd.flush() + os.fsync(fd.fileno()) + os.replace(tmp_path, path) + except BaseException: + tmp_path.unlink(missing_ok=True) + raise diff --git a/codeclone/cache_paths.py b/codeclone/cache_paths.py new file mode 100644 index 0000000..62d0d82 --- /dev/null +++ b/codeclone/cache_paths.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + + +def wire_filepath_from_runtime( + runtime_filepath: str, + *, + root: Path | None, +) -> str: + runtime_path = Path(runtime_filepath) + if root is None: + return runtime_path.as_posix() + + try: + relative = runtime_path.relative_to(root) + return relative.as_posix() + except ValueError: + pass + + try: + relative = runtime_path.resolve().relative_to(root.resolve()) + return relative.as_posix() + except OSError: + return runtime_path.as_posix() + except ValueError: + return runtime_path.as_posix() + + +def runtime_filepath_from_wire( + wire_filepath: str, + *, + root: Path | None, +) -> str: + wire_path = Path(wire_filepath) + if root is None or wire_path.is_absolute(): + return str(wire_path) + + combined = root / wire_path + try: + return str(combined.resolve(strict=False)) + except OSError: + return str(combined) diff --git a/codeclone/cache_segments.py b/codeclone/cache_segments.py new file mode 100644 index 0000000..df4bca7 --- /dev/null +++ b/codeclone/cache_segments.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import TypedDict + +from .cache_io import ( + as_int_or_none, + as_object_list, + as_str_dict, + as_str_or_none, +) +from .cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime +from .models import SegmentGroupItem + +SegmentDict = SegmentGroupItem + + +class SegmentReportProjection(TypedDict): + digest: str + suppressed: int + groups: dict[str, list[SegmentDict]] + + +def build_segment_report_projection( + *, + digest: str, + suppressed: int, + groups: Mapping[str, Sequence[Mapping[str, object]]], +) -> SegmentReportProjection: + normalized_groups: dict[str, list[SegmentDict]] = {} + for group_key in sorted(groups): + normalized_items: list[SegmentDict] = [] + for raw_item in sorted( + groups[group_key], + key=lambda item: ( + str(item.get("filepath", "")), + str(item.get("qualname", "")), + as_int_or_none(item.get("start_line")) or 0, + as_int_or_none(item.get("end_line")) or 0, + ), + ): + segment_hash = as_str_or_none(raw_item.get("segment_hash")) + segment_sig = as_str_or_none(raw_item.get("segment_sig")) + filepath = as_str_or_none(raw_item.get("filepath")) + qualname = as_str_or_none(raw_item.get("qualname")) + start_line = as_int_or_none(raw_item.get("start_line")) + end_line = as_int_or_none(raw_item.get("end_line")) + size = as_int_or_none(raw_item.get("size")) + if ( + segment_hash is None + or segment_sig is None + or filepath is None + or qualname is None + or start_line is None + or end_line is None + or size is None + ): + continue + normalized_items.append( + SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + ) + if normalized_items: + normalized_groups[group_key] = normalized_items + return { + "digest": digest, + "suppressed": max(0, int(suppressed)), + "groups": normalized_groups, + } + + +def decode_segment_report_projection( + value: object, + *, + root: Path | None, +) -> SegmentReportProjection | None: + obj = as_str_dict(value) + if obj is None: + return None + digest = as_str_or_none(obj.get("d")) + suppressed = as_int_or_none(obj.get("s")) + groups_raw = as_object_list(obj.get("g")) + if digest is None or suppressed is None or groups_raw is None: + return None + groups: dict[str, list[SegmentDict]] = {} + for group_row in groups_raw: + group_list = as_object_list(group_row) + if group_list is None or len(group_list) != 2: + return None + group_key = as_str_or_none(group_list[0]) + items_raw = as_object_list(group_list[1]) + if group_key is None or items_raw is None: + return None + items: list[SegmentDict] = [] + for item_raw in items_raw: + item_list = as_object_list(item_raw) + if item_list is None or len(item_list) != 7: + return None + wire_filepath = as_str_or_none(item_list[0]) + qualname = as_str_or_none(item_list[1]) + start_line = as_int_or_none(item_list[2]) + end_line = as_int_or_none(item_list[3]) + size = as_int_or_none(item_list[4]) + segment_hash = as_str_or_none(item_list[5]) + segment_sig = as_str_or_none(item_list[6]) + if ( + wire_filepath is None + or qualname is None + or start_line is None + or end_line is None + or size is None + or segment_hash is None + or segment_sig is None + ): + return None + items.append( + SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=runtime_filepath_from_wire(wire_filepath, root=root), + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + ) + groups[group_key] = items + return { + "digest": digest, + "suppressed": max(0, suppressed), + "groups": groups, + } + + +def encode_segment_report_projection( + projection: SegmentReportProjection | None, + *, + root: Path | None, +) -> dict[str, object] | None: + if projection is None: + return None + groups_rows: list[list[object]] = [] + for group_key in sorted(projection["groups"]): + items = sorted( + projection["groups"][group_key], + key=lambda item: ( + item["filepath"], + item["qualname"], + item["start_line"], + item["end_line"], + ), + ) + encoded_items = [ + [ + wire_filepath_from_runtime(item["filepath"], root=root), + item["qualname"], + item["start_line"], + item["end_line"], + item["size"], + item["segment_hash"], + item["segment_sig"], + ] + for item in items + ] + groups_rows.append([group_key, encoded_items]) + return { + "d": projection["digest"], + "s": max(0, int(projection["suppressed"])), + "g": groups_rows, + } diff --git a/codeclone/cli.py b/codeclone/cli.py index 2a97ef9..d996bf6 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -140,6 +140,8 @@ "report", ] +# Lazy singleton for pipeline module — deferred import to keep CLI startup fast. +# Tests monkeypatch this via _pipeline_module() to inject mocks. _PIPELINE_MODULE: ModuleType | None = None @@ -258,6 +260,13 @@ def _normalize_changed_paths( def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + if git_diff_ref.startswith("-"): + console.print( + ui.fmt_contract_error( + f"Invalid git diff ref '{git_diff_ref}': must not start with '-'." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) try: completed = subprocess.run( ["git", "diff", "--name-only", git_diff_ref, "--"], diff --git a/codeclone/extractor.py b/codeclone/extractor.py index 116731a..6eebd40 100644 --- a/codeclone/extractor.py +++ b/codeclone/extractor.py @@ -75,8 +75,11 @@ class _ParseTimeoutError(Exception): pass +# Sync or async function definition node. FunctionNode = ast.FunctionDef | ast.AsyncFunctionDef +# Any named declaration: function, async function, or class. _NamedDeclarationNode = FunctionNode | ast.ClassDef +# Unique key for a declaration's token index: (start_line, end_line, qualname). _DeclarationTokenIndexKey = tuple[int, int, str] @@ -649,18 +652,18 @@ def _resolve_referenced_qualnames( for attr_node in state.attr_nodes: base = attr_node.value - if not isinstance(base, ast.Name): - continue - imported_module = state.imported_module_aliases.get(base.id) - if imported_module is not None: - resolved.add(f"{imported_module}:{attr_node.attr}") - continue - class_qualname = top_level_class_by_name.get(base.id) - if class_qualname is None: - continue - local_method_qualname = f"{module_name}:{class_qualname}.{attr_node.attr}" - if local_method_qualname in local_method_qualnames: - resolved.add(local_method_qualname) + if isinstance(base, ast.Name): + imported_module = state.imported_module_aliases.get(base.id) + if imported_module is not None: + resolved.add(f"{imported_module}:{attr_node.attr}") + else: + class_qualname = top_level_class_by_name.get(base.id) + if class_qualname is not None: + local_method_qualname = ( + f"{module_name}:{class_qualname}.{attr_node.attr}" + ) + if local_method_qualname in local_method_qualnames: + resolved.add(local_method_qualname) return frozenset(resolved) @@ -694,16 +697,14 @@ def _collect_module_walk_data( state=state, collect_referenced_names=collect_referenced_names, ) - continue - if isinstance(node, ast.ImportFrom): + elif isinstance(node, ast.ImportFrom): _collect_import_from_node( node=node, module_name=module_name, state=state, collect_referenced_names=collect_referenced_names, ) - continue - if collect_referenced_names: + elif collect_referenced_names: _collect_load_reference_node(node=node, state=state) deps_sorted = tuple( @@ -767,27 +768,25 @@ def _collect_dead_candidates( suppression_index=suppression_index, protocol_class_qualnames=protocol_class_qualnames, ) - if candidate is None: - continue - candidates.append(candidate) + if candidate is not None: + candidates.append(candidate) for class_qualname, class_node in collector.class_nodes: span = _node_line_span(class_node) - if span is None: - continue - start, end = span - candidates.append( - _build_dead_candidate( - module_name=module_name, - local_name=class_qualname, - node=class_node, - filepath=filepath, - kind="class", - suppression_index=suppression_index, - start_line=start, - end_line=end, + if span is not None: + start, end = span + candidates.append( + _build_dead_candidate( + module_name=module_name, + local_name=class_qualname, + node=class_node, + filepath=filepath, + kind="class", + suppression_index=suppression_index, + start_line=start, + end_line=end, + ) ) - ) return tuple( sorted( @@ -1009,7 +1008,6 @@ def extract_units_and_stats_from_source( risk = risk_level(complexity) raw_hash = _raw_source_hash_for_range(source_lines, start, end) - # Function-level unit (including __init__) units.append( Unit( qualname=qualname, @@ -1037,7 +1035,6 @@ def extract_units_and_stats_from_source( ) ) - # Block-level and segment-level units share statement hashes needs_blocks = ( not local_name.endswith("__init__") and loc >= block_min_loc @@ -1077,7 +1074,6 @@ def extract_units_and_stats_from_source( ) ) - # Structural findings extraction (report-only, no re-parse) if collect_structural_findings: structural_findings.extend(structure_facts.structural_findings) diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py index 902ce4d..7523775 100644 --- a/codeclone/mcp_server.py +++ b/codeclone/mcp_server.py @@ -4,6 +4,7 @@ from __future__ import annotations import argparse +import ipaddress import sys from collections.abc import Callable from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast @@ -11,9 +12,12 @@ from . import __version__ from .contracts import DOCS_URL from .mcp_service import ( + DEFAULT_MCP_HISTORY_LIMIT, + MAX_MCP_HISTORY_LIMIT, CodeCloneMCPService, MCPAnalysisRequest, MCPGateRequest, + _validated_history_limit, ) if TYPE_CHECKING: @@ -64,7 +68,7 @@ def _load_mcp_runtime() -> tuple[type[FastMCP], ToolAnnotations, ToolAnnotations def build_mcp_server( *, - history_limit: int = 16, + history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, host: str = "127.0.0.1", port: int = 8000, json_response: bool = False, @@ -73,7 +77,7 @@ def build_mcp_server( log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", ) -> FastMCP: runtime_fastmcp, read_only_tool, session_tool = _load_mcp_runtime() - service = CodeCloneMCPService(history_limit=history_limit) + service = CodeCloneMCPService(history_limit=_validated_history_limit(history_limit)) mcp = runtime_fastmcp( name="CodeClone", instructions=_SERVER_INSTRUCTIONS, @@ -404,10 +408,10 @@ def compare_runs( @tool( title="Check Complexity", description=( - "Return complexity hotspots for a path or repository. If no run " - "exists yet, this triggers a full analysis first." + "Return complexity hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available." ), - annotations=session_tool, + annotations=read_only_tool, structured_output=True, ) def check_complexity( @@ -430,10 +434,10 @@ def check_complexity( @tool( title="Check Clones", description=( - "Return clone findings for a path or repository. If no run exists " - "yet, this triggers a full analysis first." + "Return clone findings from a compatible stored run. " + "Use analyze_repository first if no compatible run is available." ), - annotations=session_tool, + annotations=read_only_tool, structured_output=True, ) def check_clones( @@ -458,10 +462,10 @@ def check_clones( @tool( title="Check Coupling", description=( - "Return coupling hotspots for a path or repository. If no run " - "exists yet, this triggers a full analysis first." + "Return coupling hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available." ), - annotations=session_tool, + annotations=read_only_tool, structured_output=True, ) def check_coupling( @@ -482,10 +486,10 @@ def check_coupling( @tool( title="Check Cohesion", description=( - "Return cohesion hotspots for a path or repository. If no run " - "exists yet, this triggers a full analysis first." + "Return cohesion hotspots from a compatible stored run. " + "Use analyze_repository first if no full run is available." ), - annotations=session_tool, + annotations=read_only_tool, structured_output=True, ) def check_cohesion( @@ -506,10 +510,10 @@ def check_cohesion( @tool( title="Check Dead Code", description=( - "Return dead-code findings for a path or repository. If no run " - "exists yet, this triggers a full analysis first." + "Return dead-code findings from a compatible stored run. " + "Use analyze_repository first if no full run is available." ), - annotations=session_tool, + annotations=read_only_tool, structured_output=True, ) def check_dead_code( @@ -576,6 +580,18 @@ def mark_finding_reviewed( def list_reviewed_findings(run_id: str | None = None) -> dict[str, object]: return service.list_reviewed_findings(run_id=run_id) + @tool( + title="Clear Session Runs", + description=( + "Clear all in-memory MCP analysis runs and ephemeral session state " + "for this server process." + ), + annotations=session_tool, + structured_output=True, + ) + def clear_session_runs() -> dict[str, object]: + return service.clear_session_runs() + @resource( "codeclone://latest/summary", title="Latest Run Summary", @@ -662,6 +678,19 @@ def run_finding_resource(run_id: str, finding_id: str) -> str: return mcp +def _history_limit_arg(value: str) -> int: + try: + parsed = int(value) + except ValueError as exc: + raise argparse.ArgumentTypeError( + f"history limit must be an integer between 1 and {MAX_MCP_HISTORY_LIMIT}." + ) from exc + try: + return _validated_history_limit(parsed) + except ValueError as exc: + raise argparse.ArgumentTypeError(str(exc)) from exc + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="codeclone-mcp", @@ -681,6 +710,15 @@ def build_parser() -> argparse.ArgumentParser: default="127.0.0.1", help="Host to bind when using streamable-http.", ) + parser.add_argument( + "--allow-remote", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Allow binding streamable-http to a non-loopback host. " + "Disabled by default because CodeClone MCP has no built-in authentication." + ), + ) parser.add_argument( "--port", type=int, @@ -689,9 +727,12 @@ def build_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--history-limit", - type=int, - default=16, - help="Maximum number of in-memory analysis runs retained by the server.", + type=_history_limit_arg, + default=DEFAULT_MCP_HISTORY_LIMIT, + help=( + "Maximum number of in-memory analysis runs retained by the server " + f"(1-{MAX_MCP_HISTORY_LIMIT}, default: {DEFAULT_MCP_HISTORY_LIMIT})." + ), ) parser.add_argument( "--json-response", @@ -720,8 +761,34 @@ def build_parser() -> argparse.ArgumentParser: return parser +def _host_is_loopback(host: str) -> bool: + cleaned = host.strip().strip("[]") + if not cleaned: + return False + if cleaned.lower() == "localhost": + return True + try: + return ipaddress.ip_address(cleaned).is_loopback + except ValueError: + return False + + def main() -> None: args = build_parser().parse_args() + if ( + args.transport == "streamable-http" + and not args.allow_remote + and not _host_is_loopback(args.host) + ): + print( + ( + "Refusing to bind CodeClone MCP streamable-http to non-loopback " + f"host '{args.host}' without --allow-remote. " + "The server has no built-in authentication." + ), + file=sys.stderr, + ) + raise SystemExit(2) try: server = build_mcp_server( history_limit=args.history_limit, diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py index 8f765be..169a6d4 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/mcp_service.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from pathlib import Path from threading import RLock -from typing import Literal, cast +from typing import Final, Literal, cast from . import __version__, _coerce from ._cli_args import ( @@ -22,7 +22,6 @@ DEFAULT_MAX_CACHE_SIZE_MB, DEFAULT_MIN_LOC, DEFAULT_MIN_STMT, - DEFAULT_PROCESSES, DEFAULT_ROOT, DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, @@ -42,12 +41,13 @@ validate_numeric_args, ) from .baseline import Baseline -from .cache import Cache, CacheStatus, build_segment_report_projection +from .cache import Cache, CacheStatus from .contracts import ( DEFAULT_COHESION_THRESHOLD, DEFAULT_COMPLEXITY_THRESHOLD, DEFAULT_COUPLING_THRESHOLD, REPORT_SCHEMA_VERSION, + ExitCode, ) from .domain.findings import ( CATEGORY_CLONE, @@ -75,17 +75,15 @@ SEVERITY_INFO, SEVERITY_WARNING, ) -from .errors import CacheError -from .models import MetricsDiff -from .normalize import NormalizationConfig +from .models import MetricsDiff, ProjectMetrics, Suggestion from .pipeline import ( - AnalysisResult, - BootstrapResult, + GatingResult, + MetricGateConfig, OutputPaths, analyze, bootstrap, discover, - gate, + metric_gate_reasons, process, report, ) @@ -142,32 +140,32 @@ "metrics_baseline", } ) -_RESOURCE_SECTION_MAP: dict[str, ReportSection] = { +_RESOURCE_SECTION_MAP: Final[dict[str, ReportSection]] = { "report.json": "all", "summary": "meta", "health": "metrics", "changed": "changed", "overview": "derived", } -_SEVERITY_WEIGHT = { +_SEVERITY_WEIGHT: Final[dict[str, float]] = { SEVERITY_CRITICAL: 1.0, SEVERITY_WARNING: 0.6, SEVERITY_INFO: 0.2, } -_EFFORT_WEIGHT = { +_EFFORT_WEIGHT: Final[dict[str, float]] = { EFFORT_EASY: 1.0, EFFORT_MODERATE: 0.6, EFFORT_HARD: 0.3, } -_NOVELTY_WEIGHT = {"new": 1.0, "known": 0.5} -_RUNTIME_WEIGHT = { +_NOVELTY_WEIGHT: Final[dict[str, float]] = {"new": 1.0, "known": 0.5} +_RUNTIME_WEIGHT: Final[dict[str, float]] = { "production": 1.0, "mixed": 0.8, "tests": 0.4, "fixtures": 0.2, "other": 0.5, } -_CONFIDENCE_WEIGHT = { +_CONFIDENCE_WEIGHT: Final[dict[str, float]] = { CONFIDENCE_HIGH: 1.0, CONFIDENCE_MEDIUM: 0.7, CONFIDENCE_LOW: 0.3, @@ -184,6 +182,8 @@ _VALID_DETAIL_LEVELS = frozenset({"summary", "normal", "full"}) _VALID_COMPARISON_FOCUS = frozenset({"all", "clones", "structural", "metrics"}) _VALID_PR_SUMMARY_FORMATS = frozenset({"markdown", "json"}) +DEFAULT_MCP_HISTORY_LIMIT = 4 +MAX_MCP_HISTORY_LIMIT = 10 _VALID_REPORT_SECTIONS = frozenset( { "all", @@ -374,6 +374,10 @@ def _git_diff_lines_payload( root_path: Path, git_diff_ref: str, ) -> tuple[str, ...]: + if git_diff_ref.startswith("-"): + raise MCPGitDiffError( + f"Invalid git diff ref '{git_diff_ref}': must not start with '-'." + ) try: completed = subprocess.run( ["git", "diff", "--name-only", git_diff_ref, "--"], @@ -404,6 +408,14 @@ def _load_report_document_payload(report_json: str) -> dict[str, object]: return dict(payload) +def _validated_history_limit(history_limit: int) -> int: + if not 1 <= history_limit <= MAX_MCP_HISTORY_LIMIT: + raise ValueError( + f"history_limit must be between 1 and {MAX_MCP_HISTORY_LIMIT}." + ) + return history_limit + + class MCPServiceError(RuntimeError): """Base class for CodeClone MCP service errors.""" @@ -479,21 +491,23 @@ class MCPRunRecord: root: Path request: MCPAnalysisRequest report_document: dict[str, object] - report_json: str summary: dict[str, object] changed_paths: tuple[str, ...] changed_projection: dict[str, object] | None warnings: tuple[str, ...] failures: tuple[str, ...] - analysis: AnalysisResult + func_clones_count: int + block_clones_count: int + project_metrics: ProjectMetrics | None + suggestions: tuple[Suggestion, ...] new_func: frozenset[str] new_block: frozenset[str] metrics_diff: MetricsDiff | None class CodeCloneMCPRunStore: - def __init__(self, *, history_limit: int = 16) -> None: - self._history_limit = max(1, history_limit) + def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: + self._history_limit = _validated_history_limit(history_limit) self._lock = RLock() self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() self._latest_run_id: str | None = None @@ -519,9 +533,16 @@ def records(self) -> tuple[MCPRunRecord, ...]: with self._lock: return tuple(self._records.values()) + def clear(self) -> tuple[str, ...]: + with self._lock: + removed_run_ids = tuple(self._records.keys()) + self._records.clear() + self._latest_run_id = None + return removed_run_ids + class CodeCloneMCPService: - def __init__(self, *, history_limit: int = 16) -> None: + def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() self._review_state: dict[str, OrderedDict[str, str | None]] = {} @@ -568,13 +589,6 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: processing=processing_result, ) - if request.cache_policy == "refresh": - self._refresh_cache_projection(cache=cache, analysis=analysis_result) - try: - cache.save() - except CacheError as exc: - console.print(f"Cache save failed: {exc}") - clone_baseline_state = resolve_clone_baseline_state( args=args, baseline_path=baseline_path, @@ -703,13 +717,15 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: root=root_path, request=request, report_document=report_document, - report_json=report_json, summary=base_summary, changed_paths=changed_paths, changed_projection=None, warnings=warnings, failures=failures, - analysis=analysis_result, + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + project_metrics=analysis_result.project_metrics, + suggestions=analysis_result.suggestions, new_func=frozenset(new_func), new_block=frozenset(new_block), metrics_diff=metrics_diff, @@ -725,13 +741,15 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: root=root_path, request=request, report_document=report_document, - report_json=report_json, summary=summary, changed_paths=changed_paths, changed_projection=changed_projection, warnings=warnings, failures=failures, - analysis=analysis_result, + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + project_metrics=analysis_result.project_metrics, + suggestions=analysis_result.suggestions, new_func=frozenset(new_func), new_block=frozenset(new_block), metrics_diff=metrics_diff, @@ -807,31 +825,7 @@ def compare_runs( def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: record = self._runs.get(request.run_id) - gate_args = Namespace( - fail_on_new=request.fail_on_new, - fail_threshold=request.fail_threshold, - fail_complexity=request.fail_complexity, - fail_coupling=request.fail_coupling, - fail_cohesion=request.fail_cohesion, - fail_cycles=request.fail_cycles, - fail_dead_code=request.fail_dead_code, - fail_health=request.fail_health, - fail_on_new_metrics=request.fail_on_new_metrics, - ) - boot = BootstrapResult( - root=record.root, - config=NormalizationConfig(), - args=gate_args, - output_paths=OutputPaths(), - cache_path=_REPORT_DUMMY_PATH, - ) - gate_result = gate( - boot=boot, - analysis=record.analysis, - new_func=record.new_func, - new_block=record.new_block, - metrics_diff=record.metrics_diff, - ) + gate_result = self._evaluate_gate_snapshot(record=record, request=request) result = { "run_id": record.run_id, "would_fail": gate_result.exit_code != 0, @@ -853,6 +847,45 @@ def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: self._last_gate_results[record.run_id] = dict(result) return result + def _evaluate_gate_snapshot( + self, + *, + record: MCPRunRecord, + request: MCPGateRequest, + ) -> GatingResult: + reasons: list[str] = [] + if record.project_metrics is not None: + metric_reasons = metric_gate_reasons( + project_metrics=record.project_metrics, + metrics_diff=record.metrics_diff, + config=MetricGateConfig( + fail_complexity=request.fail_complexity, + fail_coupling=request.fail_coupling, + fail_cohesion=request.fail_cohesion, + fail_cycles=request.fail_cycles, + fail_dead_code=request.fail_dead_code, + fail_health=request.fail_health, + fail_on_new_metrics=request.fail_on_new_metrics, + ), + ) + reasons.extend(f"metric:{reason}" for reason in metric_reasons) + + if request.fail_on_new and (record.new_func or record.new_block): + reasons.append("clone:new") + + total_clone_groups = record.func_clones_count + record.block_clones_count + if 0 <= request.fail_threshold < total_clone_groups: + reasons.append( + f"clone:threshold:{total_clone_groups}:{request.fail_threshold}" + ) + + if reasons: + return GatingResult( + exit_code=int(ExitCode.GATING_FAILURE), + reasons=tuple(reasons), + ) + return GatingResult(exit_code=int(ExitCode.SUCCESS), reasons=()) + def get_report_section( self, *, @@ -1173,6 +1206,25 @@ def list_reviewed_findings( "items": items, } + def clear_session_runs(self) -> dict[str, object]: + removed_run_ids = self._runs.clear() + with self._state_lock: + cleared_review_entries = sum( + len(entries) for entries in self._review_state.values() + ) + cleared_gate_results = len(self._last_gate_results) + cleared_spread_cache_entries = len(self._spread_max_cache) + self._review_state.clear() + self._last_gate_results.clear() + self._spread_max_cache.clear() + return { + "cleared_runs": len(removed_run_ids), + "cleared_run_ids": list(removed_run_ids), + "cleared_review_entries": cleared_review_entries, + "cleared_gate_results": cleared_gate_results, + "cleared_spread_cache_entries": cleared_spread_cache_entries, + } + def check_complexity( self, *, @@ -1451,7 +1503,11 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: sort_keys=True, ) if suffix == "report.json": - return record.report_json + return json.dumps( + record.report_document, + ensure_ascii=False, + indent=2, + ) if suffix == "overview": return json.dumps( self.list_hotspots(kind="highest_spread", run_id=record.run_id), @@ -1525,7 +1581,7 @@ def _normalize_changed_paths( ) from exc normalized.add(relative.as_posix()) continue - cleaned = candidate.as_posix().strip("./") + cleaned = self._normalize_relative_path(candidate.as_posix()) if cleaned: normalized.add(cleaned) return tuple(sorted(normalized)) @@ -1576,9 +1632,17 @@ def _severity_rank(self, severity: str) -> int: def _path_filter_tuple(self, path: str | None) -> tuple[str, ...]: if not path: return () - cleaned = Path(path).as_posix().strip("./") + cleaned = self._normalize_relative_path(Path(path).as_posix()) return (cleaned,) if cleaned else () + def _normalize_relative_path(self, path: str) -> str: + cleaned = path.strip() + if cleaned == ".": + return "" + if cleaned.startswith("./"): + cleaned = cleaned[2:] + return cleaned.rstrip("/") + def _previous_run_for_root(self, record: MCPRunRecord) -> MCPRunRecord | None: previous: MCPRunRecord | None = None for item in self._runs.records(): @@ -1588,6 +1652,33 @@ def _previous_run_for_root(self, record: MCPRunRecord) -> MCPRunRecord | None: previous = item return None + def _record_supports_analysis_mode( + self, + record: MCPRunRecord, + *, + analysis_mode: AnalysisMode, + ) -> bool: + record_mode = record.request.analysis_mode + if analysis_mode == "clones_only": + return record_mode in {"clones_only", "full"} + return record_mode == "full" + + def _latest_compatible_record( + self, + *, + analysis_mode: AnalysisMode, + root_path: Path | None = None, + ) -> MCPRunRecord | None: + for item in reversed(self._runs.records()): + if root_path is not None and item.root != root_path: + continue + if self._record_supports_analysis_mode( + item, + analysis_mode=analysis_mode, + ): + return item + return None + def _resolve_granular_record( self, *, @@ -1596,14 +1687,34 @@ def _resolve_granular_record( analysis_mode: AnalysisMode, ) -> MCPRunRecord: if run_id is not None: - return self._runs.get(run_id) - summary = self.analyze_repository( - MCPAnalysisRequest( - root=root, - analysis_mode=analysis_mode, + record = self._runs.get(run_id) + if self._record_supports_analysis_mode(record, analysis_mode=analysis_mode): + return record + raise MCPServiceContractError( + "Selected MCP run is not compatible with this check. " + f"Call analyze_repository(root='{record.root}', " + "analysis_mode='full') first." + ) + root_path: Path | None = None + if root != DEFAULT_ROOT: + root_path = self._resolve_root(root) + latest_record = self._latest_compatible_record( + analysis_mode=analysis_mode, + root_path=root_path, + ) + if latest_record is not None: + return latest_record + if root_path is not None: + raise MCPRunNotFoundError( + f"No compatible MCP analysis run is available for root: {root_path}. " + f"Call analyze_repository(root='{root_path}') or " + f"analyze_changed_paths(root='{root_path}', changed_paths=[...]) first." ) + raise MCPRunNotFoundError( + "No compatible MCP analysis run is available. " + "Call analyze_repository(root='/path/to/repo') or " + "analyze_changed_paths(root='/path/to/repo', changed_paths=[...]) first." ) - return self._runs.get(str(summary["run_id"])) def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: report_document = record.report_document @@ -1857,8 +1968,7 @@ def _sort_findings( str(finding.get("id", "")), ) ) - return finding_rows - if sort_by == "spread": + elif sort_by == "spread": finding_rows.sort( key=lambda finding: ( -self._spread_value(finding), @@ -1866,21 +1976,24 @@ def _sort_findings( str(finding.get("id", "")), ) ) - return finding_rows - finding_rows.sort( - key=lambda finding: ( - -_as_float( - self._as_mapping( - (priority_map or {}).get(str(finding.get("id", ""))) - ).get("score", 0.0), - 0.0, + else: + finding_rows.sort( + key=lambda finding: ( + -_as_float( + self._as_mapping( + (priority_map or {}).get(str(finding.get("id", ""))) + ).get("score", 0.0), + 0.0, + ) + if priority_map is not None + else -_as_float( + self._priority_score(record, finding)["score"], + 0.0, + ), + -self._severity_rank(str(finding.get("severity", ""))), + str(finding.get("id", "")), ) - if priority_map is not None - else -_as_float(self._priority_score(record, finding)["score"], 0.0), - -self._severity_rank(str(finding.get("severity", ""))), - str(finding.get("id", "")), ) - ) return finding_rows def _decorate_finding( @@ -2138,10 +2251,9 @@ def _locations_for_finding( _as_int(location.get("line", 0), 0), str(location.get("symbol", "")), ) - if key in seen: - continue - seen.add(key) - deduped.append(location) + if key not in seen: + seen.add(key) + deduped.append(location) return deduped def _suggestion_finding_id(self, suggestion: object) -> str: @@ -2198,7 +2310,7 @@ def _suggestion_for_finding( record: MCPRunRecord, finding_id: str, ) -> object | None: - for suggestion in record.analysis.suggestions: + for suggestion in record.suggestions: if self._suggestion_finding_id(suggestion) == finding_id: return suggestion return None @@ -2607,6 +2719,11 @@ def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: request.cache_policy, _VALID_CACHE_POLICIES, ) + if request.cache_policy == "refresh": + raise MCPServiceContractError( + "cache_policy='refresh' is not supported by the read-only " + "CodeClone MCP server. Use 'reuse' or 'off'." + ) def _validate_choice( self, @@ -2651,7 +2768,7 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp block_min_stmt=DEFAULT_BLOCK_MIN_STMT, segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, - processes=DEFAULT_PROCESSES, + processes=None, cache_path=None, max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, baseline=DEFAULT_BASELINE_PATH, @@ -2821,22 +2938,6 @@ def _build_cache( cache.load() return cache - def _refresh_cache_projection( - self, - *, - cache: Cache, - analysis: AnalysisResult, - ) -> None: - if not hasattr(cache, "segment_report_projection"): - return - new_projection = build_segment_report_projection( - suppressed=analysis.suppressed_segment_groups, - digest=analysis.segment_groups_raw_digest, - groups=analysis.segment_groups, - ) - if new_projection != cache.segment_report_projection: - cache.segment_report_projection = new_projection - def _metrics_computed(self, analysis_mode: AnalysisMode) -> tuple[str, ...]: return ( () diff --git a/codeclone/metrics/cohesion.py b/codeclone/metrics/cohesion.py index f0b2cc1..5e02dfb 100644 --- a/codeclone/metrics/cohesion.py +++ b/codeclone/metrics/cohesion.py @@ -65,16 +65,14 @@ def compute_lcom4(class_node: ast.ClassDef) -> tuple[int, int, int]: components = 0 for method_name in method_names: - if method_name in visited: - continue - components += 1 - stack = [method_name] - while stack: - current = stack.pop() - if current in visited: - continue - visited.add(current) - stack.extend(sorted(adjacency[current] - visited)) + if method_name not in visited: + components += 1 + stack = [method_name] + while stack: + current = stack.pop() + if current not in visited: + visited.add(current) + stack.extend(sorted(adjacency[current] - visited)) instance_vars = set().union(*method_to_attrs.values()) if method_to_attrs else set() return components, len(method_names), len(instance_vars) diff --git a/codeclone/metrics/coupling.py b/codeclone/metrics/coupling.py index 07752b7..043e552 100644 --- a/codeclone/metrics/coupling.py +++ b/codeclone/metrics/coupling.py @@ -41,34 +41,28 @@ def compute_cbo( """ couplings: set[str] = set() - for base in class_node.bases: - candidate = _annotation_name(base) + def _add_annotation_coupling(node: ast.AST | None) -> None: + if node is None: + return + candidate = _annotation_name(node) if candidate: couplings.add(candidate) + for base in class_node.bases: + _add_annotation_coupling(base) + for node in ast.walk(class_node): if isinstance(node, ast.Name): couplings.add(node.id) - continue - if isinstance(node, ast.Attribute): - if isinstance(node.value, ast.Name) and node.value.id in {"self", "cls"}: - continue - couplings.add(node.attr) - continue - if isinstance(node, ast.Call): - candidate = _annotation_name(node.func) - if candidate: - couplings.add(candidate) - continue - if isinstance(node, ast.AnnAssign) and node.annotation is not None: - candidate = _annotation_name(node.annotation) - if candidate: - couplings.add(candidate) - continue - if isinstance(node, ast.arg) and node.annotation is not None: - candidate = _annotation_name(node.annotation) - if candidate: - couplings.add(candidate) + elif isinstance(node, ast.Attribute): + if not ( + isinstance(node.value, ast.Name) and node.value.id in {"self", "cls"} + ): + couplings.add(node.attr) + elif isinstance(node, ast.Call): + _add_annotation_coupling(node.func) + elif isinstance(node, (ast.AnnAssign, ast.arg)): + _add_annotation_coupling(node.annotation) filtered = { name diff --git a/codeclone/metrics_baseline.py b/codeclone/metrics_baseline.py index d7aa592..d47d24a 100644 --- a/codeclone/metrics_baseline.py +++ b/codeclone/metrics_baseline.py @@ -7,6 +7,7 @@ import hmac import json import os +import tempfile from datetime import datetime, timezone from enum import Enum from pathlib import Path @@ -452,13 +453,21 @@ def diff(self, current: ProjectMetrics) -> MetricsDiff: def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: - tmp_path = path.with_name(f"{path.name}.tmp") data = json.dumps(payload, indent=2, ensure_ascii=False) + "\n" - with tmp_path.open("wb") as tmp_file: - tmp_file.write(data.encode("utf-8")) - tmp_file.flush() - os.fsync(tmp_file.fileno()) - os.replace(tmp_path, path) + fd_num, tmp_name = tempfile.mkstemp( + dir=path.parent, + suffix=".tmp", + ) + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd_num, "wb") as fd: + fd.write(data.encode("utf-8")) + fd.flush() + os.fsync(fd.fileno()) + os.replace(tmp_path, path) + except BaseException: + tmp_path.unlink(missing_ok=True) + raise def _load_json_object(path: Path) -> dict[str, Any]: diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py index f99a946..3e3eb26 100644 --- a/codeclone/pipeline.py +++ b/codeclone/pipeline.py @@ -74,6 +74,7 @@ DEFAULT_BATCH_SIZE = 100 PARALLEL_MIN_FILES_PER_WORKER = 8 PARALLEL_MIN_FILES_FLOOR = 16 +DEFAULT_RUNTIME_PROCESSES = 4 @dataclass(frozen=True, slots=True) @@ -339,6 +340,12 @@ def _parallel_min_files(processes: int) -> int: return max(PARALLEL_MIN_FILES_FLOOR, processes * PARALLEL_MIN_FILES_PER_WORKER) +def _resolve_process_count(processes: object) -> int: + if processes is None: + return DEFAULT_RUNTIME_PROCESSES + return max(1, _coerce.as_int(processes, DEFAULT_RUNTIME_PROCESSES)) + + def _should_collect_structural_findings(output_paths: OutputPaths) -> bool: return any( path is not None @@ -817,7 +824,9 @@ def process( failed_files: list[str] = [] source_read_failures: list[str] = [] root_str = str(boot.root) - processes = max(1, int(boot.args.processes)) + # Keep process-count fallback in the core runtime so non-CLI callers such as + # the MCP service do not need to guess or mirror parallelism policy. + processes = _resolve_process_count(boot.args.processes) min_loc = int(boot.args.min_loc) min_stmt = int(boot.args.min_stmt) block_min_loc = int(boot.args.block_min_loc) @@ -1429,6 +1438,18 @@ def analyze( ) +def _load_markdown_report_renderer() -> Callable[..., str]: + from .report.markdown import to_markdown_report + + return to_markdown_report + + +def _load_sarif_report_renderer() -> Callable[..., str]: + from .report.sarif import to_sarif_report + + return to_sarif_report + + def report( *, boot: BootstrapResult, @@ -1531,10 +1552,11 @@ def report( if boot.output_paths.json and report_document is not None: contents["json"] = render_json_report_document(report_document) - if boot.output_paths.md and report_document is not None: - from .report.markdown import to_markdown_report - - contents["md"] = to_markdown_report( + def _render_projection_artifact( + renderer: Callable[..., str], + ) -> str: + assert report_document is not None + return renderer( report_document=report_document, meta=report_meta, inventory=report_inventory, @@ -1550,24 +1572,12 @@ def report( structural_findings=sf, ) - if boot.output_paths.sarif and report_document is not None: - from .report.sarif import to_sarif_report - - contents["sarif"] = to_sarif_report( - report_document=report_document, - meta=report_meta, - inventory=report_inventory, - func_groups=analysis.func_groups, - block_groups=analysis.block_groups_report, - segment_groups=analysis.segment_groups, - block_facts=analysis.block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - new_segment_group_keys=set(analysis.segment_groups.keys()), - metrics=analysis.metrics_payload, - suggestions=analysis.suggestions, - structural_findings=sf, - ) + for key, output_path, loader in ( + ("md", boot.output_paths.md, _load_markdown_report_renderer), + ("sarif", boot.output_paths.sarif, _load_sarif_report_renderer), + ): + if output_path and report_document is not None: + contents[key] = _render_projection_artifact(loader()) if boot.output_paths.text and report_document is not None: contents["text"] = render_text_report_document(report_document) diff --git a/codeclone/report/findings.py b/codeclone/report/findings.py index 19de07b..a056603 100644 --- a/codeclone/report/findings.py +++ b/codeclone/report/findings.py @@ -185,8 +185,8 @@ def _finding_reason_list_html( items: Sequence[StructuralFindingOccurrence], ) -> str: spread = _spread(items) - if group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - reasons = [ + clone_cohort_reasons = { + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: [ ( f"{len(items)} divergent clone members were detected after " "stable sorting and deduplication." @@ -206,20 +206,20 @@ def _finding_reason_list_html( f"{spread['files']} {'file' if spread['files'] == 1 else 'files'}." ), "This is a report-only finding and does not affect clone gating.", - ] - return _render_reason_list_html(reasons) - if group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - reasons = [ + ], + STRUCTURAL_KIND_CLONE_COHORT_DRIFT: [ f"{len(items)} clone members diverge from the cohort majority profile.", f"Drift fields: {group.signature.get('drift_fields', 'n/a')}.", ( f"Cohort id: {group.signature.get('cohort_id', 'unknown')} with " f"arity {group.signature.get('cohort_arity', 'n/a')}." ), - ("Majority profile is compared deterministically with lexical tie-breaks."), + "Majority profile is compared deterministically with lexical tie-breaks.", "This is a report-only finding and does not affect clone gating.", - ] - return _render_reason_list_html(reasons) + ], + } + if group.finding_kind in clone_cohort_reasons: + return _render_reason_list_html(clone_cohort_reasons[group.finding_kind]) stmt_seq = group.signature.get("stmt_seq", "n/a") terminal = group.signature.get("terminal", "n/a") @@ -259,20 +259,20 @@ def _finding_matters_html( ) -> str: spread = _spread(items) count = len(items) - if group.finding_kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - message = ( + special_messages = { + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: ( "Members of one function-clone cohort diverged in guard/exit behavior. " "This often points to a partial fix where one path was updated and " "other siblings were left unchanged." - ) - return _finding_matters_paragraph(message) - if group.finding_kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - message = ( + ), + STRUCTURAL_KIND_CLONE_COHORT_DRIFT: ( "Members of one function-clone cohort drifted from a stable majority " "profile (terminal, guard, try/finally, side-effect order). Review " "whether divergence is intentional." - ) - return _finding_matters_paragraph(message) + ), + } + if group.finding_kind in special_messages: + return _finding_matters_paragraph(special_messages[group.finding_kind]) terminal = str(group.signature.get("terminal", "")).strip() stmt_seq = str(group.signature.get("stmt_seq", "")).strip() @@ -282,23 +282,26 @@ def _finding_matters_html( f"{spread['files']} files, so the same branch policy may be copied " "between multiple code paths." ) - elif terminal == "raise": - message = ( - "This group points to repeated guard or validation exits inside one " - "function. Consolidating the shared exit policy usually reduces " - "branch noise." - ) - elif terminal == "return": - message = ( - "This group points to repeated return-path logic inside one function. " - "A helper can often keep the branch predicate local while sharing " - "the emitted behavior." - ) else: - message = ( - f"This group reports {count} branches with the same local shape " - f"({stmt_seq or 'unknown signature'}). Review whether the shared " - "branch body should stay duplicated or become a helper." + terminal_messages = { + "raise": ( + "This group points to repeated guard or validation exits inside one " + "function. Consolidating the shared exit policy usually reduces " + "branch noise." + ), + "return": ( + "This group points to repeated return-path logic inside one function. " + "A helper can often keep the branch predicate local while sharing " + "the emitted behavior." + ), + } + message = terminal_messages.get( + terminal, + ( + f"This group reports {count} branches with the same local shape " + f"({stmt_seq or 'unknown signature'}). Review whether the shared " + "branch body should stay duplicated or become a helper." + ), ) return _finding_matters_paragraph(message) diff --git a/codeclone/structural_findings.py b/codeclone/structural_findings.py index a6aaaf1..aac3ee9 100644 --- a/codeclone/structural_findings.py +++ b/codeclone/structural_findings.py @@ -228,35 +228,55 @@ def normalize_structural_findings( return tuple(normalized) -def _summarize_branch(body: list[ast.stmt]) -> dict[str, str] | None: - """Build deterministic structural signature for a meaningful branch body.""" - if not body or all(isinstance(stmt, ast.Pass) for stmt in body): - return None +_TRY_STAR_TYPE = getattr(ast, "TryStar", None) +_NESTED_SCOPE_TYPES = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef) +_LOOP_TYPES = (ast.For, ast.While, ast.AsyncFor) + + +def _walk_branch_stats(body: Sequence[ast.stmt]) -> _BranchWalkStats: + """Collect branch body facts without descending into nested scopes.""" + call_count = 0 + raise_count = 0 + has_nested_if = False + has_loop = False + has_try = False + stack: list[ast.AST] = [ast.Module(body=list(body), type_ignores=[])] + + while stack: + node = stack.pop() + if isinstance(node, _NESTED_SCOPE_TYPES): + continue - call_count = raise_count = 0 - has_nested_if, has_loop, has_try = False, False, False - try_star = getattr(ast, "TryStar", None) - for node in ast.walk(ast.Module(body=body, type_ignores=[])): if isinstance(node, ast.Call): call_count += 1 elif isinstance(node, ast.Raise): raise_count += 1 elif isinstance(node, ast.If): has_nested_if = True - elif isinstance(node, (ast.For, ast.While, ast.AsyncFor)): + elif isinstance(node, _LOOP_TYPES): has_loop = True elif isinstance(node, ast.Try) or ( - try_star is not None and isinstance(node, try_star) + _TRY_STAR_TYPE is not None and isinstance(node, _TRY_STAR_TYPE) ): has_try = True - stats = _BranchWalkStats( + stack.extend(reversed(list(ast.iter_child_nodes(node)))) + + return _BranchWalkStats( call_count=call_count, raise_count=raise_count, has_nested_if=has_nested_if, has_loop=has_loop, has_try=has_try, ) + + +def _summarize_branch(body: list[ast.stmt]) -> dict[str, str] | None: + """Build deterministic structural signature for a meaningful branch body.""" + if not body or all(isinstance(stmt, ast.Pass) for stmt in body): + return None + + stats = _walk_branch_stats(body) signature = { "stmt_seq": _stmt_type_sequence(body), "terminal": _terminal_kind(body), diff --git a/docs/book/10-html-render.md b/docs/book/10-html-render.md index e93161f..cc298cf 100644 --- a/docs/book/10-html-render.md +++ b/docs/book/10-html-render.md @@ -48,6 +48,18 @@ Refs: - Get Badge modal: grade-only / score+grade variants with shields.io embed - Dead-code UI is a single top-level `Dead Code` tab with deterministic split sub-tabs: `Active` and `Suppressed`. +- IDE deep links: + - An IDE picker in the topbar lets users choose their IDE. The selection is + persisted in `localStorage` (key `codeclone-ide`). + - Supported IDEs: PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, Zed. + - File paths across Clones, Quality, Suggestions, Dead Code, and Findings + tabs are rendered as `` elements with `data-file` + (absolute path) and `data-line` attributes. + - JetBrains IDEs use `jetbrains://` protocol (requires Toolbox); others use + native URL schemes (`vscode://`, `cursor://`, `fleet://`, `zed://`). + - The scan root is embedded as `data-scan-root` on `` so that + JetBrains links can derive the project name and relative path. + - When no IDE is selected, links are inert (no `href`, default cursor). Refs: @@ -55,6 +67,8 @@ Refs: - `codeclone/report/overview.py:materialize_report_overview` - `codeclone/_html_report/_sections/_clones.py:_render_group_explanation` - `codeclone/_html_report/_sections/_meta.py:render_meta_panel` +- `codeclone/_html_js.py:_IDE_LINKS` +- `codeclone/_html_report/_assemble.py` (IDE picker topbar widget) ## Invariants (MUST) @@ -63,12 +77,16 @@ Refs: - Novelty controls reflect baseline trust split note and per-group novelty flags. - Suppressed dead-code rows are rendered only from report dead-code suppression payloads and do not become active dead-code findings in UI tables. +- IDE link `data-file` and `data-line` attributes are escaped via + `_escape_attr` before insertion into HTML. Refs: - `codeclone/_html_escape.py:_escape_attr` - `codeclone/_html_snippets.py:_render_code_block` - `codeclone/_html_report/_sections/_clones.py:render_clones_panel` +- `codeclone/_html_report/_tables.py` (path cell IDE links) +- `codeclone/report/findings.py` (structural findings IDE links) ## Failure modes @@ -104,7 +122,9 @@ Refs: ## Non-guarantees - CSS/visual system and interaction details may evolve without schema bump. -- HTML-only interaction affordances (theme toggle, provenance modal, badge - modal, radar chart) are not baseline/cache/report contracts. +- HTML-only interaction affordances (theme toggle, IDE picker, provenance modal, + badge modal, radar chart) are not baseline/cache/report contracts. +- IDE deep link behavior depends on the user's local IDE installation and + protocol handler registration (e.g. JetBrains Toolbox for `jetbrains://`). - Overview layout (KPI grid, executive summary, analytics) is a pure view concern; only the underlying data identity and ordering are contract-sensitive. diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index bc19dca..15e491f 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -26,17 +26,20 @@ Current server characteristics: - `streamable-http` - run storage: - in-memory only - - bounded history (`--history-limit`, default `16`) + - bounded history (`--history-limit`, default `4`, maximum `10`) - latest-run pointer for `codeclone://latest/...` resources - run identity: - `run_id` is derived from the canonical report integrity digest - analysis modes: - `full` - `clones_only` +- process-count policy: + - `processes` is an optional override + - when omitted, MCP defers to the core CodeClone runtime - cache policies: - `reuse` - - `refresh` - `off` + `refresh` is rejected in MCP because the server is read-only. - summary payload: - `run_id`, `root`, `analysis_mode` - `baseline`, `metrics_baseline`, `cache` @@ -66,21 +69,24 @@ Current tool set: | `get_finding` | `finding_id`, `run_id` | Return one canonical finding group by id with locations, priority, and remediation payload when available | | `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return just the remediation/explainability packet for one finding | | `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, `limit`, `max_results` | Return one derived hotlist (`most_actionable`, `highest_spread`, `highest_priority`, `production_hotspots`, `test_fixture_hotspots`) | -| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Return clone findings for a repository or path | -| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Return complexity hotspots for a repository or path | -| `check_coupling` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return coupling hotspots for a repository or path | -| `check_cohesion` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return cohesion hotspots for a repository or path | -| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `max_results`, `detail_level` | Return dead-code findings for a repository or path | +| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Return clone findings from a compatible stored run | +| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Return complexity hotspots from a compatible stored run | +| `check_coupling` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return coupling hotspots from a compatible stored run | +| `check_cohesion` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return cohesion hotspots from a compatible stored run | +| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `max_results`, `detail_level` | Return dead-code findings from a compatible stored run | | `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | Build a PR-friendly changed-files summary in markdown or JSON | | `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the in-memory MCP session | | `list_reviewed_findings` | `run_id` | Return the current reviewed findings for the selected run | +| `clear_session_runs` | none | Clear all stored in-memory runs plus ephemeral review/gate/session caches for the current server process | All analysis/report tools are read-only with respect to repo state. The only -mutable MCP tool is `mark_finding_reviewed`, and its state is in-memory only. -`analyze_repository`, `analyze_changed_paths`, `evaluate_gates`, and the -granular `check_*` tools are sessionful: they may populate or reuse in-memory -run state, and the `check_*` tools may trigger a full analysis when no -compatible run exists yet. +mutable MCP tools are `mark_finding_reviewed` and `clear_session_runs`, and +their effects are session-local and in-memory only. `analyze_repository`, +`analyze_changed_paths`, and `evaluate_gates` are +sessionful and may populate or reuse in-memory run state. The granular +`check_*` tools are read-only over stored runs: use `analyze_repository` or +`analyze_changed_paths` first, then query the latest run or pass a specific +`run_id`. ## Resources @@ -107,9 +113,13 @@ trigger fresh analysis by themselves. - no source-file mutation - no baseline update - no metrics-baseline update + - no cache refresh writes - Session review markers are **ephemeral only**: - stored in memory per server process - never written to baseline, cache, or report artifacts +- `streamable-http` defaults to loopback binding. + Non-loopback hosts require explicit `--allow-remote` because the server has + no built-in authentication. - MCP must reuse current: - pipeline stages - baseline trust semantics @@ -182,6 +192,8 @@ trigger fresh analysis by themselves. - There is currently no standalone `mcp_api_version` constant. - In-memory run history does not survive process restart. +- `clear_session_runs` resets the in-memory run registry and related session + caches, but does not mutate baseline/cache/report artifacts on disk. - Client-specific UI/approval behavior is not part of the CodeClone contract. ## See also diff --git a/docs/mcp.md b/docs/mcp.md index ef98f8a..291d6dc 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -1,473 +1,251 @@ -# MCP for AI Agents and IDE Clients +# MCP Usage Guide -## Purpose +CodeClone MCP is a **read-only, baseline-aware** analysis server for AI agents +and MCP-capable clients. It exposes the existing deterministic pipeline as tools +and resources — no separate analysis engine, no source mutation, no baseline +writes. -Explain how to use CodeClone as an MCP server in real agent workflows. - -Important framing: MCP is primarily a **client integration surface**, not a -model-specific trick. CodeClone does not care whether the backend model is -GPT-5.x, Claude, Gemini, or something else. What matters is whether the -client/application you use can talk to MCP and which transport it expects. +MCP is a **client integration surface**, not a model-specific feature. It works +with any MCP-capable client regardless of the backend model. ## Install -Base install stays lean: - -```bash -pip install codeclone -``` - -Install MCP support only when you need it: - ```bash -pip install "codeclone[mcp]" -``` - -Tool install example: - -```bash -uv tool install "codeclone[mcp]" +pip install "codeclone[mcp]" # add MCP extra +# or +uv tool install "codeclone[mcp]" # install as a standalone tool ``` ## Start the server -### Local agent workflows: prefer `stdio` +**Local agents** (Claude Code, Codex, Copilot Chat, Gemini CLI): ```bash codeclone-mcp --transport stdio ``` -This is the best default when the MCP-capable client runs on the same machine -and needs access to the local repository. - -### Remote or HTTP-only clients: use `streamable-http` +**Remote / HTTP-only clients:** ```bash codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 ``` -With current FastMCP defaults, clients usually connect to the streamable HTTP -endpoint at: +Non-loopback hosts require `--allow-remote` (no built-in auth). +Run retention is bounded: default `4`, max `10` (`--history-limit`). +If a tool request omits `processes`, MCP defers process-count policy to the +core CodeClone runtime. -```text -http://127.0.0.1:8000/mcp -``` +## Tool surface -Use this mode when the client only supports remote MCP endpoints or when you -want to expose CodeClone from a controlled local/remote service boundary. - -## What agents get - -CodeClone MCP is designed as a **read-only structural governance layer**: - -- run CodeClone analysis against a repository -- get a compact run summary -- list clone / structural / dead-code / design findings -- inspect one finding by id -- retrieve derived hotlists -- preview gate decisions without exiting the process -- read the canonical JSON report for a stored run - -It does **not**: - -- update baselines -- mutate source files -- add suppressions automatically - -Practical contract notes: - -- `changed_paths` is a structured `list[str]` of repo-relative paths, not a - comma-separated string. -- `analyze_changed_paths` adds a changed-files projection to the current run. If - the canonical report digest does not change, the call may return the same - `run_id` as a prior full analysis. -- `compare_runs` is most useful when both runs were produced for the same - repository scope/root and comparable analysis settings. -- The focused `check_*` tools may trigger a full analysis first when no stored - run exists yet. -- `mark_finding_reviewed` is the only mutable MCP tool, and its state lives only - in memory for the current server process. - -Current tool surface: - -| Tool | Typical use | -|------|-------------| -| `analyze_repository` | Run a fresh analysis and register it as the latest in-memory run | -| `analyze_changed_paths` | Run the diff-aware fast path using explicit `changed_paths` or `git_diff_ref` | -| `get_run_summary` | Get the compact baseline/cache/health/findings snapshot for the latest or selected run | -| `compare_runs` | Compare two stored runs and see regressions, improvements, and health delta | -| `list_findings` | Browse findings with filters and pagination | -| `get_finding` | Inspect one finding group deeply by id | -| `get_remediation` | Pull the structured remediation/explainability payload for one finding | -| `list_hotspots` | Jump to high-signal derived views such as `highest_priority` or `production_hotspots` | -| `get_report_section` | Read a canonical section (`meta`, `findings`, `metrics`, `derived`, etc.) | -| `evaluate_gates` | Preview CI/gating outcomes without exiting the process | -| `check_clones` | Run a focused clone-only check for a repo or path | -| `check_complexity` | Run a focused complexity hotspot check | -| `check_coupling` | Run a focused coupling hotspot check | -| `check_cohesion` | Run a focused cohesion hotspot check | -| `check_dead_code` | Run a focused dead-code check | -| `generate_pr_summary` | Build a PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | Mark one finding as reviewed in the current MCP session | -| `list_reviewed_findings` | List the reviewed findings currently stored in memory for the run | - -Current resource surface: - -| Resource | Typical use | -|----------|-------------| -| `codeclone://latest/summary` | Quick latest-run status for clients that prefer resource reads | -| `codeclone://latest/report.json` | Full canonical report for the latest stored run | -| `codeclone://latest/health` | Lightweight health snapshot only | -| `codeclone://latest/gates` | Read back the most recent gate preview in the current MCP session | -| `codeclone://latest/changed` | Read the latest changed-files projection after a diff-aware run | -| `codeclone://schema` | Discover the canonical report shape and major section layout | -| `codeclone://runs/{run_id}/summary` | Stable summary lookup for a specific stored run | -| `codeclone://runs/{run_id}/report.json` | Stable canonical report lookup for a specific stored run | -| `codeclone://runs/{run_id}/findings/{finding_id}` | Direct lookup for one finding in one stored run | - -If a client needs pure machine-to-machine navigation, the clean split is: - -- use tools to create or refine analysis state -- use resources to re-read stored summaries, reports, health, gate, and finding payloads - -## Recommended agent workflow - -For agentic coding and review loops, the clean sequence is: - -1. `analyze_repository` -2. `get_run_summary` -3. `list_hotspots` or `list_findings` -4. `get_finding` for the specific item the agent should inspect -5. `evaluate_gates` before finalizing the change - -For change-focused workflows, prefer: - -1. `analyze_changed_paths` -2. `get_report_section(section="changed")` -3. `list_findings(changed_paths=..., sort_by="priority")` -4. `get_remediation` -5. `generate_pr_summary` - -In practice, the changed-files projection is also exposed through: - -- `get_report_section(section="changed")` -- `codeclone://latest/changed` - -If you want a resource-first flow after one initial analysis, a practical loop is: - -1. `analyze_repository` or `analyze_changed_paths` -2. `codeclone://latest/summary` -3. `codeclone://latest/report.json` or `codeclone://runs/{run_id}/findings/{finding_id}` - -For review/refactor loops, add: - -1. `mark_finding_reviewed` -2. `list_reviewed_findings` -3. `exclude_reviewed=true` on later `list_findings` / `list_hotspots` calls - -That pattern works especially well for AI-generated code because CodeClone is -baseline-aware: it helps separate accepted legacy debt from new structural -regressions introduced by the latest change set. - -## Prompt patterns for real agent workflows - -The most effective way to use CodeClone MCP is to ask the agent for a -**specific analysis task**, not just "run CodeClone". - -Good prompts usually include: - -- the scope: - - full repository - - clones only - - production findings only -- the goal: - - review - - triage - - safe cleanup plan - - gate preview -- the constraint: - - do not mutate code yet - - do not add suppressions automatically - - prioritize runtime-facing findings - -Use prompts like these. - -### 1. Full repository health check +| Tool | Purpose | +|--------------------------|----------------------------------------------------------------------| +| `analyze_repository` | Full analysis → register as latest run | +| `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref` | +| `get_run_summary` | Compact health/findings/baseline snapshot | +| `compare_runs` | Regressions, improvements, health delta between two runs | +| `list_findings` | Filtered, paginated finding groups | +| `get_finding` | Deep inspection of one finding by id | +| `get_remediation` | Structured remediation payload for one finding | +| `list_hotspots` | Derived views: highest priority, production hotspots, spread, etc. | +| `get_report_section` | Read canonical report sections (meta, findings, metrics, derived, …) | +| `evaluate_gates` | Preview CI/gating decisions without exiting | +| `check_clones` | Clone findings from a stored run | +| `check_complexity` | Complexity hotspots from a stored run | +| `check_coupling` | Coupling hotspots from a stored run | +| `check_cohesion` | Cohesion hotspots from a stored run | +| `check_dead_code` | Dead-code findings from a stored run | +| `generate_pr_summary` | PR-friendly markdown or JSON summary | +| `mark_finding_reviewed` | Session-local review marker (in-memory only) | +| `list_reviewed_findings` | List reviewed findings for a run | +| `clear_session_runs` | Reset all in-memory runs and session caches | -```text -Use codeclone MCP to analyze this repository and give me a concise structural health summary. -Prioritize the highest-signal findings and explain what is worth looking at first. -``` +> `check_*` tools query stored runs only. Call `analyze_repository` or +> `analyze_changed_paths` first. -### 2. Clone-focused review only +## Resource surface -```text -Use codeclone MCP in clones-only mode and show me the most important clone findings. -Separate production findings from test/fixture noise and suggest which clone group is the safest first cleanup target. -``` +| Resource | Content | +|---------------------------------------------------|--------------------------------------------| +| `codeclone://latest/summary` | Latest run summary | +| `codeclone://latest/report.json` | Full canonical report | +| `codeclone://latest/health` | Health score and dimensions | +| `codeclone://latest/gates` | Last gate evaluation result | +| `codeclone://latest/changed` | Changed-files projection (diff-aware runs) | +| `codeclone://schema` | Canonical report shape descriptor | +| `codeclone://runs/{run_id}/summary` | Summary for a specific run | +| `codeclone://runs/{run_id}/report.json` | Report for a specific run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | -### 3. Production-only clone triage +Resources are read-only views over stored runs — they do not trigger analysis. -```text -Analyze this repository through codeclone MCP, filter to clone findings in production code only, -and show me the top 3 clone groups worth fixing first. -If there are no production clones, say that explicitly. -``` +## Recommended workflows -### 4. Structural hotspot review +### Full repository review -```text -Use codeclone MCP to find the most important production structural findings. -Focus on duplicated branches, cohesion, coupling, and complexity hotspots. -Give me a safe cleanup plan ordered by ROI. ``` - -### 5. Dead-code triage - -```text -Use codeclone MCP to review dead-code findings in this repository. -Separate actionable items from likely framework/runtime false positives and explain what should actually be cleaned up. -Do not add suppressions automatically. +analyze_repository → get_run_summary → list_hotspots → get_finding → evaluate_gates ``` -### 6. Gate preview before CI +### Changed-files review (PR / patch) -```text -Run codeclone through MCP and tell me whether this repository would fail stricter gating. -Preview the result for fail_on_new plus a zero clone threshold, and explain the exact reasons. -Do not change any files. +``` +analyze_changed_paths → get_report_section(section="changed") +→ list_findings(changed_paths=..., sort_by="priority") → get_remediation → generate_pr_summary ``` -### 7. AI-generated code review +### Session-based review loop -```text -I added a lot of code with an AI agent. Use codeclone MCP to check whether we introduced structural drift: -new clone groups, dead code, duplicated branches, or design hotspots. -Prioritize what is genuinely new or risky, not accepted baseline debt. +``` +list_findings → get_finding → mark_finding_reviewed +→ list_findings(exclude_reviewed=true) → … → clear_session_runs ``` -### 8. Safe refactor planning +## Prompt patterns -```text -Use codeclone MCP as the source of truth for structural findings. -Pick one production issue that looks safe to refactor, explain why it is a good candidate, -and outline a minimal plan that should not change behavior. -``` +Good prompts include **scope**, **goal**, and **constraint**: -### 9. Explain one finding deeply +### Health check ```text -Use codeclone MCP to find the highest-priority production finding, then inspect it in detail. -Explain what triggered it, where it lives, how risky it is, and what refactoring shape would address it. -Do not make code changes yet. +Use codeclone MCP to analyze this repository. Give me a concise structural health summary +and explain which findings are worth looking at first. ``` -### 10. Review after a change +### Clone triage (production only) ```text -Use codeclone MCP to analyze the repository after my latest changes. -Tell me whether the structural picture got better, worse, or stayed flat relative to baseline, -and summarize only the findings that are worth acting on. +Analyze through codeclone MCP, filter to clone findings in production code only, +and show me the top 3 clone groups worth fixing first. ``` -### 11. Changed-files only review +### Changed-files review ```text Use codeclone MCP in changed-files mode for my latest edits. Focus only on findings that touch changed files and rank them by priority. ``` -### 12. Run-to-run comparison +### Dead-code review ```text -Compare the latest CodeClone MCP run against the previous run for this repository. -Show me new regressions, resolved findings, and the health delta. +Use codeclone MCP to review dead-code findings. Separate actionable items from +likely framework false positives. Do not add suppressions automatically. ``` -### 13. Remediation-first workflow +### Gate preview ```text -Use codeclone MCP to find one high-priority production finding and fetch its remediation payload. -Explain the safest refactor shape and why this is a good first target. +Run codeclone through MCP and preview gating with fail_on_new plus a zero clone threshold. +Explain the exact reasons. Do not change any files. ``` -### 14. Session-based review loop +### AI-generated code check ```text -Use codeclone MCP to review findings one by one. -Mark each finding as reviewed after we discuss it, and exclude reviewed findings from the next list. -``` - -## Prompting tips - -- Prefer "production-only" when you care about runtime code. -- Prefer `analyze_changed_paths` or explicit `changed_paths` when the agent is - reviewing one patch or PR, not the whole repository. -- Prefer "clones-only mode" when you want the cheapest focused pass on duplication. -- Ask for "safe first candidate" when you want the agent to move from triage to refactor planning. -- Use "compare the latest run against the previous run" when you want the agent - to reason about improvements/regressions instead of absolute repo state. -- Use "mark as reviewed" / "exclude reviewed" for long sessions so the agent - does not keep circling around the same finding. -- For AI-generated code, explicitly ask the agent to separate: - - accepted baseline debt - - from new structural regressions - -## Client recipes - -Client UX changes fast, so prefer official client documentation for the exact -setup screens. The integration shape below is the stable part on the CodeClone -side. - -### Codex / local command-based OpenAI clients - -Recommended mode: `stdio` - -```bash -codeclone-mcp --transport stdio +I added code with an AI agent. Use codeclone MCP to check for new structural drift: +clone groups, dead code, duplicated branches, design hotspots. +Separate accepted baseline debt from new regressions. ``` -A typical command-based registration looks like: +### Safe refactor planning -```toml -[mcp_servers.codeclone] -enabled = true -command = "codeclone-mcp" -args = ["--transport", "stdio"] +```text +Use codeclone MCP to pick one production finding that looks safe to refactor. +Explain why it is a good candidate and outline a minimal plan. ``` -Use command-based MCP registration when the client can spawn a local server -process. If `codeclone-mcp` is not on `PATH`, use an absolute path to the -launcher. +### Run comparison -Official docs: - -- [OpenAI: Connectors and MCP servers](https://platform.openai.com/docs/guides/tools-connectors-mcp?lang=javascript) -- [OpenAI Responses API reference (`mcp` tool)](https://platform.openai.com/docs/api-reference/responses/compact?api-mode=responses) - -### OpenAI Responses API / remote MCP-capable OpenAI clients - -Recommended mode: `streamable-http` - -```bash -codeclone-mcp --transport streamable-http --host 127.0.0.1 --port 8000 +```text +Compare the latest CodeClone MCP run against the previous one. +Show regressions, resolved findings, and health delta. ``` -Then register the remote MCP endpoint in the client or API flow that expects an -HTTP MCP server. Prefer allowing only the CodeClone tools you need for the -current workflow. - -### Gemini CLI / Gemini MCP-capable clients +**Tips:** -Recommended mode: `stdio` +- Use `analyze_changed_paths` for PRs, not full analysis. +- Use `"production-only"` / `source_kind` filters to cut test/fixture noise. +- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. +- Ask the agent to separate baseline debt from new regressions. -Use the same command-based local server registration pattern when the Gemini -client can spawn MCP commands locally. If the client only accepts remote MCP -URLs, use `streamable-http` and point it to the `/mcp` endpoint. +## Client configuration -### Claude Code / Anthropic MCP-capable clients +All clients use the same CodeClone server — only the registration differs. -Recommended mode: `stdio` - -Generic command-based configuration: +### Claude Code / Anthropic ```json { "mcpServers": { "codeclone": { "command": "codeclone-mcp", - "args": ["--transport", "stdio"] + "args": [ + "--transport", + "stdio" + ] } } } ``` -This is the best fit when Claude runs on the same machine and should analyze -the local checkout directly. - -Official docs: +### Codex / OpenAI (command-based) -- [Anthropic: Model Context Protocol (MCP)](https://docs.anthropic.com/en/docs/build-with-claude/mcp) -- [Anthropic: MCP with Claude Code](https://docs.anthropic.com/en/docs/claude-code/mcp) - -### GitHub Copilot Chat / IDE MCP clients +```toml +[mcp_servers.codeclone] +enabled = true +command = "codeclone-mcp" +args = ["--transport", "stdio"] +``` -Recommended mode: `stdio` +For the Responses API or remote-only OpenAI clients, use `streamable-http`. -Use the same local command registration pattern: +### GitHub Copilot Chat ```json { "mcpServers": { "codeclone": { "command": "codeclone-mcp", - "args": ["--transport", "stdio"] + "args": [ + "--transport", + "stdio" + ] } } } ``` -Then configure the MCP server in the IDE/client that hosts Copilot Chat. - -Official docs: +### Gemini CLI -- [GitHub Docs: Extending GitHub Copilot Chat with MCP](https://docs.github.com/en/copilot/how-tos/provide-context/use-mcp/extend-copilot-chat-with-mcp?tool=visualstudio) +Same `stdio` registration. If the client only accepts remote URLs, use +`streamable-http` and point to the `/mcp` endpoint. -### Other MCP-capable clients +### Other clients -Use the same transport rule: +- `stdio` for local analysis +- `streamable-http` for remote/HTTP-only clients -- `stdio` for local repository analysis -- `streamable-http` for remote-only or hosted MCP clients +If `codeclone-mcp` is not on `PATH`, use an absolute path to the launcher. -The CodeClone server surface itself stays the same. +## Security -## Security and operations - -- CodeClone MCP is read-only by design. -- It stores run history in memory only. -- Review markers are also in-memory only and disappear when the server process stops. +- Read-only by design: no source mutation, no baseline/cache writes. +- Run history and review markers are in-memory only — lost on process stop. - Repository access is limited to what the server process can read locally. -- Baseline/cache/report semantics remain owned by the normal CodeClone contracts. +- `streamable-http` binds to loopback by default; `--allow-remote` is explicit opt-in. ## Troubleshooting -### `CodeClone MCP support requires the optional 'mcp' extra` - -Install the extra: - -```bash -pip install "codeclone[mcp]" -``` - -### The client cannot find `codeclone-mcp` - -Either install it as a tool: - -```bash -uv tool install "codeclone[mcp]" -``` - -or point the client at the absolute path to the launcher from the environment -where CodeClone was installed. - -### The client only accepts remote MCP servers - -Run CodeClone in `streamable-http` mode and point the client at the MCP -endpoint instead of using `stdio`. - -### The agent is reading stale results - -Run `analyze_repository` again. Runs are stored in memory per server process and -`latest` always points at the most recently analyzed run in that process. - -### Changed-files tools are rejecting `changed_paths` - -Pass `changed_paths` as a real list of repo-relative paths. Do not pass a -single comma-separated string. +| Problem | Fix | +|-----------------------------------------------------------|--------------------------------------------------------------------------------| +| `CodeClone MCP support requires the optional 'mcp' extra` | `pip install "codeclone[mcp]"` | +| Client cannot find `codeclone-mcp` | `uv tool install "codeclone[mcp]"` or use absolute path | +| Client only accepts remote MCP | Use `streamable-http` transport | +| Agent reads stale results | Call `analyze_repository` again; `latest` always points to the most recent run | +| `changed_paths` rejected | Pass a `list[str]` of repo-relative paths, not a comma-separated string | ## See also -- [book/20-mcp-interface.md](book/20-mcp-interface.md) -- [book/08-report.md](book/08-report.md) -- [book/09-cli.md](book/09-cli.md) +- [book/20-mcp-interface.md](book/20-mcp-interface.md) — formal interface contract +- [book/08-report.md](book/08-report.md) — canonical report contract +- [book/09-cli.md](book/09-cli.md) — CLI reference diff --git a/tests/test_cache.py b/tests/test_cache.py index e0c2cf3..793dd60 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -11,6 +11,8 @@ import codeclone.cache as cache_mod from codeclone.blocks import BlockUnit, SegmentUnit from codeclone.cache import Cache, CacheStatus +from codeclone.cache_io import sign_cache_payload +from codeclone.cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime from codeclone.errors import CacheError from codeclone.extractor import Unit @@ -161,7 +163,7 @@ def test_cache_load_normalizes_stale_structural_findings(tmp_path: Path) -> None cache, files={"x.py": cache_mod._encode_wire_file_entry(entry)}, ) - signature = cache._sign_data(payload) + signature = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}), "utf-8", @@ -289,7 +291,7 @@ def test_cache_v13_missing_optional_sections_default_empty(tmp_path: Path) -> No cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files={"x.py": {"st": [1, 2]}}) - signature = cache._sign_data(payload) + signature = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}), "utf-8", @@ -393,9 +395,8 @@ def test_cache_signature_mismatch_warns(tmp_path: Path) -> None: def test_cache_version_mismatch_warns(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" - cache = Cache(cache_path) data = {"version": "0.0", "files": {}} - signature = cache._sign_data(data) + signature = sign_cache_payload(data) cache_path.write_text( json.dumps({**data, "_signature": signature}, ensure_ascii=False, indent=2), "utf-8", @@ -415,7 +416,7 @@ def test_cache_v_field_version_mismatch_warns(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files={}) - signature = cache._sign_data(payload) + signature = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": "0.0", "payload": payload, "sig": signature}), "utf-8" ) @@ -737,7 +738,7 @@ def test_cache_load_invalid_files_type(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files=[]) - signature = cache._sign_data(payload) + signature = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}), "utf-8", @@ -838,7 +839,7 @@ def test_cache_load_missing_v_field(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files={}) - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text(json.dumps({"payload": payload, "sig": sig}), "utf-8") cache.load() assert cache.load_warning is not None @@ -871,7 +872,7 @@ def test_cache_load_rejects_missing_required_payload_fields( cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = payload_factory(cache) - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -889,7 +890,7 @@ def test_cache_load_python_tag_mismatch(tmp_path: Path) -> None: "ap": cache.data["analysis_profile"], "files": {}, } - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -907,7 +908,7 @@ def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None: "ap": cache.data["analysis_profile"], "files": {}, } - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -940,7 +941,7 @@ def test_cache_load_missing_analysis_profile_in_payload(tmp_path: Path) -> None: "fp": cache.data["fingerprint_version"], "files": {}, } - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -971,7 +972,7 @@ def test_cache_load_invalid_analysis_profile_payload( "ap": bad_analysis_profile, "files": {}, } - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -988,7 +989,7 @@ def test_cache_load_invalid_wire_file_entry(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files={"x.py": {"st": "bad"}}) - sig = cache._sign_data(payload) + sig = sign_cache_payload(payload) cache_path.write_text( json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8" ) @@ -1028,7 +1029,9 @@ def test_wire_filepath_outside_root_falls_back_to_runtime_path(tmp_path: Path) - root.mkdir() cache = Cache(tmp_path / "cache.json", root=root) outside = tmp_path / "outside.py" - assert cache._wire_filepath_from_runtime(str(outside)) == outside.as_posix() + assert ( + wire_filepath_from_runtime(str(outside), root=cache.root) == outside.as_posix() + ) def test_wire_filepath_resolve_oserror_falls_back_to_runtime_path( @@ -1046,7 +1049,9 @@ def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _resolve_with_error) - assert cache._wire_filepath_from_runtime(str(runtime)) == runtime.as_posix() + assert ( + wire_filepath_from_runtime(str(runtime), root=cache.root) == runtime.as_posix() + ) def test_wire_filepath_resolve_relative_success_path( @@ -1067,7 +1072,7 @@ def _resolve_with_mapping(self: Path, *, strict: bool = False) -> Path: return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _resolve_with_mapping) - assert cache._wire_filepath_from_runtime(str(runtime)) == "pkg/module.py" + assert wire_filepath_from_runtime(str(runtime), root=cache.root) == "pkg/module.py" def test_runtime_filepath_from_wire_resolve_oserror( @@ -1085,7 +1090,7 @@ def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _resolve_with_error) - assert cache._runtime_filepath_from_wire("pkg/module.py") == str(combined) + assert runtime_filepath_from_wire("pkg/module.py", root=cache.root) == str(combined) def test_as_str_dict_rejects_non_string_keys() -> None: diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index 43407e0..b1afa81 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -26,6 +26,7 @@ _is_dead_candidate_dict, build_segment_report_projection, ) +from codeclone.cache_segments import decode_segment_report_projection from codeclone.errors import CacheError from codeclone.grouping import build_segment_groups from codeclone.models import ( @@ -413,29 +414,41 @@ def test_cache_segment_report_projection_filters_invalid_items(tmp_path: Path) - def test_cache_decode_segment_projection_invalid_shapes(tmp_path: Path) -> None: cache = Cache(tmp_path / "cache.json", root=tmp_path.resolve()) assert ( - cache._decode_segment_report_projection({"d": "x", "s": 0, "g": "bad"}) is None + decode_segment_report_projection( + {"d": "x", "s": 0, "g": "bad"}, + root=cache.root, + ) + is None ) assert ( - cache._decode_segment_report_projection({"d": "x", "s": 0, "g": [["k"]]}) + decode_segment_report_projection( + {"d": "x", "s": 0, "g": [["k"]]}, + root=cache.root, + ) is None ) assert ( - cache._decode_segment_report_projection({"d": "x", "s": 0, "g": [[1, []]]}) + decode_segment_report_projection( + {"d": "x", "s": 0, "g": [[1, []]]}, + root=cache.root, + ) is None ) assert ( - cache._decode_segment_report_projection( - {"d": "x", "s": 0, "g": [["k", ["bad-item"]]]} + decode_segment_report_projection( + {"d": "x", "s": 0, "g": [["k", ["bad-item"]]]}, + root=cache.root, ) is None ) assert ( - cache._decode_segment_report_projection( + decode_segment_report_projection( { "d": "x", "s": 0, "g": [["k", [["a.py", "q", 1, 2, 3, "h", None]]]], - } + }, + root=cache.root, ) is None ) diff --git a/tests/test_github_action_helpers.py b/tests/test_github_action_helpers.py new file mode 100644 index 0000000..d8ce885 --- /dev/null +++ b/tests/test_github_action_helpers.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from types import ModuleType +from typing import cast + + +def _load_action_impl() -> ModuleType: + path = ( + Path(__file__).resolve().parents[1] + / ".github" + / "actions" + / "codeclone" + / "_action_impl.py" + ) + spec = importlib.util.spec_from_file_location("codeclone_action_impl", path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +def _assert_contains_all(text: str, expected_parts: tuple[str, ...]) -> None: + for expected in expected_parts: + assert expected in text + + +def test_build_codeclone_args_includes_enabled_gates_and_paths() -> None: + action_impl = _load_action_impl() + inputs = action_impl.ActionInputs( + path=".", + json_path=".cache/codeclone/report.json", + sarif=True, + sarif_path=".cache/codeclone/report.sarif", + fail_on_new=True, + fail_on_new_metrics=True, + fail_threshold=5, + fail_complexity=20, + fail_coupling=10, + fail_cohesion=4, + fail_cycles=True, + fail_dead_code=True, + fail_health=60, + baseline_path="codeclone.baseline.json", + metrics_baseline_path="codeclone.baseline.json", + extra_args="--no-color --quiet", + no_progress=True, + ) + + args = cast(list[str], action_impl.build_codeclone_args(inputs)) + + assert args[:5] == [ + ".", + "--json", + ".cache/codeclone/report.json", + "--sarif", + ".cache/codeclone/report.sarif", + ] + _assert_contains_all( + " ".join(args), + ( + "--fail-on-new", + "--fail-on-new-metrics", + "--fail-cycles", + "--fail-dead-code", + "--no-progress", + "--baseline", + "--metrics-baseline", + "--no-color", + "--quiet", + ), + ) + + +def test_render_pr_comment_uses_canonical_report_summary() -> None: + action_impl = _load_action_impl() + report = { + "meta": { + "codeclone_version": "2.0.0b3", + "baseline": {"status": "ok"}, + "cache": {"used": True}, + }, + "findings": { + "summary": { + "families": { + "clones": 8, + "structural": 15, + "dead_code": 0, + "design": 3, + }, + "clones": { + "new": 1, + "known": 7, + }, + } + }, + "metrics": { + "summary": { + "health": { + "score": 81, + "grade": "B", + } + } + }, + } + + body = cast(str, action_impl.render_pr_comment(report, exit_code=3)) + + _assert_contains_all( + body, + ( + "", + "CodeClone Report", + "**81/100 (B)**", + ":x: Failed (gating)", + "Clones: 8 (1 new, 7 known)", + "Structural: 15", + "Dead code: 0", + "Design: 3", + "`2.0.0b3`", + ), + ) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 0a50124..b59ad8d 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -2,6 +2,7 @@ from __future__ import annotations +import argparse import asyncio import builtins import json @@ -93,6 +94,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert set(tools) == { "analyze_repository", "analyze_changed_paths", + "clear_session_runs", "get_run_summary", "evaluate_gates", "get_report_section", @@ -115,6 +117,11 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: assert tool.annotations.readOnlyHint is ( name in { + "check_complexity", + "check_clones", + "check_coupling", + "check_cohesion", + "check_dead_code", "get_run_summary", "get_report_section", "list_findings", @@ -128,10 +135,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: ) assert tool.annotations.destructiveHint is False assert tool.annotations.idempotentHint is True - assert "triggers a full analysis first" in str( - tools["check_complexity"].description - ) - assert "triggers a full analysis first" in str(tools["check_clones"].description) + assert "Use analyze_repository first" in str(tools["check_complexity"].description) + assert "Use analyze_repository first" in str(tools["check_clones"].description) def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: @@ -153,7 +158,21 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + changed_summary = _structured_tool_result( + asyncio.run( + server.call_tool( + "analyze_changed_paths", + { + "root": str(tmp_path), + "respect_pyproject": False, + "cache_policy": "off", + "changed_paths": ["pkg/dup.py"], + }, + ) + ) + ) run_id = str(summary["run_id"]) + changed_run_id = str(changed_summary["run_id"]) latest = _structured_tool_result( asyncio.run(server.call_tool("get_run_summary", {})) @@ -195,7 +214,9 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: latest_changed_resource = list( asyncio.run(server.read_resource("codeclone://latest/changed")) ) - assert json.loads(latest_changed_resource[0].content)["run_id"] == run_id + latest_changed_payload = json.loads(latest_changed_resource[0].content) + assert latest_changed_payload["run_id"] == changed_run_id + assert latest_changed_payload["changed_paths"] == changed_summary["changed_paths"] report_resource = list( asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/report.json")) @@ -223,7 +244,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: changed_section = _structured_tool_result( asyncio.run(server.call_tool("get_report_section", {"section": "changed"})) ) - assert changed_section["changed_paths"] == ["pkg/dup.py", "pkg/quality.py"] + assert changed_section["changed_paths"] == changed_summary["changed_paths"] finding = _structured_tool_result( asyncio.run(server.call_tool("get_finding", {"finding_id": first_finding_id})) @@ -239,7 +260,20 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: hotspots = _structured_tool_result( asyncio.run(server.call_tool("list_hotspots", {"kind": "highest_priority"})) ) + comparison = _structured_tool_result( + asyncio.run( + server.call_tool( + "compare_runs", + { + "run_id_before": run_id, + "run_id_after": changed_run_id, + "focus": "all", + }, + ) + ) + ) assert cast(int, hotspots["total"]) >= 1 + assert comparison["summary"] complexity = _structured_tool_result( asyncio.run( @@ -261,6 +295,20 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + coupling = _structured_tool_result( + asyncio.run(server.call_tool("check_coupling", {"run_id": run_id})) + ) + cohesion = _structured_tool_result( + asyncio.run(server.call_tool("check_cohesion", {"run_id": run_id})) + ) + dead_code = _structured_tool_result( + asyncio.run( + server.call_tool( + "check_dead_code", + {"run_id": run_id, "path": "pkg/quality.py"}, + ) + ) + ) reviewed = _structured_tool_result( asyncio.run( server.call_tool( @@ -290,6 +338,9 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) assert complexity["check"] == "complexity" assert cast(int, clones["total"]) >= 1 + assert coupling["check"] == "coupling" + assert cohesion["check"] == "cohesion" + assert dead_code["check"] == "dead_code" assert reviewed["reviewed"] is True assert reviewed_items["reviewed_count"] == 1 assert "## CodeClone Summary" in str(pr_summary["content"]) @@ -313,6 +364,16 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: assert schema_payload["title"] == "CodeCloneCanonicalReport" assert "report_schema_version" in schema_payload["properties"] + cleared = _structured_tool_result( + asyncio.run(server.call_tool("clear_session_runs", {})) + ) + assert cast(int, cleared["cleared_runs"]) >= 1 + assert run_id in cast("list[str]", cleared["cleared_run_ids"]) + from mcp.server.fastmcp.exceptions import ResourceError + + with pytest.raises(ResourceError): + list(asyncio.run(server.read_resource("codeclone://latest/summary"))) + def test_mcp_server_parser_defaults_and_main_success( monkeypatch: pytest.MonkeyPatch, @@ -320,10 +381,11 @@ def test_mcp_server_parser_defaults_and_main_success( parser = mcp_server.build_parser() args = parser.parse_args([]) assert args.transport == "stdio" - assert args.history_limit == 16 + assert args.history_limit == 4 assert args.json_response is True assert args.stateless_http is True assert args.log_level == "INFO" + assert args.allow_remote is False captured: dict[str, object] = {} @@ -358,6 +420,63 @@ def _fake_build_mcp_server(**kwargs: object) -> _FakeServer: assert kwargs["history_limit"] == 8 +def test_mcp_server_parser_rejects_excessive_history_limit() -> None: + parser = mcp_server.build_parser() + with pytest.raises(SystemExit): + parser.parse_args(["--history-limit", "11"]) + + +def test_mcp_server_main_rejects_non_loopback_host_without_opt_in( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone-mcp", + "--transport", + "streamable-http", + "--host", + "0.0.0.0", + ], + ) + + with pytest.raises(SystemExit) as exc_info: + mcp_server.main() + + assert exc_info.value.code == 2 + assert "without --allow-remote" in capsys.readouterr().err + + +def test_mcp_server_main_allows_non_loopback_host_with_opt_in( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + class _FakeServer: + def run(self, *, transport: str) -> None: + captured["transport"] = transport + + monkeypatch.setattr(mcp_server, "build_mcp_server", lambda **kwargs: _FakeServer()) + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone-mcp", + "--transport", + "streamable-http", + "--host", + "0.0.0.0", + "--allow-remote", + ], + ) + + mcp_server.main() + + assert captured["transport"] == "streamable-http" + + def test_mcp_server_main_reports_missing_optional_dependency( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], @@ -376,6 +495,13 @@ def _boom() -> tuple[object, object, object]: assert "codeclone[mcp]" in err +def test_mcp_server_history_limit_arg_rejects_non_integer() -> None: + with pytest.raises( + argparse.ArgumentTypeError, match="history limit must be an integer" + ): + mcp_server._history_limit_arg("oops") + + def test_mcp_server_load_runtime_wraps_import_error( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -415,3 +541,13 @@ def run(self, *, transport: str) -> None: ) mcp_server.main() + + +def test_mcp_server_host_loopback_detection() -> None: + assert mcp_server._host_is_loopback("") is False + assert mcp_server._host_is_loopback("127.0.0.1") is True + assert mcp_server._host_is_loopback("localhost") is True + assert mcp_server._host_is_loopback("::1") is True + assert mcp_server._host_is_loopback("[::1]") is True + assert mcp_server._host_is_loopback("0.0.0.0") is False + assert mcp_server._host_is_loopback("example.com") is False diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index cd80912..1280513 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -15,7 +15,6 @@ from codeclone import mcp_service as mcp_service_mod from codeclone._cli_config import ConfigValidationError from codeclone.cache import Cache -from codeclone.errors import CacheError from codeclone.mcp_service import ( CodeCloneMCPService, MCPAnalysisRequest, @@ -104,13 +103,15 @@ def _dummy_run_record(root: Path, run_id: str) -> MCPRunRecord: root=root, request=MCPAnalysisRequest(root=str(root), respect_pyproject=False), report_document={}, - report_json="{}", summary={"run_id": run_id, "health": {"score": 0, "grade": "N/A"}}, changed_paths=(), changed_projection=None, warnings=(), failures=(), - analysis=cast(Any, SimpleNamespace(suggestions=[])), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), new_func=frozenset(), new_block=frozenset(), metrics_diff=None, @@ -343,6 +344,45 @@ def test_mcp_service_granular_checks_pr_summary_and_resources( assert json_summary["changed_paths"] == ["pkg/dup.py"] +def test_mcp_service_granular_checks_require_existing_run_by_default( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + with pytest.raises( + MCPRunNotFoundError, match="analyze_repository\\(root='/path/to/repo'\\)" + ): + service.check_clones(detail_level="summary") + + with pytest.raises( + MCPRunNotFoundError, + match=f"analyze_repository\\(root='{tmp_path}'\\)", + ): + service.check_dead_code(root=str(tmp_path), detail_level="summary") + + +def test_mcp_service_granular_checks_reject_incompatible_run_modes( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + analysis_mode="clones_only", + ) + ) + + with pytest.raises(MCPServiceContractError, match="not compatible"): + service.check_dead_code( + run_id=str(summary["run_id"]), + detail_level="summary", + ) + + def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( tmp_path: Path, ) -> None: @@ -499,6 +539,7 @@ def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( ), ) assert args.min_loc == 12 + assert args.processes is None assert args.skip_metrics is True assert args.skip_dead_code is True assert args.skip_dependencies is True @@ -618,33 +659,115 @@ def test_mcp_service_helper_filters_and_metrics_payload() -> None: assert service._as_sequence("not-a-sequence") == () -def test_mcp_service_refresh_cache_reports_save_warning( +def test_mcp_service_git_diff_and_helper_branch_edges( tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, ) -> None: - _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) - refresh_calls: list[str] = [] - def _fake_refresh(*, cache: object, analysis: object) -> None: - refresh_calls.append("called") + with pytest.raises(MCPGitDiffError, match="must not start with '-'"): + mcp_service_mod._git_diff_lines_payload( + root_path=tmp_path, + git_diff_ref="--cached", + ) - def _fake_save(self: Cache) -> None: - raise CacheError("boom") + assert service._normalize_relative_path("./.github/workflows/docs.yml") == ( + ".github/workflows/docs.yml" + ) - monkeypatch.setattr(service, "_refresh_cache_projection", _fake_refresh) - monkeypatch.setattr(Cache, "save", _fake_save) + full_record = _dummy_run_record(tmp_path, "full") + object.__setattr__( + full_record, + "request", + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + analysis_mode="full", + ), + ) + clones_only_record = _dummy_run_record(tmp_path, "clones") + object.__setattr__( + clones_only_record, + "request", + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + analysis_mode="clones_only", + ), + ) + other_root_record = _dummy_run_record(tmp_path / "other", "other") + object.__setattr__( + other_root_record, + "request", + MCPAnalysisRequest( + root=str(tmp_path / "other"), + respect_pyproject=False, + analysis_mode="full", + ), + ) + service._runs.register(clones_only_record) + service._runs.register(other_root_record) + service._runs.register(full_record) - summary = service.analyze_repository( + assert ( + service._latest_compatible_record( + analysis_mode="clones_only", + root_path=tmp_path, + ) + is full_record + ) + assert ( + service._latest_compatible_record( + analysis_mode="full", + root_path=tmp_path, + ) + is full_record + ) + assert ( + service._latest_compatible_record( + analysis_mode="full", + root_path=tmp_path / "other", + ) + is other_root_record + ) + + service_full_fallback = CodeCloneMCPService(history_limit=4) + service_full_fallback._runs.register(clones_only_record) + service_full_fallback._runs.register(full_record) + service_full_fallback._runs.register( + _dummy_run_record(tmp_path, "latest-clones-only") + ) + object.__setattr__( + service_full_fallback._runs.get("latest-clones-only"), + "request", MCPAnalysisRequest( root=str(tmp_path), respect_pyproject=False, - cache_policy="refresh", + analysis_mode="clones_only", + ), + ) + assert ( + service_full_fallback._latest_compatible_record( + analysis_mode="full", + root_path=tmp_path, ) + is full_record ) - assert refresh_calls == ["called"] - assert "Cache save failed: boom" in cast("list[str]", summary["warnings"]) + +def test_mcp_service_rejects_refresh_cache_policy_in_read_only_mode( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + service = CodeCloneMCPService(history_limit=4) + + with pytest.raises(MCPServiceContractError, match="read-only"): + service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="refresh", + ) + ) def test_mcp_service_all_section_and_optional_path_overrides(tmp_path: Path) -> None: @@ -705,32 +828,35 @@ def _fake_load(self: Cache) -> None: ) assert load_calls == ["loaded"] - cache_without_projection = SimpleNamespace() - service._refresh_cache_projection( - cache=cast(Any, cache_without_projection), - analysis=cast( - Any, - SimpleNamespace( - suppressed_segment_groups=0, - segment_groups_raw_digest=None, - segment_groups={}, - ), - ), + +def test_mcp_service_build_args_defers_process_count_to_runtime( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + service = CodeCloneMCPService(history_limit=4) + + monkeypatch.setattr( + mcp_service_mod, + "load_pyproject_config", + lambda _root: {"processes": 3}, + ) + args = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=False), ) + assert args.processes is None - cache_with_projection = SimpleNamespace(segment_report_projection=()) - service._refresh_cache_projection( - cache=cast(Any, cache_with_projection), - analysis=cast( - Any, - SimpleNamespace( - suppressed_segment_groups=0, - segment_groups_raw_digest="digest", - segment_groups={}, - ), - ), + args_from_config = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=True), + ) + assert args_from_config.processes == 3 + + args_from_request = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=False, processes=2), ) - assert cache_with_projection.segment_report_projection is not None + assert args_from_request.processes == 2 def test_mcp_service_invalid_path_resolution_contract_errors( @@ -824,6 +950,8 @@ def _raise_subprocess(*args: object, **kwargs: object) -> object: assert tuple(record.run_id for record in store.records()) == ("second",) with pytest.raises(MCPRunNotFoundError): store.get("first") + with pytest.raises(ValueError): + mcp_service_mod.CodeCloneMCPRunStore(history_limit=11) def test_mcp_service_branch_helpers_on_real_runs( @@ -942,9 +1070,9 @@ def test_mcp_service_branch_helpers_on_real_runs( abs_dup = tmp_path / "pkg" / "dup.py" normalized = service._normalize_changed_paths( root_path=tmp_path, - paths=(str(abs_dup), "./pkg/dup.py", "pkg"), + paths=(str(abs_dup), "./pkg/dup.py", "pkg", "./.github/workflows/docs.yml"), ) - assert normalized == ("pkg", "pkg/dup.py") + assert normalized == (".github/workflows/docs.yml", "pkg", "pkg/dup.py") with pytest.raises(MCPServiceContractError): service._normalize_changed_paths( root_path=tmp_path, @@ -1484,13 +1612,15 @@ def _patched_get_finding( } }, }, - report_json="{}", summary={"run_id": "design", "health": {"score": 80, "grade": "B"}}, changed_paths=(), changed_projection=None, warnings=(), failures=(), - analysis=cast(Any, SimpleNamespace(suggestions=[])), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), new_func=frozenset(), new_block=frozenset(), metrics_diff=None, @@ -1596,13 +1726,15 @@ def _patched_get_finding( **record.report_document, "derived": {"hotlists": {"highest_spread_ids": ["missing-id"]}}, }, - report_json=record.report_json, summary=record.summary, changed_paths=record.changed_paths, changed_projection=record.changed_projection, warnings=record.warnings, failures=record.failures, - analysis=record.analysis, + func_clones_count=record.func_clones_count, + block_clones_count=record.block_clones_count, + project_metrics=record.project_metrics, + suggestions=record.suggestions, new_func=record.new_func, new_block=record.new_block, metrics_diff=record.metrics_diff, @@ -1643,6 +1775,34 @@ def _patched_get_finding( detail_level="summary", ) assert complexity_check["check"] == "complexity" + unfiltered_complexity = service.check_complexity( + run_id=run_id, + detail_level="summary", + ) + assert unfiltered_complexity["check"] == "complexity" + + +def test_mcp_service_clear_session_runs_clears_in_memory_state(tmp_path: Path) -> None: + service = _build_quality_service(tmp_path) + run_id = str(service.get_run_summary()["run_id"]) + first_finding = cast( + "list[dict[str, object]]", + service.list_findings(family="clone", detail_level="summary")["items"], + )[0] + service.mark_finding_reviewed( + run_id=run_id, + finding_id=str(first_finding["id"]), + note="triaged", + ) + service.evaluate_gates(MCPGateRequest(run_id=run_id, fail_threshold=0)) + + cleared = service.clear_session_runs() + + assert cleared["cleared_runs"] == 1 + assert cleared["cleared_review_entries"] == 1 + assert cleared["cleared_gate_results"] == 1 + with pytest.raises(MCPRunNotFoundError): + service.get_run_summary() def test_mcp_service_metrics_diff_warning_and_projection_branches( @@ -1693,13 +1853,133 @@ def test_mcp_service_metrics_diff_warning_and_projection_branches( metrics_diff = cast("dict[str, object]", summary["metrics_diff"]) assert metrics_diff["new_high_risk_functions"] == 1 assert "cache warning" in cast("list[str]", summary["warnings"]) - analysis = cast( - Any, - SimpleNamespace( - suppressed_segment_groups=0, - segment_groups_raw_digest="digest", - segment_groups={}, - ), + + +def test_mcp_service_helper_branches_for_empty_gate_and_missing_remediation( + tmp_path: Path, +) -> None: + service = CodeCloneMCPService(history_limit=2) + request = MCPAnalysisRequest(root=str(tmp_path), respect_pyproject=False) + record = MCPRunRecord( + run_id="helpers", + root=tmp_path, + request=request, + report_document={"metrics": 1}, + summary={}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), + new_func=frozenset(), + new_block=frozenset(), + metrics_diff=None, + ) + service._runs.register(record) + + success_gate = service._evaluate_gate_snapshot( + record=record, + request=MCPGateRequest(fail_on_new=True, fail_threshold=10), + ) + assert success_gate.exit_code == 0 + assert success_gate.reasons == () + + clone_gate_record = MCPRunRecord( + run_id="helpers-new", + root=tmp_path, + request=request, + report_document={"meta": {}}, + summary={}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), + new_func=frozenset({"clone:new"}), + new_block=frozenset(), + metrics_diff=None, + ) + clone_gate = service._evaluate_gate_snapshot( + record=clone_gate_record, + request=MCPGateRequest(fail_on_new=True, fail_threshold=10), + ) + assert clone_gate.exit_code == 3 + assert clone_gate.reasons == ("clone:new",) + + with pytest.raises(MCPServiceContractError): + service.get_report_section(run_id="helpers", section="metrics") + + assert service._suggestion_for_finding(record, "missing") is None + assert ( + service._remediation_for_finding( + record, + {"id": "missing", "severity": "info"}, + ) + is None + ) + detail = service._decorate_finding( + record, + {"id": "missing", "title": "Missing remediation", "severity": "info"}, + detail_level="summary", + remediation=None, + priority_payload={"score": 0.1, "factors": {}}, + ) + assert detail["id"] == "missing" + assert "remediation" not in detail + + +def test_mcp_service_record_lookup_helper_branches(tmp_path: Path) -> None: + service = CodeCloneMCPService(history_limit=2) + request = MCPAnalysisRequest(root=str(tmp_path), respect_pyproject=False) + record = MCPRunRecord( + run_id="lookup", + root=tmp_path, + request=request, + report_document={"meta": {}}, + summary={}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), + new_func=frozenset(), + new_block=frozenset(), + metrics_diff=None, + ) + service._runs.register(record) + + foreign_record = MCPRunRecord( + run_id="foreign", + root=tmp_path, + request=request, + report_document={"meta": {}}, + summary={}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + func_clones_count=0, + block_clones_count=0, + project_metrics=None, + suggestions=(), + new_func=frozenset(), + new_block=frozenset(), + metrics_diff=None, + ) + assert service._previous_run_for_root(foreign_record) is None + assert ( + service._latest_compatible_record( + analysis_mode="full", + root_path=tmp_path / "other", + ) + is None ) - service._refresh_cache_projection(cache=cache_with_warning, analysis=analysis) - service._refresh_cache_projection(cache=cache_with_warning, analysis=analysis) diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py index f3474bc..f8bb8ad 100644 --- a/tests/test_pipeline_process.py +++ b/tests/test_pipeline_process.py @@ -10,6 +10,7 @@ import codeclone.pipeline as pipeline from codeclone.cache import Cache, CacheEntry, SourceStatsDict, file_stat_signature +from codeclone.models import HealthScore, ProjectMetrics from codeclone.normalize import NormalizationConfig @@ -53,6 +54,12 @@ def _build_boot(tmp_path: Path, *, processes: int) -> pipeline.BootstrapResult: ) +def test_resolve_process_count_defaults_in_runtime() -> None: + assert pipeline._resolve_process_count(None) == pipeline.DEFAULT_RUNTIME_PROCESSES + assert pipeline._resolve_process_count(0) == 1 + assert pipeline._resolve_process_count(3) == 3 + + def _build_discovery(filepaths: tuple[str, ...]) -> pipeline.DiscoveryResult: return pipeline.DiscoveryResult( files_found=len(filepaths), @@ -454,3 +461,83 @@ def _guard_import( assert artifacts.json is not None assert artifacts.md is None assert artifacts.sarif is None + + +def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + boot = pipeline.BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=Namespace( + processes=None, + skip_metrics=False, + skip_dead_code=True, + skip_dependencies=True, + ), + output_paths=pipeline.OutputPaths(), + cache_path=tmp_path / "cache.json", + ) + discovery = _build_discovery(()) + processing = pipeline.ProcessingResult( + units=(), + blocks=(), + segments=(), + class_metrics=(), + module_deps=(), + dead_candidates=(), + referenced_names=frozenset(), + referenced_qualnames=frozenset(), + structural_findings=(), + files_analyzed=0, + files_skipped=0, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + ) + project_metrics = ProjectMetrics( + complexity_avg=0.0, + complexity_max=0, + high_risk_functions=(), + coupling_avg=0.0, + coupling_max=0, + high_risk_classes=(), + cohesion_avg=0.0, + cohesion_max=0, + low_cohesion_classes=(), + dependency_modules=0, + dependency_edges=0, + dependency_edge_list=(), + dependency_cycles=(), + dependency_max_depth=0, + dependency_longest_chains=(), + dead_code=(), + health=HealthScore(total=100, grade="A", dimensions={"overall": 100}), + ) + + monkeypatch.setattr( + pipeline, + "compute_project_metrics", + lambda **kwargs: (project_metrics, None, ()), + ) + monkeypatch.setattr( + pipeline, + "find_suppressed_unused", + lambda **kwargs: (_ for _ in ()).throw( + AssertionError("should not compute suppressed dead-code items") + ), + ) + monkeypatch.setattr(pipeline, "compute_suggestions", lambda **kwargs: ()) + monkeypatch.setattr( + pipeline, + "build_metrics_report_payload", + lambda **kwargs: {"health": {"score": 100, "grade": "A", "dimensions": {}}}, + ) + + analysis = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + assert analysis.project_metrics == project_metrics + assert analysis.suppressed_dead_code_items == 0 diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py index 6f3752e..9ea9ad9 100644 --- a/tests/test_structural_findings.py +++ b/tests/test_structural_findings.py @@ -676,6 +676,26 @@ def test_private_member_decoding_and_majority_defaults() -> None: assert sf._member_profile_value(member, "unknown-field") == "" +def test_summarize_branch_does_not_descend_into_nested_scopes() -> None: + body = ast.parse( + """ +if cond: + def inner(): + while True: + helper() + class Inner: + def method(self): + raise RuntimeError("boom") + value = 1 +""", + ).body + signature = sf._summarize_branch(body) + assert signature is not None + assert signature["calls"] == "0" + assert signature["raises"] == "0" + assert signature["has_loop"] == "0" + + def test_clone_cohort_builders_cover_early_exit_paths() -> None: base_member = sf._CloneCohortMember( file_path="pkg/a.py", From fc3210809a6ad1206ebf69d4208be263c7ba3eef Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Sun, 29 Mar 2026 22:06:52 +0500 Subject: [PATCH 04/15] feat(core): slim MCP payloads, fix stale analysis drift, and relicense code to MPL-2.0 - slim MCP summary and finding payloads: summary inventory now returns counts, `metrics` is summary-only, `metrics_detail` exposes the full dump, list/check envelopes expose `base_uri`, and summary/normal finding views drop repeated location `uri` and `priority_factors` - fix stale analysis drift by bumping cache schema to `2.3`, invalidating stale per-file cache entries after semantic analysis changes, and documenting the new cache compatibility rule - fix AST normalization side effects that corrupted downstream cohesion metrics and remove duplicated branch logic without changing canonical report schema - refresh the repository baseline and health snapshot after the analysis fix (`81 -> 85`) and update MCP/docs/tests to lock the new behavior - relicense repository code to `MPL-2.0`, keep documentation under `MIT`, update package metadata and user-facing license notes, add Mozilla file notices to Python sources, and add directory-level MPL notices for golden fixtures without changing fixture contents --- .github/actions/codeclone/_action_impl.py | 6 +- .../actions/codeclone/render_pr_comment.py | 6 +- .github/actions/codeclone/run_codeclone.py | 6 +- CHANGELOG.md | 22 + CONTRIBUTING.md | 86 +++- LICENSE | 386 +++++++++++++++++- LICENSE-docs | 25 ++ README.md | 66 ++- SECURITY.md | 60 ++- benchmarks/run_benchmark.py | 5 +- codeclone.baseline.json | 14 +- codeclone/__init__.py | 5 +- codeclone/_cli_args.py | 5 +- codeclone/_cli_baselines.py | 5 +- codeclone/_cli_config.py | 5 +- codeclone/_cli_gating.py | 5 +- codeclone/_cli_meta.py | 5 +- codeclone/_cli_paths.py | 5 +- codeclone/_cli_reports.py | 5 +- codeclone/_cli_rich.py | 5 +- codeclone/_cli_runtime.py | 5 +- codeclone/_cli_summary.py | 5 +- codeclone/_coerce.py | 5 +- codeclone/_html_badges.py | 5 +- codeclone/_html_css.py | 5 +- codeclone/_html_data_attrs.py | 5 +- codeclone/_html_escape.py | 5 +- codeclone/_html_filters.py | 5 +- codeclone/_html_js.py | 5 +- codeclone/_html_report/__init__.py | 5 +- codeclone/_html_report/_assemble.py | 5 +- codeclone/_html_report/_components.py | 5 +- codeclone/_html_report/_context.py | 5 +- codeclone/_html_report/_glossary.py | 5 +- codeclone/_html_report/_icons.py | 5 +- codeclone/_html_report/_sections/__init__.py | 5 +- codeclone/_html_report/_sections/_clones.py | 5 +- codeclone/_html_report/_sections/_coupling.py | 5 +- .../_html_report/_sections/_dead_code.py | 5 +- .../_html_report/_sections/_dependencies.py | 5 +- codeclone/_html_report/_sections/_meta.py | 5 +- codeclone/_html_report/_sections/_overview.py | 5 +- .../_html_report/_sections/_structural.py | 5 +- .../_html_report/_sections/_suggestions.py | 5 +- codeclone/_html_report/_tables.py | 5 +- codeclone/_html_report/_tabs.py | 5 +- codeclone/_html_snippets.py | 5 +- codeclone/_schema_validation.py | 5 +- codeclone/baseline.py | 5 +- codeclone/blockhash.py | 5 +- codeclone/blocks.py | 5 +- codeclone/cache.py | 5 +- codeclone/cache_io.py | 5 +- codeclone/cache_paths.py | 5 +- codeclone/cache_segments.py | 5 +- codeclone/cfg.py | 5 +- codeclone/cfg_model.py | 5 +- codeclone/cli.py | 5 +- codeclone/contracts.py | 7 +- codeclone/domain/__init__.py | 5 +- codeclone/domain/findings.py | 5 +- codeclone/domain/quality.py | 5 +- codeclone/domain/source_scope.py | 5 +- codeclone/errors.py | 5 +- codeclone/extractor.py | 104 +++-- codeclone/fingerprint.py | 5 +- codeclone/grouping.py | 5 +- codeclone/html_report.py | 5 +- codeclone/mcp_server.py | 8 +- codeclone/mcp_service.py | 139 +++++-- codeclone/meta_markers.py | 5 +- codeclone/metrics/__init__.py | 5 +- codeclone/metrics/cohesion.py | 5 +- codeclone/metrics/complexity.py | 5 +- codeclone/metrics/coupling.py | 5 +- codeclone/metrics/dead_code.py | 5 +- codeclone/metrics/dependencies.py | 5 +- codeclone/metrics/health.py | 5 +- codeclone/metrics_baseline.py | 5 +- codeclone/models.py | 5 +- codeclone/normalize.py | 11 +- codeclone/paths.py | 5 +- codeclone/pipeline.py | 5 +- codeclone/report/__init__.py | 5 +- codeclone/report/_formatting.py | 5 +- codeclone/report/_source_kinds.py | 5 +- codeclone/report/blocks.py | 5 +- codeclone/report/derived.py | 5 +- codeclone/report/explain.py | 5 +- codeclone/report/explain_contract.py | 5 +- codeclone/report/findings.py | 5 +- codeclone/report/json_contract.py | 5 +- codeclone/report/markdown.py | 5 +- codeclone/report/merge.py | 5 +- codeclone/report/overview.py | 5 +- codeclone/report/sarif.py | 5 +- codeclone/report/segments.py | 5 +- codeclone/report/serialize.py | 5 +- codeclone/report/suggestions.py | 5 +- codeclone/report/types.py | 5 +- codeclone/scanner.py | 5 +- codeclone/structural_findings.py | 5 +- codeclone/suppressions.py | 5 +- codeclone/templates.py | 5 +- codeclone/ui_messages.py | 5 +- docs/README.md | 2 +- docs/book/01-architecture-map.md | 6 + docs/book/07-cache.md | 7 +- docs/book/13-testing-as-spec.md | 16 +- docs/book/14-compatibility-and-versioning.md | 12 +- docs/book/20-mcp-interface.md | 66 ++- docs/book/appendix/b-schema-layouts.md | 4 +- docs/mcp.md | 16 +- pyproject.toml | 4 +- scripts/build_docs_example_report.py | 5 +- tests/__init__.py | 5 + tests/_assertions.py | 6 + tests/_ast_helpers.py | 6 + tests/_report_access.py | 6 + tests/_report_fixtures.py | 6 + tests/conftest.py | 6 + tests/fixtures/golden_project/LICENSE | 9 + .../golden_v2/clone_metrics_cycle/pkg/LICENSE | 9 + .../golden_v2/pyproject_defaults/pkg/LICENSE | 9 + .../golden_v2/test_only_usage/pkg/LICENSE | 9 + .../test_only_usage/pkg/tests/LICENSE | 9 + tests/test_architecture.py | 6 + tests/test_baseline.py | 6 + tests/test_blockhash.py | 6 + tests/test_blocks.py | 6 + tests/test_cache.py | 13 +- tests/test_cfg.py | 6 + tests/test_cfg_model.py | 6 + tests/test_cli_config.py | 6 + tests/test_cli_inprocess.py | 35 +- tests/test_cli_main_guard.py | 6 + tests/test_cli_main_guard_runpy.py | 6 + tests/test_cli_smoke.py | 6 + tests/test_cli_unit.py | 6 + tests/test_coerce.py | 6 + tests/test_core_branch_coverage.py | 6 + tests/test_detector_golden.py | 6 + tests/test_extractor.py | 57 ++- tests/test_fingerprint.py | 6 + tests/test_github_action_helpers.py | 6 + tests/test_golden_v2.py | 6 + tests/test_html_report.py | 6 + tests/test_html_report_helpers.py | 6 + tests/test_init.py | 6 + tests/test_mcp_server.py | 48 ++- tests/test_mcp_service.py | 238 ++++++++++- tests/test_metrics_baseline.py | 6 + tests/test_metrics_modules.py | 6 + tests/test_normalize.py | 23 ++ tests/test_pipeline_metrics.py | 6 + tests/test_pipeline_process.py | 6 + tests/test_python_syntax_compat.py | 6 + tests/test_report.py | 6 + tests/test_report_branch_invariants.py | 6 + tests/test_report_contract_coverage.py | 6 + tests/test_report_explain.py | 6 + tests/test_report_source_kinds.py | 6 + tests/test_report_suggestions.py | 6 + tests/test_scanner_extra.py | 6 + tests/test_security.py | 6 + tests/test_segments.py | 6 + tests/test_structural_findings.py | 5 +- tests/test_suppressions.py | 6 + 168 files changed, 1940 insertions(+), 303 deletions(-) create mode 100644 LICENSE-docs create mode 100644 tests/fixtures/golden_project/LICENSE create mode 100644 tests/fixtures/golden_v2/clone_metrics_cycle/pkg/LICENSE create mode 100644 tests/fixtures/golden_v2/pyproject_defaults/pkg/LICENSE create mode 100644 tests/fixtures/golden_v2/test_only_usage/pkg/LICENSE create mode 100644 tests/fixtures/golden_v2/test_only_usage/pkg/tests/LICENSE diff --git a/.github/actions/codeclone/_action_impl.py b/.github/actions/codeclone/_action_impl.py index b8418cf..66c4fd3 100644 --- a/.github/actions/codeclone/_action_impl.py +++ b/.github/actions/codeclone/_action_impl.py @@ -1,4 +1,8 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/.github/actions/codeclone/render_pr_comment.py b/.github/actions/codeclone/render_pr_comment.py index 1edf7b4..f08668e 100644 --- a/.github/actions/codeclone/render_pr_comment.py +++ b/.github/actions/codeclone/render_pr_comment.py @@ -1,4 +1,8 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/.github/actions/codeclone/run_codeclone.py b/.github/actions/codeclone/run_codeclone.py index 1c729e9..b253289 100644 --- a/.github/actions/codeclone/run_codeclone.py +++ b/.github/actions/codeclone/run_codeclone.py @@ -1,4 +1,8 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/CHANGELOG.md b/CHANGELOG.md index 7387527..4537fe4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## [Unreleased] + +### Licensing + +- Re-license repository code to MPL-2.0 and keep documentation under MIT. + +### Packaging + +- Ship both `LICENSE` and `LICENSE-docs`, update package metadata, and sync file-level SPDX headers. + ## [2.0.0b3] ### MCP server @@ -11,6 +21,18 @@ - Require explicit `--allow-remote` for non-loopback `streamable-http` binds; reject `cache_policy=refresh` to preserve read-only semantics. - Defer MCP process-count policy to the core runtime when `processes` is not explicitly overridden. +- Slim MCP summary payloads for agent usage: `get_run_summary`, summary resources, and `analyze_changed_paths` now + replace `inventory.file_registry.items` with `{encoding, count}` while `analyze_repository` keeps the full registry. +- Split `get_report_section(section="metrics")` into a summary-only projection and add `metrics_detail` for the full + metrics payload, without changing canonical report schema `2.1`. +- Slim `health.dimensions` in granular `check_*` responses to the single dimension relevant to each tool. +- Keep hotspot `source_kind` aligned with canonical finding payloads, including fixture-scoped findings. +- Add envelope-level `base_uri` to `list_findings`, `list_hotspots`, and `check_*`, while removing repeated per-location + `uri` values from summary/normal finding payloads. +- Slim finding list payloads further: summary responses drop `priority_factors` and keep only `file` + `line` in + locations; normal responses keep `symbol` but still omit `uri` and `priority_factors`; `get_finding` remains full. +- Bump cache schema to `2.3` so stale per-file analysis entries from older metric semantics are ignored and rebuilt + instead of being treated as reusable cache hits. ### CLI diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ff63f06..6a92890 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ Thank you for your interest in contributing to **CodeClone**. CodeClone provides **structural code quality analysis** for Python, including clone detection, -quality metrics, and baseline-aware CI governance. +quality metrics, baseline-aware CI governance, and an optional MCP agent interface. Contributions are welcome — especially those that improve **signal quality**, **CFG semantics**, and **real-world CI usability**. @@ -31,8 +31,11 @@ We especially welcome contributions in the following areas: - Control Flow Graph (CFG) construction and semantics - AST normalization improvements - Segment-level clone detection and reporting +- Quality metrics (complexity, coupling, cohesion, dead-code, dependencies) - False-positive reduction - HTML report UX improvements +- MCP server tools and agent workflows +- GitHub Action improvements - Performance optimizations - Documentation and real-world examples @@ -51,6 +54,8 @@ When reporting issues related to clone detection, include: - AST-related, - CFG-related, - normalization-related, + - metrics-related, + - MCP-related, - reporting / UI-related. Screenshots alone are usually insufficient for analysis. @@ -73,8 +78,6 @@ Well-argued false-positive reports are valuable and appreciated. ## CFG Semantics Discussions -CFG behavior in CodeClone is intentionally conservative in the 1.x series. - If proposing changes to CFG semantics, include: - a description of the current behavior; @@ -98,15 +101,13 @@ Such changes often require design-level discussion and may be staged across vers ## Baseline & CI -### Baseline contract (v1) +### Baseline contract (v2) -- The baseline schema is versioned (`meta.schema_version`). +- The baseline schema is versioned (`meta.schema_version`, currently `2.0`). - Compatibility/trust gates include `schema_version`, `fingerprint_version`, `python_tag`, and `meta.generator.name`. -- Integrity is tamper-evident via `meta.payload_sha256` over canonical payload: - `clones.functions`, `clones.blocks`, `meta.fingerprint_version`, `meta.python_tag`. - `meta.schema_version`, `meta.generator.name`, `meta.generator.version`, and `created_at` - are excluded from payload hashing. +- Integrity is tamper-evident via `meta.payload_sha256` over canonical payload. +- The baseline may embed a `metrics` section for metrics-baseline-aware CI gating. ### When baseline regeneration is required @@ -131,12 +132,55 @@ Such changes often require design-level discussion and may be staged across vers --- +## Versioned schemas + +CodeClone maintains several versioned schema contracts: + +| Schema | Current version | Owner | +|------------------|-----------------|-------------------------------------| +| Baseline | `2.0` | `codeclone/baseline.py` | +| Report | `2.1` | `codeclone/report/json_contract.py` | +| Cache | `2.2` | `codeclone/cache.py` | +| Metrics baseline | `1.0` | `codeclone/metrics_baseline.py` | + +Any change to schema shape or semantics requires version review, documentation, and tests. + +--- + +## MCP Interface + +CodeClone includes an optional **read-only MCP server** (`codeclone[mcp]`) for AI agents. + +When contributing to MCP: + +- MCP must remain **read-only** — it must never mutate baselines, source files, or repo state. +- Session-local review markers are the only allowed mutable state (in-memory, ephemeral). +- MCP reuses pipeline/report contracts — do not create a second analysis truth path. +- Tool names, resource URIs, and response shapes are public surfaces — changes require tests and docs. + +See `docs/mcp.md` and `docs/book/20-mcp-interface.md` for details. + +--- + +## GitHub Action + +CodeClone ships a composite GitHub Action (`.github/actions/codeclone/`). + +When contributing to the Action: + +- Never inline `${{ inputs.* }}` in shell scripts — pass through `env:` variables. +- Prefer major-tag pinning for actions (e.g., `actions/setup-python@v5`). +- Add timeouts to all `subprocess.run` calls. + +--- + ## Development Setup ```bash git clone https://github.com/orenlab/codeclone.git cd codeclone uv sync --all-extras --dev +uv run pre-commit install ``` Run tests: @@ -148,16 +192,26 @@ uv run pytest Static checks: ```bash -uv run mypy . -uv run ruff check . -uv run ruff format . +uv run pre-commit run --all-files +``` + +Build documentation (if you touched `docs/` or `mkdocs.yml`): + +```bash +uv run --with mkdocs --with mkdocs-material mkdocs build --strict +``` + +Run MCP tests (if you touched `mcp_service.py` or `mcp_server.py`): + +```bash +uv run pytest -q tests/test_mcp_service.py tests/test_mcp_server.py ``` --- ## Code Style -- Python **3.10–3.14** +- Python **3.10 – 3.14** - Type annotations are required - `Any` should be minimized; prefer precise types and small typed helpers - `mypy` must pass @@ -182,5 +236,7 @@ and may require a `fingerprint_version` bump (and thus baseline regeneration). ## License -By contributing to CodeClone, you agree that your contributions will be licensed -under the **MIT License**. +By contributing code to CodeClone, you agree that your contributions will be +licensed under **MPL-2.0**. + +Documentation contributions are licensed under **MIT**. diff --git a/LICENSE b/LICENSE index fdcac7c..df9d84d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,23 +1,373 @@ -MIT License +Mozilla Public License Version 2.0 +================================== -Copyright (c) 2024 Denis Rozhnovskiy +1. Definitions +-------------- -The name “CodeClone” refers to the official project distribution. +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +1.3. "Contribution" + means Covered Software of a particular Contributor. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted from a particular Contributor are +reinstated (a) provisionally, unless and until such Contributor +explicitly and finally terminates Your grants, and (b) on an ongoing +basis, if such Contributor fails to notify You of the non-compliance by +some reasonable means prior to 60 days after You have come back into +compliance. Moreover, Your grants from a particular Contributor are +reinstated on an ongoing basis if such Contributor notifies You of the +non-compliance by some reasonable means, this is the first time You have +received notice of non-compliance with this License from such +Contributor, and You become compliant prior to 30 days after Your +receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/LICENSE-docs b/LICENSE-docs new file mode 100644 index 0000000..66b3e88 --- /dev/null +++ b/LICENSE-docs @@ -0,0 +1,25 @@ +MIT License + +Copyright (c) 2024 Denis Rozhnovskiy + +This license applies to documentation in this repository, including the +`docs/` tree and Markdown documentation files, unless a file states +otherwise. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this documentation and associated files (the "Documentation"), to deal +in the Documentation without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Documentation, and to permit persons to whom the +Documentation is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Documentation. + +THE DOCUMENTATION IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE DOCUMENTATION OR THE USE OR OTHER DEALINGS IN +THE DOCUMENTATION. diff --git a/README.md b/README.md index bed0c3a..00db248 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ Tests Benchmark Python - codeclone 81 (B) - License + codeclone 85 (B) + License

    --- @@ -48,18 +48,31 @@ Live sample report: ## Quick Start ```bash -pip install codeclone # or: uv tool install codeclone - -codeclone . # analyze current directory -codeclone . --html # generate HTML report -codeclone . --html --open-html-report # generate and open HTML report -codeclone . --json --md --sarif --text # generate machine-readable reports -codeclone . --html --json --timestamped-report-paths # keep timestamped report snapshots -codeclone . --changed-only --diff-against main # changed-scope clone gating against git diff -codeclone . --paths-from-git-diff HEAD~1 # shorthand diff source for changed-scope review -codeclone . --ci # CI mode (--fail-on-new --no-color --quiet) +pip install codeclone # or: uv tool install codeclone + +codeclone . # analyze +codeclone . --html # HTML report +codeclone . --html --open-html-report # open in browser +codeclone . --json --md --sarif --text # all formats +codeclone . --ci # CI mode ``` +
    +More examples + +```bash +# timestamped report snapshots +codeclone . --html --json --timestamped-report-paths + +# changed-scope gating against git diff +codeclone . --changed-only --diff-against main + +# shorthand: diff source for changed-scope review +codeclone . --paths-from-git-diff HEAD~1 +``` + +
    +
    Run without install @@ -138,14 +151,22 @@ repos: CodeClone ships an optional read-only MCP server for AI agents and IDE clients. ```bash -pip install "codeclone[mcp]" # install the extra -codeclone-mcp --transport stdio # local agents (Claude Code, Codex, Copilot, Gemini CLI) -codeclone-mcp --transport streamable-http --port 8000 # remote/HTTP-only clients +# install the MCP extra +pip install "codeclone[mcp]" + +# local agents (Claude Code, Codex, Copilot, Gemini CLI) +codeclone-mcp --transport stdio + +# remote / HTTP-only clients +codeclone-mcp --transport streamable-http --port 8000 ``` -The server exposes 19 tools (analysis, diff-aware checks, findings, remediation, gates, PR summaries) -and 9 resources — all deterministic, baseline-aware, and read-only. -It never mutates source files, baselines, or repo state. +19 tools + 9 resources — deterministic, baseline-aware, read-only. +Never mutates source files, baselines, or repo state. +List-style finding responses expose a single `base_uri` per envelope and keep +summary locations compact; `get_finding` remains the full-detail endpoint. +`get_run_summary` and `analyze_changed_paths` return slim inventory counts; +`get_report_section(metrics)` returns summary-only, `metrics_detail` gives the full dump. Docs: [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) @@ -389,8 +410,15 @@ in [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmark
    +## License + +- **Code:** MPL-2.0 +- **Documentation:** MIT + +Versions released before this change remain under their original license terms. + ## Links - **Issues:** - **PyPI:** -- **License:** MIT +- **Licenses:** [MPL-2.0](LICENSE) · [MIT docs](LICENSE-docs) diff --git a/SECURITY.md b/SECURITY.md index aca157b..333de2d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -27,36 +27,74 @@ CodeClone operates purely on static input and follows a conservative execution m - Performs analysis in-process with explicit resource limits - Generates static HTML reports without external dependencies -Potential risk areas include: +### Core analysis -- malformed or adversarial source files -- extremely large inputs leading to resource exhaustion -- HTML report generation and embedding - -These areas are explicitly tested and hardened, but are still the primary focus of -ongoing security review. +- Scanner traversal is root-confined and prevents symlink-based path escape. +- Temporary files use unpredictable names (`tempfile.NamedTemporaryFile` with `delete=False`) + and atomic replacement (`os.replace`) to prevent predictable-path attacks. -Additional safeguards: +### HTML reports - HTML report content is escaped in both text and attribute contexts to prevent script injection. - Reports are static and do not execute analyzed code. -- Report explainability fields are generated in Python core; UI is rendering-only and does not infer semantics. -- Scanner traversal is root-confined and prevents symlink-based path escape. +- Report explainability fields are generated in Python core; UI is rendering-only and does not + infer semantics. + +### Baseline and cache integrity + - Baseline files are schema/type validated with size limits and tamper-evident integrity fields (`meta.generator` as trust gate, `meta.payload_sha256` as integrity hash in baseline schema `2.0`). - Baseline integrity is tamper-evident (audit signal), not tamper-proof cryptographic signing. An actor who can rewrite baseline content and recompute `payload_sha256` can still alter it. -- Baseline hash covers canonical payload only (`clones.functions`, `clones.blocks`, +- Baseline hash covers canonical clone payload (`clones.functions`, `clones.blocks`, `meta.fingerprint_version`, `meta.python_tag`). - Baseline hash excludes non-semantic metadata (`created_at`, `meta.generator.version`). - `meta.schema_version` and `meta.generator.name` are validated as compatibility/trust gates and are intentionally excluded from `payload_sha256`. +- Metrics baseline (`MetricsBaseline`) maintains a separate integrity hash over its own payload, + independent of the clone baseline hash. - In `--ci` (or explicit `--fail-on-new`), untrusted baseline states fail fast; otherwise baseline is ignored with explicit warning and comparison proceeds against an empty baseline. - Cache files are integrity-signed with canonical payload hashing (constant-time comparison), size-limited, and ignored on mismatch. - Legacy cache secret files (`.cache/codeclone/.cache_secret`) are obsolete and should be removed. +### MCP server + +CodeClone includes an optional read-only MCP server (`codeclone[mcp]`) that exposes +analysis results over JSON-RPC (stdio transport). + +- The MCP server is **read-only**: it never mutates baselines, source files, cache, or repo state. +- Session-local review markers are in-memory only and discarded on process exit. +- Tool arguments that accept git refs (`git_diff_ref`) are validated against a strict regex + to prevent command injection via `subprocess` calls. +- The MCP run store is bounded (`history_limit`) with FIFO eviction to prevent unbounded + memory growth from repeated analysis calls. +- MCP is an optional extra (`codeclone[mcp]`); its runtime dependencies are never loaded + by the base install or CLI. + +### GitHub Action + +CodeClone ships a composite GitHub Action (`.github/actions/codeclone/`). + +- All `${{ inputs.* }}` values are passed through `env:` variables, never inlined in shell + scripts, to prevent script injection from untrusted PR authors. +- External subprocess calls use explicit timeouts (`timeout=600` for analysis, + `timeout=30` for git commands) to prevent hanging CI runners. + +### Potential risk areas + +Potential risk areas include: + +- malformed or adversarial source files +- extremely large inputs leading to resource exhaustion +- HTML report generation and embedding +- MCP tool arguments from untrusted agent contexts +- GitHub Action inputs from untrusted PR authors + +These areas are explicitly tested and hardened, but remain the primary focus of +ongoing security review. + --- ## Reporting a Vulnerability diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index c9b7135..ba12356 100755 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone.baseline.json b/codeclone.baseline.json index 4492b81..8b28ea2 100644 --- a/codeclone.baseline.json +++ b/codeclone.baseline.json @@ -2,14 +2,14 @@ "meta": { "generator": { "name": "codeclone", - "version": "2.0.0b2" + "version": "2.0.0b3" }, "schema_version": "2.0", "fingerprint_version": "1", "python_tag": "cp313", - "created_at": "2026-03-26T16:36:17Z", + "created_at": "2026-03-29T16:19:22Z", "payload_sha256": "691c6cedd10e2a51d6038780f3ae9dffe763356dd2aba742b3980f131b79f217", - "metrics_payload_sha256": "f18db9aa4573517b0babb31e4e995208209895ea6b8a1957087c0f3b6f1f5434" + "metrics_payload_sha256": "878d5169c9ffd6d73eb0ce3ce55166df3d080b85ed835091f33ff53d2779b9ac" }, "clones": { "functions": [ @@ -30,16 +30,14 @@ "high_risk_functions": [], "max_coupling": 10, "high_coupling_classes": [], - "max_cohesion": 5, + "max_cohesion": 4, "low_cohesion_classes": [ - "codeclone.baseline:Baseline", - "codeclone.metrics_baseline:MetricsBaseline", "tests.test_golden_v2:_DummyExecutor" ], "dependency_cycles": [], - "dependency_max_depth": 10, + "dependency_max_depth": 11, "dead_code_items": [], - "health_score": 81, + "health_score": 85, "health_grade": "B" } } diff --git a/codeclone/__init__.py b/codeclone/__init__.py index b52ea47..cb9ce1d 100644 --- a/codeclone/__init__.py +++ b/codeclone/__init__.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from importlib.metadata import PackageNotFoundError, version diff --git a/codeclone/_cli_args.py b/codeclone/_cli_args.py index f995660..17a2a2f 100644 --- a/codeclone/_cli_args.py +++ b/codeclone/_cli_args.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_baselines.py b/codeclone/_cli_baselines.py index 64a187c..ed415f7 100644 --- a/codeclone/_cli_baselines.py +++ b/codeclone/_cli_baselines.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_config.py b/codeclone/_cli_config.py index b31d9b1..22efec1 100644 --- a/codeclone/_cli_config.py +++ b/codeclone/_cli_config.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_gating.py b/codeclone/_cli_gating.py index d6d100f..5a5ae7d 100644 --- a/codeclone/_cli_gating.py +++ b/codeclone/_cli_gating.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_meta.py b/codeclone/_cli_meta.py index b9b8c20..07cea7d 100644 --- a/codeclone/_cli_meta.py +++ b/codeclone/_cli_meta.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_paths.py b/codeclone/_cli_paths.py index 2fb6d11..3577dc0 100644 --- a/codeclone/_cli_paths.py +++ b/codeclone/_cli_paths.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_reports.py b/codeclone/_cli_reports.py index f1ffea6..126879c 100644 --- a/codeclone/_cli_reports.py +++ b/codeclone/_cli_reports.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_rich.py b/codeclone/_cli_rich.py index 506a6ce..88f9d00 100644 --- a/codeclone/_cli_rich.py +++ b/codeclone/_cli_rich.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_runtime.py b/codeclone/_cli_runtime.py index b7e315e..616057b 100644 --- a/codeclone/_cli_runtime.py +++ b/codeclone/_cli_runtime.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_cli_summary.py b/codeclone/_cli_summary.py index a0496f0..69b30da 100644 --- a/codeclone/_cli_summary.py +++ b/codeclone/_cli_summary.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_coerce.py b/codeclone/_coerce.py index e4c07bd..9017c6a 100644 --- a/codeclone/_coerce.py +++ b/codeclone/_coerce.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_html_badges.py b/codeclone/_html_badges.py index f35dc17..dc06b15 100644 --- a/codeclone/_html_badges.py +++ b/codeclone/_html_badges.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Shared HTML badge, label, and visual helpers for the report UI layer. diff --git a/codeclone/_html_css.py b/codeclone/_html_css.py index fd18350..621989a 100644 --- a/codeclone/_html_css.py +++ b/codeclone/_html_css.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """CSS design system for the HTML report — tokens, components, layout.""" diff --git a/codeclone/_html_data_attrs.py b/codeclone/_html_data_attrs.py index cf10e4b..74b2f8b 100644 --- a/codeclone/_html_data_attrs.py +++ b/codeclone/_html_data_attrs.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Unified data-attribute builder for HTML elements.""" diff --git a/codeclone/_html_escape.py b/codeclone/_html_escape.py index b12a3b8..63b1a7e 100644 --- a/codeclone/_html_escape.py +++ b/codeclone/_html_escape.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_html_filters.py b/codeclone/_html_filters.py index 980cf91..dd9bbf3 100644 --- a/codeclone/_html_filters.py +++ b/codeclone/_html_filters.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Data-driven filter dropdown renderer for report toolbars.""" diff --git a/codeclone/_html_js.py b/codeclone/_html_js.py index 5f95074..0d07299 100644 --- a/codeclone/_html_js.py +++ b/codeclone/_html_js.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """JavaScript for the HTML report — modular IIFE with feature blocks.""" diff --git a/codeclone/_html_report/__init__.py b/codeclone/_html_report/__init__.py index fbbfff7..69b89c1 100644 --- a/codeclone/_html_report/__init__.py +++ b/codeclone/_html_report/__init__.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """New HTML report package — component-based architecture.""" diff --git a/codeclone/_html_report/_assemble.py b/codeclone/_html_report/_assemble.py index b5f6308..ca8edb3 100644 --- a/codeclone/_html_report/_assemble.py +++ b/codeclone/_html_report/_assemble.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Orchestrator: build_context → render all sections → template.substitute.""" diff --git a/codeclone/_html_report/_components.py b/codeclone/_html_report/_components.py index 76193e6..3a0d5a8 100644 --- a/codeclone/_html_report/_components.py +++ b/codeclone/_html_report/_components.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Shared UI components: insight banners, summary helpers, chip rows.""" diff --git a/codeclone/_html_report/_context.py b/codeclone/_html_report/_context.py index 8d05650..40718fb 100644 --- a/codeclone/_html_report/_context.py +++ b/codeclone/_html_report/_context.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """ReportContext — immutable shared state for all section renderers.""" diff --git a/codeclone/_html_report/_glossary.py b/codeclone/_html_report/_glossary.py index 7712253..ba05a00 100644 --- a/codeclone/_html_report/_glossary.py +++ b/codeclone/_html_report/_glossary.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Tooltip glossary for report table headers and stat cards.""" diff --git a/codeclone/_html_report/_icons.py b/codeclone/_html_report/_icons.py index c364917..2109c13 100644 --- a/codeclone/_html_report/_icons.py +++ b/codeclone/_html_report/_icons.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """SVG icon constants for the HTML report (Lucide-style).""" diff --git a/codeclone/_html_report/_sections/__init__.py b/codeclone/_html_report/_sections/__init__.py index a8917fd..9135843 100644 --- a/codeclone/_html_report/_sections/__init__.py +++ b/codeclone/_html_report/_sections/__init__.py @@ -1,2 +1,5 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_html_report/_sections/_clones.py b/codeclone/_html_report/_sections/_clones.py index 7e4a419..f071e3c 100644 --- a/codeclone/_html_report/_sections/_clones.py +++ b/codeclone/_html_report/_sections/_clones.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Clones panel renderer — function/block/segment sections.""" diff --git a/codeclone/_html_report/_sections/_coupling.py b/codeclone/_html_report/_sections/_coupling.py index 224e8cc..cfc7bac 100644 --- a/codeclone/_html_report/_sections/_coupling.py +++ b/codeclone/_html_report/_sections/_coupling.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Coupling + Cohesion panel renderer (unified Quality tab).""" diff --git a/codeclone/_html_report/_sections/_dead_code.py b/codeclone/_html_report/_sections/_dead_code.py index ca87f42..1823128 100644 --- a/codeclone/_html_report/_sections/_dead_code.py +++ b/codeclone/_html_report/_sections/_dead_code.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Dead Code panel renderer.""" diff --git a/codeclone/_html_report/_sections/_dependencies.py b/codeclone/_html_report/_sections/_dependencies.py index 67d5917..3258f9d 100644 --- a/codeclone/_html_report/_sections/_dependencies.py +++ b/codeclone/_html_report/_sections/_dependencies.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Dependencies panel renderer (SVG graph + tables).""" diff --git a/codeclone/_html_report/_sections/_meta.py b/codeclone/_html_report/_sections/_meta.py index 6c0fcde..a29e494 100644 --- a/codeclone/_html_report/_sections/_meta.py +++ b/codeclone/_html_report/_sections/_meta.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Report Provenance / metadata panel renderer.""" diff --git a/codeclone/_html_report/_sections/_overview.py b/codeclone/_html_report/_sections/_overview.py index be3b811..7c91b73 100644 --- a/codeclone/_html_report/_sections/_overview.py +++ b/codeclone/_html_report/_sections/_overview.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Overview panel renderer.""" diff --git a/codeclone/_html_report/_sections/_structural.py b/codeclone/_html_report/_sections/_structural.py index 4f09a52..9a9f5c8 100644 --- a/codeclone/_html_report/_sections/_structural.py +++ b/codeclone/_html_report/_sections/_structural.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Structural Findings panel — thin wrapper delegating to report/findings.py.""" diff --git a/codeclone/_html_report/_sections/_suggestions.py b/codeclone/_html_report/_sections/_suggestions.py index 5cc3212..d9aa766 100644 --- a/codeclone/_html_report/_sections/_suggestions.py +++ b/codeclone/_html_report/_sections/_suggestions.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Suggestions panel renderer.""" diff --git a/codeclone/_html_report/_tables.py b/codeclone/_html_report/_tables.py index 5153c0f..14bf7aa 100644 --- a/codeclone/_html_report/_tables.py +++ b/codeclone/_html_report/_tables.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Generic table renderer for metric/finding tables.""" diff --git a/codeclone/_html_report/_tabs.py b/codeclone/_html_report/_tabs.py index 54870ca..d9241a8 100644 --- a/codeclone/_html_report/_tabs.py +++ b/codeclone/_html_report/_tabs.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Tab/subtab rendering helpers.""" diff --git a/codeclone/_html_snippets.py b/codeclone/_html_snippets.py index 9ae7e40..dac7eec 100644 --- a/codeclone/_html_snippets.py +++ b/codeclone/_html_snippets.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/_schema_validation.py b/codeclone/_schema_validation.py index 43280c0..e90404f 100644 --- a/codeclone/_schema_validation.py +++ b/codeclone/_schema_validation.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/baseline.py b/codeclone/baseline.py index 357da62..53d4a37 100644 --- a/codeclone/baseline.py +++ b/codeclone/baseline.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/blockhash.py b/codeclone/blockhash.py index 5eb8bcc..0ba3199 100644 --- a/codeclone/blockhash.py +++ b/codeclone/blockhash.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/blocks.py b/codeclone/blocks.py index 8aca801..89697c8 100644 --- a/codeclone/blocks.py +++ b/codeclone/blocks.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cache.py b/codeclone/cache.py index cecc73e..3bee46f 100644 --- a/codeclone/cache.py +++ b/codeclone/cache.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cache_io.py b/codeclone/cache_io.py index e63e408..ecffc83 100644 --- a/codeclone/cache_io.py +++ b/codeclone/cache_io.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cache_paths.py b/codeclone/cache_paths.py index 62d0d82..8de7c63 100644 --- a/codeclone/cache_paths.py +++ b/codeclone/cache_paths.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cache_segments.py b/codeclone/cache_segments.py index df4bca7..a771e51 100644 --- a/codeclone/cache_segments.py +++ b/codeclone/cache_segments.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cfg.py b/codeclone/cfg.py index 097a216..e16be39 100644 --- a/codeclone/cfg.py +++ b/codeclone/cfg.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cfg_model.py b/codeclone/cfg_model.py index bb5fba2..4361e04 100644 --- a/codeclone/cfg_model.py +++ b/codeclone/cfg_model.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/cli.py b/codeclone/cli.py index d996bf6..1aa9d66 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/contracts.py b/codeclone/contracts.py index fdb09de..91e5109 100644 --- a/codeclone/contracts.py +++ b/codeclone/contracts.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -9,7 +12,7 @@ BASELINE_SCHEMA_VERSION: Final = "2.0" BASELINE_FINGERPRINT_VERSION: Final = "1" -CACHE_VERSION: Final = "2.2" +CACHE_VERSION: Final = "2.3" REPORT_SCHEMA_VERSION: Final = "2.1" METRICS_BASELINE_SCHEMA_VERSION: Final = "1.0" diff --git a/codeclone/domain/__init__.py b/codeclone/domain/__init__.py index 86ffb32..61cd04f 100644 --- a/codeclone/domain/__init__.py +++ b/codeclone/domain/__init__.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from .findings import ( diff --git a/codeclone/domain/findings.py b/codeclone/domain/findings.py index 07f0c49..66f4851 100644 --- a/codeclone/domain/findings.py +++ b/codeclone/domain/findings.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/domain/quality.py b/codeclone/domain/quality.py index 6d03baa..cca64c2 100644 --- a/codeclone/domain/quality.py +++ b/codeclone/domain/quality.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/domain/source_scope.py b/codeclone/domain/source_scope.py index ddfd3ea..578b3e3 100644 --- a/codeclone/domain/source_scope.py +++ b/codeclone/domain/source_scope.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/errors.py b/codeclone/errors.py index 77371c4..7b9331f 100644 --- a/codeclone/errors.py +++ b/codeclone/errors.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/extractor.py b/codeclone/extractor.py index 6eebd40..a23b559 100644 --- a/codeclone/extractor.py +++ b/codeclone/extractor.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -533,6 +536,59 @@ def _node_line_span(node: ast.AST) -> tuple[int, int] | None: return start, end +def _eligible_unit_shape( + node: FunctionNode, + *, + min_loc: int, + min_stmt: int, +) -> tuple[int, int, int, int] | None: + span = _node_line_span(node) + if span is None: + return None + start, end = span + if end < start: + return None + loc = end - start + 1 + stmt_count = _stmt_count(node) + if loc < min_loc or stmt_count < min_stmt: + return None + return start, end, loc, stmt_count + + +def _class_metrics_for_node( + *, + module_name: str, + class_qualname: str, + class_node: ast.ClassDef, + filepath: str, + module_import_names: set[str], + module_class_names: set[str], +) -> ClassMetrics | None: + span = _node_line_span(class_node) + if span is None: + return None + start, end = span + cbo, coupled_classes = compute_cbo( + class_node, + module_import_names=module_import_names, + module_class_names=module_class_names, + ) + lcom4, method_count, instance_var_count = compute_lcom4(class_node) + return ClassMetrics( + qualname=f"{module_name}:{class_qualname}", + filepath=filepath, + start_line=start, + end_line=end, + cbo=cbo, + lcom4=lcom4, + method_count=method_count, + instance_var_count=instance_var_count, + risk_coupling=coupling_risk(cbo), + risk_cohesion=cohesion_risk(lcom4), + coupled_classes=coupled_classes, + ) + + def _dead_candidate_kind(local_name: str) -> Literal["function", "method"]: return "method" if "." in local_name else "function" @@ -984,17 +1040,14 @@ def extract_units_and_stats_from_source( structural_findings: list[StructuralFindingGroup] = [] for local_name, node in collector.units: - start = getattr(node, "lineno", None) - end = getattr(node, "end_lineno", None) - - if not start or not end or end < start: - continue - - loc = end - start + 1 - stmt_count = _stmt_count(node) - - if loc < min_loc or stmt_count < min_stmt: + unit_shape = _eligible_unit_shape( + node, + min_loc=min_loc, + min_stmt=min_stmt, + ) + if unit_shape is None: continue + start, end, loc, stmt_count = unit_shape qualname = f"{module_name}:{local_name}" fingerprint, complexity = _cfg_fingerprint_and_complexity(node, cfg, qualname) @@ -1078,31 +1131,16 @@ def extract_units_and_stats_from_source( structural_findings.extend(structure_facts.structural_findings) for class_qualname, class_node in collector.class_nodes: - start = int(getattr(class_node, "lineno", 0)) - end = int(getattr(class_node, "end_lineno", 0)) - if start <= 0 or end <= 0: - continue - cbo, coupled_classes = compute_cbo( - class_node, + class_metric = _class_metrics_for_node( + module_name=module_name, + class_qualname=class_qualname, + class_node=class_node, + filepath=filepath, module_import_names=module_import_names, module_class_names=module_class_names, ) - lcom4, method_count, instance_var_count = compute_lcom4(class_node) - class_metrics.append( - ClassMetrics( - qualname=f"{module_name}:{class_qualname}", - filepath=filepath, - start_line=start, - end_line=end, - cbo=cbo, - lcom4=lcom4, - method_count=method_count, - instance_var_count=instance_var_count, - risk_coupling=coupling_risk(cbo), - risk_cohesion=cohesion_risk(lcom4), - coupled_classes=coupled_classes, - ) - ) + if class_metric is not None: + class_metrics.append(class_metric) dead_candidates = _collect_dead_candidates( filepath=filepath, diff --git a/codeclone/fingerprint.py b/codeclone/fingerprint.py index d47b8fc..72adaee 100644 --- a/codeclone/fingerprint.py +++ b/codeclone/fingerprint.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/grouping.py b/codeclone/grouping.py index 583e62a..a5ac7db 100644 --- a/codeclone/grouping.py +++ b/codeclone/grouping.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/html_report.py b/codeclone/html_report.py index 3783b23..16ceab5 100644 --- a/codeclone/html_report.py +++ b/codeclone/html_report.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Public facade for HTML report generation. diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py index 7523775..ed00137 100644 --- a/codeclone/mcp_server.py +++ b/codeclone/mcp_server.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -271,7 +274,8 @@ def evaluate_gates( title="Get Report Section", description=( "Return a canonical CodeClone report section for the latest or " - "specified MCP run." + "specified MCP run. The 'metrics' section returns only the " + "summary, while 'metrics_detail' returns the full metrics dump." ), annotations=read_only_tool, structured_output=True, diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py index 169a6d4..aaeafc3 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/mcp_service.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -117,6 +120,7 @@ "inventory", "findings", "metrics", + "metrics_detail", "derived", "changed", "integrity", @@ -191,6 +195,7 @@ "inventory", "findings", "metrics", + "metrics_detail", "derived", "changed", "integrity", @@ -206,6 +211,13 @@ } ) _VALID_SEVERITIES = frozenset({SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO}) +_CHECK_TO_DIMENSION: Final[dict[str, str]] = { + "cohesion": "cohesion", + "coupling": "coupling", + "dead_code": "dead_code", + "complexity": "complexity", + "clones": "clones", +} _as_int = _coerce.as_int _as_float = _coerce.as_float _as_str = _coerce.as_str @@ -763,10 +775,12 @@ def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object raise MCPServiceContractError( "analyze_changed_paths requires changed_paths or git_diff_ref." ) - return self.analyze_repository(request) + summary = dict(self.analyze_repository(request)) + return self._summary_payload(summary) def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: - return dict(self._runs.get(run_id).summary) + summary = dict(self._runs.get(run_id).summary) + return self._summary_payload(summary) def compare_runs( self, @@ -906,6 +920,16 @@ def get_report_section( "Report section 'changed' is not available in this run." ) return dict(record.changed_projection) + if validated_section == "metrics": + metrics = self._as_mapping(report_document.get("metrics")) + return {"summary": dict(self._as_mapping(metrics.get("summary")))} + if validated_section == "metrics_detail": + payload = report_document.get("metrics") + if not isinstance(payload, Mapping): + raise MCPServiceContractError( + "Report section 'metrics_detail' is not available in this run." + ) + return dict(payload) payload = report_document.get(validated_section) if not isinstance(payload, Mapping): raise MCPServiceContractError( @@ -980,6 +1004,7 @@ def list_findings( next_offset = normalized_offset + len(items) return { "run_id": record.run_id, + "base_uri": record.root.as_uri(), "detail_level": validated_detail, "sort_by": validated_sort, "changed_paths": list(paths_filter), @@ -1076,6 +1101,7 @@ def list_hotspots( ) return { "run_id": record.run_id, + "base_uri": record.root.as_uri(), "kind": validated_kind, "detail_level": validated_detail, "changed_paths": list(paths_filter), @@ -1459,7 +1485,7 @@ def read_resource(self, uri: str) -> str: def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: if suffix == "summary": return json.dumps( - record.summary, + self._summary_payload(dict(record.summary)), ensure_ascii=False, indent=2, sort_keys=True, @@ -2024,7 +2050,11 @@ def _decorate_finding( payload = dict(finding) payload["priority_score"] = resolved_priority_payload["score"] payload["priority_factors"] = resolved_priority_payload["factors"] - payload["locations"] = self._locations_for_finding(record, finding) + payload["locations"] = self._locations_for_finding( + record, + finding, + include_uri=detail_level == "full", + ) payload["html_anchor"] = f"finding-{finding.get('id', '')}" if resolved_remediation is not None: payload["remediation"] = resolved_remediation @@ -2041,6 +2071,7 @@ def _project_finding_detail( if detail_level == "summary": return self._finding_summary_card_payload(finding) payload = dict(finding) + payload.pop("priority_factors", None) if "remediation" in payload: payload["remediation"] = self._project_remediation( self._as_mapping(payload["remediation"]), @@ -2067,9 +2098,11 @@ def _finding_summary_card_payload( **card, "novelty": str(finding.get("novelty", "")), "priority_score": _as_float(finding.get("priority_score", 0.0), 0.0), - "priority_factors": dict(self._as_mapping(finding.get("priority_factors"))), "locations": [ - dict(self._as_mapping(item)) + { + "file": str(self._as_mapping(item).get("file", "")), + "line": _as_int(self._as_mapping(item).get("line", 0), 0), + } for item in self._as_sequence(finding.get("locations"))[:3] ], } @@ -2132,6 +2165,21 @@ def _finding_is_reviewed( review_map = self._review_state.get(record.run_id, OrderedDict()) return str(finding.get("id", "")) in review_map + def _include_hotspot_finding( + self, + *, + record: MCPRunRecord, + finding: Mapping[str, object], + changed_paths: Sequence[str], + exclude_reviewed: bool, + ) -> bool: + if changed_paths and not self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ): + return False + return not exclude_reviewed or not self._finding_is_reviewed(record, finding) + def _priority_score( self, record: MCPRunRecord, @@ -2222,6 +2270,8 @@ def _locations_for_finding( self, record: MCPRunRecord, finding: Mapping[str, object], + *, + include_uri: bool = True, ) -> list[dict[str, object]]: locations: list[dict[str, object]] = [] for item in self._as_sequence(finding.get("items")): @@ -2229,20 +2279,20 @@ def _locations_for_finding( relative_path = str(item_map.get("relative_path", "")).strip() if not relative_path: continue - absolute_path = (record.root / relative_path).resolve() line = _as_int(item_map.get("start_line", 0) or 0, 0) symbol = str(item_map.get("qualname", item_map.get("module", ""))).strip() - uri = absolute_path.as_uri() - if line > 0: - uri = f"{uri}#L{line}" - locations.append( - { - "file": relative_path, - "line": line, - "symbol": symbol, - "uri": uri, - } - ) + location: dict[str, object] = { + "file": relative_path, + "line": line, + "symbol": symbol, + } + if include_uri: + absolute_path = (record.root / relative_path).resolve() + uri = absolute_path.as_uri() + if line > 0: + uri = f"{uri}#L{line}" + location["uri"] = uri + locations.append(location) deduped: list[dict[str, object]] = [] seen: set[tuple[str, int, str]] = set() for location in locations: @@ -2447,20 +2497,21 @@ def _hotspot_rows( rows: list[dict[str, object]] = [] for finding_id in ordered_ids: finding = finding_index.get(finding_id) - if finding is None: - continue - if changed_paths and not self._finding_touches_paths( + if finding is None or not self._include_hotspot_finding( + record=record, finding=finding, changed_paths=changed_paths, + exclude_reviewed=exclude_reviewed, ): continue - if exclude_reviewed and self._finding_is_reviewed(record, finding): - continue finding_id_key = str(finding.get("id", "")) + projection_detail: DetailLevel = ( + "normal" if detail_level == "summary" else detail_level + ) decorated = self._decorate_finding( record, finding, - detail_level=detail_level, + detail_level=projection_detail, remediation=remediation_map[finding_id_key], priority_payload=priority_map[finding_id_key], max_spread_value=max_spread_value, @@ -2476,7 +2527,6 @@ def _hotspot_rows( "id": finding_id, "novelty": decorated.get("novelty"), "priority_score": decorated.get("priority_score"), - "priority_factors": decorated.get("priority_factors"), "locations": decorated.get("locations"), } ) @@ -2670,14 +2720,27 @@ def _granular_payload( path: str | None, ) -> dict[str, object]: bounded_items = [dict(item) for item in items[: max(1, max_results)]] + full_health = dict(self._as_mapping(record.summary.get("health"))) + dimensions = self._as_mapping(full_health.get("dimensions")) + relevant_dimension = _CHECK_TO_DIMENSION.get(check) + slim_dimensions = ( + {relevant_dimension: dimensions.get(relevant_dimension)} + if relevant_dimension and relevant_dimension in dimensions + else dict(dimensions) + ) return { "run_id": record.run_id, + "base_uri": record.root.as_uri(), "check": check, "detail_level": detail_level, "path": path, "returned": len(bounded_items), "total": len(items), - "health": dict(self._as_mapping(record.summary.get("health"))), + "health": { + "score": full_health.get("score"), + "grade": full_health.get("grade"), + "dimensions": slim_dimensions, + }, "items": bounded_items, } @@ -3044,6 +3107,28 @@ def _build_run_summary_payload( "failures": list(failures), } + def _summary_payload( + self, + summary: Mapping[str, object], + ) -> dict[str, object]: + payload = dict(summary) + inventory = self._as_mapping(payload.get("inventory")) + if inventory: + payload["inventory"] = self._slim_inventory(inventory) + return payload + + def _slim_inventory( + self, + inventory: Mapping[str, object], + ) -> dict[str, object]: + slim_inventory = dict(inventory) + registry = self._as_mapping(slim_inventory.get("file_registry")) + slim_inventory["file_registry"] = { + "encoding": registry.get("encoding", "relative_path"), + "count": len(self._as_sequence(registry.get("items"))), + } + return slim_inventory + def _metrics_diff_payload( self, metrics_diff: MetricsDiff | None, diff --git a/codeclone/meta_markers.py b/codeclone/meta_markers.py index ec0d390..3f527d6 100644 --- a/codeclone/meta_markers.py +++ b/codeclone/meta_markers.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/__init__.py b/codeclone/metrics/__init__.py index e9c1afe..bf64509 100644 --- a/codeclone/metrics/__init__.py +++ b/codeclone/metrics/__init__.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/cohesion.py b/codeclone/metrics/cohesion.py index 5e02dfb..c8a389b 100644 --- a/codeclone/metrics/cohesion.py +++ b/codeclone/metrics/cohesion.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/complexity.py b/codeclone/metrics/complexity.py index 2e6919e..4573da5 100644 --- a/codeclone/metrics/complexity.py +++ b/codeclone/metrics/complexity.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/coupling.py b/codeclone/metrics/coupling.py index 043e552..8a34037 100644 --- a/codeclone/metrics/coupling.py +++ b/codeclone/metrics/coupling.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/dead_code.py b/codeclone/metrics/dead_code.py index 4762548..3b64c97 100644 --- a/codeclone/metrics/dead_code.py +++ b/codeclone/metrics/dead_code.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/dependencies.py b/codeclone/metrics/dependencies.py index caa32d9..48ba032 100644 --- a/codeclone/metrics/dependencies.py +++ b/codeclone/metrics/dependencies.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics/health.py b/codeclone/metrics/health.py index 9886ae9..9f0ab67 100644 --- a/codeclone/metrics/health.py +++ b/codeclone/metrics/health.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/metrics_baseline.py b/codeclone/metrics_baseline.py index d47d24a..3522a05 100644 --- a/codeclone/metrics_baseline.py +++ b/codeclone/metrics_baseline.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/models.py b/codeclone/models.py index f882d37..eaae21a 100644 --- a/codeclone/models.py +++ b/codeclone/models.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/normalize.py b/codeclone/normalize.py index b3e0243..98bd78e 100644 --- a/codeclone/normalize.py +++ b/codeclone/normalize.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -214,14 +217,16 @@ def normalized_ast_dump_from_list( ) -> str: """ Dump a list of AST nodes after normalization. - WARNING: This modifies the AST nodes in-place for performance. + + The normalizer works on deep-copied nodes so callers can safely reuse + the original AST for downstream metrics and reporting passes. """ active_normalizer = normalizer or AstNormalizer(cfg) dumps: list[str] = [] for node in nodes: # Fingerprints ignore location attributes, so we skip location repair. - new_node = active_normalizer.visit(node) + new_node = active_normalizer.visit(copy.deepcopy(node)) assert isinstance(new_node, ast.AST) dumps.append(ast.dump(new_node, annotate_fields=True, include_attributes=False)) diff --git a/codeclone/paths.py b/codeclone/paths.py index 551d2be..c9a33a6 100644 --- a/codeclone/paths.py +++ b/codeclone/paths.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py index 3e3eb26..69e3f68 100644 --- a/codeclone/pipeline.py +++ b/codeclone/pipeline.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/__init__.py b/codeclone/report/__init__.py index 08f4da3..31e295a 100644 --- a/codeclone/report/__init__.py +++ b/codeclone/report/__init__.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/_formatting.py b/codeclone/report/_formatting.py index 9b3cffb..4e9362b 100644 --- a/codeclone/report/_formatting.py +++ b/codeclone/report/_formatting.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/_source_kinds.py b/codeclone/report/_source_kinds.py index 4e9dee8..15aab14 100644 --- a/codeclone/report/_source_kinds.py +++ b/codeclone/report/_source_kinds.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/blocks.py b/codeclone/report/blocks.py index 7e1b592..8ecaf5a 100644 --- a/codeclone/report/blocks.py +++ b/codeclone/report/blocks.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py index cfa8fd1..3aac986 100644 --- a/codeclone/report/derived.py +++ b/codeclone/report/derived.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/explain.py b/codeclone/report/explain.py index 5673e84..2c6e4e2 100644 --- a/codeclone/report/explain.py +++ b/codeclone/report/explain.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/explain_contract.py b/codeclone/report/explain_contract.py index 2169ff2..fccbd5f 100644 --- a/codeclone/report/explain_contract.py +++ b/codeclone/report/explain_contract.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/findings.py b/codeclone/report/findings.py index a056603..1c7d93d 100644 --- a/codeclone/report/findings.py +++ b/codeclone/report/findings.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """CodeClone — structural code quality analysis for Python. diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py index e53746c..765999c 100644 --- a/codeclone/report/json_contract.py +++ b/codeclone/report/json_contract.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/markdown.py b/codeclone/report/markdown.py index 71e1eef..237817e 100644 --- a/codeclone/report/markdown.py +++ b/codeclone/report/markdown.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/merge.py b/codeclone/report/merge.py index fc59e9e..2ba2331 100644 --- a/codeclone/report/merge.py +++ b/codeclone/report/merge.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py index 14fac90..73a535c 100644 --- a/codeclone/report/overview.py +++ b/codeclone/report/overview.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py index 01f5dce..0d40890 100644 --- a/codeclone/report/sarif.py +++ b/codeclone/report/sarif.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/segments.py b/codeclone/report/segments.py index ba5ec9a..d335234 100644 --- a/codeclone/report/segments.py +++ b/codeclone/report/segments.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/serialize.py b/codeclone/report/serialize.py index f074cd3..83489f4 100644 --- a/codeclone/report/serialize.py +++ b/codeclone/report/serialize.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/suggestions.py b/codeclone/report/suggestions.py index f1277e2..21414d0 100644 --- a/codeclone/report/suggestions.py +++ b/codeclone/report/suggestions.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/report/types.py b/codeclone/report/types.py index 42bd16d..6824e6b 100644 --- a/codeclone/report/types.py +++ b/codeclone/report/types.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/scanner.py b/codeclone/scanner.py index 42ed7f7..a9c65a9 100644 --- a/codeclone/scanner.py +++ b/codeclone/scanner.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/structural_findings.py b/codeclone/structural_findings.py index aac3ee9..d0e3d78 100644 --- a/codeclone/structural_findings.py +++ b/codeclone/structural_findings.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """CodeClone — structural code quality analysis for Python. diff --git a/codeclone/suppressions.py b/codeclone/suppressions.py index 2a9984d..9b2b149 100644 --- a/codeclone/suppressions.py +++ b/codeclone/suppressions.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/codeclone/templates.py b/codeclone/templates.py index 67b2891..bc3d493 100644 --- a/codeclone/templates.py +++ b/codeclone/templates.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Minimal HTML skeleton template for the report. diff --git a/codeclone/ui_messages.py b/codeclone/ui_messages.py index 1b9f35d..7aca82e 100644 --- a/codeclone/ui_messages.py +++ b/codeclone/ui_messages.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/docs/README.md b/docs/README.md index d6b6ad8..ec3d332 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,7 +38,7 @@ repository build: - [Config and defaults](book/04-config-and-defaults.md) - [Core pipeline and invariants](book/05-core-pipeline.md) - [Baseline contract (schema v2.0)](book/06-baseline.md) -- [Cache contract (schema v2.2)](book/07-cache.md) +- [Cache contract (schema v2.3)](book/07-cache.md) - [Report contract (schema v2.1)](book/08-report.md) ## Interfaces diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index ecc5cff..f88861b 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -43,6 +43,12 @@ Refs: recompute detection semantics. - MCP layer reuses current pipeline/report semantics and must not introduce a separate analysis truth path. +- MCP may ship task-specific slim projections (for example, summary-only metrics + or inventory counts) as long as canonical report data remains the source of + truth and richer detail stays reachable through dedicated tools/sections. +- MCP finding lists may also move repeated absolute location context to + envelope-level metadata such as `base_uri`, while keeping `get_finding` as + the richer per-finding inspection path. - Baseline, metrics baseline, and cache are validated before being trusted. Refs: diff --git a/docs/book/07-cache.md b/docs/book/07-cache.md index 3690c7c..1e2fe51 100644 --- a/docs/book/07-cache.md +++ b/docs/book/07-cache.md @@ -2,7 +2,7 @@ ## Purpose -Define cache schema v2.2, integrity verification, and fail-open behavior. +Define cache schema v2.3, integrity verification, and fail-open behavior. ## Public surface @@ -13,7 +13,7 @@ Define cache schema v2.2, integrity verification, and fail-open behavior. ## Data model -On-disk schema (`v == "2.2"`): +On-disk schema (`v == "2.3"`): - Top-level: `v`, `payload`, `sig` - `payload` keys: `py`, `fp`, `ap`, `files`, optional `sr` @@ -58,6 +58,9 @@ Refs: (`min_loc`, `min_stmt`, `block_min_loc`, `block_min_stmt`, `segment_min_loc`, `segment_min_stmt`) - `sig` equals deterministic hash of canonical payload +- Cache schema must also be bumped when cached analysis semantics change in a + way that could leave syntactically valid but semantically stale per-file + entries accepted by runtime compatibility checks. Refs: diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md index ac46762..fbdff31 100644 --- a/docs/book/13-testing-as-spec.md +++ b/docs/book/13-testing-as-spec.md @@ -29,14 +29,14 @@ Test classes by role: The following matrix is treated as executable contract: -| Contract | Tests | -|--------------------------------------------|---------------------------------------------------------------------------------------------------------------| -| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | -| Cache v2.2 fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` | -| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | -| Report schema v2.1 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | -| HTML render-only explainability + escaping | `tests/test_html_report.py` | -| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | +| Contract | Tests | +|--------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------| +| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | +| Cache v2.3 fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` | +| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | +| Report schema v2.1 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | +| HTML render-only explainability + escaping | `tests/test_html_report.py` | +| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | ## Invariants (MUST) diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index 31f7740..d7c5962 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -20,7 +20,7 @@ Current contract versions: - `BASELINE_SCHEMA_VERSION = "2.0"` - `BASELINE_FINGERPRINT_VERSION = "1"` -- `CACHE_VERSION = "2.2"` +- `CACHE_VERSION = "2.3"` - `REPORT_SCHEMA_VERSION = "2.1"` - `METRICS_BASELINE_SCHEMA_VERSION = "1.0"` (standalone metrics-baseline file) @@ -34,13 +34,21 @@ Version bump rules: - Bump **baseline schema** only for baseline JSON layout/type changes. - Bump **fingerprint version** when clone key semantics change. -- Bump **cache schema** for cache wire-format/validation changes. +- Bump **cache schema** for cache wire-format/validation changes and for + cached-analysis semantic changes that would otherwise leave stale cache + entries looking compatible to runtime validation. - Bump **report schema** for canonical report document contract changes (`report_schema_version`, consumed by JSON/TXT/Markdown/SARIF and HTML provenance/view). - Bump **metrics-baseline schema** only for standalone metrics-baseline payload changes. - MCP does not currently define a separate schema/version constant; tool names, resource shapes, and documented request/response semantics are therefore package-versioned public surface and must be documented/tested when changed. +- Slimming or splitting MCP-only projections (for example, summary payloads or + `metrics` vs `metrics_detail`) does not change `report_schema_version` as long + as the canonical report document and finding identities remain unchanged. +- The same rule applies to finding-level MCP projection changes such as + envelope-level `base_uri`, slim summary locations, or omitting + `priority_factors` outside `detail_level="full"`. Baseline compatibility rules: diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 15e491f..cc4ca23 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -39,15 +39,28 @@ Current server characteristics: - cache policies: - `reuse` - `off` - `refresh` is rejected in MCP because the server is read-only. + `refresh` is rejected in MCP because the server is read-only. - summary payload: - `run_id`, `root`, `analysis_mode` - `baseline`, `metrics_baseline`, `cache` - `inventory`, `findings_summary`, `health` + - `get_run_summary` and summary resources expose slim inventory + `file_registry` as `{ encoding, count }` + - `analyze_repository` keeps the full `inventory.file_registry.items` + - `analyze_changed_paths` also returns slim inventory `file_registry` - `baseline_diff`, `metrics_diff` - optional `changed_paths` (`list[str]`, repo-relative), `changed_findings`, `health_delta`, `verdict` - `warnings`, `failures` +- finding-list payloads: + - `list_findings`, `list_hotspots`, and `check_*` include envelope-level + `base_uri` once per response + - `detail_level="summary"` keeps only compact location tuples + (`file` + `line`) and omits `priority_factors` + - `detail_level="normal"` keeps `symbol` in locations but omits `uri` and + `priority_factors` + - `detail_level="full"` keeps `priority_factors`, location `symbol`, and + per-location `uri` for compatibility-oriented consumers The MCP layer does not introduce a separate analysis engine. It calls the current CodeClone pipeline and reuses the canonical report document already @@ -57,27 +70,27 @@ produced by the report contract. Current tool set: -| Tool | Key parameters | Purpose / notes | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | `root`, `analysis_mode`, `changed_paths`, `git_diff_ref`, inline thresholds, cache/baseline paths | Run deterministic CodeClone analysis and register the result as the latest MCP run | -| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, inline thresholds | Diff-aware fast path: analyze a repo and attach a changed-files projection to the run | -| `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and health delta | -| `evaluate_gates` | `run_id`, gate thresholds/booleans | Evaluate CI/gating conditions against an existing run without exiting the process | -| `get_report_section` | `run_id`, `section` | Return a canonical report section (`meta`, `inventory`, `findings`, `metrics`, `derived`, `integrity`, `changed`, or `all`) | -| `list_findings` | `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, pagination | Return deterministically ordered finding groups with filtering and pagination | -| `get_finding` | `finding_id`, `run_id` | Return one canonical finding group by id with locations, priority, and remediation payload when available | -| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return just the remediation/explainability packet for one finding | -| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, `limit`, `max_results` | Return one derived hotlist (`most_actionable`, `highest_spread`, `highest_priority`, `production_hotspots`, `test_fixture_hotspots`) | -| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Return clone findings from a compatible stored run | -| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Return complexity hotspots from a compatible stored run | -| `check_coupling` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return coupling hotspots from a compatible stored run | -| `check_cohesion` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return cohesion hotspots from a compatible stored run | -| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `max_results`, `detail_level` | Return dead-code findings from a compatible stored run | -| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | Build a PR-friendly changed-files summary in markdown or JSON | -| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the in-memory MCP session | -| `list_reviewed_findings` | `run_id` | Return the current reviewed findings for the selected run | -| `clear_session_runs` | none | Clear all stored in-memory runs plus ephemeral review/gate/session caches for the current server process | +| Tool | Key parameters | Purpose / notes | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `analyze_repository` | `root`, `analysis_mode`, `changed_paths`, `git_diff_ref`, inline thresholds, cache/baseline paths | Run deterministic CodeClone analysis and register the result as the latest MCP run | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, inline thresholds | Diff-aware fast path: analyze a repo and attach a changed-files projection to the run; summary inventory is slimmed to `{count}` | +| `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run, with slim inventory counts instead of the full file registry | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and health delta | +| `evaluate_gates` | `run_id`, gate thresholds/booleans | Evaluate CI/gating conditions against an existing run without exiting the process | +| `get_report_section` | `run_id`, `section` | Return a canonical report section. `metrics` is summary-only; `metrics_detail` exposes the full metrics payload; other sections stay canonical | +| `list_findings` | `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, pagination | Return deterministically ordered finding groups with filtering and pagination; list responses include `base_uri` and compact summary/normal projections | +| `get_finding` | `finding_id`, `run_id` | Return one canonical finding group by id with locations, priority, and remediation payload when available; this endpoint remains full-detail | +| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return just the remediation/explainability packet for one finding | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, `limit`, `max_results` | Return one derived hotlist (`most_actionable`, `highest_spread`, `highest_priority`, `production_hotspots`, `test_fixture_hotspots`) with compact summary cards and `base_uri` | +| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Return clone findings from a compatible stored run; `health.dimensions` includes only `clones` | +| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Return complexity hotspots from a compatible stored run; `health.dimensions` includes only `complexity` | +| `check_coupling` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return coupling hotspots from a compatible stored run; `health.dimensions` includes only `coupling` | +| `check_cohesion` | `run_id`, `root`, `path`, `max_results`, `detail_level` | Return cohesion hotspots from a compatible stored run; `health.dimensions` includes only `cohesion` | +| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `max_results`, `detail_level` | Return dead-code findings from a compatible stored run; `health.dimensions` includes only `dead_code` | +| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | Build a PR-friendly changed-files summary in markdown or JSON | +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the in-memory MCP session | +| `list_reviewed_findings` | `run_id` | Return the current reviewed findings for the selected run | +| `clear_session_runs` | none | Clear all stored in-memory runs plus ephemeral review/gate/session caches for the current server process | All analysis/report tools are read-only with respect to repo state. The only mutable MCP tools are `mark_finding_reviewed` and `clear_session_runs`, and @@ -152,10 +165,19 @@ trigger fresh analysis by themselves. not a second canonical report. - `get_run_summary` with no `run_id` resolves to the latest stored run. - `get_report_section(section="all")` returns the full canonical report document. +- `get_report_section(section="metrics")` returns only `metrics.summary`. +- `get_report_section(section="metrics_detail")` returns the full canonical + metrics payload (`summary` + `families`). - `get_report_section(section="changed")` is available only for diff-aware runs. - `run_id` must equal the canonical report digest for that run. +- List-style MCP finding responses expose `base_uri` once per envelope instead + of repeating absolute `file://` URIs inside summary/normal locations. - Finding `locations` and `html_anchor` values are stable projections over the current run and do not invent non-canonical ids. +- For the same finding id, `source_kind` remains consistent across + `list_findings`, `list_hotspots`, and `get_finding`. +- `get_finding` remains the compatibility-preserving full-detail endpoint: + `priority_factors` and location `uri` are still available there. - `compare_runs` is only semantically meaningful when both runs use comparable repository scope/root and analysis settings. diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 1d3b0a3..5213b39 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -25,11 +25,11 @@ Compact structural layouts for baseline/cache/report contracts in `2.0.0b3`. } ``` -## Cache schema (`2.2`) +## Cache schema (`2.3`) ```json { - "v": "2.2", + "v": "2.3", "payload": { "py": "cp313", "fp": "1", diff --git a/docs/mcp.md b/docs/mcp.md index 291d6dc..f2d0cf0 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -40,14 +40,14 @@ core CodeClone runtime. | Tool | Purpose | |--------------------------|----------------------------------------------------------------------| | `analyze_repository` | Full analysis → register as latest run | -| `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref` | -| `get_run_summary` | Compact health/findings/baseline snapshot | +| `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref`; summary inventory is slimmed to counts | +| `get_run_summary` | Compact health/findings/baseline snapshot with slim inventory counts | | `compare_runs` | Regressions, improvements, health delta between two runs | -| `list_findings` | Filtered, paginated finding groups | +| `list_findings` | Filtered, paginated finding groups with envelope-level `base_uri` | | `get_finding` | Deep inspection of one finding by id | | `get_remediation` | Structured remediation payload for one finding | -| `list_hotspots` | Derived views: highest priority, production hotspots, spread, etc. | -| `get_report_section` | Read canonical report sections (meta, findings, metrics, derived, …) | +| `list_hotspots` | Derived views: highest priority, production hotspots, spread, etc., with compact summary cards | +| `get_report_section` | Read canonical report sections; `metrics` is summary-only, `metrics_detail` is the full metrics dump | | `evaluate_gates` | Preview CI/gating decisions without exiting | | `check_clones` | Clone findings from a stored run | | `check_complexity` | Complexity hotspots from a stored run | @@ -62,6 +62,12 @@ core CodeClone runtime. > `check_*` tools query stored runs only. Call `analyze_repository` or > `analyze_changed_paths` first. +`check_*` responses keep `health.score` and `health.grade`, but slim +`health.dimensions` down to the one dimension relevant to that tool. +List-style finding responses also expose `base_uri` once per envelope and keep +summary locations as `file` + `line`; richer `symbol` / `uri` data stays in +`normal` / `full` responses and `get_finding`. + ## Resource surface | Resource | Content | diff --git a/pyproject.toml b/pyproject.toml index 1218afc..3ecbfab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,8 +7,8 @@ name = "codeclone" version = "2.0.0b3" description = "Structural code quality analysis for Python" readme = { file = "README.md", content-type = "text/markdown" } -license = "MIT" -license-files = ["LICENSE"] +license = "MPL-2.0 AND MIT" +license-files = ["LICENSE", "LICENSE-docs"] authors = [ { name = "Den Rozhnovskiy", email = "pytelemonbot@mail.ru" } diff --git a/scripts/build_docs_example_report.py b/scripts/build_docs_example_report.py index 5254c59..b003fd2 100644 --- a/scripts/build_docs_example_report.py +++ b/scripts/build_docs_example_report.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..9135843 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/tests/_assertions.py b/tests/_assertions.py index 619e882..1f4dd4a 100644 --- a/tests/_assertions.py +++ b/tests/_assertions.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from collections.abc import Mapping diff --git a/tests/_ast_helpers.py b/tests/_ast_helpers.py index ce123be..574d026 100644 --- a/tests/_ast_helpers.py +++ b/tests/_ast_helpers.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import ast diff --git a/tests/_report_access.py b/tests/_report_access.py index 9eeb760..91fb068 100644 --- a/tests/_report_access.py +++ b/tests/_report_access.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from collections.abc import Mapping diff --git a/tests/_report_fixtures.py b/tests/_report_fixtures.py index 73e68e4..6bbe126 100644 --- a/tests/_report_fixtures.py +++ b/tests/_report_fixtures.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from pathlib import Path diff --git a/tests/conftest.py b/tests/conftest.py index 7647800..a497dcf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from collections.abc import Callable diff --git a/tests/fixtures/golden_project/LICENSE b/tests/fixtures/golden_project/LICENSE new file mode 100644 index 0000000..d2b21b1 --- /dev/null +++ b/tests/fixtures/golden_project/LICENSE @@ -0,0 +1,9 @@ +Fixture source files in this directory are covered by the Mozilla Public +License, v. 2.0. + +Per Mozilla MPL header guidance, the notice is provided in this directory +instead of modifying the fixture files themselves. + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +directory, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/LICENSE b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/LICENSE new file mode 100644 index 0000000..d2b21b1 --- /dev/null +++ b/tests/fixtures/golden_v2/clone_metrics_cycle/pkg/LICENSE @@ -0,0 +1,9 @@ +Fixture source files in this directory are covered by the Mozilla Public +License, v. 2.0. + +Per Mozilla MPL header guidance, the notice is provided in this directory +instead of modifying the fixture files themselves. + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +directory, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/tests/fixtures/golden_v2/pyproject_defaults/pkg/LICENSE b/tests/fixtures/golden_v2/pyproject_defaults/pkg/LICENSE new file mode 100644 index 0000000..d2b21b1 --- /dev/null +++ b/tests/fixtures/golden_v2/pyproject_defaults/pkg/LICENSE @@ -0,0 +1,9 @@ +Fixture source files in this directory are covered by the Mozilla Public +License, v. 2.0. + +Per Mozilla MPL header guidance, the notice is provided in this directory +instead of modifying the fixture files themselves. + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +directory, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/LICENSE b/tests/fixtures/golden_v2/test_only_usage/pkg/LICENSE new file mode 100644 index 0000000..d2b21b1 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/LICENSE @@ -0,0 +1,9 @@ +Fixture source files in this directory are covered by the Mozilla Public +License, v. 2.0. + +Per Mozilla MPL header guidance, the notice is provided in this directory +instead of modifying the fixture files themselves. + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +directory, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/tests/fixtures/golden_v2/test_only_usage/pkg/tests/LICENSE b/tests/fixtures/golden_v2/test_only_usage/pkg/tests/LICENSE new file mode 100644 index 0000000..d2b21b1 --- /dev/null +++ b/tests/fixtures/golden_v2/test_only_usage/pkg/tests/LICENSE @@ -0,0 +1,9 @@ +Fixture source files in this directory are covered by the Mozilla Public +License, v. 2.0. + +Per Mozilla MPL header guidance, the notice is provided in this directory +instead of modifying the fixture files themselves. + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +directory, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/tests/test_architecture.py b/tests/test_architecture.py index 34101e9..6454b52 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import ast diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 127af92..4ee3e77 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import json from collections.abc import Callable from pathlib import Path diff --git a/tests/test_blockhash.py b/tests/test_blockhash.py index 003f120..b6025a9 100644 --- a/tests/test_blockhash.py +++ b/tests/test_blockhash.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from codeclone.blockhash import stmt_hashes diff --git a/tests/test_blocks.py b/tests/test_blocks.py index d551666..a875635 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from codeclone.blocks import extract_blocks diff --git a/tests/test_cache.py b/tests/test_cache.py index 793dd60..e4bc012 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json @@ -412,13 +418,14 @@ def test_cache_version_mismatch_warns(tmp_path: Path) -> None: assert loaded.cache_schema_version == "0.0" -def test_cache_v_field_version_mismatch_warns(tmp_path: Path) -> None: +@pytest.mark.parametrize("version", ["0.0", "2.2"]) +def test_cache_v_field_version_mismatch_warns(tmp_path: Path, version: str) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) payload = _analysis_payload(cache, files={}) signature = sign_cache_payload(payload) cache_path.write_text( - json.dumps({"v": "0.0", "payload": payload, "sig": signature}), "utf-8" + json.dumps({"v": version, "payload": payload, "sig": signature}), "utf-8" ) loaded = Cache(cache_path) @@ -427,7 +434,7 @@ def test_cache_v_field_version_mismatch_warns(tmp_path: Path) -> None: assert "version mismatch" in loaded.load_warning assert loaded.data["files"] == {} assert loaded.load_status == CacheStatus.VERSION_MISMATCH - assert loaded.cache_schema_version == "0.0" + assert loaded.cache_schema_version == version def test_cache_too_large_warns(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/tests/test_cfg.py b/tests/test_cfg.py index b0c4955..a111819 100644 --- a/tests/test_cfg.py +++ b/tests/test_cfg.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from textwrap import dedent diff --git a/tests/test_cfg_model.py b/tests/test_cfg_model.py index 216c9a8..36c4eee 100644 --- a/tests/test_cfg_model.py +++ b/tests/test_cfg_model.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from codeclone.cfg_model import CFG, Block diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index 4fdfcd4..ecc60a0 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import argparse diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index 1317065..8651b3f 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json @@ -1869,16 +1875,37 @@ def test_cli_too_large_baseline_fails_in_ci( _assert_report_baseline_meta(payload, status="too_large", loaded=False) +@pytest.mark.parametrize( + ("mutator", "expected_message", "expected_status", "expected_schema_version"), + [ + ( + lambda data: data.__setitem__("sig", "bad"), + "signature", + "integrity_failed", + CACHE_VERSION, + ), + ( + lambda data: data.__setitem__("v", "2.2"), + "Cache version mismatch", + "version_mismatch", + "2.2", + ), + ], +) def test_cli_reports_cache_used_false_on_warning( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + mutator: Callable[[dict[str, object]], None], + expected_message: str, + expected_status: str, + expected_schema_version: object, ) -> None: src, cache_path, cache = _prepare_single_source_cache(tmp_path) cache.put_file_entry(str(src), {"mtime_ns": 1, "size": 10}, [], [], []) cache.save() data = json.loads(cache_path.read_text("utf-8")) - data["sig"] = "bad" + mutator(data) cache_path.write_text(json.dumps(data), "utf-8") baseline_path = _write_current_python_baseline(tmp_path / "baseline.json") @@ -1893,12 +1920,12 @@ def test_cli_reports_cache_used_false_on_warning( ], ) out = capsys.readouterr().out - assert "signature" in out + assert expected_message in out _assert_report_cache_meta( payload, used=False, - status="integrity_failed", - schema_version=CACHE_VERSION, + status=expected_status, + schema_version=expected_schema_version, ) diff --git a/tests/test_cli_main_guard.py b/tests/test_cli_main_guard.py index 3570d68..904c2b3 100644 --- a/tests/test_cli_main_guard.py +++ b/tests/test_cli_main_guard.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import os import subprocess import sys diff --git a/tests/test_cli_main_guard_runpy.py b/tests/test_cli_main_guard_runpy.py index f130c4e..9685a8a 100644 --- a/tests/test_cli_main_guard_runpy.py +++ b/tests/test_cli_main_guard_runpy.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import runpy import sys diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py index 97093f2..ea3399f 100644 --- a/tests/test_cli_smoke.py +++ b/tests/test_cli_smoke.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import os import subprocess import sys diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 12682c0..ce6f2a2 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import json import os import subprocess diff --git a/tests/test_coerce.py b/tests/test_coerce.py index 0112504..9b7b0c0 100644 --- a/tests/test_coerce.py +++ b/tests/test_coerce.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from collections.abc import Mapping, Sequence diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index b1afa81..888f8dc 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from argparse import Namespace diff --git a/tests/test_detector_golden.py b/tests/test_detector_golden.py index a270bb8..d03e103 100644 --- a/tests/test_detector_golden.py +++ b/tests/test_detector_golden.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json diff --git a/tests/test_extractor.py b/tests/test_extractor.py index aeb8161..566754a 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast import os import signal @@ -12,7 +18,7 @@ from codeclone import extractor from codeclone.errors import ParseError from codeclone.metrics import find_unused -from codeclone.models import BlockUnit, ModuleDep, SegmentUnit +from codeclone.models import BlockUnit, ClassMetrics, ModuleDep, SegmentUnit from codeclone.normalize import NormalizationConfig @@ -855,6 +861,55 @@ def test_extract_stats_drops_referenced_names_for_test_filepaths() -> None: assert "live" in regular_metrics.referenced_names +def test_extract_stats_keeps_class_cohesion_metrics_after_unit_fingerprinting() -> None: + src = """ +class Service: + def __init__(self): + self.path = "x" + self.data = {} + + def load(self): + if self.path: + return self.data + return {} + + def save(self): + if self.path: + self.data["saved"] = True + return self.data + + def verify(self): + return bool(self.path) and bool(self.data) + + @staticmethod + def make(): + return Service() +""" + _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + source=src, + filepath="pkg/service.py", + module_name="pkg.service", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + assert file_metrics.class_metrics == ( + ClassMetrics( + qualname="pkg.service:Service", + filepath="pkg/service.py", + start_line=2, + end_line=22, + cbo=0, + lcom4=2, + method_count=5, + instance_var_count=2, + risk_coupling="low", + risk_cohesion="medium", + ), + ) + + def test_dead_code_marks_symbol_dead_when_referenced_only_by_tests() -> None: src_prod = """ def orphan(): diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index dffb176..a785d15 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from codeclone.fingerprint import bucket_loc, sha1 diff --git a/tests/test_github_action_helpers.py b/tests/test_github_action_helpers.py index d8ce885..cb801c7 100644 --- a/tests/test_github_action_helpers.py +++ b/tests/test_github_action_helpers.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import importlib.util diff --git a/tests/test_golden_v2.py b/tests/test_golden_v2.py index 3a95188..fcce405 100644 --- a/tests/test_golden_v2.py +++ b/tests/test_golden_v2.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 96c1338..ba9a4d2 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import importlib import json from collections.abc import Callable diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py index 8a10ab5..b7e5bc5 100644 --- a/tests/test_html_report_helpers.py +++ b/tests/test_html_report_helpers.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from types import SimpleNamespace from typing import Any, cast diff --git a/tests/test_init.py b/tests/test_init.py index 69b6098..b02a9cb 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from types import ModuleType import pytest diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index b59ad8d..26704f5 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -1,4 +1,8 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -173,11 +177,19 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) run_id = str(summary["run_id"]) changed_run_id = str(changed_summary["run_id"]) + changed_inventory = cast("dict[str, object]", changed_summary["inventory"]) + changed_registry = cast("dict[str, object]", changed_inventory["file_registry"]) + assert cast(int, changed_registry["count"]) >= 1 + assert "items" not in changed_registry latest = _structured_tool_result( asyncio.run(server.call_tool("get_run_summary", {})) ) assert latest["run_id"] == run_id + latest_inventory = cast("dict[str, object]", latest["inventory"]) + latest_registry = cast("dict[str, object]", latest_inventory["file_registry"]) + assert cast(int, latest_registry["count"]) >= 1 + assert "items" not in latest_registry findings_result = _structured_tool_result( asyncio.run( @@ -191,7 +203,14 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + assert findings_result["base_uri"] == tmp_path.as_uri() assert cast(int, findings_result["total"]) >= 1 + summary_finding = cast("list[dict[str, object]]", findings_result["items"])[0] + assert "priority_factors" not in summary_finding + assert all( + set(cast("dict[str, object]", location)) <= {"file", "line"} + for location in cast("list[object]", summary_finding["locations"]) + ) latest_summary_resource = list( asyncio.run(server.read_resource("codeclone://latest/summary")) @@ -200,6 +219,16 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: latest_summary_text = latest_summary_resource[0].content latest_summary = json.loads(latest_summary_text) assert latest_summary["run_id"] == run_id + latest_summary_inventory = cast( + "dict[str, object]", + latest_summary["inventory"], + ) + latest_summary_registry = cast( + "dict[str, object]", + latest_summary_inventory["file_registry"], + ) + assert cast(int, latest_summary_registry["count"]) >= 1 + assert "items" not in latest_summary_registry latest_report_resource = list( asyncio.run(server.read_resource("codeclone://latest/report.json")) @@ -241,6 +270,17 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: asyncio.run(server.call_tool("get_report_section", {"section": "meta"})) ) assert report_section["codeclone_version"] + metrics_section = _structured_tool_result( + asyncio.run(server.call_tool("get_report_section", {"section": "metrics"})) + ) + assert "summary" in metrics_section + assert "families" not in metrics_section + metrics_detail_section = _structured_tool_result( + asyncio.run( + server.call_tool("get_report_section", {"section": "metrics_detail"}) + ) + ) + assert "families" in metrics_detail_section changed_section = _structured_tool_result( asyncio.run(server.call_tool("get_report_section", {"section": "changed"})) ) @@ -272,6 +312,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) ) + assert hotspots["base_uri"] == tmp_path.as_uri() assert cast(int, hotspots["total"]) >= 1 assert comparison["summary"] @@ -337,10 +378,15 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) ) assert complexity["check"] == "complexity" + assert complexity["base_uri"] == tmp_path.as_uri() assert cast(int, clones["total"]) >= 1 + assert clones["base_uri"] == tmp_path.as_uri() assert coupling["check"] == "coupling" + assert coupling["base_uri"] == tmp_path.as_uri() assert cohesion["check"] == "cohesion" + assert cohesion["base_uri"] == tmp_path.as_uri() assert dead_code["check"] == "dead_code" + assert dead_code["base_uri"] == tmp_path.as_uri() assert reviewed["reviewed"] is True assert reviewed_items["reviewed_count"] == 1 assert "## CodeClone Summary" in str(pr_summary["content"]) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 1280513..fdea3cf 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -1,4 +1,8 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy from __future__ import annotations @@ -29,10 +33,11 @@ from codeclone.models import MetricsDiff -def _write_clone_fixture(root: Path) -> None: - root.joinpath("pkg").mkdir(exist_ok=True) - root.joinpath("pkg", "__init__.py").write_text("", "utf-8") - root.joinpath("pkg", "dup.py").write_text( +def _write_clone_fixture(root: Path, relative_dir: str = "pkg") -> None: + fixture_dir = root.joinpath(relative_dir) + fixture_dir.mkdir(parents=True, exist_ok=True) + fixture_dir.joinpath("__init__.py").write_text("", "utf-8") + fixture_dir.joinpath("dup.py").write_text( ( "def alpha(value: int) -> int:\n" " total = value + 1\n" @@ -132,6 +137,27 @@ def _build_quality_service(root: Path) -> CodeCloneMCPService: return service +def _analyze_quality_repository( + root: Path, +) -> tuple[CodeCloneMCPService, dict[str, object]]: + _write_clone_fixture(root) + _write_quality_fixture(root) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(root), + respect_pyproject=False, + cache_policy="off", + ) + ) + return service, summary + + +def _file_registry(payload: dict[str, object]) -> dict[str, object]: + inventory = cast("dict[str, object]", payload["inventory"]) + return cast("dict[str, object]", inventory["file_registry"]) + + def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> None: _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) @@ -159,6 +185,34 @@ def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> assert latest_health["grade"] +def test_mcp_service_summary_inventory_is_slim_but_analysis_summary_is_full( + tmp_path: Path, +) -> None: + service, repository_summary = _analyze_quality_repository(tmp_path) + changed_summary = service.analyze_changed_paths( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + changed_paths=("pkg/dup.py",), + ) + ) + stored_summary = service.get_run_summary(run_id=str(repository_summary["run_id"])) + + repo_registry = _file_registry(repository_summary) + changed_registry = _file_registry(changed_summary) + stored_registry = _file_registry(stored_summary) + + assert isinstance(repo_registry["items"], list) + assert "count" not in repo_registry + assert changed_registry["count"] == len( + cast("list[object]", repo_registry["items"]) + ) + assert "items" not in changed_registry + assert stored_registry["count"] == len(cast("list[object]", repo_registry["items"])) + assert "items" not in stored_registry + + def test_mcp_service_lists_findings_and_hotspots(tmp_path: Path) -> None: _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) @@ -288,6 +342,8 @@ def test_mcp_service_granular_checks_pr_summary_and_resources( path="pkg/dup.py", detail_level="summary", ) + summary_health = cast("dict[str, object]", summary["health"]) + summary_dimensions = cast("dict[str, object]", summary_health["dimensions"]) assert clones["check"] == "clones" assert cast(int, clones["total"]) >= 1 @@ -311,6 +367,20 @@ def test_mcp_service_granular_checks_pr_summary_and_resources( cohesion = service.check_cohesion(run_id=run_id, detail_level="summary") assert coupling["check"] == "coupling" assert cohesion["check"] == "cohesion" + for dimension, payload in ( + ("clones", clones), + ("complexity", complexity), + ("dead_code", dead_code), + ("coupling", coupling), + ("cohesion", cohesion), + ): + assert payload["base_uri"] == tmp_path.as_uri() + check_health = cast("dict[str, object]", payload["health"]) + assert check_health["score"] == summary_health["score"] + assert check_health["grade"] == summary_health["grade"] + assert cast("dict[str, object]", check_health["dimensions"]) == { + dimension: summary_dimensions[dimension] + } gate_result = service.evaluate_gates( MCPGateRequest(run_id=run_id, fail_threshold=0) @@ -416,6 +486,33 @@ def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( assert cache_summary["used"] == cache_meta["used"] assert cache_summary["schema_version"] == cache_meta["schema_version"] assert health_summary == metrics_health + assert "families" not in report_metrics + + +def test_mcp_service_metrics_sections_split_summary_and_detail( + tmp_path: Path, +) -> None: + service, summary = _analyze_quality_repository(tmp_path) + run_id = str(summary["run_id"]) + + metrics_summary = service.get_report_section(run_id=run_id, section="metrics") + metrics_detail = service.get_report_section( + run_id=run_id, + section="metrics_detail", + ) + + assert set(cast("dict[str, object]", metrics_summary["summary"])) >= { + "complexity", + "coupling", + "cohesion", + "dependencies", + "dead_code", + "health", + } + assert "families" not in metrics_summary + assert len(json.dumps(metrics_summary, ensure_ascii=False, sort_keys=True)) < 5000 + assert set(metrics_detail) == {"summary", "families"} + assert cast("dict[str, object]", metrics_detail["families"]) def test_mcp_service_evaluate_gates_on_existing_run(tmp_path: Path) -> None: @@ -454,9 +551,108 @@ def test_mcp_service_resources_expose_latest_summary_and_report(tmp_path: Path) latest_report = json.loads(service.read_resource("codeclone://latest/report.json")) assert latest_summary["run_id"] == summary["run_id"] + assert latest_summary["inventory"]["file_registry"]["count"] >= 1 + assert "items" not in latest_summary["inventory"]["file_registry"] assert latest_report["report_schema_version"] == "2.1" +def test_mcp_service_hotspot_summary_preserves_fixtures_source_kind( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path, relative_dir="tests/fixtures") + service = CodeCloneMCPService(history_limit=4) + service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + findings = service.list_findings( + family="clone", + detail_level="summary", + limit=1, + ) + hotspots = service.list_hotspots( + kind="highest_spread", + detail_level="summary", + limit=1, + ) + + finding = cast("list[dict[str, object]]", findings["items"])[0] + hotspot = cast("list[dict[str, object]]", hotspots["items"])[0] + assert hotspots["base_uri"] == tmp_path.as_uri() + assert finding["id"] == hotspot["id"] + assert finding["source_kind"] == "fixtures" + assert hotspot["source_kind"] == finding["source_kind"] + assert "priority_factors" not in hotspot + assert all( + set(cast("dict[str, object]", location)) <= {"file", "line"} + for location in cast("list[object]", hotspot["locations"]) + ) + + +def test_mcp_service_list_findings_detail_levels_slim_and_full_payloads( + tmp_path: Path, +) -> None: + service, summary = _analyze_quality_repository(tmp_path) + run_id = str(summary["run_id"]) + + summary_payload = service.list_findings( + run_id=run_id, + family="clone", + detail_level="summary", + limit=1, + ) + normal_payload = service.list_findings( + run_id=run_id, + family="clone", + detail_level="normal", + limit=1, + ) + full_payload = service.list_findings( + run_id=run_id, + family="clone", + detail_level="full", + limit=1, + ) + + assert summary_payload["base_uri"] == tmp_path.as_uri() + summary_item = cast("list[dict[str, object]]", summary_payload["items"])[0] + normal_item = cast("list[dict[str, object]]", normal_payload["items"])[0] + full_item = cast("list[dict[str, object]]", full_payload["items"])[0] + + assert "priority_factors" not in summary_item + assert "priority_factors" not in normal_item + assert cast("dict[str, object]", full_item["priority_factors"]) + assert all( + set(cast("dict[str, object]", location)) <= {"file", "line"} + for location in cast("list[object]", summary_item["locations"]) + ) + assert all( + "symbol" in cast("dict[str, object]", location) + and "uri" not in cast("dict[str, object]", location) + for location in cast("list[object]", normal_item["locations"]) + ) + assert all( + "symbol" in cast("dict[str, object]", location) + and "uri" in cast("dict[str, object]", location) + for location in cast("list[object]", full_item["locations"]) + ) + + finding = service.get_finding( + run_id=run_id, + finding_id=str(summary_item["id"]), + ) + assert cast("dict[str, object]", finding["priority_factors"]) + assert all( + "symbol" in cast("dict[str, object]", location) + and "uri" in cast("dict[str, object]", location) + for location in cast("list[object]", finding["locations"]) + ) + + def test_mcp_service_run_store_evicts_old_runs(tmp_path: Path) -> None: first_root = tmp_path / "first" second_root = tmp_path / "second" @@ -1137,17 +1333,7 @@ def test_mcp_service_branch_helpers_on_real_runs( def test_mcp_service_remediation_and_comparison_helper_branches( tmp_path: Path, ) -> None: - _write_clone_fixture(tmp_path) - _write_quality_fixture(tmp_path) - service = CodeCloneMCPService(history_limit=4) - - before = service.analyze_repository( - MCPAnalysisRequest( - root=str(tmp_path), - respect_pyproject=False, - cache_policy="off", - ) - ) + service, before = _analyze_quality_repository(tmp_path) tmp_path.joinpath("pkg", "dup.py").write_text( "def alpha(value: int) -> int:\n return value + 1\n", "utf-8", @@ -1709,6 +1895,21 @@ def _patched_get_finding( }, )[0]["uri"] assert "#L" not in str(location_without_line) + location_without_uri = service._locations_for_finding( + record, + { + "items": [ + { + "relative_path": "pkg/dup.py", + "start_line": 1, + "qualname": "pkg.dup:alpha", + } + ] + }, + include_uri=False, + )[0] + assert "uri" not in location_without_uri + assert location_without_uri["symbol"] == "pkg.dup:alpha" assert ( service.list_hotspots( kind="highest_spread", @@ -1912,8 +2113,11 @@ def test_mcp_service_helper_branches_for_empty_gate_and_missing_remediation( assert clone_gate.exit_code == 3 assert clone_gate.reasons == ("clone:new",) + assert service.get_report_section(run_id="helpers", section="metrics") == { + "summary": {} + } with pytest.raises(MCPServiceContractError): - service.get_report_section(run_id="helpers", section="metrics") + service.get_report_section(run_id="helpers", section="metrics_detail") assert service._suggestion_for_finding(record, "missing") is None assert ( diff --git a/tests/test_metrics_baseline.py b/tests/test_metrics_baseline.py index ba2a035..53196d6 100644 --- a/tests/test_metrics_baseline.py +++ b/tests/test_metrics_baseline.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index 614f50f..4ee88b6 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import ast diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 613a320..ec5cc3f 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from typing import Any, cast @@ -72,6 +78,23 @@ def test_normalization_equivalent_sources(src1: str, src2: str) -> None: assert normalized_ast_dump(a1, cfg) == normalized_ast_dump(a2, cfg) +def test_normalized_ast_dump_does_not_mutate_input_ast() -> None: + cfg = NormalizationConfig() + node = ast.parse( + """ +def f(x: int) -> int: + value = x + 1 + return value +""" + ).body[0] + before = ast.dump(node, annotate_fields=True, include_attributes=False) + + normalized_ast_dump(node, cfg) + + after = ast.dump(node, annotate_fields=True, include_attributes=False) + assert after == before + + @pytest.mark.parametrize( ("src1", "src2"), [ diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py index 59935bd..1c17c20 100644 --- a/tests/test_pipeline_metrics.py +++ b/tests/test_pipeline_metrics.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from codeclone.cache import CacheEntry diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py index f8bb8ad..76eaabb 100644 --- a/tests/test_pipeline_process.py +++ b/tests/test_pipeline_process.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import builtins diff --git a/tests/test_python_syntax_compat.py b/tests/test_python_syntax_compat.py index 6d52266..a4a50b7 100644 --- a/tests/test_python_syntax_compat.py +++ b/tests/test_python_syntax_compat.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from pathlib import Path diff --git a/tests/test_report.py b/tests/test_report.py index cacda08..4506fe2 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast import json from collections.abc import Callable, Collection, Mapping, Sequence diff --git a/tests/test_report_branch_invariants.py b/tests/test_report_branch_invariants.py index 098abf2..ad7bbaf 100644 --- a/tests/test_report_branch_invariants.py +++ b/tests/test_report_branch_invariants.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from codeclone._html_snippets import _FileCache diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index 38a03b0..e17f64c 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import json diff --git a/tests/test_report_explain.py b/tests/test_report_explain.py index 57689bf..8fe4e8c 100644 --- a/tests/test_report_explain.py +++ b/tests/test_report_explain.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from pathlib import Path diff --git a/tests/test_report_source_kinds.py b/tests/test_report_source_kinds.py index 2f64c8c..5541035 100644 --- a/tests/test_report_source_kinds.py +++ b/tests/test_report_source_kinds.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from codeclone.report._source_kinds import ( diff --git a/tests/test_report_suggestions.py b/tests/test_report_suggestions.py index 44063ea..bf5f759 100644 --- a/tests/test_report_suggestions.py +++ b/tests/test_report_suggestions.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations from codeclone.models import ( diff --git a/tests/test_scanner_extra.py b/tests/test_scanner_extra.py index c2fa01e..a7a77ca 100644 --- a/tests/test_scanner_extra.py +++ b/tests/test_scanner_extra.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import os diff --git a/tests/test_security.py b/tests/test_security.py index 3e5c474..599c092 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import os import tempfile from pathlib import Path diff --git a/tests/test_segments.py b/tests/test_segments.py index 6e2ab63..6253233 100644 --- a/tests/test_segments.py +++ b/tests/test_segments.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + import ast from codeclone.blocks import extract_segments diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py index 9ea9ad9..fb7bacb 100644 --- a/tests/test_structural_findings.py +++ b/tests/test_structural_findings.py @@ -1,4 +1,7 @@ -# SPDX-License-Identifier: MIT +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy """Unit tests for codeclone.structural_findings (Phase 1: duplicated_branches).""" diff --git a/tests/test_suppressions.py b/tests/test_suppressions.py index 2848704..f36ef4c 100644 --- a/tests/test_suppressions.py +++ b/tests/test_suppressions.py @@ -1,3 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + from __future__ import annotations import pytest From bff187faaa1b8a82a0af75688ebb2fffdf5d56e4 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 30 Mar 2026 12:53:11 +0500 Subject: [PATCH 05/15] chore(tests): extending test coverage --- CHANGELOG.md | 4 +- tests/test_baseline.py | 46 ++++ tests/test_cache.py | 79 +++++++ tests/test_cli_unit.py | 286 +++++++++++++++++++++++ tests/test_extractor.py | 189 +++++++++++++++ tests/test_html_report_helpers.py | 305 +++++++++++++++++++++++++ tests/test_mcp_service.py | 4 + tests/test_metrics_baseline.py | 33 +++ tests/test_metrics_modules.py | 14 ++ tests/test_report.py | 77 +++++++ tests/test_report_contract_coverage.py | 164 +++++++++++++ tests/test_report_suggestions.py | 37 +++ tests/test_structural_findings.py | 104 +++++++++ 13 files changed, 1339 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4537fe4..2157f6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [Unreleased] +## [2.0.0b3] ### Licensing @@ -10,8 +10,6 @@ - Ship both `LICENSE` and `LICENSE-docs`, update package metadata, and sync file-level SPDX headers. -## [2.0.0b3] - ### MCP server - Add optional `codeclone[mcp]` extra with `codeclone-mcp` launcher (`stdio` and `streamable-http` transports). diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 4ee3e77..302040e 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -7,6 +7,7 @@ import json from collections.abc import Callable from pathlib import Path +from typing import Any, cast import pytest @@ -225,6 +226,16 @@ def test_baseline_load_legacy_payload(tmp_path: Path) -> None: assert exc.value.status == "missing_fields" +def test_baseline_load_rejects_non_object_preloaded_payload(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + _write_payload(baseline_path, _trusted_payload()) + baseline = Baseline(baseline_path) + + with pytest.raises(BaselineValidationError, match="must be an object") as exc: + baseline.load(preloaded_payload=cast(Any, [])) + assert exc.value.status == "invalid_type" + + def test_baseline_load_missing_top_level_key(tmp_path: Path) -> None: baseline_path = tmp_path / "baseline.json" _write_payload(baseline_path, {"meta": {}}) @@ -784,6 +795,25 @@ def _boom_stat(self: Path) -> object: assert exc.value.status == "invalid_type" +def test_baseline_atomic_write_json_cleans_up_temp_file_on_replace_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + path = tmp_path / "baseline.json" + temp_holder: dict[str, Path] = {} + + def _boom_replace(src: str | Path, dst: str | Path) -> None: + temp_holder["path"] = Path(src) + raise OSError("replace failed") + + monkeypatch.setattr("codeclone.baseline.os.replace", _boom_replace) + + with pytest.raises(OSError, match="replace failed"): + baseline_mod._atomic_write_json(path, _trusted_payload()) + + assert temp_holder["path"].exists() is False + + def test_baseline_load_json_read_error( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -818,6 +848,22 @@ def test_baseline_optional_str_paths(tmp_path: Path) -> None: assert exc.value.status == "invalid_type" +def test_baseline_require_utc_iso8601_z_rejects_invalid_calendar_date( + tmp_path: Path, +) -> None: + path = tmp_path / "baseline.json" + with pytest.raises( + BaselineValidationError, + match="'created_at' must be UTC ISO-8601 with Z", + ) as exc: + baseline_mod._require_utc_iso8601_z( + {"created_at": "2026-02-31T00:00:00Z"}, + "created_at", + path=path, + ) + assert exc.value.status == "invalid_type" + + def test_baseline_load_legacy_codeclone_version_alias(tmp_path: Path) -> None: baseline_path = tmp_path / "baseline.json" payload = _trusted_payload(generator_version="1.4.0") diff --git a/tests/test_cache.py b/tests/test_cache.py index e4bc012..788709e 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1277,6 +1277,73 @@ def test_decode_wire_file_entry_optional_source_stats() -> None: ) +def test_cache_helpers_cover_invalid_analysis_profile_and_source_stats_shapes() -> None: + assert ( + cache_mod._decode_wire_qualname_span_size(["pkg.mod:fn", 1, 2, "bad"]) is None + ) + assert cache_mod._decode_wire_qualname_span_size([None, 1, 2, 4]) is None + assert ( + cache_mod._as_analysis_profile( + { + "min_loc": 1, + "min_stmt": 1, + "block_min_loc": 2, + "block_min_stmt": "bad", + "segment_min_loc": 3, + "segment_min_stmt": 4, + } + ) + is None + ) + assert ( + cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, "bad", 0]}) + is None + ) + + +def test_canonicalize_cache_entry_skips_invalid_dead_candidate_suppression_shape() -> ( + None +): + normalized = cache_mod._canonicalize_cache_entry( + cast( + Any, + { + "stat": {"mtime_ns": 1, "size": 2}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [], + "module_deps": [], + "dead_candidates": [ + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "function", + "suppressed_rules": "dead-code", + } + ], + "referenced_names": [], + "referenced_qualnames": [], + "import_names": [], + "class_names": [], + }, + ) + ) + assert normalized["dead_candidates"] == [ + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "function", + } + ] + + def test_decode_optional_wire_coupled_classes_rejects_non_string_qualname() -> None: assert ( cache_mod._decode_optional_wire_coupled_classes( @@ -1562,6 +1629,18 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: assert cache_mod._is_class_metrics_dict([]) is False assert cache_mod._is_module_dep_dict([]) is False assert cache_mod._is_dead_candidate_dict([]) is False + assert ( + cache_mod._is_dead_candidate_dict( + { + "qualname": "pkg.mod:broken", + "local_name": "broken", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + } + ) + is False + ) assert ( cache_mod._is_dead_candidate_dict( { diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index ce6f2a2..37338c9 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -12,10 +12,12 @@ from argparse import Namespace from collections.abc import Callable from pathlib import Path +from types import SimpleNamespace from typing import Any, cast import pytest +import codeclone._cli_meta as cli_meta_mod import codeclone._cli_reports as cli_reports import codeclone._cli_summary as cli_summary import codeclone.baseline as baseline_mod @@ -26,6 +28,7 @@ from codeclone import ui_messages as ui from codeclone._cli_args import build_parser from codeclone._cli_config import ConfigValidationError +from codeclone.cache import Cache from codeclone.cli import process_file from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL from codeclone.errors import BaselineValidationError @@ -251,6 +254,16 @@ def test_open_html_report_in_browser_raises_without_handler( cli_reports._open_html_report_in_browser(path=report_path) +def test_open_html_report_in_browser_succeeds_when_handler_exists( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + report_path = tmp_path / "report.html" + report_path.write_text("", encoding="utf-8") + monkeypatch.setattr(webbrowser, "open_new_tab", lambda _uri: True) + cli_reports._open_html_report_in_browser(path=report_path) + + def test_cli_plain_console_status_context() -> None: plain = cli._make_plain_console() with plain.status("noop"): @@ -358,6 +371,18 @@ def test_validate_changed_scope_args_promotes_paths_from_git_diff() -> None: assert args.changed_only is True +def test_validate_changed_scope_args_rejects_conflicting_diff_sources() -> None: + cli.console = cli._make_console(no_color=True) + args = Namespace( + changed_only=True, + diff_against="HEAD~1", + paths_from_git_diff="HEAD~2", + ) + with pytest.raises(SystemExit) as exc: + cli._validate_changed_scope_args(args=args) + assert exc.value.code == 2 + + def test_normalize_changed_paths_relativizes_dedupes_and_sorts(tmp_path: Path) -> None: root_path = tmp_path.resolve() pkg_dir = root_path / "pkg" @@ -373,6 +398,26 @@ def test_normalize_changed_paths_relativizes_dedupes_and_sorts(tmp_path: Path) - ) == ("pkg/a.py", "pkg/b.py") +def test_normalize_changed_paths_skips_empty_relative_results( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + root_path = tmp_path.resolve() + candidate = root_path / "marker.py" + candidate.write_text("pass\n", encoding="utf-8") + original_relative_to = Path.relative_to + + def _fake_relative_to(self: Path, *other: str | Path) -> Path: + if self == candidate: + return Path("/") + return original_relative_to(self, *other) + + monkeypatch.setattr(Path, "relative_to", _fake_relative_to) + assert ( + cli._normalize_changed_paths(root_path=root_path, paths=(str(candidate),)) == () + ) + + def test_normalize_changed_paths_reports_unresolvable_path( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: @@ -442,6 +487,25 @@ def _run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]: assert exc.value.code == 2 +def test_git_diff_changed_paths_rejects_option_like_ref(tmp_path: Path) -> None: + cli.console = cli._make_console(no_color=True) + with pytest.raises(SystemExit) as exc: + cli._git_diff_changed_paths( + root_path=tmp_path.resolve(), git_diff_ref="--cached" + ) + assert exc.value.code == 2 + + +def test_report_path_origins_ignores_unrelated_equals_tokens() -> None: + assert cli._report_path_origins(("--unknown=value", "--json=out.json")) == { + "html": None, + "json": "explicit", + "md": None, + "sarif": None, + "text": None, + } + + def test_changed_clone_gate_from_report_filters_changed_scope() -> None: gate = cli._changed_clone_gate_from_report( { @@ -501,6 +565,68 @@ def test_changed_clone_gate_from_report_filters_changed_scope() -> None: assert gate.findings_known == 1 +def test_run_analysis_stages_requires_rich_console_when_progress_ui_is_enabled( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + cli.console = cli._make_plain_console() + monkeypatch.setattr( + cli, + "discover", + lambda **_kwargs: SimpleNamespace( + skipped_warnings=(), files_to_process=("x.py",) + ), + ) + + with pytest.raises(RuntimeError, match="Rich console is required"): + cli._run_analysis_stages( + args=Namespace(quiet=False, no_progress=False), + boot=cast(Any, object()), + cache=Cache(tmp_path / "cache.json"), + ) + + +def test_run_analysis_stages_prints_source_read_failures_when_failed_files_are_empty( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + cli.console = cli._make_plain_console() + printed: list[tuple[object, ...]] = [] + monkeypatch.setattr( + cli, + "_print_failed_files", + lambda failures: printed.append(tuple(failures)), + ) + monkeypatch.setattr( + cli, + "discover", + lambda **_kwargs: SimpleNamespace(skipped_warnings=(), files_to_process=()), + ) + monkeypatch.setattr( + cli, + "process", + lambda **_kwargs: SimpleNamespace( + failed_files=(), + source_read_failures=("pkg/mod.py: unreadable",), + ), + ) + monkeypatch.setattr(cli, "analyze", lambda **_kwargs: SimpleNamespace()) + monkeypatch.setattr( + cli, + "_cache_update_segment_projection", + lambda *_args, **_kwargs: None, + ) + monkeypatch.setattr(Cache, "save", lambda self: None) + + cli._run_analysis_stages( + args=Namespace(quiet=False, no_progress=True), + boot=cast(Any, object()), + cache=Cache(tmp_path / "cache.json"), + ) + + assert printed == [(), ("pkg/mod.py: unreadable",)] + + def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -547,6 +673,166 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( ) +def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_limit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cli.console = cli._make_console(no_color=True) + observed: dict[str, object] = {} + + monkeypatch.setattr( + cli, + "gate", + lambda **_kwargs: pipeline.GatingResult( + exit_code=3, + reasons=("clone:threshold:8:1",), + ), + ) + monkeypatch.setattr( + cli, + "_print_gating_failure_block", + lambda **kwargs: observed.update(kwargs), + ) + + cli._enforce_gating( + args=Namespace(fail_threshold=5, verbose=False), + boot=cast("pipeline.BootstrapResult", object()), + analysis=cast("pipeline.AnalysisResult", object()), + processing=cast(Any, Namespace(source_read_failures=[])), + source_read_contract_failure=False, + baseline_failure_code=None, + metrics_baseline_failure_code=None, + new_func=set(), + new_block=set(), + metrics_diff=None, + html_report_path=None, + clone_threshold_total=2, + ) + + assert observed == {} + + +def test_main_impl_prints_changed_scope_when_changed_projection_is_available( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + baseline_path = tmp_path / "baseline.json" + metrics_path = tmp_path / "metrics.json" + cache_path = tmp_path / "cache.json" + monkeypatch.setattr( + sys, + "argv", + [ + "codeclone", + str(tmp_path), + "--quiet", + "--changed-only", + "--diff-against", + "HEAD~1", + "--baseline", + str(baseline_path), + "--metrics-baseline", + str(metrics_path), + "--cache-path", + str(cache_path), + ], + ) + observed: dict[str, object] = {} + + monkeypatch.setattr(cli, "load_pyproject_config", lambda _root: {}) + monkeypatch.setattr( + cli, + "apply_pyproject_config_overrides", + lambda **_kwargs: None, + ) + monkeypatch.setattr( + cli, + "_git_diff_changed_paths", + lambda **_kwargs: ("pkg/dup.py",), + ) + monkeypatch.setattr(cli, "_validate_report_ui_flags", lambda **_kwargs: None) + monkeypatch.setattr(cli, "bootstrap", lambda **_kwargs: cast(Any, object())) + monkeypatch.setattr( + cli, + "_run_analysis_stages", + lambda **_kwargs: ( + SimpleNamespace(files_found=1, cache_hits=0), + SimpleNamespace( + files_analyzed=1, + files_skipped=0, + analyzed_lines=10, + analyzed_functions=1, + analyzed_methods=0, + analyzed_classes=0, + source_read_failures=(), + ), + SimpleNamespace( + func_groups={}, + block_groups={}, + func_clones_count=0, + block_clones_count=0, + segment_clones_count=0, + suppressed_segment_groups=0, + project_metrics=None, + ), + ), + ) + monkeypatch.setattr( + cli, + "_resolve_clone_baseline_state", + lambda **_kwargs: SimpleNamespace( + baseline=baseline_mod.Baseline(baseline_path), + loaded=False, + status=baseline_mod.BaselineStatus.MISSING, + trusted_for_diff=False, + updated_path=None, + failure_code=None, + ), + ) + monkeypatch.setattr( + cli, + "_resolve_metrics_baseline_state", + lambda **_kwargs: SimpleNamespace( + baseline=metrics_baseline_mod.MetricsBaseline(metrics_path), + loaded=False, + status=metrics_baseline_mod.MetricsBaselineStatus.MISSING, + trusted_for_diff=False, + failure_code=None, + ), + ) + monkeypatch.setattr(cli_meta_mod, "_build_report_meta", lambda **_kwargs: {}) + monkeypatch.setattr(cli, "_print_summary", lambda **_kwargs: None) + monkeypatch.setattr( + cli, "report", lambda **_kwargs: SimpleNamespace(report_document={}) + ) + monkeypatch.setattr( + cli, + "_changed_clone_gate_from_report", + lambda _report, changed_paths: cli.ChangedCloneGate( + changed_paths=tuple(changed_paths), + new_func=frozenset(), + new_block=frozenset(), + total_clone_groups=0, + findings_total=3, + findings_new=1, + findings_known=2, + ), + ) + monkeypatch.setattr( + cli, + "_print_changed_scope", + lambda **kwargs: observed.update(kwargs), + ) + monkeypatch.setattr(cli, "_write_report_outputs", lambda **_kwargs: None) + monkeypatch.setattr(cli, "_enforce_gating", lambda **_kwargs: None) + + cli._main_impl() + + changed_scope = cast(Any, observed["changed_scope"]) + assert observed["quiet"] is True + assert changed_scope.paths_count == 1 + assert changed_scope.findings_total == 3 + + def test_make_console_caps_width_to_layout_limit( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 566754a..6491e31 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -140,6 +140,75 @@ def test_declaration_token_index_uses_prebuilt_index() -> None: ) +def test_declaration_helpers_cover_async_found_tokens_and_eof_scan() -> None: + async_node = ast.parse( + """ +async def demo(): + return 1 +""" + ).body[0] + assert isinstance(async_node, ast.AsyncFunctionDef) + assert extractor._declaration_token_name(async_node) == "async" + + tokens = extractor._source_tokens("def demo():\n return 1\n") + assert ( + extractor._declaration_token_index( + source_tokens=tokens, + start_line=1, + start_col=0, + declaration_token="def", + ) + == 0 + ) + + nested_tokens = extractor._source_tokens( + "def demo(arg: tuple[int, int]) -> tuple[int, int]:\n return arg\n" + ) + assert ( + extractor._scan_declaration_colon_line( + source_tokens=nested_tokens, + start_index=0, + ) + == 1 + ) + + default_tokens = extractor._source_tokens( + "def demo(arg=(1, [2])):\n return arg\n" + ) + assert ( + extractor._scan_declaration_colon_line( + source_tokens=default_tokens, + start_index=0, + ) + == 1 + ) + + eof_tokens = ( + tokenize.TokenInfo(tokenize.NAME, "def", (1, 0), (1, 3), "def demo("), + tokenize.TokenInfo(tokenize.NAME, "demo", (1, 4), (1, 8), "def demo("), + tokenize.TokenInfo(tokenize.OP, "(", (1, 8), (1, 9), "def demo("), + ) + assert ( + extractor._scan_declaration_colon_line( + source_tokens=eof_tokens, + start_index=0, + ) + is None + ) + + unmatched_close_tokens = ( + tokenize.TokenInfo(tokenize.NAME, "def", (1, 0), (1, 3), "def demo)"), + tokenize.TokenInfo(tokenize.OP, ")", (1, 8), (1, 9), "def demo)"), + ) + assert ( + extractor._scan_declaration_colon_line( + source_tokens=unmatched_close_tokens, + start_index=0, + ) + is None + ) + + def test_scan_declaration_colon_line_returns_none_when_header_is_incomplete() -> None: tokens = extractor._source_tokens("def broken\n") assert ( @@ -174,6 +243,40 @@ def broken(): assert extractor._declaration_end_line(node, source_tokens=()) == 0 +def test_declaration_fallback_helpers_cover_empty_and_same_line_bodies() -> None: + empty_body_node = ast.parse( + """ +def demo(): + return 1 +""" + ).body[0] + assert isinstance(empty_body_node, ast.FunctionDef) + empty_body_node.body = [] + assert extractor._fallback_declaration_end_line(empty_body_node, start_line=2) == 2 + + inline_body_node = ast.parse( + """ +def demo(): + return 1 +""" + ).body[0] + assert isinstance(inline_body_node, ast.FunctionDef) + inline_body_node.body[0].lineno = 2 + assert extractor._fallback_declaration_end_line(inline_body_node, start_line=2) == 2 + + no_colon_tokens = ( + tokenize.TokenInfo(tokenize.NAME, "def", (2, 0), (2, 3), "def demo"), + tokenize.TokenInfo(tokenize.NAME, "demo", (2, 4), (2, 8), "def demo"), + ) + assert ( + extractor._declaration_end_line( + inline_body_node, + source_tokens=no_colon_tokens, + ) + == 2 + ) + + def test_init_function_is_ignored_for_blocks() -> None: src = """ class A: @@ -834,6 +937,92 @@ def hook(self) -> int: assert "pkg.helpers:decorate" not in walk.referenced_qualnames +def test_extractor_private_helper_branches_cover_invalid_protocol_and_declarations( + monkeypatch: pytest.MonkeyPatch, +) -> None: + expr = ast.Attribute( + value=ast.Call( + func=ast.Name(id="factory", ctx=ast.Load()), + args=[], + keywords=[], + ), + attr="method", + ctx=ast.Load(), + ) + assert extractor._dotted_expr_name(expr) is None + + protocol_class = ast.parse( + """ +class Demo(Unknown, alias.Protocol): + pass +""" + ).body[0] + assert isinstance(protocol_class, ast.ClassDef) + assert ( + extractor._is_protocol_class( + protocol_class, + protocol_symbol_aliases=frozenset({"Protocol"}), + protocol_module_aliases=frozenset({"typing"}), + ) + is False + ) + + bad_span_node = ast.parse( + """ +def demo(): + return 1 +""" + ).body[0] + assert isinstance(bad_span_node, ast.FunctionDef) + bad_span_node.lineno = 3 + bad_span_node.end_lineno = 2 + assert extractor._eligible_unit_shape(bad_span_node, min_loc=1, min_stmt=1) is None + + _, missing_method_collector, missing_method_walk = _collect_module_walk( + """ +class Service: + def real(self) -> int: + return 1 + +handler = Service.missing +""" + ) + assert "pkg.mod:Service.missing" not in missing_method_walk.referenced_qualnames + assert missing_method_collector.class_nodes[0][0] == "Service" + + _, declaration_collector = _parse_tree_and_collector( + """ +class Demo: + def work(self) -> int: + return 1 +""" + ) + declaration_collector.units[0][1].end_lineno = 0 + declaration_collector.class_nodes[0][1].end_lineno = 0 + assert ( + extractor._collect_declaration_targets( + filepath="pkg/mod.py", + module_name="pkg.mod", + collector=declaration_collector, + ) + == () + ) + + suppression_source = """ +def demo(): # codeclone: ignore[dead-code] + return 1 +""" + _, suppression_collector = _parse_tree_and_collector(suppression_source) + monkeypatch.setattr(extractor, "_source_tokens", lambda _source: ()) + suppression_index = extractor._build_suppression_index_for_source( + source=suppression_source, + filepath="pkg/mod.py", + module_name="pkg.mod", + collector=suppression_collector, + ) + assert tuple(suppression_index.values()) == (("dead-code",),) + + def test_extract_stats_drops_referenced_names_for_test_filepaths() -> None: src = """ from pkg.mod import live diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py index b7e5bc5..086c896 100644 --- a/tests/test_html_report_helpers.py +++ b/tests/test_html_report_helpers.py @@ -4,9 +4,15 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy +from pathlib import Path from types import SimpleNamespace from typing import Any, cast +import pytest + +import codeclone._html_report._assemble as assemble_mod +import codeclone._html_report._sections._suggestions as suggestions_section +from codeclone._html_badges import _quality_badge_html, _stat_card from codeclone._html_report._components import ( overview_source_breakdown_html, overview_summary_item_html, @@ -15,12 +21,26 @@ _derive_group_display_name, _render_group_explanation, ) +from codeclone._html_report._sections._dead_code import render_dead_code_panel from codeclone._html_report._sections._dependencies import ( _hub_threshold, _render_dep_nodes_and_labels, _select_dep_nodes, ) +from codeclone._html_report._sections._meta import _path_basename, render_meta_panel +from codeclone._html_report._sections._overview import ( + _health_gauge_html, + _issue_breakdown_html, + render_overview_panel, +) +from codeclone._html_report._sections._suggestions import ( + _format_source_breakdown, + _render_card, + _render_fact_summary, +) from codeclone._html_report._tabs import render_split_tabs +from codeclone._html_snippets import _FileCache +from codeclone.models import MetricsDiff, ReportLocation, Suggestion def test_summary_helpers_cover_empty_and_non_clone_context_branches() -> None: @@ -75,6 +95,23 @@ def test_clone_display_name_and_group_explanation_edge_branches() -> None: assert _render_group_explanation({}) == "" +def test_clone_display_name_falls_back_to_short_key_when_items_have_no_labels() -> None: + ctx = SimpleNamespace( + bare_qualname=lambda _qualname, _filepath: "", + relative_path=lambda _filepath: "", + ) + assert ( + _derive_group_display_name( + "short-key", + ({"qualname": "", "filepath": ""},), + "blocks", + {}, + cast(Any, ctx), + ) + == "short-key" + ) + + def test_dependency_helpers_cover_dense_and_empty_branches() -> None: edges = [(f"n{i}", f"n{i + 1}") for i in range(21)] nodes, filtered = _select_dep_nodes(edges) @@ -99,3 +136,271 @@ def test_dependency_helpers_cover_dense_and_empty_branches() -> None: def test_render_split_tabs_returns_empty_for_no_tabs() -> None: assert render_split_tabs(group_id="dead-code", tabs=()) == "" + + +def _section_ctx(**overrides: object) -> SimpleNamespace: + base: dict[str, object] = { + "clone_groups_total": 4, + "complexity_map": {"summary": {"high_risk": 5, "average": 2.5, "max": 9}}, + "coupling_map": {"summary": {"high_risk": 3, "average": 1.5, "max": 7}}, + "cohesion_map": {"summary": {"low_cohesion": 2, "average": 1.2, "max": 5}}, + "dead_code_map": { + "summary": {"total": 4, "high_confidence": 0, "suppressed": 0}, + "items": [ + { + "qualname": "pkg.mod:maybe", + "filepath": "pkg/mod.py", + "start_line": 5, + "kind": "function", + "confidence": "medium", + } + ], + "suppressed_items": [ + { + "qualname": "pkg.mod:kept", + "filepath": "pkg/mod.py", + "start_line": 9, + "kind": "function", + "confidence": "medium", + "suppressed_by": [{"rule": "dead-code", "source": "inline"}], + } + ], + }, + "dependencies_map": {"cycles": [("pkg.a", "pkg.b")], "max_depth": 4}, + "health_map": {"score": 82, "grade": "B", "dimensions": {}}, + "metrics_available": True, + "structural_findings": (SimpleNamespace(finding_kind="duplicated_branches"),), + "suggestions": (), + "metrics_diff": None, + "func_sorted": (("clone:new", ({}, {})),), + "block_sorted": (("clone:block", ({},)),), + "segment_sorted": (), + "new_func_keys": frozenset({"clone:new"}), + "new_block_keys": frozenset(), + "overview_data": {"source_breakdown": {"production": 3, "tests": 1}}, + "bare_qualname": ( + lambda qualname, _filepath: qualname.rsplit(":", maxsplit=1)[-1] + ), + "relative_path": lambda filepath: filepath, + "meta": {}, + "baseline_meta": {}, + "cache_meta": {}, + "metrics_baseline_meta": {}, + "runtime_meta": {}, + "integrity_map": {}, + "report_schema_version": "2.1", + "report_generated_at": "2026-03-22T21:30:45Z", + } + base.update(overrides) + return SimpleNamespace(**base) + + +def _make_suggestion(**overrides: object) -> Suggestion: + payload: dict[str, object] = { + "severity": "warning", + "category": "complexity", + "title": "Reduce function complexity", + "location": "pkg/mod.py:10-20", + "steps": ("Extract a helper.",), + "effort": "moderate", + "priority": 0.9, + "finding_family": "metrics", + "finding_kind": "function_hotspot", + "subject_key": "pkg.mod:run", + "fact_kind": "Complexity hotspot", + "fact_summary": "cyclomatic_complexity=15, guard_count=2, hot path", + "fact_count": 2, + "spread_files": 2, + "spread_functions": 3, + "clone_type": "", + "confidence": "high", + "source_kind": "production", + "source_breakdown": (("production", 2), ("tests", 1)), + "representative_locations": ( + ReportLocation( + filepath="/repo/pkg/mod.py", + relative_path="pkg/mod.py", + start_line=10, + end_line=20, + qualname="pkg.mod:run", + source_kind="production", + ), + ), + "location_label": "pkg/mod.py:10-20", + } + payload.update(overrides) + return Suggestion(**cast(Any, payload)) + + +def test_html_badges_and_cards_cover_effort_and_tip_paths() -> None: + assert 'risk-badge risk-moderate">moderate<' in _quality_badge_html("moderate") + + card_html = _stat_card( + "High Complexity", + 7, + tip="Cyclomatic hotspots", + value_tone="good", + delta_new=2, + ) + assert "meta-value--good" in card_html + assert 'data-tip="Cyclomatic hotspots"' in card_html + assert "+2<" in card_html + + plain_card_html = _stat_card("Clone Groups", 2) + assert "kpi-help" not in plain_card_html + + +def test_overview_helpers_cover_negative_delta_split_and_baselined_rows() -> None: + gauge_html = _health_gauge_html(65, "B", health_delta=-5) + assert "health-ring-delta--down" in gauge_html + assert 'stroke="var(--error)" opacity="0.4"' in gauge_html + assert "Get Badge" in gauge_html + + breakdown_html = _issue_breakdown_html( + cast(Any, _section_ctx()), + deltas={ + "clones": 1, + "structural": None, + "complexity": 0, + "cohesion": None, + "coupling": None, + "dead_code": 2, + "dep_cycles": 0, + }, + ) + assert "breakdown-bar-fill--baselined" in breakdown_html + assert 'families-delta families-delta--new">+1<' in breakdown_html + assert 'families-delta families-delta--ok">✓<' in breakdown_html + + +def test_render_overview_panel_surfaces_baselined_and_partially_baselined_kpis() -> ( + None +): + ctx = _section_ctx( + metrics_diff=MetricsDiff( + new_high_risk_functions=(), + new_high_coupling_classes=("pkg.mod:Service",), + new_cycles=(), + new_dead_code=("pkg.mod:unused",), + health_delta=3, + ), + func_sorted=(("clone:known", ({}, {})),), + block_sorted=(("clone:block", ({},)),), + new_func_keys=frozenset(), + new_block_keys=frozenset(), + ) + + panel_html = render_overview_panel(cast(Any, ctx)) + assert "kpi-micro--baselined" in panel_html + assert 'baselined' in panel_html + assert "health-ring-delta--up" in panel_html + + +def test_render_dead_code_panel_warns_when_only_medium_confidence_items_exist() -> None: + panel_html = render_dead_code_panel(cast(Any, _section_ctx())) + assert "2 candidates total" not in panel_html + assert "4 candidates total" in panel_html + assert "No dead code detected." not in panel_html + + +def test_suggestion_helpers_cover_empty_summary_breakdown_and_optional_sections( + monkeypatch: pytest.MonkeyPatch, +) -> None: + assert _render_fact_summary("") == "" + assert _render_fact_summary( + "cyclomatic_complexity=15, guard_count=2, hot path" + ) == ( + '
    ' + "cyclomatic complexity: 15, guard count: 2, hot path" + "
    " + ) + assert _format_source_breakdown({"tests": 2, "production": 1, "fixtures": 0}) == ( + "Production 1 · Tests 2" + ) + assert ( + _format_source_breakdown( + [("tests", 2), ("production", 1), ("fixtures", 0), ("other", "x")] + ) + == "Production 1 · Tests 2" + ) + + monkeypatch.setattr(suggestions_section, "source_kind_label", lambda _kind: "") + card_html = _render_card( + _make_suggestion( + category=cast(Any, ""), + source_kind=cast(Any, ""), + source_breakdown=(), + representative_locations=(), + steps=(), + fact_summary="", + location_label="", + ), + cast(Any, _section_ctx()), + ) + assert "suggestion-chip" not in card_html + assert "suggestion-summary" not in card_html + assert "Locations (" not in card_html + assert "Refactoring steps" not in card_html + + +def test_meta_snippet_and_assembly_helpers_cover_empty_optional_paths( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + assert _path_basename(" /tmp/demo/report.json ") == "report.json" + assert _path_basename("/") == "" + assert _path_basename(" ") is None + + meta_html = render_meta_panel( + cast( + Any, + SimpleNamespace( + meta={}, + baseline_meta={}, + cache_meta={}, + metrics_baseline_meta={}, + runtime_meta={}, + integrity_map={}, + report_schema_version="", + report_generated_at="", + ), + ) + ) + assert "Report schema" not in meta_html + assert "Schema" not in meta_html + + snippet_path = tmp_path / "demo.py" + snippet_path.write_text("print('x')\n", encoding="utf-8") + assert _FileCache().get_lines_range(str(snippet_path), 5, 6) == () + + monkeypatch.setattr(assemble_mod, "_pygments_css", lambda _style: "") + html_without_pygments = assemble_mod.build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + block_group_facts={}, + report_meta={"project_name": "demo"}, + metrics={}, + report_document={}, + ) + assert '[data-theme="light"] .codebox span' not in html_without_pygments + + monkeypatch.setattr( + assemble_mod, + "_pygments_css", + lambda style: ( + ".codebox { color: #fff; }" + if style == "monokai" + else ".tok { color: #000; }" + ), + ) + html_without_light_rules = assemble_mod.build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + block_group_facts={}, + report_meta={"project_name": "demo"}, + metrics={}, + report_document={}, + ) + assert '[data-theme="light"] .codebox span' not in html_without_light_rules diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index fdea3cf..166b572 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -2118,6 +2118,10 @@ def test_mcp_service_helper_branches_for_empty_gate_and_missing_remediation( } with pytest.raises(MCPServiceContractError): service.get_report_section(run_id="helpers", section="metrics_detail") + with pytest.raises(MCPServiceContractError): + service.get_report_section(run_id="helpers", section="findings") + + assert service._summary_payload({"inventory": {}}) == {"inventory": {}} assert service._suggestion_for_finding(record, "missing") is None assert ( diff --git a/tests/test_metrics_baseline.py b/tests/test_metrics_baseline.py index 53196d6..0f66333 100644 --- a/tests/test_metrics_baseline.py +++ b/tests/test_metrics_baseline.py @@ -9,6 +9,7 @@ import json import os from pathlib import Path +from typing import Any, cast import pytest @@ -178,6 +179,18 @@ def test_metrics_baseline_load_size_and_shape_validation(tmp_path: Path) -> None baseline.load() +def test_metrics_baseline_load_rejects_non_object_preloaded_payload( + tmp_path: Path, +) -> None: + path = tmp_path / "metrics-baseline.json" + _write_json(path, _valid_payload()) + baseline = MetricsBaseline(path) + + with pytest.raises(BaselineValidationError, match="must be an object") as exc: + baseline.load(preloaded_payload=cast(Any, [])) + assert exc.value.status == MetricsBaselineStatus.INVALID_TYPE + + def test_metrics_baseline_load_stat_error_after_exists_true( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -246,6 +259,26 @@ def test_metrics_baseline_save_with_existing_plain_payload_rewrites_plain( assert baseline.is_embedded_in_clone_baseline is False +def test_metrics_baseline_atomic_write_json_cleans_up_temp_file_on_replace_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + path = tmp_path / "metrics-baseline.json" + payload = _valid_payload() + temp_holder: dict[str, Path] = {} + + def _boom_replace(src: str | Path, dst: str | Path) -> None: + temp_holder["path"] = Path(src) + raise OSError("replace failed") + + monkeypatch.setattr("codeclone.metrics_baseline.os.replace", _boom_replace) + + with pytest.raises(OSError, match="replace failed"): + mb_mod._atomic_write_json(path, payload) + + assert temp_holder["path"].exists() is False + + def test_metrics_baseline_save_rejects_corrupted_existing_payload( tmp_path: Path, ) -> None: diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index 4ee88b6..5418445 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -103,6 +103,20 @@ async def worker(items, value): assert nesting_depth(func) == 4 +def test_nesting_depth_counts_if_else_branches() -> None: + func = _parse_function( + """ +def choose(flag): + if flag: + return 1 + else: + return 2 +""".strip(), + "choose", + ) + assert nesting_depth(func) == 1 + + def test_iter_nested_statement_lists_try_and_empty_match() -> None: module = ast.parse( """ diff --git a/tests/test_report.py b/tests/test_report.py index 4506fe2..0b77054 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -14,9 +14,11 @@ import pytest import codeclone.report as report_mod +import codeclone.report.findings as report_findings_mod import codeclone.report.merge as merge_mod import codeclone.report.overview as overview_mod import codeclone.report.serialize as serialize_mod +from codeclone._html_snippets import _FileCache from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION from codeclone.models import ( StructuralFindingGroup, @@ -1646,6 +1648,81 @@ def test_report_overview_serialize_finding_group_card_covers_families() -> None: assert dependency_card["summary"] == "3 modules participate in this cycle" assert dependency_card["location"] == "pkg.a -> pkg.b -> pkg.c" + fallback_dependency_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "dependency", + "severity": "warning", + "confidence": "medium", + "count": 2, + "source_scope": {"dominant_kind": "production"}, + "spread": {"files": 2, "functions": 0}, + "items": [{"module": ""}], + "facts": {}, + } + ) + assert ( + fallback_dependency_card["location"] + == "2 occurrences across 2 files / 0 functions" + ) + + unknown_design_card = overview_mod.serialize_finding_group_card( + { + "family": "design", + "category": "unknown", + "severity": "info", + "confidence": "low", + "count": 1, + "source_scope": {"dominant_kind": "other"}, + "spread": {"files": 1, "functions": 1}, + "items": [{"relative_path": "pkg/mod.py", "start_line": 1, "end_line": 1}], + "facts": {}, + } + ) + assert unknown_design_card["title"] == "Finding" + assert unknown_design_card["summary"] == "" + + +def test_report_findings_template_html_covers_custom_kind_fallback( + tmp_path: Path, +) -> None: + snippet_path = tmp_path / "custom.py" + snippet_path.write_text("value = 1\nvalue = 2\n", encoding="utf-8") + items = ( + StructuralFindingOccurrence( + finding_kind="custom_kind", + finding_key="custom:1", + file_path=str(snippet_path), + qualname="pkg.mod:fn", + start=1, + end=1, + signature={"stmt_seq": "Assign", "terminal": "fallthrough"}, + ), + StructuralFindingOccurrence( + finding_kind="custom_kind", + finding_key="custom:1", + file_path=str(snippet_path), + qualname="pkg.mod:fn", + start=2, + end=2, + signature={"stmt_seq": "Assign", "terminal": "fallthrough"}, + ), + ) + html = report_findings_mod._finding_why_template_html( + StructuralFindingGroup( + finding_kind="custom_kind", + finding_key="custom:1", + signature={"stmt_seq": "Assign", "terminal": "fallthrough"}, + items=items, + ), + items, + file_cache=_FileCache(), + context_lines=0, + max_snippet_lines=10, + ) + assert "structurally matching branch bodies" in html + assert "Showing the first 2 matching branches" in html + def test_report_overview_materialize_preserves_existing_cards_and_breakdown() -> None: materialized = materialize_report_overview( diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index e17f64c..da183f3 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -64,6 +64,12 @@ from codeclone.report.sarif import ( _partial_fingerprints as _sarif_partial_fingerprints, ) +from codeclone.report.sarif import ( + _primary_location_properties as _sarif_primary_location_properties, +) +from codeclone.report.sarif import ( + _result_entry as _sarif_result_entry, +) from codeclone.report.sarif import ( _result_message as _sarif_result_message, ) @@ -88,6 +94,7 @@ _text as _sarif_text, ) from codeclone.report.serialize import ( + _append_clone_section, _append_single_item_findings, _append_structural_findings, _append_suggestions, @@ -1597,6 +1604,33 @@ def test_serialize_private_helpers_cover_structural_and_suppression_paths() -> N assert any("... and 1 more occurrences" in line for line in structural_lines) assert structural_lines[-1] != "" + clone_lines: list[str] = [] + _append_clone_section( + clone_lines, + title="FUNCTION CLONES", + groups=[ + { + "id": "clone:function:1", + "novelty": "new", + "clone_type": "Type-2", + "severity": "warning", + "count": 1, + "spread": {"files": 1, "functions": 1}, + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "items": [], + } + ], + novelty="new", + metric_name="cyclomatic_complexity", + ) + assert clone_lines[0] == "FUNCTION CLONES (NEW) (groups=1)" + assert not any(line.startswith("facts: ") for line in clone_lines) + assert not any(line.startswith("display_facts: ") for line in clone_lines) + assert clone_lines[-1] != "" + finding_lines: list[str] = [] _append_single_item_findings( finding_lines, @@ -1665,6 +1699,136 @@ def test_serialize_private_helpers_cover_structural_and_suppression_paths() -> N ) assert any("suppressed_by=(none)" in line for line in suppressed_none_lines) + +def test_sarif_and_serialize_helpers_cover_missing_primary_path_and_no_empty_tail() -> ( + None +): + assert _sarif_primary_location_properties( + {"qualname": "pkg.mod:fn", "start_line": 3, "end_line": 4} + ) == { + "primaryQualname": "pkg.mod:fn", + "primaryRegion": "3-4", + } + result = _sarif_result_entry( + group={"id": "clone:function:1", "severity": "warning", "items": []}, + rule_id="codeclone.clone.function", + rule_index=0, + artifact_index_map={}, + use_uri_base_id=False, + ) + assert result["locations"] == [] + assert "primaryPath" not in cast(dict[str, object], result["properties"]) + + class _NoEmptyList(list[str]): + def append(self, item: str) -> None: + if item != "": + super().append(item) + + clone_lines: list[str] = _NoEmptyList() + _append_clone_section( + clone_lines, + title="BLOCK CLONES", + groups=[ + { + "id": "clone:block:1", + "novelty": "known", + "clone_type": "Type-4", + "severity": "warning", + "count": 1, + "spread": {"files": 1, "functions": 1}, + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "items": [ + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + } + ], + } + ], + novelty="known", + metric_name="cyclomatic_complexity", + ) + assert clone_lines[-1].startswith("- pkg.mod:fn") + + structural_lines: list[str] = _NoEmptyList() + _append_structural_findings( + structural_lines, + [ + { + "id": "structural:custom:2", + "kind": "custom_kind", + "severity": "info", + "confidence": "low", + "count": 1, + "spread": {"files": 1, "functions": 1}, + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "signature": {"stable": {"family": "custom", "control_flow": {}}}, + "items": [ + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 1, + "end_line": 1, + } + ], + } + ], + ) + assert structural_lines[-1].startswith("- pkg.mod:fn") + + finding_lines: list[str] = _NoEmptyList() + _append_single_item_findings( + finding_lines, + title="DESIGN FINDINGS", + groups=[ + { + "id": "design:coupling:pkg.mod:fn", + "category": "coupling", + "kind": "class_hotspot", + "severity": "warning", + "confidence": "medium", + "source_scope": { + "dominant_kind": "production", + "impact_scope": "runtime", + }, + "items": [ + { + "qualname": "pkg.mod:fn", + "relative_path": "pkg/mod.py", + "start_line": 10, + "end_line": 11, + } + ], + } + ], + fact_keys=("cbo",), + ) + assert finding_lines[-1].startswith("- pkg.mod:fn") + + suppressed_lines: list[str] = _NoEmptyList() + _append_suppressed_dead_code_items( + suppressed_lines, + items=[ + { + "kind": "function", + "confidence": "high", + "relative_path": "pkg/mod.py", + "qualname": "pkg.mod:keep", + "start_line": 12, + "end_line": 13, + } + ], + ) + assert suppressed_lines[-1].startswith("- pkg.mod:keep") + suggestion_lines: list[str] = [] _append_suggestions( suggestion_lines, diff --git a/tests/test_report_suggestions.py b/tests/test_report_suggestions.py index bf5f759..402a535 100644 --- a/tests/test_report_suggestions.py +++ b/tests/test_report_suggestions.py @@ -11,6 +11,8 @@ DeadItem, HealthScore, ProjectMetrics, + StructuralFindingGroup, + StructuralFindingOccurrence, ) from codeclone.report import suggestions as suggestions_mod from codeclone.report.suggestions import classify_clone_type, generate_suggestions @@ -372,3 +374,38 @@ def test_generate_suggestions_uses_full_spread_for_group_location_label() -> Non assert ( clone_suggestion.location_label == "4 occurrences across 4 files / 4 functions" ) + + +def test_structural_suggestions_raise_clone_cohort_drift_to_warning() -> None: + suggestions = suggestions_mod._structural_suggestions( + ( + StructuralFindingGroup( + finding_kind="clone_cohort_drift", + finding_key="structural:clone_cohort_drift:1", + signature={"cohort_id": "c1", "drift_fields": "terminal_kind"}, + items=( + StructuralFindingOccurrence( + finding_kind="clone_cohort_drift", + finding_key="structural:clone_cohort_drift:1", + file_path="/repo/pkg/a.py", + qualname="pkg.a:alpha", + start=10, + end=12, + signature={"cohort_id": "c1"}, + ), + StructuralFindingOccurrence( + finding_kind="clone_cohort_drift", + finding_key="structural:clone_cohort_drift:1", + file_path="/repo/pkg/b.py", + qualname="pkg.b:beta", + start=20, + end=22, + signature={"cohort_id": "c1"}, + ), + ), + ), + ), + scan_root="/repo", + ) + assert len(suggestions) == 1 + assert suggestions[0].severity == "warning" diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py index fb7bacb..29c02cd 100644 --- a/tests/test_structural_findings.py +++ b/tests/test_structural_findings.py @@ -10,6 +10,7 @@ import ast import sys +from typing import Any, cast import pytest @@ -845,3 +846,106 @@ def fn(x): collect_findings=False, ) assert match_facts.structural_findings == () + + +def test_structural_helper_branches_cover_empty_if_chain_and_bool_defaults() -> None: + if_chain = ast.parse("if flag:\n pass\n").body[0] + assert isinstance(if_chain, ast.If) + assert sf._collect_if_branch_bodies(if_chain) == [] + assert sf._as_item_bool("maybe", default=True) is True + assert sf._as_item_bool(object(), default=True) is True + assert sf._group_item_sort_key( + { + "filepath": "pkg/mod.py", + "qualname": "pkg.mod:fn", + "start_line": 3, + "end_line": 4, + } + ) == ("pkg/mod.py", "pkg.mod:fn", 3, 4) + + +def test_clone_cohort_findings_skip_invalid_filtered_members() -> None: + member = { + "filepath": "pkg/mod.py", + "qualname": "pkg.mod:fn", + "start_line": 10, + "end_line": 12, + "entry_guard_count": 1, + "entry_guard_terminal_profile": "return_const", + "entry_guard_has_side_effect_before": False, + "terminal_kind": "return_const", + "try_finally_profile": "none", + "side_effect_order_profile": "guard_then_effect", + } + findings = sf.build_clone_cohort_structural_findings( + func_groups={ + "cohort-a": ( + member, + {**member, "qualname": "pkg.mod:fn2", "start_line": 0}, + {**member, "qualname": "pkg.mod:fn3", "end_line": 0}, + ) + } + ) + assert findings == () + + +def test_clone_cohort_guard_and_drift_defensive_none_branches() -> None: + class _FlakyGuardMember: + def __init__(self, first_count: int, *, qualname: str) -> None: + self.file_path = "pkg/mod.py" + self.qualname = qualname + self.start = 1 + self.end = 2 + self.entry_guard_terminal_profile = "return_const" + self.entry_guard_has_side_effect_before = False + self.terminal_kind = "return_const" + self.try_finally_profile = "none" + self.side_effect_order_profile = "guard_then_effect" + self._first_count = first_count + self._reads = 0 + + @property + def entry_guard_count(self) -> int: + self._reads += 1 + return self._first_count if self._reads == 1 else 2 + + guard_members = ( + cast(Any, _FlakyGuardMember(1, qualname="pkg.mod:a")), + cast(Any, _FlakyGuardMember(2, qualname="pkg.mod:b")), + cast(Any, _FlakyGuardMember(2, qualname="pkg.mod:c")), + ) + assert sf._clone_guard_exit_divergence("cohort-guard", guard_members) is None + + class _FlakyDriftMember: + def __init__(self, first_terminal: str, *, qualname: str) -> None: + self.file_path = "pkg/mod.py" + self.qualname = qualname + self.start = 1 + self.end = 2 + self.entry_guard_count = 1 + self.entry_guard_terminal_profile = "return_const" + self.entry_guard_has_side_effect_before = False + self.try_finally_profile = "none" + self.side_effect_order_profile = "guard_then_effect" + self._first_terminal = first_terminal + self._reads = 0 + + @property + def terminal_kind(self) -> str: + self._reads += 1 + return self._first_terminal if self._reads == 1 else "return_const" + + @property + def guard_exit_profile(self) -> str: + return "1x:return_const" + + drift_members = ( + cast(Any, _FlakyDriftMember("raise", qualname="pkg.mod:a")), + cast(Any, _FlakyDriftMember("return_const", qualname="pkg.mod:b")), + cast(Any, _FlakyDriftMember("return_const", qualname="pkg.mod:c")), + ) + assert sf._clone_cohort_drift("cohort-drift", drift_members) is None + + +def test_collect_if_branch_bodies_returns_empty_for_none_like_input() -> None: + assert sf._collect_if_branch_bodies(cast(Any, None)) == [] From 9f60de5b660a6d2fc88c259da201995c4ede4524 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 30 Mar 2026 18:02:31 +0500 Subject: [PATCH 06/15] feat(core): canonicalize design thresholds and add directory hotspots to report projections --- CHANGELOG.md | 65 ++- README.md | 41 +- codeclone/_cli_meta.py | 14 + codeclone/_html_report/_sections/_overview.py | 130 ++++- codeclone/cli.py | 11 +- codeclone/contracts.py | 5 +- codeclone/mcp_server.py | 36 +- codeclone/mcp_service.py | 497 ++++++++---------- codeclone/report/derived.py | 66 ++- codeclone/report/json_contract.py | 149 ++++-- codeclone/report/overview.py | 224 +++++++- docs/README.md | 2 +- docs/architecture.md | 3 +- docs/book/01-architecture-map.md | 3 + docs/book/02-terminology.md | 2 +- docs/book/08-report.md | 17 +- docs/book/10-html-render.md | 4 + docs/book/13-testing-as-spec.md | 2 +- docs/book/14-compatibility-and-versioning.md | 14 +- docs/book/17-suggestions-and-clone-typing.md | 2 +- docs/book/20-mcp-interface.md | 60 ++- docs/book/appendix/b-schema-layouts.md | 18 +- docs/mcp.md | 89 ++-- tests/test_html_report.py | 74 ++- tests/test_html_report_helpers.py | 3 +- tests/test_mcp_server.py | 46 +- tests/test_mcp_service.py | 289 +++++++--- tests/test_report.py | 6 + tests/test_report_contract_coverage.py | 218 +++++++- 29 files changed, 1538 insertions(+), 552 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2157f6a..bfd0857 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,57 +2,54 @@ ## [2.0.0b3] -### Licensing +2.0.0b3 is the release where CodeClone stops looking like "a strong analyzer with extras" and starts looking like a +coherent platform: canonical-report-first, agent-facing, CI-native, and product-grade. -- Re-license repository code to MPL-2.0 and keep documentation under MIT. +### Licensing & packaging -### Packaging - -- Ship both `LICENSE` and `LICENSE-docs`, update package metadata, and sync file-level SPDX headers. +Re-license source code to MPL-2.0 while keeping documentation under MIT. Ship dual `LICENSE` / `LICENSE-docs` files and +sync SPDX headers. -### MCP server +### MCP server (new) -- Add optional `codeclone[mcp]` extra with `codeclone-mcp` launcher (`stdio` and `streamable-http` transports). -- Expose 19 read-only tools and 9 resources over the canonical pipeline: analysis, diff-aware changed-files, run - comparison, findings/hotspots/remediation, granular checks, gate preview, PR summary, and session review markers. -- Bound in-memory run retention (`--history-limit`, default `4`, max `10`) and prune stale session state automatically. -- Require explicit `--allow-remote` for non-loopback `streamable-http` binds; reject `cache_policy=refresh` to preserve +- Optional `codeclone[mcp]` extra with `codeclone-mcp` launcher (`stdio` and `streamable-http` transports). +- 20 read-only tools + 10 resources: analysis, diff-aware changed-files, run comparison, findings / hotspots / + remediation, granular checks, gate preview, PR summary, and session review markers. +- Bounded run retention (`--history-limit`), `--allow-remote` guard, `cache_policy=refresh` rejected to preserve read-only semantics. -- Defer MCP process-count policy to the core runtime when `processes` is not explicitly overridden. -- Slim MCP summary payloads for agent usage: `get_run_summary`, summary resources, and `analyze_changed_paths` now - replace `inventory.file_registry.items` with `{encoding, count}` while `analyze_repository` keeps the full registry. -- Split `get_report_section(section="metrics")` into a summary-only projection and add `metrics_detail` for the full - metrics payload, without changing canonical report schema `2.1`. -- Slim `health.dimensions` in granular `check_*` responses to the single dimension relevant to each tool. -- Keep hotspot `source_kind` aligned with canonical finding payloads, including fixture-scoped findings. -- Add envelope-level `base_uri` to `list_findings`, `list_hotspots`, and `check_*`, while removing repeated per-location - `uri` values from summary/normal finding payloads. -- Slim finding list payloads further: summary responses drop `priority_factors` and keep only `file` + `line` in - locations; normal responses keep `symbol` but still omit `uri` and `priority_factors`; `get_finding` remains full. -- Bump cache schema to `2.3` so stale per-file analysis entries from older metric semantics are ignored and rebuilt - instead of being treated as reusable cache hits. +- Agent-optimised payloads: slim inventory counts in summaries, `base_uri` envelope with relative locations, + single-dimension `health` in `check_*`, three-tier `detail_level` on finding cards, and `metrics` / `metrics_detail` + split — all without changing canonical report schema until the later `2.2` report-threshold update below. +- `cache.effective_freshness` marker and `get_production_triage` / `codeclone://latest/triage` for compact + production-first overview. +- Fix hotlist key resolution for `production_hotspots` and `test_fixture_hotspots`. +- Bump cache schema to `2.3` (stale metric entries rebuilt, not reused). + +### Report contract + +- Bump canonical report schema to `2.2`. +- Add canonical `meta.analysis_thresholds.design_findings` provenance and move threshold-aware design findings fully + into the canonical report, so MCP/HTML read the same design-finding universe instead of re-synthesizing it. +- Add `derived.overview.directory_hotspots` and render it in the HTML Overview tab as `Hotspots by Directory`. ### CLI -- Add `--changed-only`, `--diff-against`, and `--paths-from-git-diff` for changed-scope clone review and gating. -- Render changed-scope results as a first-class summary block in normal CLI output. +- `--changed-only`, `--diff-against`, `--paths-from-git-diff` for changed-scope review and gating with first-class + summary output. ### SARIF -- Stabilize `primaryLocationLineHash` by excluding line numbers from the hash material. -- Add run-unique `automationDetails.id`, `startTimeUtc`, and explicit result `kind: "fail"`. -- Move ancillary identity fields to SARIF `properties`; keep only `primaryLocationLineHash` in `partialFingerprints`. +- Stable `primaryLocationLineHash` (line numbers excluded), run-unique `automationDetails.id` / `startTimeUtc`, explicit + `kind: "fail"`, ancillary fields moved to `properties`. ### HTML report -- Add IDE picker (PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, Zed) with localStorage persistence. -- Make file paths across all tabs clickable IDE deep links via `jetbrains://`, `vscode://`, and other protocol schemes. -- Add stable deep-link anchors (`finding-{finding_id}`) for clone and structural finding cards. +- IDE picker (PyCharm, IDEA, VS Code, Cursor, Fleet, Zed) with persistent selection; clickable file-path deep links + across all tabs; stable `finding-{id}` anchors. ### GitHub Action -- Ship composite GitHub Action v2 with configurable quality gates, SARIF upload to Code Scanning, and PR summary - comments. +- Composite Action v2: configurable quality gates, SARIF upload to Code Scanning, PR summary comments. ## [2.0.0b2] diff --git a/README.md b/README.md index 00db248..d358dac 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,12 @@ --- -CodeClone provides comprehensive structural code quality analysis for Python. It detects architectural -duplication via normalized AST and Control Flow Graphs, computes quality metrics, and enforces CI gates — -all with baseline-aware governance that separates **known** technical debt from **new** regressions. +CodeClone provides deterministic structural code quality analysis for Python. +It detects architectural duplication via normalized AST and Control Flow Graphs, +computes quality metrics, and enforces CI gates — all with **baseline-aware +governance** that separates **known** technical debt from **new** regressions. +An optional read-only MCP interface exposes the same analysis pipeline to AI agents, IDEs, and other MCP-capable +clients. Docs: [orenlab.github.io/codeclone](https://orenlab.github.io/codeclone/) · Live sample report: @@ -43,7 +46,7 @@ Live sample report: - **Reports** — interactive HTML, deterministic JSON/TXT plus Markdown and SARIF projections from one canonical report - **MCP server** — optional read-only MCP surface for AI agents, IDEs, and MCP-capable clients - **CI-first** — deterministic output, stable ordering, exit code contract, pre-commit support -- **Fast*** — incremental caching, parallel processing, warm-run optimization, and reproducible benchmark coverage +- **Fast** — incremental caching, parallel processing, warm-run optimization, and reproducible benchmark coverage ## Quick Start @@ -92,9 +95,12 @@ codeclone . --update-baseline codeclone . --ci ``` -The `--ci` preset equals `--fail-on-new --no-color --quiet`. +
    +What --ci enables +The --ci preset equals --fail-on-new --no-color --quiet. When a trusted metrics baseline is loaded, CI mode also enables -`--fail-on-new-metrics`. +--fail-on-new-metrics. +
    ### GitHub Action @@ -161,12 +167,9 @@ codeclone-mcp --transport stdio codeclone-mcp --transport streamable-http --port 8000 ``` -19 tools + 9 resources — deterministic, baseline-aware, read-only. -Never mutates source files, baselines, or repo state. -List-style finding responses expose a single `base_uri` per envelope and keep -summary locations compact; `get_finding` remains the full-detail endpoint. -`get_run_summary` and `analyze_changed_paths` return slim inventory counts; -`get_report_section(metrics)` returns summary-only, `metrics_detail` gives the full dump. +20 tools + 10 resources — deterministic, baseline-aware, and read-only. Never mutates source files, baselines, or repo +state. +Payloads are optimised for LLM context: compact summaries by default, full detail on demand. Docs: [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) @@ -263,16 +266,21 @@ class Middleware: # codeclone: ignore[dead-code] Dynamic/runtime false positives are resolved via explicit inline suppressions, not via broad heuristics.
    -JSON report shape (v2.1) +JSON report shape (v2.2) ```json { - "report_schema_version": "2.1", + "report_schema_version": "2.2", "meta": { "codeclone_version": "2.0.0b3", "project_name": "...", "scan_root": ".", "report_mode": "full", + "analysis_thresholds": { + "design_findings": { + "...": "..." + } + }, "baseline": { "...": "..." }, @@ -330,7 +338,8 @@ Dynamic/runtime false positives are resolved via explicit inline suppressions, n "families": {}, "top_risks": [], "source_scope_breakdown": {}, - "health_snapshot": {} + "health_snapshot": {}, + "directory_hotspots": {} }, "hotlists": { "most_actionable_ids": [], @@ -386,7 +395,7 @@ CFG semantics: [CFG semantics](https://orenlab.github.io/codeclone/cfg/) | Docker benchmark contract | [Benchmarking contract](https://orenlab.github.io/codeclone/book/18-benchmarking/) | | Determinism | [Determinism policy](https://orenlab.github.io/codeclone/book/12-determinism/) | -## * Benchmarking +## Benchmarking Notes
    Reproducible Docker Benchmark diff --git a/codeclone/_cli_meta.py b/codeclone/_cli_meta.py index 07cea7d..f112d8d 100644 --- a/codeclone/_cli_meta.py +++ b/codeclone/_cli_meta.py @@ -11,6 +11,11 @@ from typing import TYPE_CHECKING, TypedDict from .baseline import Baseline, current_python_tag +from .contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, +) if TYPE_CHECKING: from pathlib import Path @@ -70,6 +75,9 @@ class ReportMeta(TypedDict): health_grade: str | None analysis_mode: str metrics_computed: list[str] + design_complexity_threshold: int + design_coupling_threshold: int + design_cohesion_threshold: int analysis_started_at_utc: str | None report_generated_at_utc: str @@ -95,6 +103,9 @@ def _build_report_meta( health_grade: str | None, analysis_mode: str, metrics_computed: tuple[str, ...], + design_complexity_threshold: int = DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold: int = DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold: int = DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, analysis_started_at_utc: str | None, report_generated_at_utc: str, ) -> ReportMeta: @@ -138,6 +149,9 @@ def _build_report_meta( "health_grade": health_grade, "analysis_mode": analysis_mode, "metrics_computed": list(metrics_computed), + "design_complexity_threshold": design_complexity_threshold, + "design_coupling_threshold": design_coupling_threshold, + "design_cohesion_threshold": design_cohesion_threshold, "analysis_started_at_utc": analysis_started_at_utc, "report_generated_at_utc": report_generated_at_utc, } diff --git a/codeclone/_html_report/_sections/_overview.py b/codeclone/_html_report/_sections/_overview.py index 7c91b73..cbb09eb 100644 --- a/codeclone/_html_report/_sections/_overview.py +++ b/codeclone/_html_report/_sections/_overview.py @@ -9,10 +9,11 @@ from __future__ import annotations import math +from collections.abc import Mapping from typing import TYPE_CHECKING from ... import _coerce -from ..._html_badges import _stat_card +from ..._html_badges import _source_kind_badge_html, _stat_card from ..._html_escape import _escape_html from .._components import ( Tone, @@ -31,6 +32,36 @@ _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence +_DIRECTORY_BUCKET_LABELS: dict[str, str] = { + "all": "All Findings", + "clones": "Clone Groups", + "structural": "Structural Findings", + "complexity": "High Complexity", + "cohesion": "Low Cohesion", + "coupling": "High Coupling", + "dead_code": "Dead Code", + "dependency": "Dependency Cycles", +} +_DIRECTORY_BUCKET_ORDER: tuple[str, ...] = ( + "all", + "clones", + "structural", + "complexity", + "cohesion", + "coupling", + "dead_code", + "dependency", +) +_DIRECTORY_KIND_LABELS: dict[str, str] = { + "clones": "clones", + "structural": "structural", + "dead_code": "dead code", + "complexity": "complexity", + "cohesion": "cohesion", + "coupling": "coupling", + "dependency": "dependency", +} + def _health_gauge_html( score: float, grade: str, *, health_delta: int | None = None @@ -347,6 +378,102 @@ def _issue_breakdown_html( return '
    ' + "".join(parts) + "
    " +def _directory_kind_summary(kind_breakdown: Mapping[str, object]) -> str: + rows = [ + (str(kind), _as_int(count)) + for kind, count in kind_breakdown.items() + if _as_int(count) > 0 + ] + rows.sort(key=lambda item: (-item[1], item[0])) + top_rows = rows[:2] + if not top_rows: + return "" + return "; ".join( + f"{count} {_DIRECTORY_KIND_LABELS.get(kind, kind)}" for kind, count in top_rows + ) + + +def _directory_hotspot_bucket_body(bucket: str, payload: Mapping[str, object]) -> str: + items = list(map(_as_mapping, _as_sequence(payload.get("items")))) + if not items: + return "" + returned = _as_int(payload.get("returned")) + total_directories = _as_int(payload.get("total_directories")) + has_more = bool(payload.get("has_more")) + subtitle_html = "" + if has_more and returned > 0 and total_directories > returned: + subtitle_html = ( + '
    ' + f"top {returned} of {total_directories} directories" + "
    " + ) + rows: list[str] = [] + for item in items: + path = str(item.get("path", ".")).strip() or "." + source_scope = _as_mapping(item.get("source_scope")) + dominant_kind = ( + str(source_scope.get("dominant_kind", "other")).strip() or "other" + ) + detail = ( + f"{_as_int(item.get('finding_groups'))} groups; " + f"{_as_int(item.get('affected_items'))} items; " + f"{_as_int(item.get('files'))} files; " + f"{_as_float(item.get('share_pct')):.1f}%" + ) + kind_summary = "" + if bucket == "all": + kind_summary = _directory_kind_summary( + _as_mapping(item.get("kind_breakdown")) + ) + kind_html = ( + f'
    {_escape_html(kind_summary)}
    ' + if kind_summary + else "" + ) + rows.append( + "
  • " + '
    ' + f"{_escape_html(path)} {_source_kind_badge_html(dominant_kind)}" + "
    " + f'
    {_escape_html(detail)}
    ' + f"{kind_html}" + "
  • " + ) + return ( + subtitle_html + '
      ' + "".join(rows) + "
    " + ) + + +def _directory_hotspots_section(ctx: ReportContext) -> str: + directory_hotspots = _as_mapping(ctx.overview_data.get("directory_hotspots")) + if not directory_hotspots: + return "" + cards: list[str] = [] + for bucket in _DIRECTORY_BUCKET_ORDER: + payload = _as_mapping(directory_hotspots.get(bucket)) + body_html = _directory_hotspot_bucket_body(bucket, payload) + if not body_html: + continue + cards.append( + overview_summary_item_html( + label=_DIRECTORY_BUCKET_LABELS.get(bucket, bucket), + body_html=body_html, + ) + ) + if not cards: + return "" + return ( + '
    ' + + overview_cluster_header( + "Hotspots by Directory", + "Directories with the highest concentration of findings by category.", + ) + + '
    ' + + "".join(cards) + + "
    " + ) + + def render_overview_panel(ctx: ReportContext) -> str: """Build the Overview tab panel HTML.""" complexity_summary = _as_mapping(ctx.complexity_map.get("summary")) @@ -622,6 +749,7 @@ def _baselined_detail( + "
    " + "" + executive + + _directory_hotspots_section(ctx) + _analytics_section(ctx) ) diff --git a/codeclone/cli.py b/codeclone/cli.py index 1aa9d66..d06a3e4 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -94,7 +94,13 @@ ) from .baseline import Baseline from .cache import Cache, CacheStatus, build_segment_report_projection -from .contracts import ISSUES_URL, ExitCode +from .contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ISSUES_URL, + ExitCode, +) from .errors import CacheError if TYPE_CHECKING: @@ -1398,6 +1404,9 @@ def _prepare_run_inputs() -> tuple[ ), analysis_mode=("clones_only" if args.skip_metrics else "full"), metrics_computed=_metrics_computed(args), + design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, analysis_started_at_utc=analysis_started_at_utc, report_generated_at_utc=report_generated_at_utc, ) diff --git a/codeclone/contracts.py b/codeclone/contracts.py index 91e5109..a75c22d 100644 --- a/codeclone/contracts.py +++ b/codeclone/contracts.py @@ -13,12 +13,15 @@ BASELINE_FINGERPRINT_VERSION: Final = "1" CACHE_VERSION: Final = "2.3" -REPORT_SCHEMA_VERSION: Final = "2.1" +REPORT_SCHEMA_VERSION: Final = "2.2" METRICS_BASELINE_SCHEMA_VERSION: Final = "1.0" DEFAULT_COMPLEXITY_THRESHOLD: Final = 20 DEFAULT_COUPLING_THRESHOLD: Final = 10 DEFAULT_COHESION_THRESHOLD: Final = 4 +DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD: Final = 20 +DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD: Final = 10 +DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD: Final = 4 DEFAULT_HEALTH_THRESHOLD: Final = 60 COMPLEXITY_RISK_LOW_MAX: Final = 10 diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py index ed00137..711c6af 100644 --- a/codeclone/mcp_server.py +++ b/codeclone/mcp_server.py @@ -113,7 +113,8 @@ def resource( title="Analyze Repository", description=( "Run a deterministic CodeClone analysis for a repository and register " - "the result as the latest MCP run." + "the result as the latest MCP run. Tip: set cache_policy='off' to " + "bypass cache and get fully fresh results." ), annotations=session_tool, structured_output=True, @@ -171,7 +172,8 @@ def analyze_repository( title="Analyze Changed Paths", description=( "Run a deterministic CodeClone analysis and return a changed-files " - "projection using explicit paths or a git diff ref." + "projection using explicit paths or a git diff ref. Tip: set " + "cache_policy='off' to bypass cache and get fully fresh results." ), annotations=session_tool, structured_output=True, @@ -234,6 +236,27 @@ def analyze_changed_paths( def get_run_summary(run_id: str | None = None) -> dict[str, object]: return service.get_run_summary(run_id) + @tool( + title="Get Production Triage", + description=( + "Return a production-first triage view over a stored run: health, " + "effective cache freshness, production hotspots, and production " + "suggestions, while keeping global source-kind counters visible." + ), + annotations=read_only_tool, + structured_output=True, + ) + def get_production_triage( + run_id: str | None = None, + max_hotspots: int = 3, + max_suggestions: int = 3, + ) -> dict[str, object]: + return service.get_production_triage( + run_id=run_id, + max_hotspots=max_hotspots, + max_suggestions=max_suggestions, + ) + @tool( title="Evaluate Gates", description=( @@ -643,6 +666,15 @@ def latest_gates_resource() -> str: def latest_changed_resource() -> str: return service.read_resource("codeclone://latest/changed") + @resource( + "codeclone://latest/triage", + title="Latest Production Triage", + description=("Production-first triage view for the latest CodeClone MCP run."), + mime_type="application/json", + ) + def latest_triage_resource() -> str: + return service.read_resource("codeclone://latest/triage") + @resource( "codeclone://schema", title="CodeClone Report Schema", diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py index aaeafc3..90edbb0 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/mcp_service.py @@ -10,7 +10,7 @@ import subprocess from argparse import Namespace from collections import OrderedDict -from collections.abc import Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass from pathlib import Path from threading import RLock @@ -46,9 +46,9 @@ from .baseline import Baseline from .cache import Cache, CacheStatus from .contracts import ( - DEFAULT_COHESION_THRESHOLD, - DEFAULT_COMPLEXITY_THRESHOLD, - DEFAULT_COUPLING_THRESHOLD, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, REPORT_SCHEMA_VERSION, ExitCode, ) @@ -78,6 +78,14 @@ SEVERITY_INFO, SEVERITY_WARNING, ) +from .domain.source_scope import ( + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_ORDER, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) from .models import MetricsDiff, ProjectMetrics, Suggestion from .pipeline import ( GatingResult, @@ -91,7 +99,6 @@ report, ) from .report.json_contract import ( - _source_scope_from_filepaths, clone_group_id, dead_code_group_id, design_group_id, @@ -101,6 +108,7 @@ AnalysisMode = Literal["full", "clones_only"] CachePolicy = Literal["reuse", "refresh", "off"] +FreshnessKind = Literal["fresh", "mixed", "reused"] HotlistKind = Literal[ "most_actionable", "highest_spread", @@ -211,6 +219,19 @@ } ) _VALID_SEVERITIES = frozenset({SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO}) +_SOURCE_KIND_BREAKDOWN_ORDER: Final[tuple[str, ...]] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, +) +_HOTLIST_REPORT_KEYS: Final[dict[str, str]] = { + "most_actionable": "most_actionable_ids", + "highest_spread": "highest_spread_ids", + "production_hotspots": "production_hotspot_ids", + "test_fixture_hotspots": "test_fixture_hotspot_ids", +} _CHECK_TO_DIMENSION: Final[dict[str, str]] = { "cohesion": "cohesion", "coupling": "coupling", @@ -223,141 +244,6 @@ _as_str = _coerce.as_str -def _design_singleton_group_payload( - *, - category: str, - kind: str, - severity: str, - qualname: str, - filepath: str, - start_line: int, - end_line: int, - item_data: Mapping[str, object], - facts: Mapping[str, object], - scan_root: str, -) -> dict[str, object]: - relative_path = filepath - return { - "id": design_group_id(category, qualname), - "family": FAMILY_DESIGN, - "category": category, - "kind": kind, - "severity": severity, - "confidence": CONFIDENCE_HIGH, - "priority": 2.0 if severity == SEVERITY_WARNING else 3.0, - "count": 1, - "source_scope": _source_scope_from_filepaths( - (relative_path,), - scan_root=scan_root, - ), - "spread": {"files": 1, "functions": 1}, - "items": [ - { - "relative_path": relative_path, - "qualname": qualname, - "start_line": start_line, - "end_line": end_line, - **item_data, - } - ], - "facts": dict(facts), - } - - -def _complexity_group_for_threshold_payload( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - cc = _as_int(item_map.get("cyclomatic_complexity", 1), 1) - if cc <= threshold: - return None - severity = SEVERITY_CRITICAL if cc > max(40, threshold * 2) else SEVERITY_WARNING - return _design_singleton_group_payload( - category=CATEGORY_COMPLEXITY, - kind="function_hotspot", - severity=severity, - qualname=str(item_map.get("qualname", "")), - filepath=str(item_map.get("relative_path", "")), - start_line=_as_int(item_map.get("start_line", 0), 0), - end_line=_as_int(item_map.get("end_line", 0), 0), - scan_root=scan_root, - item_data={ - "cyclomatic_complexity": cc, - "nesting_depth": _as_int(item_map.get("nesting_depth", 0), 0), - "risk": str(item_map.get("risk", "")), - }, - facts={ - "cyclomatic_complexity": cc, - "nesting_depth": _as_int(item_map.get("nesting_depth", 0), 0), - }, - ) - - -def _coupling_group_for_threshold_payload( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - cbo = _as_int(item_map.get("cbo", 0), 0) - if cbo <= threshold: - return None - coupled_classes = list(_coerce.as_sequence(item_map.get("coupled_classes"))) - return _design_singleton_group_payload( - category=CATEGORY_COUPLING, - kind="class_hotspot", - severity=SEVERITY_WARNING, - qualname=str(item_map.get("qualname", "")), - filepath=str(item_map.get("relative_path", "")), - start_line=_as_int(item_map.get("start_line", 0), 0), - end_line=_as_int(item_map.get("end_line", 0), 0), - scan_root=scan_root, - item_data={ - "cbo": cbo, - "risk": str(item_map.get("risk", "")), - "coupled_classes": coupled_classes, - }, - facts={ - "cbo": cbo, - "coupled_classes": coupled_classes, - }, - ) - - -def _cohesion_group_for_threshold_payload( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - lcom4 = _as_int(item_map.get("lcom4", 0), 0) - if lcom4 <= threshold: - return None - return _design_singleton_group_payload( - category=CATEGORY_COHESION, - kind="class_hotspot", - severity=SEVERITY_WARNING, - qualname=str(item_map.get("qualname", "")), - filepath=str(item_map.get("relative_path", "")), - start_line=_as_int(item_map.get("start_line", 0), 0), - end_line=_as_int(item_map.get("end_line", 0), 0), - scan_root=scan_root, - item_data={ - "lcom4": lcom4, - "risk": str(item_map.get("risk", "")), - "method_count": _as_int(item_map.get("method_count", 0), 0), - "instance_var_count": _as_int(item_map.get("instance_var_count", 0), 0), - }, - facts={ - "lcom4": lcom4, - "method_count": _as_int(item_map.get("method_count", 0), 0), - "instance_var_count": _as_int(item_map.get("instance_var_count", 0), 0), - }, - ) - - def _suggestion_finding_id_payload(suggestion: object) -> str: if not hasattr(suggestion, "finding_family"): return "" @@ -658,6 +544,30 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ), analysis_mode=request.analysis_mode, metrics_computed=self._metrics_computed(request.analysis_mode), + design_complexity_threshold=_as_int( + getattr( + args, + "design_complexity_threshold", + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + design_coupling_threshold=_as_int( + getattr( + args, + "design_coupling_threshold", + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + design_cohesion_threshold=_as_int( + getattr( + args, + "design_cohesion_threshold", + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), analysis_started_at_utc=analysis_started_at_utc, report_generated_at_utc=_current_report_timestamp_utc(), ) @@ -1110,6 +1020,70 @@ def list_hotspots( "items": [dict(self._as_mapping(item)) for item in rows[:normalized_limit]], } + def get_production_triage( + self, + *, + run_id: str | None = None, + max_hotspots: int = 3, + max_suggestions: int = 3, + ) -> dict[str, object]: + record = self._runs.get(run_id) + summary = self._summary_payload(dict(record.summary)) + findings = self._base_findings(record) + findings_breakdown = self._source_kind_breakdown( + self._finding_source_kind(finding) for finding in findings + ) + suggestion_rows = self._triage_suggestion_rows(record) + suggestion_breakdown = self._source_kind_breakdown( + row.get("source_kind") for row in suggestion_rows + ) + hotspot_limit = max(1, min(max_hotspots, 10)) + suggestion_limit = max(1, min(max_suggestions, 10)) + production_hotspots = self._hotspot_rows( + record=record, + kind="production_hotspots", + detail_level="summary", + changed_paths=(), + exclude_reviewed=False, + ) + production_suggestions = [ + dict(row) + for row in suggestion_rows + if str(row.get("source_kind", "")) == SOURCE_KIND_PRODUCTION + ] + return { + "run_id": record.run_id, + "base_uri": record.root.as_uri(), + "health": dict(self._as_mapping(summary.get("health"))), + "cache": dict(self._as_mapping(summary.get("cache"))), + "findings": { + "total": len(findings), + "by_source_kind": findings_breakdown, + "outside_focus": len(findings) + - findings_breakdown[SOURCE_KIND_PRODUCTION], + }, + "top_hotspots": { + "kind": "production_hotspots", + "available": len(production_hotspots), + "returned": min(len(production_hotspots), hotspot_limit), + "items": [ + dict(self._as_mapping(item)) + for item in production_hotspots[:hotspot_limit] + ], + }, + "suggestions": { + "total": len(suggestion_rows), + "by_source_kind": suggestion_breakdown, + "outside_focus": len(suggestion_rows) + - suggestion_breakdown[SOURCE_KIND_PRODUCTION], + }, + "top_suggestions": { + "available": len(production_suggestions), + "returned": min(len(production_suggestions), suggestion_limit), + "items": production_suggestions[:suggestion_limit], + }, + } + def generate_pr_summary( self, *, @@ -1467,6 +1441,14 @@ def read_resource(self, uri: str) -> str: indent=2, sort_keys=True, ) + if uri == "codeclone://latest/triage": + latest = self._runs.get() + return json.dumps( + self.get_production_triage(run_id=latest.run_id), + ensure_ascii=False, + indent=2, + sort_keys=True, + ) latest_prefix = "codeclone://latest/" run_prefix = "codeclone://runs/" if uri.startswith(latest_prefix): @@ -1490,6 +1472,10 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: indent=2, sort_keys=True, ) + if suffix == "triage": + raise MCPServiceContractError( + "Production triage is exposed only as codeclone://latest/triage." + ) if suffix == "health": return json.dumps( self._as_mapping(record.summary.get("health")), @@ -1747,7 +1733,6 @@ def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: findings = self._as_mapping(report_document.get("findings")) groups = self._as_mapping(findings.get("groups")) clone_groups = self._as_mapping(groups.get(FAMILY_CLONES)) - design_groups = self._design_groups_for_record(record, groups=groups) return [ *self._dict_list(clone_groups.get("functions")), *self._dict_list(clone_groups.get("blocks")), @@ -1758,152 +1743,9 @@ def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: *self._dict_list( self._as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") ), - *design_groups, + *self._dict_list(self._as_mapping(groups.get(FAMILY_DESIGN)).get("groups")), ] - def _design_groups_for_record( - self, - record: MCPRunRecord, - *, - groups: Mapping[str, object], - ) -> list[dict[str, object]]: - canonical_design_groups = self._dict_list( - self._as_mapping(groups.get(FAMILY_DESIGN)).get("groups") - ) - if ( - record.request.complexity_threshold is None - and record.request.coupling_threshold is None - and record.request.cohesion_threshold is None - ): - return canonical_design_groups - - metrics = self._as_mapping(record.report_document.get("metrics")) - families = self._as_mapping(metrics.get("families")) - complexity_threshold = ( - record.request.complexity_threshold - if record.request.complexity_threshold is not None - else DEFAULT_COMPLEXITY_THRESHOLD - ) - coupling_threshold = ( - record.request.coupling_threshold - if record.request.coupling_threshold is not None - else DEFAULT_COUPLING_THRESHOLD - ) - cohesion_threshold = ( - record.request.cohesion_threshold - if record.request.cohesion_threshold is not None - else DEFAULT_COHESION_THRESHOLD - ) - groups_out: list[dict[str, object]] = [] - for item in self._as_sequence( - self._as_mapping(families.get(CATEGORY_COMPLEXITY)).get("items") - ): - group = self._complexity_group_for_threshold( - self._as_mapping(item), - threshold=complexity_threshold, - scan_root=str(record.root), - ) - if group is not None: - groups_out.append(group) - for item in self._as_sequence( - self._as_mapping(families.get(CATEGORY_COUPLING)).get("items") - ): - group = self._coupling_group_for_threshold( - self._as_mapping(item), - threshold=coupling_threshold, - scan_root=str(record.root), - ) - if group is not None: - groups_out.append(group) - for item in self._as_sequence( - self._as_mapping(families.get(CATEGORY_COHESION)).get("items") - ): - group = self._cohesion_group_for_threshold( - self._as_mapping(item), - threshold=cohesion_threshold, - scan_root=str(record.root), - ) - if group is not None: - groups_out.append(group) - groups_out.extend( - group - for group in canonical_design_groups - if str(group.get("category", "")) == CATEGORY_DEPENDENCY - ) - groups_out.sort( - key=lambda group: ( - -_as_float(group.get("priority", 0.0), 0.0), - str(group.get("id", "")), - ) - ) - return groups_out - - def _design_singleton_group( - self, - *, - category: str, - kind: str, - severity: str, - qualname: str, - filepath: str, - start_line: int, - end_line: int, - item_data: Mapping[str, object], - facts: Mapping[str, object], - scan_root: str, - ) -> dict[str, object]: - return _design_singleton_group_payload( - category=category, - kind=kind, - severity=severity, - qualname=qualname, - filepath=filepath, - start_line=start_line, - end_line=end_line, - item_data=item_data, - facts=facts, - scan_root=scan_root, - ) - - def _complexity_group_for_threshold( - self, - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, - ) -> dict[str, object] | None: - return _complexity_group_for_threshold_payload( - item_map, - threshold=threshold, - scan_root=scan_root, - ) - - def _coupling_group_for_threshold( - self, - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, - ) -> dict[str, object] | None: - return _coupling_group_for_threshold_payload( - item_map, - threshold=threshold, - scan_root=scan_root, - ) - - def _cohesion_group_for_threshold( - self, - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, - ) -> dict[str, object] | None: - return _cohesion_group_for_threshold_payload( - item_map, - threshold=threshold, - scan_root=scan_root, - ) - def _query_findings( self, *, @@ -2488,7 +2330,9 @@ def _hotspot_rows( ) ] else: - hotlist_key = f"{kind}_ids" + hotlist_key = _HOTLIST_REPORT_KEYS.get(kind) + if hotlist_key is None: + return [] ordered_ids = [ str(item) for item in self._as_sequence(hotlists.get(hotlist_key)) @@ -2744,6 +2588,48 @@ def _granular_payload( "items": bounded_items, } + def _normalized_source_kind(self, value: object) -> str: + normalized = str(value).strip().lower() + if normalized in SOURCE_KIND_ORDER: + return normalized + return SOURCE_KIND_OTHER + + def _finding_source_kind(self, finding: Mapping[str, object]) -> str: + source_scope = self._as_mapping(finding.get("source_scope")) + return self._normalized_source_kind(source_scope.get("dominant_kind")) + + def _source_kind_breakdown( + self, + source_kinds: Iterable[object], + ) -> dict[str, int]: + breakdown = dict.fromkeys(_SOURCE_KIND_BREAKDOWN_ORDER, 0) + for value in source_kinds: + breakdown[self._normalized_source_kind(value)] += 1 + return breakdown + + def _triage_suggestion_rows(self, record: MCPRunRecord) -> list[dict[str, object]]: + derived = self._as_mapping(record.report_document.get("derived")) + canonical_rows = self._dict_list(derived.get("suggestions")) + suggestion_source_kinds = { + self._suggestion_finding_id(suggestion): self._normalized_source_kind( + getattr(suggestion, "source_kind", SOURCE_KIND_OTHER) + ) + for suggestion in record.suggestions + } + rows: list[dict[str, object]] = [] + for row in canonical_rows: + finding_id = str(row.get("finding_id", "")) + rows.append( + { + **row, + "source_kind": suggestion_source_kinds.get( + finding_id, + SOURCE_KIND_OTHER, + ), + } + ) + return rows + def _schema_resource_payload(self) -> dict[str, object]: return { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -2847,6 +2733,9 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp fail_dead_code=False, fail_health=-1, fail_on_new_metrics=False, + design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, update_metrics_baseline=False, metrics_baseline=DEFAULT_BASELINE_PATH, skip_metrics=False, @@ -2909,6 +2798,9 @@ def _apply_request_overrides( "segment_min_stmt": request.segment_min_stmt, "max_baseline_size_mb": request.max_baseline_size_mb, "max_cache_size_mb": request.max_cache_size_mb, + "design_complexity_threshold": request.complexity_threshold, + "design_coupling_threshold": request.coupling_threshold, + "design_cohesion_threshold": request.cohesion_threshold, } for key, value in override_map.items(): if value is not None: @@ -3051,7 +2943,7 @@ def _build_run_summary_payload( metrics = self._as_mapping(report_document.get("metrics")) metrics_summary = self._as_mapping(metrics.get("summary")) summary = self._as_mapping(findings.get("summary")) - return { + payload = { "run_id": run_id, "root": str(root_path), "analysis_mode": request.analysis_mode, @@ -3106,17 +2998,48 @@ def _build_run_summary_payload( "warnings": list(warnings), "failures": list(failures), } + payload["cache"] = self._summary_cache_payload(payload) + return payload def _summary_payload( self, summary: Mapping[str, object], ) -> dict[str, object]: payload = dict(summary) + cache = self._as_mapping(payload.get("cache")) + if cache: + payload["cache"] = self._summary_cache_payload(summary) inventory = self._as_mapping(payload.get("inventory")) if inventory: payload["inventory"] = self._slim_inventory(inventory) return payload + def _summary_cache_payload( + self, + summary: Mapping[str, object], + ) -> dict[str, object]: + cache = dict(self._as_mapping(summary.get("cache"))) + if not cache: + return {} + cache["effective_freshness"] = self._effective_freshness(summary) + return cache + + def _effective_freshness( + self, + summary: Mapping[str, object], + ) -> FreshnessKind: + inventory = self._as_mapping(summary.get("inventory")) + files = self._as_mapping(inventory.get("files")) + analyzed = max(0, _as_int(files.get("analyzed", 0), 0)) + cached = max(0, _as_int(files.get("cached", 0), 0)) + cache = self._as_mapping(summary.get("cache")) + cache_used = bool(cache.get("used")) + if cache_used and cached > 0 and analyzed == 0: + return "reused" + if cache_used and cached > 0 and analyzed > 0: + return "mixed" + return "fresh" + def _slim_inventory( self, inventory: Mapping[str, object], diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py index 3aac986..24c994d 100644 --- a/codeclone/report/derived.py +++ b/codeclone/report/derived.py @@ -7,10 +7,14 @@ from __future__ import annotations from collections import Counter -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from .. import _coerce from ..domain.source_scope import ( + IMPACT_SCOPE_MIXED, + IMPACT_SCOPE_NON_RUNTIME, + IMPACT_SCOPE_RUNTIME, + SOURCE_KIND_BREAKDOWN_KEYS, SOURCE_KIND_FIXTURES, SOURCE_KIND_MIXED, SOURCE_KIND_OTHER, @@ -38,6 +42,8 @@ "report_location_from_structural_occurrence", "representative_locations", "source_kind_breakdown", + "source_scope_from_counts", + "source_scope_from_locations", ] SOURCE_KIND_ORDER: dict[SourceKind, int] = { @@ -118,6 +124,64 @@ def combine_source_kinds( return SOURCE_KIND_MIXED +def normalized_source_kind(value: object) -> SourceKind: + source_kind_text = str(value).strip().lower() or SOURCE_KIND_OTHER + if source_kind_text == SOURCE_KIND_PRODUCTION: + return SOURCE_KIND_PRODUCTION + if source_kind_text == SOURCE_KIND_TESTS: + return SOURCE_KIND_TESTS + if source_kind_text == SOURCE_KIND_FIXTURES: + return SOURCE_KIND_FIXTURES + return SOURCE_KIND_OTHER + + +def source_scope_from_counts( + counts: Mapping[SourceKind, int] | Mapping[str, int], +) -> dict[str, object]: + normalized_counts = cast("Mapping[str, int]", counts) + + def _count(kind: str) -> int: + value = normalized_counts.get(kind, 0) + return int(value) + + breakdown = {kind: _count(kind) for kind in SOURCE_KIND_BREAKDOWN_KEYS} + present = tuple(kind for kind in SOURCE_KIND_BREAKDOWN_KEYS if breakdown[kind] > 0) + dominant_kind = ( + present[0] + if len(present) == 1 + else combine_source_kinds(present) + if present + else SOURCE_KIND_OTHER + ) + production_count = breakdown[SOURCE_KIND_PRODUCTION] + non_runtime_count = ( + breakdown[SOURCE_KIND_TESTS] + + breakdown[SOURCE_KIND_FIXTURES] + + breakdown[SOURCE_KIND_OTHER] + ) + match (production_count > 0, non_runtime_count == 0, production_count == 0): + case (True, True, _): + impact_scope = IMPACT_SCOPE_RUNTIME + case (_, _, True): + impact_scope = IMPACT_SCOPE_NON_RUNTIME + case _: + impact_scope = IMPACT_SCOPE_MIXED + return { + "dominant_kind": dominant_kind, + "breakdown": breakdown, + "impact_scope": impact_scope, + } + + +def source_scope_from_locations( + locations: Sequence[Mapping[str, object]], +) -> dict[str, object]: + counts: Counter[SourceKind] = Counter() + for location in locations: + counts[normalized_source_kind(location.get("source_kind"))] += 1 + return source_scope_from_counts(counts) + + def report_location_from_group_item( item: Mapping[str, object], *, diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py index 765999c..8e5bc75 100644 --- a/codeclone/report/json_contract.py +++ b/codeclone/report/json_contract.py @@ -13,7 +13,12 @@ from typing import TYPE_CHECKING, Literal from .. import _coerce -from ..contracts import REPORT_SCHEMA_VERSION +from ..contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + REPORT_SCHEMA_VERSION, +) from ..domain.findings import ( CATEGORY_COHESION, CATEGORY_COMPLEXITY, @@ -58,12 +63,18 @@ from ..structural_findings import normalize_structural_findings from ..suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE from .derived import ( - combine_source_kinds, group_spread, relative_report_path, report_location_from_group_item, report_location_from_structural_occurrence, ) +from .derived import ( + source_scope_from_counts as _report_source_scope_from_counts, +) +from .derived import ( + source_scope_from_locations as _report_source_scope_from_locations, +) +from .overview import build_directory_hotspots from .suggestions import classify_clone_type if TYPE_CHECKING: @@ -88,13 +99,6 @@ _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence -_SOURCE_BREAKDOWN_KEYS_TYPED: tuple[SourceKind, ...] = ( - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, - SOURCE_KIND_FIXTURES, - SOURCE_KIND_OTHER, -) - def _optional_str(value: object) -> str | None: if value is None: @@ -103,6 +107,45 @@ def _optional_str(value: object) -> str | None: return text or None +def _coerced_nonnegative_threshold(value: object, *, default: int) -> int: + threshold = _as_int(value, default) + return threshold if threshold >= 0 else default + + +def _design_findings_thresholds_payload( + raw_meta: Mapping[str, object] | None, +) -> dict[str, object]: + meta = dict(raw_meta or {}) + return { + "design_findings": { + CATEGORY_COMPLEXITY: { + "metric": "cyclomatic_complexity", + "operator": ">", + "value": _coerced_nonnegative_threshold( + meta.get("design_complexity_threshold"), + default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + }, + CATEGORY_COUPLING: { + "metric": "cbo", + "operator": ">", + "value": _coerced_nonnegative_threshold( + meta.get("design_coupling_threshold"), + default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + }, + CATEGORY_COHESION: { + "metric": "lcom4", + "operator": ">=", + "value": _coerced_nonnegative_threshold( + meta.get("design_cohesion_threshold"), + default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), + }, + } + } + + def _normalize_path(value: str) -> str: return value.replace("\\", "/").strip() @@ -272,44 +315,17 @@ def _normalized_source_kind(value: object) -> SourceKind: def _source_scope_from_counts( counts: Mapping[SourceKind, int], ) -> dict[str, object]: - breakdown = {kind: counts[kind] for kind in _SOURCE_BREAKDOWN_KEYS_TYPED} - present = tuple( - kind for kind in _SOURCE_BREAKDOWN_KEYS_TYPED if breakdown[kind] > 0 - ) - dominant_kind = ( - present[0] - if len(present) == 1 - else combine_source_kinds(present) - if present - else SOURCE_KIND_OTHER - ) - production_count = breakdown[SOURCE_KIND_PRODUCTION] - non_runtime_count = ( - breakdown[SOURCE_KIND_TESTS] - + breakdown[SOURCE_KIND_FIXTURES] - + breakdown[SOURCE_KIND_OTHER] - ) - match (production_count > 0, non_runtime_count == 0, production_count == 0): - case (True, True, _): - impact_scope = IMPACT_SCOPE_RUNTIME - case (_, _, True): - impact_scope = IMPACT_SCOPE_NON_RUNTIME - case _: - impact_scope = IMPACT_SCOPE_MIXED - return { - "dominant_kind": dominant_kind, - "breakdown": breakdown, - "impact_scope": impact_scope, - } + return _report_source_scope_from_counts(counts) def _source_scope_from_locations( locations: Sequence[Mapping[str, object]], ) -> dict[str, object]: - counts: Counter[SourceKind] = Counter() - for location in locations: - counts[_normalized_source_kind(location.get("source_kind"))] += 1 - return _source_scope_from_counts(counts) + normalized_locations = [ + {"source_kind": _normalized_source_kind(location.get("source_kind"))} + for location in locations + ] + return _report_source_scope_from_locations(normalized_locations) def _collect_paths_from_metrics(metrics: Mapping[str, object]) -> set[str]: @@ -852,6 +868,7 @@ def _build_meta_payload( "analysis_mode": str(meta.get("analysis_mode", "full") or "full"), "report_mode": str(meta.get("report_mode", "full") or "full"), "computed_metric_families": metrics_computed, + "analysis_thresholds": _design_findings_thresholds_payload(meta), "baseline": { "path": baseline_path, "path_scope": baseline_path_scope, @@ -1424,10 +1441,11 @@ def _design_singleton_group( def _complexity_design_group( item_map: Mapping[str, object], *, + threshold: int, scan_root: str, ) -> dict[str, object] | None: cc = _as_int(item_map.get("cyclomatic_complexity"), 1) - if cc <= 20: + if cc <= threshold: return None qualname = str(item_map.get("qualname", "")) filepath = str(item_map.get("relative_path", "")) @@ -1457,10 +1475,11 @@ def _complexity_design_group( def _coupling_design_group( item_map: Mapping[str, object], *, + threshold: int, scan_root: str, ) -> dict[str, object] | None: cbo = _as_int(item_map.get("cbo")) - if cbo <= 10: + if cbo <= threshold: return None qualname = str(item_map.get("qualname", "")) filepath = str(item_map.get("relative_path", "")) @@ -1489,10 +1508,11 @@ def _coupling_design_group( def _cohesion_design_group( item_map: Mapping[str, object], *, + threshold: int, scan_root: str, ) -> dict[str, object] | None: lcom4 = _as_int(item_map.get("lcom4")) - if lcom4 <= 3: + if lcom4 < threshold: return None qualname = str(item_map.get("qualname", "")) filepath = str(item_map.get("relative_path", "")) @@ -1568,26 +1588,52 @@ def _dependency_design_group( def _build_design_groups( metrics_payload: Mapping[str, object], *, + design_thresholds: Mapping[str, object] | None = None, scan_root: str, ) -> list[dict[str, object]]: families = _as_mapping(metrics_payload.get("families")) + thresholds = _as_mapping(design_thresholds) + complexity_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COMPLEXITY)).get("value"), + default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ) + coupling_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COUPLING)).get("value"), + default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ) + cohesion_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COHESION)).get("value"), + default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ) groups: list[dict[str, object]] = [] complexity = _as_mapping(families.get(CATEGORY_COMPLEXITY)) for item in _as_sequence(complexity.get("items")): - group = _complexity_design_group(_as_mapping(item), scan_root=scan_root) + group = _complexity_design_group( + _as_mapping(item), + threshold=complexity_threshold, + scan_root=scan_root, + ) if group is not None: groups.append(group) coupling = _as_mapping(families.get(CATEGORY_COUPLING)) for item in _as_sequence(coupling.get("items")): - group = _coupling_design_group(_as_mapping(item), scan_root=scan_root) + group = _coupling_design_group( + _as_mapping(item), + threshold=coupling_threshold, + scan_root=scan_root, + ) if group is not None: groups.append(group) cohesion = _as_mapping(families.get(CATEGORY_COHESION)) for item in _as_sequence(cohesion.get("items")): - group = _cohesion_design_group(_as_mapping(item), scan_root=scan_root) + group = _cohesion_design_group( + _as_mapping(item), + threshold=cohesion_threshold, + scan_root=scan_root, + ) if group is not None: groups.append(group) @@ -1913,6 +1959,7 @@ def _build_derived_overview( if dominant_kind_counts[key] > 0 }, "health_snapshot": _health_snapshot(metrics_payload), + "directory_hotspots": build_directory_hotspots(findings=findings), } hotlists: dict[str, object] = { "most_actionable_ids": _sort_flat_finding_ids( @@ -2049,6 +2096,7 @@ def _build_findings_payload( new_function_group_keys: Collection[str] | None, new_block_group_keys: Collection[str] | None, new_segment_group_keys: Collection[str] | None, + design_thresholds: Mapping[str, object] | None, scan_root: str, ) -> dict[str, object]: clone_functions = _build_clone_groups( @@ -2095,6 +2143,7 @@ def _build_findings_payload( ) design_groups = _build_design_groups( metrics_payload, + design_thresholds=design_thresholds, scan_root=scan_root, ) return { @@ -2221,6 +2270,9 @@ def build_report_document( report_schema_version = REPORT_SCHEMA_VERSION scan_root = str(_as_mapping(meta).get("scan_root", "")) meta_payload = _build_meta_payload(meta, scan_root=scan_root) + design_thresholds = _as_mapping( + _as_mapping(meta_payload.get("analysis_thresholds")).get("design_findings") + ) metrics_payload = _build_metrics_payload(metrics, scan_root=scan_root) file_list = _collect_report_file_list( inventory=inventory, @@ -2247,6 +2299,7 @@ def build_report_document( new_function_group_keys=new_function_group_keys, new_block_group_keys=new_block_group_keys, new_segment_group_keys=new_segment_group_keys, + design_thresholds=design_thresholds, scan_root=scan_root, ) overview_payload, hotlists_payload = _build_derived_overview( diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py index 73a535c..bbd5945 100644 --- a/codeclone/report/overview.py +++ b/codeclone/report/overview.py @@ -8,7 +8,8 @@ from collections import Counter from collections.abc import Mapping, Sequence -from typing import TYPE_CHECKING +from pathlib import PurePosixPath +from typing import TYPE_CHECKING, cast from .. import _coerce from ..domain.findings import ( @@ -20,6 +21,7 @@ CLONE_KIND_BLOCK, CLONE_KIND_FUNCTION, CLONE_KIND_SEGMENT, + FAMILY_CLONE, FAMILY_CLONES, FAMILY_DEAD_CODE, FAMILY_DESIGN, @@ -39,12 +41,17 @@ BLOCK_HINT_ASSERT_ONLY, BLOCK_PATTERN_REPEATED_STMT_HASH, ) -from .derived import format_spread_location_label +from .derived import ( + classify_source_kind, + format_spread_location_label, + source_scope_from_locations, +) if TYPE_CHECKING: from ..models import Suggestion __all__ = [ + "build_directory_hotspots", "build_report_overview", "materialize_report_overview", "serialize_suggestion_card", @@ -97,6 +104,219 @@ def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, objec ] +_DIRECTORY_HOTSPOT_BUCKETS: tuple[str, ...] = ( + "all", + "clones", + "structural", + CATEGORY_COMPLEXITY, + CATEGORY_COHESION, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, +) +_DIRECTORY_KIND_BREAKDOWN_KEYS: tuple[str, ...] = ( + "clones", + "structural", + CATEGORY_DEAD_CODE, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEPENDENCY, +) + + +def _directory_bucket_keys(group: Mapping[str, object]) -> tuple[str, ...]: + family = str(group.get("family", "")).strip() + category = str(group.get("category", "")).strip() + if family == FAMILY_CLONE: + return ("all", "clones") + if family == FAMILY_STRUCTURAL: + return ("all", "structural") + if family == FAMILY_DEAD_CODE: + return ("all", CATEGORY_DEAD_CODE) + if family == FAMILY_DESIGN and category in { + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEPENDENCY, + }: + return ("all", category) + return ("all",) + + +def _directory_kind_breakdown_key(group: Mapping[str, object]) -> str | None: + family = str(group.get("family", "")).strip() + category = str(group.get("category", "")).strip() + if family == FAMILY_CLONE: + return "clones" + if family == FAMILY_STRUCTURAL: + return "structural" + if family == FAMILY_DEAD_CODE: + return CATEGORY_DEAD_CODE + if family == FAMILY_DESIGN and category in { + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEPENDENCY, + }: + return category + return None + + +def _directory_relative_path(item: Mapping[str, object]) -> str | None: + relative_path = str(item.get("relative_path", "")).replace("\\", "/").strip() + if not relative_path: + module = str(item.get("module", "")).strip() + if module: + relative_path = module.replace(".", "/") + ".py" + return relative_path or None + + +def _directory_path_label(relative_path: str) -> str: + parent = PurePosixPath(relative_path).parent.as_posix() + return parent if parent not in {"", "/"} else "." + + +def _directory_contributions( + group: Mapping[str, object], +) -> dict[str, dict[str, object]]: + contributions: dict[str, dict[str, object]] = {} + for item in map(_as_mapping, _as_sequence(group.get("items"))): + relative_path = _directory_relative_path(item) + if relative_path is None: + continue + directory = _directory_path_label(relative_path) + source_kind = str(item.get("source_kind", "")).strip() or classify_source_kind( + relative_path + ) + entry = contributions.setdefault( + directory, + { + "affected_items": 0, + "files": set(), + "locations": [], + }, + ) + entry["affected_items"] = _as_int(entry.get("affected_items")) + 1 + cast(set[str], entry["files"]).add(relative_path) + cast(list[dict[str, object]], entry["locations"]).append( + {"source_kind": source_kind} + ) + return contributions + + +def _directory_group_data( + group: Mapping[str, object], +) -> tuple[str, dict[str, dict[str, object]]] | None: + group_id = str(group.get("id", "")).strip() + if not group_id: + return None + contributions = _directory_contributions(group) + if not contributions: + return None + return group_id, contributions + + +def build_directory_hotspots( + *, + findings: Mapping[str, object], + limit: int = 5, +) -> dict[str, object]: + normalized_limit = max(1, _as_int(limit, 5)) + bucket_rows: dict[str, dict[str, dict[str, object]]] = { + bucket: {} for bucket in _DIRECTORY_HOTSPOT_BUCKETS + } + bucket_totals: Counter[str] = Counter() + + for group in _flatten_findings(findings): + group_data = _directory_group_data(group) + if group_data is None: + continue + group_id, contributions = group_data + bucket_keys = _directory_bucket_keys(group) + kind_key = _directory_kind_breakdown_key(group) + for bucket in bucket_keys: + rows = bucket_rows[bucket] + for directory, contribution in contributions.items(): + row = rows.setdefault( + directory, + { + "path": directory, + "finding_ids": set(), + "affected_items": 0, + "files": set(), + "locations": [], + "kind_breakdown_ids": { + key: set() for key in _DIRECTORY_KIND_BREAKDOWN_KEYS + }, + }, + ) + cast(set[str], row["finding_ids"]).add(group_id) + row["affected_items"] = _as_int(row.get("affected_items")) + _as_int( + contribution.get("affected_items") + ) + cast(set[str], row["files"]).update( + cast(set[str], contribution["files"]) + ) + cast(list[dict[str, object]], row["locations"]).extend( + cast(list[dict[str, object]], contribution["locations"]) + ) + if bucket == "all" and kind_key is not None: + kind_rows = cast( + dict[str, set[str]], + row["kind_breakdown_ids"], + ) + kind_rows[kind_key].add(group_id) + bucket_totals[bucket] += _as_int(contribution.get("affected_items")) + + def _row_sort_key(row: Mapping[str, object]) -> tuple[int, int, int, str]: + return ( + -len(cast(set[str], row["finding_ids"])), + -_as_int(row.get("affected_items")), + -len(cast(set[str], row["files"])), + str(row.get("path", "")), + ) + + hotspots: dict[str, object] = {} + for bucket in _DIRECTORY_HOTSPOT_BUCKETS: + bucket_items = sorted(bucket_rows[bucket].values(), key=_row_sort_key) + total_directories = len(bucket_items) + total_affected_items = bucket_totals[bucket] + items: list[dict[str, object]] = [] + for row in bucket_items[:normalized_limit]: + finding_groups = len(cast(set[str], row["finding_ids"])) + affected_items = _as_int(row.get("affected_items")) + files = len(cast(set[str], row["files"])) + item = { + "path": str(row.get("path", ".")), + "finding_groups": finding_groups, + "affected_items": affected_items, + "files": files, + "share_pct": round( + (affected_items / total_affected_items) * 100.0, + 1, + ) + if total_affected_items > 0 + else 0.0, + "source_scope": source_scope_from_locations( + cast(list[dict[str, object]], row["locations"]) + ), + } + if bucket == "all": + item["kind_breakdown"] = { + key: len(cast(dict[str, set[str]], row["kind_breakdown_ids"])[key]) + for key in _DIRECTORY_KIND_BREAKDOWN_KEYS + } + items.append(item) + hotspots[bucket] = { + "total_directories": total_directories, + "returned": len(items), + "has_more": total_directories > len(items), + "items": items, + } + return hotspots + + def _clone_fact_kind(kind: str) -> str: return { CLONE_KIND_FUNCTION: "Function clone group", diff --git a/docs/README.md b/docs/README.md index ec3d332..b08f1d1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -39,7 +39,7 @@ repository build: - [Core pipeline and invariants](book/05-core-pipeline.md) - [Baseline contract (schema v2.0)](book/06-baseline.md) - [Cache contract (schema v2.3)](book/07-cache.md) -- [Report contract (schema v2.1)](book/08-report.md) +- [Report contract (schema v2.2)](book/08-report.md) ## Interfaces diff --git a/docs/architecture.md b/docs/architecture.md index f416ddc..09b5e4d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -144,7 +144,7 @@ gating decisions. Detected findings can be rendered as: - interactive HTML (`--html`), -- canonical JSON (`--json`, schema `2.1`), +- canonical JSON (`--json`, schema `2.2`), - deterministic text projection (`--text`), - deterministic Markdown projection (`--md`), - deterministic SARIF projection (`--sarif`). @@ -158,6 +158,7 @@ Reporting uses a layered model: Provenance is carried through `meta` and includes: - runtime/context (`codeclone_version`, `python_version`, `python_tag`, `analysis_mode`, `report_mode`) +- analysis thresholds (`meta.analysis_thresholds.design_findings`) - baseline status block (`meta.baseline.*`) - cache status block (`meta.cache.*`) - metrics-baseline status block (`meta.metrics_baseline.*`) diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index f88861b..caf9532 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -46,6 +46,9 @@ Refs: - MCP may ship task-specific slim projections (for example, summary-only metrics or inventory counts) as long as canonical report data remains the source of truth and richer detail stays reachable through dedicated tools/sections. +- The same rule applies to summary cache convenience fields such as + `effective_freshness` and to production-first triage projections built from + canonical hotlists/suggestions. - MCP finding lists may also move repeated absolute location context to envelope-level metadata such as `base_uri`, while keeping `get_finding` as the richer per-finding inspection path. diff --git a/docs/book/02-terminology.md b/docs/book/02-terminology.md index 73a29e1..4a6ffbc 100644 --- a/docs/book/02-terminology.md +++ b/docs/book/02-terminology.md @@ -87,7 +87,7 @@ Refs: ## Locked by tests - `tests/test_baseline.py::test_baseline_id_lists_must_be_sorted_and_unique` -- `tests/test_report.py::test_report_json_group_order_is_lexicographic` +- `tests/test_report.py::test_report_json_group_order_is_deterministic_by_count_then_id` - `tests/test_cache.py::test_cache_version_mismatch_warns` ## Non-guarantees diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 8ffe3d1..aeed42c 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -2,7 +2,7 @@ ## Purpose -Define report contracts in `2.0.0b3`: canonical JSON (`report_schema_version=2.1`) +Define report contracts in `2.0.0b3`: canonical JSON (`report_schema_version=2.2`) plus deterministic TXT/Markdown/SARIF projections. ## Public surface @@ -16,7 +16,7 @@ plus deterministic TXT/Markdown/SARIF projections. ## Data model -JSON report top-level (v2.1): +JSON report top-level (v2.2): - `report_schema_version` - `meta` @@ -26,6 +26,12 @@ JSON report top-level (v2.1): - `derived` - `integrity` +Canonical provenance additions: + +- `meta.analysis_thresholds.design_findings` records the effective report-level + thresholds used to materialize canonical design findings for that run + (`complexity > N`, `coupling > N`, `cohesion >= N`). + Canonical vs non-canonical split: - Canonical: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics` @@ -41,6 +47,7 @@ Derived projection layer: - `top_risks` - `source_scope_breakdown` - `health_snapshot` + - `directory_hotspots` - `derived.hotlists` — deterministic lists of canonical finding IDs: - `most_actionable_ids` - `highest_spread_ids` @@ -81,9 +88,15 @@ Per-group common axes (family-specific fields may extend): - clone results carry `baselineState` when clone novelty is known - Derived layer (`suggestions`, `overview`, `hotlists`) does not replace canonical findings/metrics. +- Design findings are built once in the canonical report using the effective + threshold policy recorded in `meta.analysis_thresholds.design_findings`; MCP + and HTML must not re-synthesize them post-hoc from raw metric rows. - HTML overview cards are materialized from canonical findings plus `derived.overview` + `derived.hotlists`; pre-expanded overview card payloads are not part of the report contract. +- `derived.overview.directory_hotspots` is a deterministic report-layer + aggregation over canonical findings; HTML must render it as-is or omit it on + compatibility paths without a canonical report document. - Overview hotspot/source-breakdown sections must resolve from canonical report data or deterministic derived IDs; HTML must not silently substitute stale placeholders such as `n/a` or empty-state cards when canonical data exists. diff --git a/docs/book/10-html-render.md b/docs/book/10-html-render.md index cc298cf..33151b9 100644 --- a/docs/book/10-html-render.md +++ b/docs/book/10-html-render.md @@ -44,6 +44,7 @@ Refs: - KPI cards with baseline-aware tone (`✓ baselined` / `+N` regression) - Health gauge with baseline delta arc (improvement/degradation) - Executive Summary: issue breakdown (sorted bars) + source breakdown + - Hotspots by Directory: render-only view over `derived.overview.directory_hotspots` - Health Profile: full-width radar chart of dimension scores - Get Badge modal: grade-only / score+grade variants with shields.io embed - Dead-code UI is a single top-level `Dead Code` tab with deterministic split @@ -128,3 +129,6 @@ Refs: protocol handler registration (e.g. JetBrains Toolbox for `jetbrains://`). - Overview layout (KPI grid, executive summary, analytics) is a pure view concern; only the underlying data identity and ordering are contract-sensitive. +- Direct `build_html_report(...)` compatibility paths without a canonical + `report_document` may omit `directory_hotspots`; HTML must not approximate + directory aggregates from suggestion cards. diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md index fbdff31..fe83446 100644 --- a/docs/book/13-testing-as-spec.md +++ b/docs/book/13-testing-as-spec.md @@ -34,7 +34,7 @@ The following matrix is treated as executable contract: | Baseline schema/integrity/compat gates | `tests/test_baseline.py` | | Cache v2.3 fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` | | Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | -| Report schema v2.1 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | +| Report schema v2.2 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | | HTML render-only explainability + escaping | `tests/test_html_report.py` | | Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index d7c5962..13e8b9e 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -21,7 +21,7 @@ Current contract versions: - `BASELINE_SCHEMA_VERSION = "2.0"` - `BASELINE_FINGERPRINT_VERSION = "1"` - `CACHE_VERSION = "2.3"` -- `REPORT_SCHEMA_VERSION = "2.1"` +- `REPORT_SCHEMA_VERSION = "2.2"` - `METRICS_BASELINE_SCHEMA_VERSION = "1.0"` (standalone metrics-baseline file) Refs: @@ -49,6 +49,14 @@ Version bump rules: - The same rule applies to finding-level MCP projection changes such as envelope-level `base_uri`, slim summary locations, or omitting `priority_factors` outside `detail_level="full"`. +- Additive MCP-only convenience fields/projections such as + `cache.effective_freshness` or production-first triage also do not change + `report_schema_version` when they are derived from unchanged canonical report + and summary data. +- Canonical report changes such as `meta.analysis_thresholds.design_findings` + or threshold-aware design finding materialization do change + `report_schema_version` because they alter canonical report semantics and + integrity payload. Baseline compatibility rules: @@ -96,8 +104,8 @@ Refs: ## Locked by tests -- `tests/test_baseline.py::test_baseline_verify_schema_too_new` -- `tests/test_baseline.py::test_baseline_verify_schema_major_mismatch` +- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities[schema_too_new]` +- `tests/test_baseline.py::test_baseline_verify_schema_incompatibilities[schema_major_mismatch]` - `tests/test_baseline.py::test_baseline_verify_fingerprint_mismatch` - `tests/test_cache.py::test_cache_v_field_version_mismatch_warns` - `tests/test_report.py::test_report_json_compact_v21_contract` diff --git a/docs/book/17-suggestions-and-clone-typing.md b/docs/book/17-suggestions-and-clone-typing.md index 5befb4f..1ee0a95 100644 --- a/docs/book/17-suggestions-and-clone-typing.md +++ b/docs/book/17-suggestions-and-clone-typing.md @@ -93,7 +93,7 @@ Refs: - `tests/test_report_suggestions.py::test_classify_clone_type_all_modes` - `tests/test_report_suggestions.py::test_generate_suggestions_covers_clone_metrics_and_dependency_categories` - `tests/test_report_suggestions.py::test_generate_suggestions_covers_skip_branches_for_optional_rules` -- `tests/test_html_report.py::test_html_report_suggestions_headers_include_help_tips` +- `tests/test_html_report.py::test_html_report_suggestions_cards_split_facts_assessment_and_action` ## Non-guarantees diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index cc4ca23..cc5bd9a 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -2,7 +2,7 @@ ## Purpose -Define the current public MCP surface in `2.0.0b3`. +Define the current public MCP surface in the `2.0` beta line. This interface is **optional** and is installed via the `mcp` extra. It does not replace the CLI or the canonical JSON report contract. Instead, it exposes @@ -28,6 +28,7 @@ Current server characteristics: - in-memory only - bounded history (`--history-limit`, default `4`, maximum `10`) - latest-run pointer for `codeclone://latest/...` resources + - the `latest` pointer moves whenever a newer `analyze_*` call registers a run - run identity: - `run_id` is derived from the canonical report integrity digest - analysis modes: @@ -43,6 +44,8 @@ Current server characteristics: - summary payload: - `run_id`, `root`, `analysis_mode` - `baseline`, `metrics_baseline`, `cache` + - `cache.effective_freshness` classifies summary cache reuse as + `fresh`, `mixed`, or `reused` - `inventory`, `findings_summary`, `health` - `get_run_summary` and summary resources expose slim inventory `file_registry` as `{ encoding, count }` @@ -75,6 +78,7 @@ Current tool set: | `analyze_repository` | `root`, `analysis_mode`, `changed_paths`, `git_diff_ref`, inline thresholds, cache/baseline paths | Run deterministic CodeClone analysis and register the result as the latest MCP run | | `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, inline thresholds | Diff-aware fast path: analyze a repo and attach a changed-files projection to the run; summary inventory is slimmed to `{count}` | | `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run, with slim inventory counts instead of the full file registry | +| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Return a compact production-first MCP projection: health, cache `effective_freshness`, production hotspots, production suggestions, and global source-kind counters | | `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and health delta | | `evaluate_gates` | `run_id`, gate thresholds/booleans | Evaluate CI/gating conditions against an existing run without exiting the process | | `get_report_section` | `run_id`, `section` | Return a canonical report section. `metrics` is summary-only; `metrics_detail` exposes the full metrics payload; other sections stay canonical | @@ -103,22 +107,31 @@ sessionful and may populate or reuse in-memory run state. The granular ## Resources -Current resources: +Current fixed resources: -| Resource | Payload | Availability | -|---------------------------------------------------|-------------------------------------------------------|-------------------------------------------------------| -| `codeclone://latest/summary` | latest run summary projection | always after at least one run | -| `codeclone://latest/report.json` | latest canonical report document | always after at least one run | -| `codeclone://latest/health` | latest health score + dimensions | always after at least one run | -| `codeclone://latest/gates` | latest gate evaluation result | only after `evaluate_gates` in current server process | -| `codeclone://latest/changed` | latest changed-files projection | only for a diff-aware latest run | -| `codeclone://schema` | schema-style descriptor for canonical report sections | always available | -| `codeclone://runs/{run_id}/summary` | run-specific summary projection | for any stored run | -| `codeclone://runs/{run_id}/report.json` | run-specific canonical report | for any stored run | -| `codeclone://runs/{run_id}/findings/{finding_id}` | run-specific canonical finding group | for an existing finding in a stored run | +| Resource | Payload | Availability | +|----------------------------------|-------------------------------------------------------|-------------------------------------------------------| +| `codeclone://latest/summary` | latest run summary projection | always after at least one run | +| `codeclone://latest/triage` | latest production-first triage projection | always after at least one run | +| `codeclone://latest/report.json` | latest canonical report document | always after at least one run | +| `codeclone://latest/health` | latest health score + dimensions | always after at least one run | +| `codeclone://latest/gates` | latest gate evaluation result | only after `evaluate_gates` in current server process | +| `codeclone://latest/changed` | latest changed-files projection | only for a diff-aware latest run | +| `codeclone://schema` | schema-style descriptor for canonical report sections | always available | -Resources are convenience views over already registered runs. They do not -trigger fresh analysis by themselves. +Current run-scoped URI templates: + +| URI template | Payload | Availability | +|---------------------------------------------------|--------------------------------------|-----------------------------------------| +| `codeclone://runs/{run_id}/summary` | run-specific summary projection | for any stored run | +| `codeclone://runs/{run_id}/report.json` | run-specific canonical report | for any stored run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | run-specific canonical finding group | for an existing finding in a stored run | + +Fixed resources and URI templates are convenience views over already +registered runs. They do not trigger fresh analysis by themselves. +If a client needs the freshest truth, it must start a fresh analysis run first +(typically with `cache_policy="off"`), rather than relying on older session +state behind `codeclone://latest/...`. ## Contracts @@ -138,9 +151,17 @@ trigger fresh analysis by themselves. - baseline trust semantics - cache semantics - canonical report contract +- Inline MCP design-threshold parameters (`complexity_threshold`, + `coupling_threshold`, `cohesion_threshold`) define the canonical design + finding universe of that run and are recorded in + `meta.analysis_thresholds.design_findings`. - `get_run_summary` is a deterministic convenience projection derived from the canonical report (`meta`, `inventory`, `findings.summary`, `metrics.summary.health`) plus baseline-diff/gate/changed-files context. +- `get_production_triage` is also a deterministic MCP projection over the same + canonical run state (`summary`, `derived.hotlists`, `derived.suggestions`, + and canonical finding source scope). It must not create a second analysis or + remediation truth path. - Canonical JSON remains the source of truth for report semantics. - `list_findings` and `list_hotspots` are deterministic projections over the canonical report, not a separate analysis branch. @@ -164,6 +185,11 @@ trigger fresh analysis by themselves. the canonical report digest is unchanged; changed-files state is an overlay, not a second canonical report. - `get_run_summary` with no `run_id` resolves to the latest stored run. +- `codeclone://latest/...` resources always resolve to the latest stored run in + the current MCP server process, not to a globally fresh analysis state. +- Summary-style MCP payloads expose `cache.effective_freshness` as a derived + convenience marker; canonical cache fields (`status`, `used`, `schema_version`) + remain unchanged. - `get_report_section(section="all")` returns the full canonical report document. - `get_report_section(section="metrics")` returns only `metrics.summary`. - `get_report_section(section="metrics_detail")` returns the full canonical @@ -180,6 +206,8 @@ trigger fresh analysis by themselves. `priority_factors` and location `uri` are still available there. - `compare_runs` is only semantically meaningful when both runs use comparable repository scope/root and analysis settings. +- `codeclone://latest/triage` is a latest-only resource; run-specific triage is + available via the tool, not via a `codeclone://runs/{run_id}/...` resource URI. ## Failure modes @@ -197,6 +225,8 @@ trigger fresh analysis by themselves. - Finding order is inherited from canonical report ordering. - Hotlists are derived from canonical report data and deterministic derived ids. - No MCP-only heuristics may change analysis or gating semantics. +- MCP must not re-synthesize design findings from raw metrics after the run; + threshold-aware design findings belong to the canonical report document. ## Locked by tests diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 5213b39..bf2734d 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -77,17 +77,24 @@ Notes: - `u` row decoder accepts both legacy 11-column rows and canonical 17-column rows (legacy rows map new structural fields to neutral defaults). -## Report schema (`2.1`) +## Report schema (`2.2`) ```json { - "report_schema_version": "2.1", + "report_schema_version": "2.2", "meta": { "codeclone_version": "2.0.0b3", "project_name": "codeclone", "scan_root": ".", "analysis_mode": "full", "report_mode": "full", + "analysis_thresholds": { + "design_findings": { + "complexity": { "metric": "cyclomatic_complexity", "operator": ">", "value": 20 }, + "coupling": { "metric": "cbo", "operator": ">", "value": 10 }, + "cohesion": { "metric": "lcom4", "operator": ">=", "value": 4 } + } + }, "baseline": { "...": "..." }, @@ -199,6 +206,9 @@ Notes: "health_snapshot": { "score": 100, "grade": "A" + }, + "directory_hotspots": { + "...": "..." } }, "hotlists": { @@ -234,7 +244,7 @@ Notes: ```text # CodeClone Report - Markdown schema: 1.0 -- Source report schema: 2.1 +- Source report schema: 2.2 ... ## Overview ## Inventory @@ -320,7 +330,7 @@ Notes: ], "properties": { "profileVersion": "1.0", - "reportSchemaVersion": "2.1" + "reportSchemaVersion": "2.2" }, "results": [ { diff --git a/docs/mcp.md b/docs/mcp.md index f2d0cf0..2e4f88b 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -37,27 +37,28 @@ core CodeClone runtime. ## Tool surface -| Tool | Purpose | -|--------------------------|----------------------------------------------------------------------| -| `analyze_repository` | Full analysis → register as latest run | -| `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref`; summary inventory is slimmed to counts | -| `get_run_summary` | Compact health/findings/baseline snapshot with slim inventory counts | -| `compare_runs` | Regressions, improvements, health delta between two runs | -| `list_findings` | Filtered, paginated finding groups with envelope-level `base_uri` | -| `get_finding` | Deep inspection of one finding by id | -| `get_remediation` | Structured remediation payload for one finding | -| `list_hotspots` | Derived views: highest priority, production hotspots, spread, etc., with compact summary cards | +| Tool | Purpose | +|--------------------------|------------------------------------------------------------------------------------------------------| +| `analyze_repository` | Full analysis → register as latest run | +| `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref`; summary inventory is slimmed to counts | +| `get_run_summary` | Compact health/findings/baseline snapshot with slim inventory counts | +| `get_production_triage` | Compact production-first view: health, cache freshness, production hotspots, production suggestions | +| `compare_runs` | Regressions, improvements, health delta between two runs | +| `list_findings` | Filtered, paginated finding groups with envelope-level `base_uri` | +| `get_finding` | Deep inspection of one finding by id | +| `get_remediation` | Structured remediation payload for one finding | +| `list_hotspots` | Derived views: highest priority, production hotspots, spread, etc., with compact summary cards | | `get_report_section` | Read canonical report sections; `metrics` is summary-only, `metrics_detail` is the full metrics dump | -| `evaluate_gates` | Preview CI/gating decisions without exiting | -| `check_clones` | Clone findings from a stored run | -| `check_complexity` | Complexity hotspots from a stored run | -| `check_coupling` | Coupling hotspots from a stored run | -| `check_cohesion` | Cohesion hotspots from a stored run | -| `check_dead_code` | Dead-code findings from a stored run | -| `generate_pr_summary` | PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | Session-local review marker (in-memory only) | -| `list_reviewed_findings` | List reviewed findings for a run | -| `clear_session_runs` | Reset all in-memory runs and session caches | +| `evaluate_gates` | Preview CI/gating decisions without exiting | +| `check_clones` | Clone findings from a stored run | +| `check_complexity` | Complexity hotspots from a stored run | +| `check_coupling` | Coupling hotspots from a stored run | +| `check_cohesion` | Cohesion hotspots from a stored run | +| `check_dead_code` | Dead-code findings from a stored run | +| `generate_pr_summary` | PR-friendly markdown or JSON summary | +| `mark_finding_reviewed` | Session-local review marker (in-memory only) | +| `list_reviewed_findings` | List reviewed findings for a run | +| `clear_session_runs` | Reset all in-memory runs and session caches | > `check_*` tools query stored runs only. Call `analyze_repository` or > `analyze_changed_paths` first. @@ -66,30 +67,48 @@ core CodeClone runtime. `health.dimensions` down to the one dimension relevant to that tool. List-style finding responses also expose `base_uri` once per envelope and keep summary locations as `file` + `line`; richer `symbol` / `uri` data stays in -`normal` / `full` responses and `get_finding`. +`normal` / `full` responses and `get_finding`. Summary-style MCP cache payloads +also expose `effective_freshness` (`fresh`, `mixed`, `reused`). +Inline design-threshold parameters on `analyze_repository` / +`analyze_changed_paths` become part of the canonical run: they are recorded in +`meta.analysis_thresholds.design_findings` and define that run's canonical +design findings. ## Resource surface -| Resource | Content | -|---------------------------------------------------|--------------------------------------------| -| `codeclone://latest/summary` | Latest run summary | -| `codeclone://latest/report.json` | Full canonical report | -| `codeclone://latest/health` | Health score and dimensions | -| `codeclone://latest/gates` | Last gate evaluation result | -| `codeclone://latest/changed` | Changed-files projection (diff-aware runs) | -| `codeclone://schema` | Canonical report shape descriptor | -| `codeclone://runs/{run_id}/summary` | Summary for a specific run | -| `codeclone://runs/{run_id}/report.json` | Report for a specific run | -| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | +Fixed resources: -Resources are read-only views over stored runs — they do not trigger analysis. +| Resource | Content | +|----------------------------------|--------------------------------------------| +| `codeclone://latest/summary` | Latest run summary | +| `codeclone://latest/triage` | Latest production-first triage | +| `codeclone://latest/report.json` | Full canonical report | +| `codeclone://latest/health` | Health score and dimensions | +| `codeclone://latest/gates` | Last gate evaluation result | +| `codeclone://latest/changed` | Changed-files projection (diff-aware runs) | +| `codeclone://schema` | Canonical report shape descriptor | + +Run-scoped resource templates: + +| URI template | Content | +|---------------------------------------------------|---------------------------------| +| `codeclone://runs/{run_id}/summary` | Summary for a specific run | +| `codeclone://runs/{run_id}/report.json` | Report for a specific run | +| `codeclone://runs/{run_id}/findings/{finding_id}` | One finding from a specific run | + +Resources and URI templates are read-only views over stored runs; they do not +trigger analysis. + +`codeclone://latest/*` always resolves to the most recent run registered in the +current MCP server session. A later `analyze_repository` or +`analyze_changed_paths` call moves that pointer. ## Recommended workflows ### Full repository review ``` -analyze_repository → get_run_summary → list_hotspots → get_finding → evaluate_gates +analyze_repository → get_production_triage → get_finding → evaluate_gates ``` ### Changed-files review (PR / patch) @@ -170,6 +189,8 @@ Show regressions, resolved findings, and health delta. **Tips:** - Use `analyze_changed_paths` for PRs, not full analysis. +- Set `cache_policy="off"` when you need the freshest truth from a new analysis + run, not whatever older session state currently sits behind `latest/*`. - Use `"production-only"` / `source_kind` filters to cut test/fixture noise. - Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions. - Ask the agent to separate baseline debt from new regressions. diff --git a/tests/test_html_report.py b/tests/test_html_report.py index ba9a4d2..f7b886a 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -8,11 +8,17 @@ import json from collections.abc import Callable from pathlib import Path -from typing import Any +from typing import Any, cast import pytest -from codeclone.contracts import CACHE_VERSION, DOCS_URL, ISSUES_URL, REPOSITORY_URL +from codeclone.contracts import ( + CACHE_VERSION, + DOCS_URL, + ISSUES_URL, + REPORT_SCHEMA_VERSION, + REPOSITORY_URL, +) from codeclone.errors import FileProcessingError from codeclone.html_report import ( _FileCache, @@ -957,7 +963,7 @@ def test_html_report_provenance_summary_uses_card_like_badges( 'class="prov-badge prov-badge--neutral"', 'verified', 'Baseline', - '2.1', + f'{REPORT_SCHEMA_VERSION}', 'Schema', '1', 'Fingerprint', @@ -1643,6 +1649,68 @@ def test_html_report_metrics_without_health_score_uses_info_overview() -> None: assert 'avg' in html +def test_html_report_renders_directory_hotspots_from_canonical_report() -> None: + report_document = build_report_document( + func_groups={}, + block_groups={}, + segment_groups={}, + meta={"scan_root": "/repo/project", "project_name": "project"}, + metrics={ + "dead_code": { + "summary": {"count": 6, "critical": 6}, + "items": [ + { + "qualname": f"pkg.dir{index}:unused", + "filepath": f"/repo/project/dir{index}/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "function", + "confidence": "high", + } + for index in range(1, 7) + ], + } + }, + ) + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta=cast("dict[str, Any]", report_document["meta"]), + metrics=cast("dict[str, Any]", report_document["metrics"]), + report_document=report_document, + ) + _assert_html_contains( + html, + "Hotspots by Directory", + "top 5 of 6 directories", + "dir1", + "dir5", + ) + + +def test_html_report_direct_path_skips_directory_hotspots_cluster() -> None: + html = build_html_report( + func_groups={}, + block_groups={}, + segment_groups={}, + report_meta={"scan_root": "/outside/project"}, + metrics=_metrics_payload( + health_score=70, + health_grade="B", + complexity_max=1, + complexity_high_risk=0, + coupling_high_risk=0, + cohesion_low=0, + dep_cycles=[], + dep_max_depth=0, + dead_total=0, + dead_critical=0, + ), + ) + assert "Hotspots by Directory" not in html + + def test_html_report_metrics_bad_health_score_and_dead_code_ok_tone() -> None: html = build_html_report( func_groups={}, diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py index 086c896..8dc0012 100644 --- a/tests/test_html_report_helpers.py +++ b/tests/test_html_report_helpers.py @@ -40,6 +40,7 @@ ) from codeclone._html_report._tabs import render_split_tabs from codeclone._html_snippets import _FileCache +from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.models import MetricsDiff, ReportLocation, Suggestion @@ -188,7 +189,7 @@ def _section_ctx(**overrides: object) -> SimpleNamespace: "metrics_baseline_meta": {}, "runtime_meta": {}, "integrity_map": {}, - "report_schema_version": "2.1", + "report_schema_version": REPORT_SCHEMA_VERSION, "report_generated_at": "2026-03-22T21:30:45Z", } base.update(overrides) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 26704f5..5a4739d 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -18,6 +18,7 @@ import pytest from codeclone import mcp_server +from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.mcp_server import MCPDependencyError, build_mcp_server @@ -31,6 +32,14 @@ def _structured_tool_result(result: object) -> dict[str, object]: return cast("dict[str, object]", payload) +def _mapping_child(payload: Mapping[str, object], key: str) -> dict[str, object]: + return cast("dict[str, object]", payload[key]) + + +def _summary_registry(payload: Mapping[str, object]) -> dict[str, object]: + return _mapping_child(_mapping_child(payload, "inventory"), "file_registry") + + def _require_mcp_runtime() -> None: pytest.importorskip("mcp.server.fastmcp") @@ -100,6 +109,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "analyze_changed_paths", "clear_session_runs", "get_run_summary", + "get_production_triage", "evaluate_gates", "get_report_section", "list_findings", @@ -127,6 +137,7 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: "check_cohesion", "check_dead_code", "get_run_summary", + "get_production_triage", "get_report_section", "list_findings", "get_finding", @@ -139,6 +150,8 @@ def test_mcp_server_exposes_expected_read_only_tools() -> None: ) assert tool.annotations.destructiveHint is False assert tool.annotations.idempotentHint is True + assert "cache_policy='off'" in str(tools["analyze_repository"].description) + assert "cache_policy='off'" in str(tools["analyze_changed_paths"].description) assert "Use analyze_repository first" in str(tools["check_complexity"].description) assert "Use analyze_repository first" in str(tools["check_clones"].description) @@ -177,8 +190,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: ) run_id = str(summary["run_id"]) changed_run_id = str(changed_summary["run_id"]) - changed_inventory = cast("dict[str, object]", changed_summary["inventory"]) - changed_registry = cast("dict[str, object]", changed_inventory["file_registry"]) + changed_registry = _summary_registry(changed_summary) assert cast(int, changed_registry["count"]) >= 1 assert "items" not in changed_registry @@ -186,8 +198,7 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: asyncio.run(server.call_tool("get_run_summary", {})) ) assert latest["run_id"] == run_id - latest_inventory = cast("dict[str, object]", latest["inventory"]) - latest_registry = cast("dict[str, object]", latest_inventory["file_registry"]) + latest_registry = _summary_registry(latest) assert cast(int, latest_registry["count"]) >= 1 assert "items" not in latest_registry @@ -219,22 +230,22 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: latest_summary_text = latest_summary_resource[0].content latest_summary = json.loads(latest_summary_text) assert latest_summary["run_id"] == run_id - latest_summary_inventory = cast( - "dict[str, object]", - latest_summary["inventory"], - ) - latest_summary_registry = cast( - "dict[str, object]", - latest_summary_inventory["file_registry"], - ) + latest_summary_registry = _summary_registry(latest_summary) assert cast(int, latest_summary_registry["count"]) >= 1 assert "items" not in latest_summary_registry + production_triage = _structured_tool_result( + asyncio.run(server.call_tool("get_production_triage", {})) + ) + assert production_triage["run_id"] == run_id + assert _mapping_child(production_triage, "cache")["effective_freshness"] + latest_report_resource = list( asyncio.run(server.read_resource("codeclone://latest/report.json")) ) assert ( - json.loads(latest_report_resource[0].content)["report_schema_version"] == "2.1" + json.loads(latest_report_resource[0].content)["report_schema_version"] + == REPORT_SCHEMA_VERSION ) latest_health_resource = list( asyncio.run(server.read_resource("codeclone://latest/health")) @@ -246,13 +257,17 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: latest_changed_payload = json.loads(latest_changed_resource[0].content) assert latest_changed_payload["run_id"] == changed_run_id assert latest_changed_payload["changed_paths"] == changed_summary["changed_paths"] + latest_triage_resource = list( + asyncio.run(server.read_resource("codeclone://latest/triage")) + ) + assert json.loads(latest_triage_resource[0].content)["run_id"] == run_id report_resource = list( asyncio.run(server.read_resource(f"codeclone://runs/{run_id}/report.json")) ) assert report_resource report_payload = json.loads(report_resource[0].content) - assert report_payload["report_schema_version"] == "2.1" + assert report_payload["report_schema_version"] == REPORT_SCHEMA_VERSION finding_items = cast("list[dict[str, object]]", findings_result["items"]) first_finding_id = str(finding_items[0]["id"]) @@ -270,6 +285,9 @@ def test_mcp_server_tool_roundtrip_and_resources(tmp_path: Path) -> None: asyncio.run(server.call_tool("get_report_section", {"section": "meta"})) ) assert report_section["codeclone_version"] + assert cast("dict[str, object]", report_section["analysis_thresholds"])[ + "design_findings" + ] metrics_section = _structured_tool_result( asyncio.run(server.call_tool("get_report_section", {"section": "metrics"})) ) diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 166b572..68842d9 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -10,6 +10,7 @@ import json import subprocess from collections import OrderedDict +from collections.abc import Mapping from pathlib import Path from types import SimpleNamespace from typing import Any, cast @@ -19,6 +20,7 @@ from codeclone import mcp_service as mcp_service_mod from codeclone._cli_config import ConfigValidationError from codeclone.cache import Cache +from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.mcp_service import ( CodeCloneMCPService, MCPAnalysisRequest, @@ -102,6 +104,43 @@ def _write_quality_fixture(root: Path) -> None: ) +def _write_clone_variant_fixture( + root: Path, + *, + relative_dir: str, + module_name: str, + seed: int, +) -> None: + fixture_dir = root.joinpath(relative_dir) + fixture_dir.mkdir(parents=True, exist_ok=True) + fixture_dir.joinpath("__init__.py").write_text("", "utf-8") + fixture_dir.joinpath(module_name).write_text( + ( + "def gamma(value: int) -> int:\n" + f" total = value * {seed}\n" + f" total -= {seed + 1}\n" + f" total *= {seed + 2}\n" + f" total -= {seed + 3}\n" + f" total *= {seed + 4}\n" + f" total -= {seed + 5}\n" + f" total *= {seed + 6}\n" + f" total -= {seed + 7}\n" + " return total\n\n" + "def delta(value: int) -> int:\n" + f" total = value * {seed}\n" + f" total -= {seed + 1}\n" + f" total *= {seed + 2}\n" + f" total -= {seed + 3}\n" + f" total *= {seed + 4}\n" + f" total -= {seed + 5}\n" + f" total *= {seed + 6}\n" + f" total -= {seed + 7}\n" + " return total\n" + ), + "utf-8", + ) + + def _dummy_run_record(root: Path, run_id: str) -> MCPRunRecord: return MCPRunRecord( run_id=run_id, @@ -158,6 +197,13 @@ def _file_registry(payload: dict[str, object]) -> dict[str, object]: return cast("dict[str, object]", inventory["file_registry"]) +def _mapping_child( + payload: dict[str, object] | Mapping[str, object], + key: str, +) -> dict[str, object]: + return cast("dict[str, object]", payload[key]) + + def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> None: _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) @@ -174,7 +220,7 @@ def test_mcp_service_analyze_repository_registers_latest_run(tmp_path: Path) -> assert summary["run_id"] == latest["run_id"] assert summary["root"] == str(tmp_path) assert summary["analysis_mode"] == "full" - assert summary["report_schema_version"] == "2.1" + assert summary["report_schema_version"] == REPORT_SCHEMA_VERSION latest_baseline = cast("dict[str, object]", latest["baseline"]) latest_cache = cast("dict[str, object]", latest["cache"]) assert latest_baseline["status"] == "missing" @@ -239,6 +285,79 @@ def test_mcp_service_lists_findings_and_hotspots(tmp_path: Path) -> None: assert cast(int, hotspots["total"]) >= 1 +def test_mcp_service_hotspot_resources_and_triage_are_production_first( + tmp_path: Path, +) -> None: + _write_clone_fixture(tmp_path) + _write_clone_variant_fixture( + tmp_path, + relative_dir="tests", + module_name="test_dup.py", + seed=20, + ) + service = CodeCloneMCPService(history_limit=4) + summary = service.analyze_repository( + MCPAnalysisRequest( + root=str(tmp_path), + respect_pyproject=False, + cache_policy="off", + ) + ) + + production_hotspots = service.list_hotspots( + kind="production_hotspots", + detail_level="summary", + ) + test_fixture_hotspots = service.list_hotspots( + kind="test_fixture_hotspots", + detail_level="summary", + ) + triage = service.get_production_triage(max_hotspots=2, max_suggestions=2) + latest_triage = json.loads(service.read_resource("codeclone://latest/triage")) + + assert production_hotspots["run_id"] == summary["run_id"] + assert cast(int, production_hotspots["total"]) >= 1 + assert cast(int, test_fixture_hotspots["total"]) >= 1 + + triage_findings = _mapping_child(triage, "findings") + triage_suggestions = _mapping_child(triage, "suggestions") + findings_breakdown = cast("dict[str, int]", triage_findings["by_source_kind"]) + suggestions_breakdown = cast( + "dict[str, int]", + triage_suggestions["by_source_kind"], + ) + top_hotspots = _mapping_child(triage, "top_hotspots") + top_suggestions = _mapping_child(triage, "top_suggestions") + production_items = cast("list[dict[str, object]]", production_hotspots["items"]) + + assert triage["run_id"] == summary["run_id"] + assert _mapping_child(triage, "cache")["effective_freshness"] == "fresh" + assert findings_breakdown["production"] >= 1 + assert findings_breakdown["tests"] >= 1 + assert cast(int, triage_findings["outside_focus"]) >= 1 + assert suggestions_breakdown["production"] >= 1 + assert suggestions_breakdown["tests"] >= 1 + assert cast(int, triage_suggestions["outside_focus"]) >= 1 + assert top_hotspots["kind"] == "production_hotspots" + assert top_hotspots["available"] == production_hotspots["total"] + assert cast(int, top_hotspots["returned"]) >= 1 + assert all( + str(item["id"]) in {str(row["id"]) for row in production_items} + for item in cast("list[dict[str, object]]", top_hotspots["items"]) + ) + assert cast(int, top_suggestions["available"]) >= 1 + assert all( + str(item["source_kind"]) == "production" + for item in cast("list[dict[str, object]]", top_suggestions["items"]) + ) + assert latest_triage["run_id"] == summary["run_id"] + with pytest.raises( + MCPServiceContractError, + match="only as codeclone://latest/triage", + ): + service.read_resource(f"codeclone://runs/{summary['run_id']}/triage") + + def test_mcp_service_changed_runs_remediation_and_review_flow(tmp_path: Path) -> None: pkg = tmp_path / "pkg" pkg.mkdir() @@ -336,6 +455,52 @@ def test_mcp_service_granular_checks_pr_summary_and_resources( ) ) run_id = str(summary["run_id"]) + report_document = service.get_report_section(run_id=run_id, section="all") + design_thresholds = cast( + "dict[str, dict[str, object]]", + cast( + "dict[str, object]", + cast("dict[str, object]", report_document["meta"])["analysis_thresholds"], + )["design_findings"], + ) + assert design_thresholds == { + "complexity": { + "metric": "cyclomatic_complexity", + "operator": ">", + "value": 1, + }, + "coupling": { + "metric": "cbo", + "operator": ">", + "value": 10, + }, + "cohesion": { + "metric": "lcom4", + "operator": ">=", + "value": 4, + }, + } + finding_groups = cast( + "dict[str, object]", + cast("dict[str, object]", report_document["findings"])["groups"], + ) + design_groups = cast( + "list[dict[str, object]]", + cast("dict[str, object]", finding_groups["design"])["groups"], + ) + canonical_design_ids = {str(group["id"]) for group in design_groups} + listed_design_ids = { + str(item["id"]) + for item in cast( + "list[dict[str, object]]", + service.list_findings( + run_id=run_id, + family="design", + detail_level="summary", + )["items"], + ) + } + assert listed_design_ids == canonical_design_ids clones = service.check_clones( run_id=run_id, @@ -475,11 +640,11 @@ def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( run_id=str(summary["run_id"]), section="metrics", ) - cache_summary = cast("dict[str, object]", summary["cache"]) - cache_meta = cast("dict[str, object]", report_meta["cache"]) - health_summary = cast("dict[str, object]", summary["health"]) - metrics_summary = cast("dict[str, object]", report_metrics["summary"]) - metrics_health = cast("dict[str, object]", metrics_summary["health"]) + cache_summary = _mapping_child(summary, "cache") + cache_meta = _mapping_child(report_meta, "cache") + health_summary = _mapping_child(summary, "health") + metrics_summary = _mapping_child(report_metrics, "summary") + metrics_health = _mapping_child(metrics_summary, "health") assert cache_summary["path"] == cache_meta["path"] assert cache_summary["status"] == cache_meta["status"] @@ -489,6 +654,38 @@ def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( assert "families" not in report_metrics +def test_mcp_service_effective_freshness_classifies_summary_cache_usage() -> None: + service = CodeCloneMCPService(history_limit=4) + + assert ( + service._effective_freshness( + { + "cache": {"used": False}, + "inventory": {"files": {"analyzed": 2, "cached": 0}}, + } + ) + == "fresh" + ) + assert ( + service._effective_freshness( + { + "cache": {"used": True}, + "inventory": {"files": {"analyzed": 0, "cached": 2}}, + } + ) + == "reused" + ) + assert ( + service._effective_freshness( + { + "cache": {"used": True}, + "inventory": {"files": {"analyzed": 1, "cached": 2}}, + } + ) + == "mixed" + ) + + def test_mcp_service_metrics_sections_split_summary_and_detail( tmp_path: Path, ) -> None: @@ -551,9 +748,10 @@ def test_mcp_service_resources_expose_latest_summary_and_report(tmp_path: Path) latest_report = json.loads(service.read_resource("codeclone://latest/report.json")) assert latest_summary["run_id"] == summary["run_id"] + assert latest_summary["cache"]["effective_freshness"] == "fresh" assert latest_summary["inventory"]["file_registry"]["count"] >= 1 assert "items" not in latest_summary["inventory"]["file_registry"] - assert latest_report["report_schema_version"] == "2.1" + assert latest_report["report_schema_version"] == REPORT_SCHEMA_VERSION def test_mcp_service_hotspot_summary_preserves_fixtures_source_kind( @@ -978,7 +1176,7 @@ def test_mcp_service_all_section_and_optional_path_overrides(tmp_path: Path) -> ) report_document = service.get_report_section(section="all") - assert report_document["report_schema_version"] == "2.1" + assert report_document["report_schema_version"] == REPORT_SCHEMA_VERSION args = service._build_args( root_path=tmp_path, @@ -1540,48 +1738,6 @@ def test_mcp_service_additional_projection_and_error_branches( run_id = str(summary["run_id"]) record = service._runs.get(run_id) - complexity_group = mcp_service_mod._complexity_group_for_threshold_payload( - { - "qualname": "pkg.quality:hot", - "relative_path": "pkg/quality.py", - "start_line": 1, - "end_line": 5, - "cyclomatic_complexity": 99, - "nesting_depth": 4, - "risk": "high", - }, - threshold=20, - scan_root=str(tmp_path), - ) - assert complexity_group is not None - assert complexity_group["severity"] == "critical" - assert mcp_service_mod._coupling_group_for_threshold_payload( - { - "qualname": "pkg.quality:coupled", - "relative_path": "pkg/quality.py", - "start_line": 1, - "end_line": 5, - "cbo": 3, - "risk": "high", - "coupled_classes": ["A", "B"], - }, - threshold=1, - scan_root=str(tmp_path), - ) - assert mcp_service_mod._cohesion_group_for_threshold_payload( - { - "qualname": "pkg.quality:cohesive", - "relative_path": "pkg/quality.py", - "start_line": 1, - "end_line": 5, - "lcom4": 2, - "risk": "medium", - "method_count": 3, - "instance_var_count": 2, - }, - threshold=1, - scan_root=str(tmp_path), - ) assert mcp_service_mod._suggestion_finding_id_payload(object()) == "" assert mcp_service_mod._suggestion_finding_id_payload( SimpleNamespace( @@ -1811,33 +1967,12 @@ def _patched_get_finding( new_block=frozenset(), metrics_diff=None, ) - findings_section = cast( - "dict[str, object]", - fake_design_record.report_document["findings"], - ) - fake_design_groups = cast("dict[str, object]", findings_section["groups"]) - assert ( - len( - service._design_groups_for_record( - fake_design_record, - groups=fake_design_groups, - ) - ) - == 3 - ) - wrapped_group = service._design_singleton_group( - category="cohesion", - kind="class_hotspot", - severity="warning", - qualname="pkg.quality:cohesive", - filepath="pkg/quality.py", - start_line=1, - end_line=5, - item_data={"lcom4": 2}, - facts={"lcom4": 2}, - scan_root=str(tmp_path), - ) - assert wrapped_group["category"] == "cohesion" + design_findings = [ + finding + for finding in service._base_findings(fake_design_record) + if str(finding.get("family", "")) == "design" + ] + assert design_findings == [] detail_payload = service._project_finding_detail( { "id": "finding", diff --git a/tests/test_report.py b/tests/test_report.py index 0b77054..292ede8 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -825,6 +825,12 @@ def test_report_json_serializes_rich_suggestions_and_overview() -> None: overview = payload["derived"]["overview"] assert overview["families"]["clones"] == 0 assert overview["source_scope_breakdown"] == {} + assert overview["directory_hotspots"]["all"] == { + "total_directories": 0, + "returned": 0, + "has_more": False, + "items": [], + } assert payload["derived"]["hotlists"]["most_actionable_ids"] == [] diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index da183f3..d086b57 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -15,6 +15,7 @@ import codeclone.report.json_contract as json_contract_mod from codeclone import _coerce +from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.models import ( ReportLocation, StructuralFindingGroup, @@ -489,6 +490,69 @@ def test_report_document_rich_invariants_and_renderers() -> None: design_groups = cast(list[dict[str, object]], design) categories = {str(item["category"]) for item in design_groups} assert {"complexity", "coupling", "cohesion", "dependency"}.issubset(categories) + design_thresholds = cast( + "dict[str, dict[str, object]]", + cast( + "dict[str, object]", + cast("dict[str, object]", payload["meta"])["analysis_thresholds"], + )["design_findings"], + ) + assert design_thresholds["complexity"] == { + "metric": "cyclomatic_complexity", + "operator": ">", + "value": 20, + } + assert design_thresholds["coupling"] == { + "metric": "cbo", + "operator": ">", + "value": 10, + } + assert design_thresholds["cohesion"] == { + "metric": "lcom4", + "operator": ">=", + "value": 4, + } + directory_hotspots = cast( + "dict[str, object]", + cast("dict[str, object]", payload["derived"])["overview"], + )["directory_hotspots"] + hotspot_buckets = cast("dict[str, object]", directory_hotspots) + assert set(hotspot_buckets) == { + "all", + "clones", + "structural", + "complexity", + "cohesion", + "coupling", + "dead_code", + "dependency", + } + all_rows = cast( + "list[dict[str, object]]", + cast("dict[str, object]", hotspot_buckets["all"])["items"], + ) + assert { + "path": all_rows[0]["path"], + "finding_groups": all_rows[0]["finding_groups"], + "affected_items": all_rows[0]["affected_items"], + "files": all_rows[0]["files"], + "share_pct": all_rows[0]["share_pct"], + } == { + "path": "codeclone", + "finding_groups": 9, + "affected_items": 11, + "files": 3, + "share_pct": 68.8, + } + assert cast("dict[str, int]", all_rows[0]["kind_breakdown"]) == { + "clones": 3, + "structural": 1, + "dead_code": 1, + "complexity": 2, + "coupling": 1, + "cohesion": 1, + "dependency": 0, + } clones = cast(dict[str, object], groups["clones"]) block_groups = cast(list[dict[str, object]], clones["blocks"]) @@ -514,6 +578,158 @@ def test_report_document_rich_invariants_and_renderers() -> None: assert all("help" in rule for rule in run["tool"]["driver"]["rules"]) +def test_report_document_design_thresholds_can_change_canonical_findings() -> None: + payload = build_report_document( + func_groups={}, + block_groups={}, + segment_groups={}, + meta={ + "scan_root": "/repo/project", + "design_complexity_threshold": 30, + "design_coupling_threshold": 12, + "design_cohesion_threshold": 5, + }, + metrics={ + "complexity": { + "functions": [ + { + "qualname": "pkg.mod:hot", + "filepath": "/repo/project/pkg/mod.py", + "start_line": 10, + "end_line": 20, + "cyclomatic_complexity": 25, + "nesting_depth": 3, + "risk": "medium", + } + ] + }, + "coupling": { + "classes": [ + { + "qualname": "pkg.mod:Service", + "filepath": "/repo/project/pkg/mod.py", + "start_line": 30, + "end_line": 60, + "cbo": 11, + "risk": "high", + "coupled_classes": ["A", "B"], + } + ] + }, + "cohesion": { + "classes": [ + { + "qualname": "pkg.mod:Service", + "filepath": "/repo/project/pkg/mod.py", + "start_line": 30, + "end_line": 60, + "lcom4": 4, + "risk": "high", + "method_count": 4, + "instance_var_count": 1, + } + ] + }, + "dependencies": { + "cycles": [["pkg.alpha", "pkg.beta"]], + }, + }, + ) + finding_groups = cast( + "dict[str, object]", + cast("dict[str, object]", payload["findings"])["groups"], + ) + design_groups = cast( + "list[dict[str, object]]", + cast("dict[str, object]", finding_groups["design"])["groups"], + ) + assert [str(group["category"]) for group in design_groups] == ["dependency"] + thresholds = cast( + "dict[str, dict[str, object]]", + cast( + "dict[str, object]", + cast("dict[str, object]", payload["meta"])["analysis_thresholds"], + )["design_findings"], + ) + assert thresholds["complexity"]["value"] == 30 + assert thresholds["coupling"]["value"] == 12 + assert thresholds["cohesion"]["value"] == 5 + + +def test_directory_hotspots_has_more_root_paths_and_stable_sort() -> None: + findings = { + "groups": { + "clones": { + "functions": [ + { + "id": "clone:function:g1", + "family": "clone", + "category": "function", + "items": [ + {"relative_path": "a.py"}, + {"relative_path": "a.py"}, + ], + } + ], + "blocks": [], + "segments": [], + }, + "structural": {"groups": []}, + "dead_code": { + "groups": [ + { + "id": f"dead:{index}", + "family": "dead_code", + "category": "function", + "items": [ + {"relative_path": f"dir{index}/mod.py"}, + ], + } + for index in (5, 3, 1, 4, 2, 6) + ] + }, + "design": {"groups": []}, + } + } + + hotspots = overview_mod.build_directory_hotspots(findings=findings) + clone_rows = cast( + "list[dict[str, object]]", + cast("dict[str, object]", hotspots["clones"])["items"], + ) + assert clone_rows == [ + { + "path": ".", + "finding_groups": 1, + "affected_items": 2, + "files": 1, + "share_pct": 100.0, + "source_scope": { + "dominant_kind": "production", + "breakdown": { + "production": 2, + "tests": 0, + "fixtures": 0, + "other": 0, + }, + "impact_scope": "runtime", + }, + } + ] + dead_code_bucket = cast("dict[str, object]", hotspots["dead_code"]) + dead_code_rows = cast("list[dict[str, object]]", dead_code_bucket["items"]) + assert dead_code_bucket["total_directories"] == 6 + assert dead_code_bucket["returned"] == 5 + assert dead_code_bucket["has_more"] is True + assert [str(row["path"]) for row in dead_code_rows] == [ + "dir1", + "dir2", + "dir3", + "dir4", + "dir5", + ] + + def test_markdown_and_sarif_reuse_prebuilt_report_document() -> None: payload = _rich_report_document() md = to_markdown_report( @@ -1278,7 +1494,7 @@ def _broken_as_uri(self: Path) -> str: def test_render_sarif_report_document_without_srcroot_keeps_relative_payload() -> None: payload = { - "report_schema_version": "2.1", + "report_schema_version": REPORT_SCHEMA_VERSION, "meta": { "codeclone_version": "2.0.0b2", "analysis_mode": "ci", From 0cae34ad36da6cce3254717ba8c84b33fc35225d Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 30 Mar 2026 18:05:07 +0500 Subject: [PATCH 07/15] feat(core): canonicalize design thresholds and add directory hotspots to report projections --- .../pyproject_defaults/golden_expected_cli_snapshot.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json index f202dbe..dc98485 100644 --- a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json +++ b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json @@ -2,7 +2,7 @@ "meta": { "python_tag": "cp313" }, - "report_schema_version": "2.1", + "report_schema_version": "2.2", "project_name": "pyproject_defaults", "scan_root": ".", "baseline_status": "missing", From fdec2af928fe5a6c6110f0de79ce10f770e95997 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 30 Mar 2026 22:58:58 +0500 Subject: [PATCH 08/15] feat(core): tighten report semantics and polish MCP and HTML projections --- AGENTS.md | 3 + CHANGELOG.md | 3 + README.md | 3 + codeclone/_html_css.py | 36 ++- codeclone/_html_report/_assemble.py | 18 +- codeclone/_html_report/_components.py | 57 ++--- codeclone/_html_report/_icons.py | 105 +++++++++ codeclone/_html_report/_sections/_overview.py | 90 +++++--- .../_html_report/_sections/_suggestions.py | 69 ++++-- codeclone/extractor.py | 72 ++---- codeclone/mcp_service.py | 209 ++++++++++++++---- codeclone/qualnames.py | 57 +++++ codeclone/report/__init__.py | 2 - codeclone/report/overview.py | 43 +++- codeclone/report/segments.py | 14 +- docs/architecture.md | 9 + docs/book/02-terminology.md | 27 +++ docs/book/05-core-pipeline.md | 29 +++ docs/book/08-report.md | 5 + docs/book/11-security-model.md | 17 ++ docs/book/15-metrics-and-quality-gates.md | 4 +- docs/book/17-suggestions-and-clone-typing.md | 2 + docs/book/20-mcp-interface.md | 14 +- docs/mcp.md | 4 +- tests/test_extractor.py | 25 ++- tests/test_html_report.py | 80 ++++++- tests/test_html_report_helpers.py | 122 ++++++++++ tests/test_mcp_service.py | 197 ++++++++++++++++- tests/test_report_contract_coverage.py | 108 +++++++++ 29 files changed, 1186 insertions(+), 238 deletions(-) create mode 100644 codeclone/qualnames.py diff --git a/AGENTS.md b/AGENTS.md index 591190d..c52de58 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -299,6 +299,7 @@ Before cutting a release: - Don’t introduce nondeterministic ordering (dict iteration, set ordering, filesystem traversal without sort). - Don’t make the base `codeclone` install depend on optional MCP runtime packages. - Don’t let MCP mutate baselines, source files, or repo state. +- Don’t let MCP re-synthesize design findings from raw metrics; read canonical `findings.groups.design` only. --- @@ -367,6 +368,8 @@ Use this map to route changes to the right owner module. `sys.exit` behavior here. - `codeclone/mcp_server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. +- `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when + touching any MCP surface. - `codeclone/html_report.py` — public HTML facade/re-export surface; preserve backward-compatible imports here; do not grow section/layout logic in this module. - `codeclone/_html_report/*` — actual HTML assembly, context shaping, tabs, sections, and overview/navigation behavior; diff --git a/CHANGELOG.md b/CHANGELOG.md index bfd0857..c013e74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ sync SPDX headers. split — all without changing canonical report schema until the later `2.2` report-threshold update below. - `cache.effective_freshness` marker and `get_production_triage` / `codeclone://latest/triage` for compact production-first overview. +- `compare_runs` now reports `mixed` when new regressions and `health_delta` point in opposite directions. +- `compare_runs` now reports `incomparable` and omits `health_delta` when run roots or effective analysis settings do not match. +- MCP summary/triage/health surfaces now mark `health` as unavailable in `clones_only` runs instead of emitting zeroed placeholders. - Fix hotlist key resolution for `production_hotspots` and `test_fixture_hotspots`. - Bump cache schema to `2.3` (stale metric entries rebuilt, not reused). diff --git a/README.md b/README.md index d358dac..56a3575 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,9 @@ codeclone-mcp --transport streamable-http --port 8000 20 tools + 10 resources — deterministic, baseline-aware, and read-only. Never mutates source files, baselines, or repo state. Payloads are optimised for LLM context: compact summaries by default, full detail on demand. +Run comparison stays compact too: `compare_runs` reports `mixed` when finding deltas and run-to-run health move in +opposite directions, and `incomparable` when roots or effective analysis settings differ. +When metrics are skipped (`clones_only`), MCP marks `health` as unavailable instead of returning fake zeros. Docs: [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) diff --git a/codeclone/_html_css.py b/codeclone/_html_css.py index 621989a..8923410 100644 --- a/codeclone/_html_css.py +++ b/codeclone/_html_css.py @@ -170,12 +170,14 @@ background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); overflow-x:auto;scrollbar-width:none;-webkit-overflow-scrolling:touch} .main-tabs::-webkit-scrollbar{display:none} -.main-tab{position:relative;flex:1;text-align:center;padding:var(--sp-2) var(--sp-3); - background:none;border:none;cursor:pointer;font-size:.85rem;font-weight:500; - color:var(--text-muted);white-space:nowrap;border-radius:var(--radius-md); - transition:all var(--dur-fast) var(--ease)} +.main-tab{position:relative;flex:1;display:inline-flex;align-items:center;justify-content:center; + gap:var(--sp-1);text-align:center;padding:var(--sp-2) var(--sp-3);background:none; + border:none;cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted); + white-space:nowrap;border-radius:var(--radius-md);transition:all var(--dur-fast) var(--ease)} .main-tab:hover{color:var(--text-primary);background:var(--bg-raised)} .main-tab[aria-selected="true"]{color:var(--accent-primary);background:var(--accent-muted)} +.main-tab-icon{flex-shrink:0;opacity:.72} +.main-tab-label{display:inline-flex;align-items:center} .tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; height:18px;padding:0 5px;font-size:.7rem;font-weight:700;border-radius:9px; background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} @@ -656,6 +658,23 @@ .breakdown-bar-track{height:6px;border-radius:3px;background:var(--bg-raised);overflow:hidden} .breakdown-bar-fill{display:block;height:100%;border-radius:3px; background:var(--accent-primary);transition:width .6s var(--ease)} +/* Directory hotspot entries */ +.dir-hotspot-list{display:flex;flex-direction:column;gap:0} +.dir-hotspot-entry{padding:var(--sp-2) 0;border-bottom:1px solid color-mix(in srgb,var(--border) 50%,transparent)} +.dir-hotspot-entry:last-child{border-bottom:none;padding-bottom:0} +.dir-hotspot-entry:first-child{padding-top:0} +.dir-hotspot-path{display:flex;align-items:center;gap:var(--sp-2);margin-bottom:4px;min-width:0} +.dir-hotspot-path code{font-size:.78rem;font-weight:600;color:var(--text-primary);line-height:1.3} +.dir-hotspot-bar-row{display:flex;align-items:center;gap:var(--sp-2);margin-bottom:3px} +.dir-hotspot-bar-track{flex:1;height:4px;border-radius:2px;background:var(--bg-raised); + overflow:hidden;display:flex} +.dir-hotspot-bar-prev{height:100%;background:var(--text-muted);opacity:.18} +.dir-hotspot-bar-cur{height:100%;background:var(--accent-primary);opacity:.7} +.dir-hotspot-pct{font-size:.7rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-muted);min-width:3.2em;text-align:right} +.dir-hotspot-meta{display:flex;flex-wrap:wrap;gap:6px;font-size:.68rem;color:var(--text-muted)} +.dir-hotspot-meta span{font-variant-numeric:tabular-nums} +.dir-hotspot-meta-sep{opacity:.3} /* Health radar chart */ .health-radar{display:flex;justify-content:center;padding:var(--sp-3) 0} .health-radar svg{width:100%;max-width:520px;height:auto;overflow:visible} @@ -781,10 +800,10 @@ .suggestion-sev-inline{font-size:.72rem;font-weight:600;padding:1px var(--sp-1); border-radius:var(--radius-sm)} .suggestion-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} -.suggestion-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} -.suggestion-meta-badge{font-size:.68rem;font-family:var(--font-mono);font-weight:500; - padding:1px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay); - color:var(--text-muted);white-space:nowrap} +.suggestion-meta{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0;flex-wrap:wrap} +.suggestion-meta-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); + border-radius:999px;background:var(--bg-overlay);color:var(--text-muted); + white-space:nowrap;line-height:1.2;font-variant-numeric:tabular-nums} .suggestion-effort--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} .suggestion-effort--moderate{color:var(--warning);background:var(--warning-muted)} .suggestion-effort--hard{color:var(--error);background:var(--error-muted)} @@ -1092,6 +1111,7 @@ linear-gradient(to left,rgba(0,0,0,.12),transparent) right center / 10px 100% no-repeat scroll, var(--bg-surface)} .main-tab{flex:none;padding:var(--sp-1) var(--sp-2);font-size:.78rem} + .main-tab-icon{width:13px;height:13px} } @media(max-width:480px){ .overview-kpi-grid{grid-template-columns:1fr} diff --git a/codeclone/_html_report/_assemble.py b/codeclone/_html_report/_assemble.py index ca8edb3..95654c8 100644 --- a/codeclone/_html_report/_assemble.py +++ b/codeclone/_html_report/_assemble.py @@ -21,7 +21,7 @@ from ..structural_findings import normalize_structural_findings from ..templates import FONT_CSS_URL, REPORT_TEMPLATE from ._context import _meta_pick, build_context -from ._icons import BRAND_LOGO, ICONS +from ._icons import BRAND_LOGO, ICONS, section_icon_html from ._sections._clones import render_clones_panel from ._sections._coupling import render_quality_panel from ._sections._dead_code import render_dead_code_panel @@ -119,6 +119,15 @@ def _tab_badge(count: int) -> str: return f'{count}' # -- Main tab navigation -- + tab_icon_keys: dict[str, str] = { + "overview": "overview", + "clones": "clones", + "quality": "quality", + "dependencies": "dependencies", + "dead-code": "dead-code", + "suggestions": "suggestions", + "structural-findings": "structural-findings", + } tab_defs = [ ("overview", "Overview", overview_html, ""), ("clones", "Clones", clones_html, _tab_badge(ctx.clone_groups_total)), @@ -151,10 +160,15 @@ def _tab_badge(count: int) -> str: extra = tab_extra_attrs.get(tab_id, "") if extra: extra = " " + extra + tab_icon = section_icon_html( + tab_icon_keys.get(tab_id, ""), + class_name="main-tab-icon", + size=14, + ) tab_buttons.append( f'" + f'{tab_icon}{tab_label}{badge}' ) active = " active" if idx == 0 else "" tab_panels.append( diff --git a/codeclone/_html_report/_components.py b/codeclone/_html_report/_components.py index 3a0d5a8..cdd2dde 100644 --- a/codeclone/_html_report/_components.py +++ b/codeclone/_html_report/_components.py @@ -14,6 +14,7 @@ from .. import _coerce from .._html_badges import _source_kind_badge_html from .._html_escape import _escape_attr, _escape_html +from ._icons import section_icon_html _as_int = _coerce.as_int _as_mapping = _coerce.as_mapping @@ -52,52 +53,24 @@ def overview_cluster_header(title: str, subtitle: str | None = None) -> str: ) -_ICON_ALERT = ( - '' - '' - '' -) - -_ICON_PIE = ( - '' - '' - '' -) - -_ICON_RADAR = ( - '' - '' - '' - '' - '' -) - -_ICON_BAR = ( - '' - '' - '' - '' -) - -_SUMMARY_ICONS: dict[str, str] = { - "top risks": _ICON_ALERT, - "source breakdown": _ICON_PIE, - "health profile": _ICON_RADAR, - "issue breakdown": _ICON_BAR, +_SUMMARY_ICON_KEYS: dict[str, tuple[str, str]] = { + "top risks": ("top-risks", "summary-icon summary-icon--risk"), + "issue breakdown": ("issue-breakdown", "summary-icon summary-icon--info"), + "source breakdown": ("source-breakdown", "summary-icon summary-icon--info"), + "all findings": ("all-findings", "summary-icon summary-icon--info"), + "clone groups": ("clone-groups", "summary-icon summary-icon--info"), + "low cohesion": ("low-cohesion", "summary-icon summary-icon--info"), + "health profile": ("health-profile", "summary-icon summary-icon--info"), } def overview_summary_item_html(*, label: str, body_html: str) -> str: - icon = _SUMMARY_ICONS.get(label.lower(), "") + icon_key, icon_class = _SUMMARY_ICON_KEYS.get(label.lower(), ("", "")) + icon = ( + section_icon_html(icon_key, class_name=icon_class) + if icon_key and icon_class + else "" + ) return ( '
    ' '
    ' diff --git a/codeclone/_html_report/_icons.py b/codeclone/_html_report/_icons.py index 2109c13..c043cb3 100644 --- a/codeclone/_html_report/_icons.py +++ b/codeclone/_html_report/_icons.py @@ -17,6 +17,15 @@ def _svg(size: int, sw: str, body: str) -> str: ) +def _svg_with_class(size: int, sw: str, body: str, *, class_name: str = "") -> str: + class_attr = f' class="{class_name}"' if class_name else "" + return ( + f'{body}' + ) + + BRAND_LOGO = ( '
    ' + "".join(parts) + "
    " -def _directory_kind_summary(kind_breakdown: Mapping[str, object]) -> str: - rows = [ +def _dir_meta_span(val: int, label: str) -> str: + return f"{val} {_escape_html(label)}" + + +_DIR_META_SEP = '\u00b7' + + +def _directory_kind_meta_parts( + kind_breakdown: Mapping[str, object], + *, + total_groups: int, +) -> list[str]: + kind_rows = [ (str(kind), _as_int(count)) for kind, count in kind_breakdown.items() if _as_int(count) > 0 ] - rows.sort(key=lambda item: (-item[1], item[0])) - top_rows = rows[:2] - if not top_rows: - return "" - return "; ".join( - f"{count} {_DIRECTORY_KIND_LABELS.get(kind, kind)}" for kind, count in top_rows - ) + kind_rows.sort(key=lambda item: (-item[1], item[0])) + if len(kind_rows) <= 1: + return [] + parts: list[str] = [] + for kind, count in kind_rows[:2]: + parts.append(_dir_meta_span(count, _DIRECTORY_KIND_LABELS.get(kind, kind))) + return parts def _directory_hotspot_bucket_body(bucket: str, payload: Mapping[str, object]) -> str: @@ -408,40 +419,57 @@ def _directory_hotspot_bucket_body(bucket: str, payload: Mapping[str, object]) - "
    " ) rows: list[str] = [] + cumulative = 0.0 for item in items: path = str(item.get("path", ".")).strip() or "." source_scope = _as_mapping(item.get("source_scope")) dominant_kind = ( str(source_scope.get("dominant_kind", "other")).strip() or "other" ) - detail = ( - f"{_as_int(item.get('finding_groups'))} groups; " - f"{_as_int(item.get('affected_items'))} items; " - f"{_as_int(item.get('files'))} files; " - f"{_as_float(item.get('share_pct')):.1f}%" - ) - kind_summary = "" + share_pct = _as_float(item.get("share_pct")) + groups = _as_int(item.get("finding_groups")) + affected = _as_int(item.get("affected_items")) + files = _as_int(item.get("files")) + + meta_parts = [ + _dir_meta_span(groups, "groups"), + _dir_meta_span(affected, "items"), + _dir_meta_span(files, "files"), + ] if bucket == "all": - kind_summary = _directory_kind_summary( - _as_mapping(item.get("kind_breakdown")) + meta_parts.extend( + _directory_kind_meta_parts( + _as_mapping(item.get("kind_breakdown")), + total_groups=groups, + ) ) - kind_html = ( - f'
    {_escape_html(kind_summary)}
    ' - if kind_summary - else "" + + path_html = _escape_html(path).replace("/", "/") + + prev_pct = min(cumulative, 100.0) + cur_pct = min(share_pct, 100.0 - prev_pct) + cumulative += share_pct + + bar_html = ( + '' + f'' + f'' + "" ) + rows.append( - "
  • " - '
    ' - f"{_escape_html(path)} {_source_kind_badge_html(dominant_kind)}" + '
    ' + '
    ' + f"{path_html}" + f" {_source_kind_badge_html(dominant_kind)}" + "
    " + f'
    {bar_html}' + f'{share_pct:.1f}%' + "
    " + f'
    {_DIR_META_SEP.join(meta_parts)}
    ' "
    " - f'
    {_escape_html(detail)}
    ' - f"{kind_html}" - "
  • " ) - return ( - subtitle_html + '
      ' + "".join(rows) + "
    " - ) + return subtitle_html + '
    ' + "".join(rows) + "
    " def _directory_hotspots_section(ctx: ReportContext) -> str: diff --git a/codeclone/_html_report/_sections/_suggestions.py b/codeclone/_html_report/_sections/_suggestions.py index d9aa766..be1e33b 100644 --- a/codeclone/_html_report/_sections/_suggestions.py +++ b/codeclone/_html_report/_sections/_suggestions.py @@ -37,6 +37,11 @@ from .._context import ReportContext _as_int = _coerce.as_int +_CLONE_KIND_CHIP_LABELS: dict[str, str] = { + "function": "Function", + "block": "Block", + "segment": "Segment", +} def _render_fact_summary(raw: str) -> str: @@ -74,6 +79,34 @@ def _format_source_breakdown( return " \u00b7 ".join(f"{source_kind_label(k)} {c}" for k, c in rows if c > 0) +def _suggestion_context_labels(s: Suggestion) -> tuple[str, ...]: + labels: list[str] = [] + source_label = source_kind_label(s.source_kind) + if source_label: + labels.append(source_label) + if s.category == CATEGORY_CLONE: + kind_label = _CLONE_KIND_CHIP_LABELS.get(s.finding_kind.strip().lower()) + if kind_label: + labels.append(kind_label) + if s.clone_type: + labels.append(s.clone_type) + return tuple(labels) + category_label = s.category.replace("_", " ").title() + if category_label: + labels.append(category_label) + return tuple(labels) + + +def _priority_badge_label(priority: float) -> str: + return f"Priority {priority:g}" + + +def _spread_label(*, spread_functions: int, spread_files: int) -> str: + function_word = "function" if spread_functions == 1 else "functions" + file_word = "file" if spread_files == 1 else "files" + return f"{spread_functions} {function_word} \u00b7 {spread_files} {file_word}" + + def _render_card(s: Suggestion, ctx: ReportContext) -> str: actionable = "true" if s.severity != "info" else "false" spread_bucket = "high" if s.spread_files > 1 or s.spread_functions > 1 else "low" @@ -81,18 +114,11 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: facts_source = _escape_html(breakdown_text or source_kind_label(s.source_kind)) facts_location = _escape_html(s.location_label or s.location) - # Context chips — more visible than a single muted line - ctx_chips: list[str] = [] - sk = source_kind_label(s.source_kind) - if sk: - ctx_chips.append(f'{_escape_html(sk)}') - cat = s.category.replace("_", " ") - if cat: - ctx_chips.append(f'{_escape_html(cat)}') - if s.clone_type: - ctx_chips.append( - f'{_escape_html(s.clone_type)}' - ) + # Context chips stay compact and specific: source scope first, then kind. + ctx_chips = [ + f'{_escape_html(label)}' + for label in _suggestion_context_labels(s) + ] ctx_html = f'
    {"".join(ctx_chips)}
    ' # Next step — primary actionable CTA @@ -109,9 +135,12 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: # Effort badge — color-coded effort_cls = f" suggestion-effort--{_escape_html(s.effort)}" - - # Priority — clean display (drop trailing zeros) - priority_str = f"{s.priority:g}" + effort_label = s.effort.title() + priority_label = _priority_badge_label(s.priority) + spread_label = _spread_label( + spread_functions=s.spread_functions, + spread_files=s.spread_files, + ) # Locations inside details locs_html = "" @@ -155,9 +184,9 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: f'{_escape_html(s.severity)}' f'{_escape_html(s.title)}' '' - f'{_escape_html(s.effort)}' - f'P{priority_str}' - f'{s.spread_functions} fn / {s.spread_files} files' + f'{_escape_html(effort_label)}' + f'{_escape_html(priority_label)}' + f'{_escape_html(spread_label)}' "" # -- body -- '
    ' @@ -174,7 +203,7 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: '
    Facts
    ' '
    ' f"
    Finding
    {_escape_html(s.fact_kind or s.category)}
    " - f"
    Spread
    {s.spread_functions} fn / {s.spread_files} files
    " + f"
    Spread
    {_escape_html(spread_label)}
    " f"
    Source
    {facts_source}
    " f"
    Scope
    {facts_location}
    " "
    " @@ -183,7 +212,7 @@ def _render_card(s: Suggestion, ctx: ReportContext) -> str: '
    ' f"
    Severity
    {sev_dd}
    " f"
    Confidence
    {_escape_html(s.confidence)}
    " - f"
    Priority
    {priority_str}
    " + f"
    Priority
    {_escape_html(priority_label)}
    " f"
    Family
    {_escape_html(s.finding_family)}
    " "
    " "" diff --git a/codeclone/extractor.py b/codeclone/extractor.py index a23b559..b1aef73 100644 --- a/codeclone/extractor.py +++ b/codeclone/extractor.py @@ -17,6 +17,7 @@ from hashlib import sha1 as _sha1 from typing import TYPE_CHECKING, Literal, NamedTuple +from . import qualnames as _qualnames from .blockhash import stmt_hashes from .blocks import extract_blocks, extract_segments from .cfg import CFGBuilder @@ -63,7 +64,6 @@ __all__ = [ "Unit", - "_QualnameCollector", "extract_units_and_stats_from_source", ] @@ -78,10 +78,8 @@ class _ParseTimeoutError(Exception): pass -# Sync or async function definition node. -FunctionNode = ast.FunctionDef | ast.AsyncFunctionDef # Any named declaration: function, async function, or class. -_NamedDeclarationNode = FunctionNode | ast.ClassDef +_NamedDeclarationNode = _qualnames.FunctionNode | ast.ClassDef # Unique key for a declaration's token index: (start_line, end_line, qualname). _DeclarationTokenIndexKey = tuple[int, int, str] @@ -273,57 +271,13 @@ def _declaration_end_line( return _fallback_declaration_end_line(node, start_line=start_line) -class _QualnameCollector(ast.NodeVisitor): - __slots__ = ( - "class_count", - "class_nodes", - "funcs", - "function_count", - "method_count", - "stack", - "units", - ) - - def __init__(self) -> None: - self.stack: list[str] = [] - self.units: list[tuple[str, FunctionNode]] = [] - self.class_nodes: list[tuple[str, ast.ClassDef]] = [] - self.funcs: dict[str, FunctionNode] = {} - self.class_count = 0 - self.function_count = 0 - self.method_count = 0 - - def visit_ClassDef(self, node: ast.ClassDef) -> None: - self.class_count += 1 - class_qualname = ".".join([*self.stack, node.name]) if self.stack else node.name - self.class_nodes.append((class_qualname, node)) - self.stack.append(node.name) - self.generic_visit(node) - self.stack.pop() - - def _register_function(self, node: FunctionNode) -> None: - name = ".".join([*self.stack, node.name]) if self.stack else node.name - if self.stack: - self.method_count += 1 - else: - self.function_count += 1 - self.units.append((name, node)) - self.funcs[name] = node - - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - self._register_function(node) - - def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - self._register_function(node) - - # ========================= # CFG fingerprinting # ========================= def _cfg_fingerprint_and_complexity( - node: FunctionNode, + node: _qualnames.FunctionNode, cfg: NormalizationConfig, qualname: str, ) -> tuple[str, int]: @@ -517,7 +471,7 @@ def _is_protocol_class( return False -def _is_non_runtime_candidate(node: FunctionNode) -> bool: +def _is_non_runtime_candidate(node: _qualnames.FunctionNode) -> bool: for decorator in node.decorator_list: name = _dotted_expr_name(decorator) if name is None: @@ -537,7 +491,7 @@ def _node_line_span(node: ast.AST) -> tuple[int, int] | None: def _eligible_unit_shape( - node: FunctionNode, + node: _qualnames.FunctionNode, *, min_loc: int, min_stmt: int, @@ -595,7 +549,7 @@ def _dead_candidate_kind(local_name: str) -> Literal["function", "method"]: def _should_skip_dead_candidate( local_name: str, - node: FunctionNode, + node: _qualnames.FunctionNode, *, protocol_class_qualnames: set[str], ) -> bool: @@ -643,7 +597,7 @@ def _dead_candidate_for_unit( *, module_name: str, local_name: str, - node: FunctionNode, + node: _qualnames.FunctionNode, filepath: str, suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], protocol_class_qualnames: set[str], @@ -687,7 +641,7 @@ def _collect_load_reference_node( def _resolve_referenced_qualnames( *, module_name: str, - collector: _QualnameCollector, + collector: _qualnames.QualnameCollector, state: _ModuleWalkState, ) -> frozenset[str]: top_level_class_by_name = { @@ -737,7 +691,7 @@ def _collect_module_walk_data( *, tree: ast.AST, module_name: str, - collector: _QualnameCollector, + collector: _qualnames.QualnameCollector, collect_referenced_names: bool, ) -> _ModuleWalkResult: """Single ast.walk that collects imports, deps, names, qualnames & protocol aliases. @@ -793,7 +747,7 @@ def _collect_dead_candidates( *, filepath: str, module_name: str, - collector: _QualnameCollector, + collector: _qualnames.QualnameCollector, protocol_symbol_aliases: frozenset[str] = frozenset({"Protocol"}), protocol_module_aliases: frozenset[str] = frozenset( {"typing", "typing_extensions"} @@ -861,7 +815,7 @@ def _collect_declaration_targets( *, filepath: str, module_name: str, - collector: _QualnameCollector, + collector: _qualnames.QualnameCollector, source_tokens: tuple[tokenize.TokenInfo, ...] = (), source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, include_inline_lines: bool = False, @@ -940,7 +894,7 @@ def _build_suppression_index_for_source( source: str, filepath: str, module_name: str, - collector: _QualnameCollector, + collector: _qualnames.QualnameCollector, ) -> Mapping[SuppressionTargetKey, tuple[str, ...]]: suppression_directives = extract_suppression_directives(source) if not suppression_directives: @@ -1002,7 +956,7 @@ def extract_units_and_stats_from_source( except SyntaxError as e: raise ParseError(f"Failed to parse {filepath}: {e}") from e - collector = _QualnameCollector() + collector = _qualnames.QualnameCollector() collector.visit(tree) source_lines = source.splitlines() source_line_count = len(source_lines) diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py index 90edbb0..3778552 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/mcp_service.py @@ -388,6 +388,7 @@ class MCPRunRecord: run_id: str root: Path request: MCPAnalysisRequest + comparison_settings: tuple[object, ...] report_document: dict[str, object] summary: dict[str, object] changed_paths: tuple[str, ...] @@ -638,6 +639,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: run_id=run_id, root=root_path, request=request, + comparison_settings=self._comparison_settings(args=args, request=request), report_document=report_document, summary=base_summary, changed_paths=changed_paths, @@ -662,6 +664,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: run_id=run_id, root=root_path, request=request, + comparison_settings=self._comparison_settings(args=args, request=request), report_document=report_document, summary=summary, changed_paths=changed_paths, @@ -714,33 +717,60 @@ def compare_runs( common = before_ids & after_ids health_before = self._summary_health_score(before.summary) health_after = self._summary_health_score(after.summary) - health_delta = health_after - health_before - verdict = self._comparison_verdict( - regressions=len(regressions), - improvements=len(improvements), - health_delta=health_delta, + comparability = self._comparison_scope(before=before, after=after) + comparable = bool(comparability["comparable"]) + health_delta = ( + health_after - health_before + if comparable and health_before is not None and health_after is not None + else None + ) + verdict = ( + self._comparison_verdict( + regressions=len(regressions), + improvements=len(improvements), + health_delta=health_delta, + ) + if comparable + else "incomparable" + ) + regressions_payload = ( + [ + self._finding_summary_card(after, after_findings[finding_id]) + for finding_id in regressions + ] + if comparable + else [] + ) + improvements_payload = ( + [ + self._finding_summary_card(before, before_findings[finding_id]) + for finding_id in improvements + ] + if comparable + else [] ) return { "before": { "run_id": before.run_id, + "root": str(before.root), + "analysis_mode": before.request.analysis_mode, "health": health_before, }, "after": { "run_id": after.run_id, + "root": str(after.root), + "analysis_mode": after.request.analysis_mode, "health": health_after, }, + "comparability": comparability, "health_delta": health_delta, "verdict": verdict, - "regressions": [ - self._finding_summary_card(after, after_findings[finding_id]) - for finding_id in regressions - ], - "improvements": [ - self._finding_summary_card(before, before_findings[finding_id]) - for finding_id in improvements - ], - "unchanged_count": len(common), + "regressions": regressions_payload, + "improvements": improvements_payload, + "unchanged_count": len(common) if comparable else None, "summary": self._comparison_summary_text( + comparable=comparable, + comparability_reason=str(comparability["reason"]), regressions=len(regressions), improvements=len(improvements), health_delta=health_delta, @@ -1054,7 +1084,7 @@ def get_production_triage( return { "run_id": record.run_id, "base_uri": record.root.as_uri(), - "health": dict(self._as_mapping(summary.get("health"))), + "health": dict(self._summary_health_payload(summary)), "cache": dict(self._as_mapping(summary.get("cache"))), "findings": { "total": len(findings), @@ -1133,10 +1163,10 @@ def generate_pr_summary( }, health_delta=self._summary_health_delta(record.summary), ) - payload = { + payload: dict[str, object] = { "run_id": record.run_id, "changed_paths": list(paths_filter), - "health": self._as_mapping(record.summary.get("health")), + "health": self._summary_health_payload(record.summary), "health_delta": self._summary_health_delta(record.summary), "verdict": verdict, "new_findings_in_changed_files": changed_items, @@ -1478,7 +1508,7 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: ) if suffix == "health": return json.dumps( - self._as_mapping(record.summary.get("health")), + self._summary_health_payload(record.summary), ensure_ascii=False, indent=2, sort_keys=True, @@ -1624,16 +1654,82 @@ def _prune_session_state(self) -> None: for run_id in stale_run_ids: state_map.pop(run_id, None) - def _summary_health_score(self, summary: Mapping[str, object]) -> int: - health = self._as_mapping(summary.get("health")) + def _summary_health_score(self, summary: Mapping[str, object]) -> int | None: + health = self._summary_health_payload(summary) + if health.get("available") is False: + return None score = health.get("score", 0) return _as_int(score, 0) - def _summary_health_delta(self, summary: Mapping[str, object]) -> int: + def _summary_health_delta(self, summary: Mapping[str, object]) -> int | None: + if self._summary_health_payload(summary).get("available") is False: + return None metrics_diff = self._as_mapping(summary.get("metrics_diff")) value = metrics_diff.get("health_delta", 0) return _as_int(value, 0) + def _summary_health_payload( + self, + summary: Mapping[str, object], + ) -> dict[str, object]: + if str(summary.get("analysis_mode", "")) == "clones_only": + return {"available": False, "reason": "metrics_skipped"} + health = dict(self._as_mapping(summary.get("health"))) + if health: + return health + return {"available": False, "reason": "unavailable"} + + def _comparison_settings( + self, + *, + args: Namespace, + request: MCPAnalysisRequest, + ) -> tuple[object, ...]: + return ( + request.analysis_mode, + _as_int(args.min_loc, DEFAULT_MIN_LOC), + _as_int(args.min_stmt, DEFAULT_MIN_STMT), + _as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), + _as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), + _as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), + _as_int(args.segment_min_stmt, DEFAULT_SEGMENT_MIN_STMT), + _as_int( + args.design_complexity_threshold, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + _as_int( + args.design_coupling_threshold, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + _as_int( + args.design_cohesion_threshold, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), + ) + + def _comparison_scope( + self, + *, + before: MCPRunRecord, + after: MCPRunRecord, + ) -> dict[str, object]: + same_root = before.root == after.root + same_analysis_settings = before.comparison_settings == after.comparison_settings + if same_root and same_analysis_settings: + reason = "comparable" + elif not same_root and not same_analysis_settings: + reason = "different_root_and_analysis_settings" + elif not same_root: + reason = "different_root" + else: + reason = "different_analysis_settings" + return { + "comparable": same_root and same_analysis_settings, + "same_root": same_root, + "same_analysis_settings": same_analysis_settings, + "reason": reason, + } + def _severity_rank(self, severity: str) -> int: return { SEVERITY_CRITICAL: 3, @@ -2399,7 +2495,7 @@ def _build_changed_projection( "new": new_count, "known": known_count, "items": items, - "health": dict(self._as_mapping(record.summary.get("health"))), + "health": dict(self._summary_health_payload(record.summary)), "health_delta": health_delta, "verdict": self._changed_verdict( changed_projection={"new": new_count, "total": len(items)}, @@ -2427,9 +2523,10 @@ def _augment_summary_with_changed( for item in self._as_sequence(changed_projection.get("items"))[:10] ], } - payload["health_delta"] = _as_int( - changed_projection.get("health_delta", 0), - 0, + payload["health_delta"] = ( + _as_int(changed_projection.get("health_delta", 0), 0) + if changed_projection.get("health_delta") is not None + else None ) payload["verdict"] = str(changed_projection.get("verdict", "stable")) return payload @@ -2438,11 +2535,17 @@ def _changed_verdict( self, *, changed_projection: Mapping[str, object], - health_delta: int, + health_delta: int | None, ) -> str: - if _as_int(changed_projection.get("new", 0), 0) > 0 or health_delta < 0: + if _as_int(changed_projection.get("new", 0), 0) > 0 or ( + health_delta is not None and health_delta < 0 + ): return "regressed" - if _as_int(changed_projection.get("total", 0), 0) == 0 and health_delta > 0: + if ( + _as_int(changed_projection.get("total", 0), 0) == 0 + and health_delta is not None + and health_delta > 0 + ): return "improved" return "stable" @@ -2472,24 +2575,48 @@ def _comparison_verdict( *, regressions: int, improvements: int, - health_delta: int, + health_delta: int | None, ) -> str: - if regressions > 0 or health_delta < 0: + has_negative_signal = regressions > 0 or ( + health_delta is not None and health_delta < 0 + ) + has_positive_signal = improvements > 0 or ( + health_delta is not None and health_delta > 0 + ) + if has_negative_signal and has_positive_signal: + return "mixed" + if has_negative_signal: return "regressed" - if improvements > 0 or health_delta > 0: + if has_positive_signal: return "improved" return "stable" def _comparison_summary_text( self, *, + comparable: bool, + comparability_reason: str, regressions: int, improvements: int, - health_delta: int, + health_delta: int | None, ) -> str: + if not comparable: + reason_text = { + "different_root": "different roots", + "different_analysis_settings": "different analysis settings", + "different_root_and_analysis_settings": ( + "different roots and analysis settings" + ), + }.get(comparability_reason, "incomparable runs") + return f"Finding and run health deltas omitted ({reason_text})" + if health_delta is None: + return ( + f"{improvements} findings resolved, {regressions} new regressions; " + "run health delta omitted (metrics unavailable)" + ) return ( f"{improvements} findings resolved, {regressions} new regressions, " - f"health delta {health_delta:+d}" + f"run health delta {health_delta:+d}" ) def _render_pr_summary_markdown(self, payload: Mapping[str, object]) -> str: @@ -2510,13 +2637,19 @@ def _render_pr_summary_markdown(self, payload: Mapping[str, object]) -> str: for item in self._as_sequence(payload.get("blocking_gates")) if str(item) ] + health_line = ( + f"Health: {score}/100 ({grade}) | Delta: {delta:+d} | " + f"Verdict: {payload.get('verdict', 'stable')}" + if payload.get("health_delta") is not None + else ( + f"Health: {score}/100 ({grade}) | Delta: n/a | " + f"Verdict: {payload.get('verdict', 'stable')}" + ) + ) lines = [ "## CodeClone Summary", "", - ( - f"Health: {score}/100 ({grade}) | Delta: {delta:+d} | " - f"Verdict: {payload.get('verdict', 'stable')}" - ), + health_line, "", f"### New findings in changed files ({len(changed_items)})", ] @@ -2999,6 +3132,7 @@ def _build_run_summary_payload( "failures": list(failures), } payload["cache"] = self._summary_cache_payload(payload) + payload["health"] = self._summary_health_payload(payload) return payload def _summary_payload( @@ -3009,6 +3143,7 @@ def _summary_payload( cache = self._as_mapping(payload.get("cache")) if cache: payload["cache"] = self._summary_cache_payload(summary) + payload["health"] = self._summary_health_payload(payload) inventory = self._as_mapping(payload.get("inventory")) if inventory: payload["inventory"] = self._slim_inventory(inventory) diff --git a/codeclone/qualnames.py b/codeclone/qualnames.py new file mode 100644 index 0000000..a63229b --- /dev/null +++ b/codeclone/qualnames.py @@ -0,0 +1,57 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast + +__all__ = ["FunctionNode", "QualnameCollector"] + +FunctionNode = ast.FunctionDef | ast.AsyncFunctionDef + + +class QualnameCollector(ast.NodeVisitor): + __slots__ = ( + "class_count", + "class_nodes", + "funcs", + "function_count", + "method_count", + "stack", + "units", + ) + + def __init__(self) -> None: + self.stack: list[str] = [] + self.units: list[tuple[str, FunctionNode]] = [] + self.class_nodes: list[tuple[str, ast.ClassDef]] = [] + self.funcs: dict[str, FunctionNode] = {} + self.class_count = 0 + self.function_count = 0 + self.method_count = 0 + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + self.class_count += 1 + class_qualname = ".".join([*self.stack, node.name]) if self.stack else node.name + self.class_nodes.append((class_qualname, node)) + self.stack.append(node.name) + self.generic_visit(node) + self.stack.pop() + + def _register_function(self, node: FunctionNode) -> None: + name = ".".join([*self.stack, node.name]) if self.stack else node.name + if self.stack: + self.method_count += 1 + else: + self.function_count += 1 + self.units.append((name, node)) + self.funcs[name] = node + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._register_function(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._register_function(node) diff --git a/codeclone/report/__init__.py b/codeclone/report/__init__.py index 31e295a..79da57b 100644 --- a/codeclone/report/__init__.py +++ b/codeclone/report/__init__.py @@ -6,7 +6,6 @@ from __future__ import annotations -from ..extractor import _QualnameCollector from ..grouping import build_block_groups, build_groups, build_segment_groups from .blocks import merge_block_items as _merge_block_items from .blocks import prepare_block_report_groups @@ -51,7 +50,6 @@ "_FORBIDDEN_STMTS", "GroupItem", "GroupMap", - "_QualnameCollector", "_SegmentAnalysis", "_analyze_segment_statements", "_assign_targets_attribute_only", diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py index bbd5945..c620a78 100644 --- a/codeclone/report/overview.py +++ b/codeclone/report/overview.py @@ -177,6 +177,47 @@ def _directory_path_label(relative_path: str) -> str: return parent if parent not in {"", "/"} else "." +def _directory_scope_root_label( + relative_path: str, + *, + source_kind: str, +) -> str | None: + parts = tuple( + part for part in PurePosixPath(relative_path).parts if part not in {"", "."} + ) + if not parts: + return None + tests_idx = next( + (index for index, part in enumerate(parts) if part == SOURCE_KIND_TESTS), + None, + ) + if tests_idx is None: + return None + if ( + source_kind == SOURCE_KIND_FIXTURES + and tests_idx + 1 < len(parts) + and parts[tests_idx + 1] == SOURCE_KIND_FIXTURES + ): + return "/".join(parts[: tests_idx + 2]) + if source_kind == SOURCE_KIND_TESTS: + return "/".join(parts[: tests_idx + 1]) + return None + + +def _overview_directory_label( + relative_path: str, + *, + source_kind: str, +) -> str: + scope_root = _directory_scope_root_label( + relative_path, + source_kind=source_kind, + ) + if scope_root: + return scope_root + return _directory_path_label(relative_path) + + def _directory_contributions( group: Mapping[str, object], ) -> dict[str, dict[str, object]]: @@ -185,10 +226,10 @@ def _directory_contributions( relative_path = _directory_relative_path(item) if relative_path is None: continue - directory = _directory_path_label(relative_path) source_kind = str(item.get("source_kind", "")).strip() or classify_source_kind( relative_path ) + directory = _overview_directory_label(relative_path, source_kind=source_kind) entry = contributions.setdefault( directory, { diff --git a/codeclone/report/segments.py b/codeclone/report/segments.py index d335234..7f46502 100644 --- a/codeclone/report/segments.py +++ b/codeclone/report/segments.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from ..extractor import _QualnameCollector +from ..qualnames import FunctionNode, QualnameCollector from .merge import coerce_positive_int, merge_overlapping_items if TYPE_CHECKING: @@ -55,7 +55,7 @@ def merge_segment_items(items: GroupItemsLike) -> list[GroupItem]: def collect_file_functions( filepath: str, -) -> dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None: +) -> dict[str, FunctionNode] | None: try: source = Path(filepath).read_text("utf-8") except OSError: @@ -65,13 +65,13 @@ def collect_file_functions( except SyntaxError: return None - collector = _QualnameCollector() + collector = QualnameCollector() collector.visit(tree) return collector.funcs def segment_statements( - func_node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int + func_node: FunctionNode, start_line: int, end_line: int ) -> list[ast.stmt]: body = getattr(func_node, "body", None) if not isinstance(body, list): @@ -140,7 +140,7 @@ def analyze_segment_statements(statements: list[ast.stmt]) -> _SegmentAnalysis | def _analyze_segment_item( item: GroupItemLike, *, - file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None], + file_cache: dict[str, dict[str, FunctionNode] | None], ) -> _SegmentAnalysis | None: filepath = str(item.get("filepath", "")) qualname = str(item.get("qualname", "")) @@ -167,7 +167,7 @@ def _analyze_segment_item( def _analyze_segment_group( items: Sequence[GroupItemLike], *, - file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None], + file_cache: dict[str, dict[str, FunctionNode] | None], ) -> list[_SegmentAnalysis] | None: analyses: list[_SegmentAnalysis] = [] for item in items: @@ -185,7 +185,7 @@ def prepare_segment_report_groups(segment_groups: GroupMapLike) -> tuple[GroupMa """ suppressed = 0 filtered: GroupMap = {} - file_cache: dict[str, dict[str, ast.FunctionDef | ast.AsyncFunctionDef] | None] = {} + file_cache: dict[str, dict[str, FunctionNode] | None] = {} for key, items in segment_groups.items(): merged_items = merge_segment_items(items) diff --git a/docs/architecture.md b/docs/architecture.md index 09b5e4d..4cea0a2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -216,6 +216,15 @@ it adapts the existing pipeline into tools/resources such as: This keeps agent integrations deterministic and aligned with the same canonical report document used by JSON/HTML/SARIF. +Security boundaries: + +- Read-only by design — no tool mutates source files, baselines, or repo state. +- `--allow-remote` guard required for non-local transports; default is `stdio`. +- `cache_policy=refresh` rejected to preserve read-only semantics. +- Review markers are session-local in-memory state, never persisted. +- Run history bounded by `--history-limit` to prevent unbounded memory growth. +- `git_diff_ref` validated against strict regex to prevent injection. + --- ## CI Integration diff --git a/docs/book/02-terminology.md b/docs/book/02-terminology.md index 4a6ffbc..3d47862 100644 --- a/docs/book/02-terminology.md +++ b/docs/book/02-terminology.md @@ -29,6 +29,25 @@ Define terms exactly as used by code and tests. - report schema (`report_schema_version`) for report format compatibility. - **payload_sha256**: canonical baseline semantic hash. - **trusted baseline**: baseline loaded + status `ok`. +- **source_kind**: file classification — `production`, `tests`, `fixtures`, `other` — + determined by scanner path rules. Drives source-scope breakdown and + hotspot attribution. +- **health score**: weighted blend of seven dimension scores (0–100). + Dimensions: clones 25%, complexity 20%, cohesion 15%, coupling 10%, + dead code 10%, dependencies 10%, coverage 10%. + Grade bands: A ≥90, B ≥75, C ≥60, D ≥40, F <40. +- **design finding**: metric-driven finding (complexity/coupling/cohesion) + emitted by the canonical report builder when a class or function exceeds + the report-level design threshold. Thresholds are stored in + `meta.analysis_thresholds.design_findings`. +- **suggestion**: advisory recommendation card derived from clones, structural + findings, or metric violations. Advisory only — never gates CI. +- **production_hotspot**: finding group whose items are concentrated in + production source scope (`source_kind=production`). +- **effective_freshness**: cache-level indicator (`fresh` / `mixed` / `reused`) + reflecting how much of the analysis was recomputed vs cache-served. +- **directory_hotspot**: derived aggregation in `derived.overview` showing + which directories concentrate the most findings by category. Refs: @@ -37,12 +56,20 @@ Refs: - `codeclone/blocks.py:extract_segments` - `codeclone/baseline.py:current_python_tag` - `codeclone/baseline.py:Baseline.verify_compatibility` +- `codeclone/scanner.py:classify_source_kind` +- `codeclone/metrics/health.py:compute_health` +- `codeclone/report/json_contract.py:_design_findings_thresholds_payload` +- `codeclone/report/suggestions.py:generate_suggestions` +- `codeclone/report/overview.py:build_directory_hotspots` ## Contracts - New/known classification is key-based, not item-heuristic-based. - Baseline trust is status-driven. - Cache trust is status-driven and independent from baseline trust. +- Design finding universe is determined solely by the canonical report builder; + MCP and HTML read, never resynthesize. +- Suggestions are advisory and never affect exit code. Refs: diff --git a/docs/book/05-core-pipeline.md b/docs/book/05-core-pipeline.md index 81ed0ee..1640dd1 100644 --- a/docs/book/05-core-pipeline.md +++ b/docs/book/05-core-pipeline.md @@ -33,6 +33,30 @@ Stages: 6. Structural report findings: - duplicated branch families from per-function AST structure facts - clone cohort drift families built from existing function groups (no rescan) +7. Metrics computation (full mode only): + - per-function cyclomatic complexity + - per-class coupling (CBO) and cohesion (LCOM4) + - dead-code analysis: declaration-only, qualname-based liveness + - dependency graph and cycle detection +8. Health scoring: + - seven dimension scores: clones, complexity, coupling, cohesion, + dead code, dependencies, coverage + - weighted blend → composite score (0–100) and grade (A–F) +9. Design finding extraction: + - threshold-aware findings for complexity, coupling, cohesion + - thresholds recorded in `meta.analysis_thresholds.design_findings` +10. Suggestion generation: + - advisory cards from clone groups, structural findings, metric violations + - deterministic priority sort, never gates CI +11. Derived overview and hotlists: + - overview families, top risks, source breakdown, health snapshot + - directory hotspots by category (`derived.overview.directory_hotspots`) + - hotlists: most actionable, highest spread, production/test-fixture hotspots +12. Gate evaluation: + - clone-baseline diff (NEW vs KNOWN) + - metric threshold gates (`--fail-complexity`, `--fail-coupling`, etc.) + - metric regression gates (`--fail-on-new-metrics`) + - gate reasons emitted in deterministic order Refs: @@ -40,6 +64,11 @@ Refs: - `codeclone/extractor.py:extract_units_and_stats_from_source` - `codeclone/report/blocks.py:prepare_block_report_groups` - `codeclone/report/segments.py:prepare_segment_report_groups` +- `codeclone/metrics/health.py:compute_health` +- `codeclone/report/json_contract.py:_build_design_groups` +- `codeclone/report/suggestions.py:generate_suggestions` +- `codeclone/report/overview.py:build_directory_hotspots` +- `codeclone/pipeline.py:metric_gate_reasons` ## Contracts diff --git a/docs/book/08-report.md b/docs/book/08-report.md index aeed42c..f893b51 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -97,6 +97,11 @@ Per-group common axes (family-specific fields may extend): - `derived.overview.directory_hotspots` is a deterministic report-layer aggregation over canonical findings; HTML must render it as-is or omit it on compatibility paths without a canonical report document. +- `derived.overview.directory_hotspots[*].path` is an overview-oriented + directory key: runtime findings keep their parent directory, while test-only + and fixture-only findings collapse to the corresponding source-scope roots + (`.../tests` or `.../tests/fixtures`) to avoid duplicating the same hotspot + across leaf fixture paths. - Overview hotspot/source-breakdown sections must resolve from canonical report data or deterministic derived IDs; HTML must not silently substitute stale placeholders such as `n/a` or empty-state cards when canonical data exists. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md index d6a271a..a9c917f 100644 --- a/docs/book/11-security-model.md +++ b/docs/book/11-security-model.md @@ -10,6 +10,7 @@ Describe implemented protections and explicit security boundaries. - File read limits and parser limits: `codeclone/cli.py:process_file`, `codeclone/extractor.py:_parse_limits` - Baseline/cache validation: `codeclone/baseline.py`, `codeclone/cache.py` - HTML escaping: `codeclone/_html_escape.py`, `codeclone/html_report.py` +- MCP read-only enforcement: `codeclone/mcp_service.py`, `codeclone/mcp_server.py` ## Data model @@ -25,6 +26,17 @@ Security-relevant input classes: - Sensitive root directories are blocked by scanner policy. - Symlink traversal outside root is skipped. - HTML report escapes text and attribute contexts before embedding. +- MCP server is read-only by design: no tool mutates source files, baselines, + cache, or report artifacts. +- `--allow-remote` guard must be passed explicitly for non-local transports; + default is local-only (`stdio`). +- `cache_policy=refresh` is rejected — MCP cannot trigger cache invalidation. +- Review markers (`mark_finding_reviewed`) are session-local in-memory state; + they are never persisted to disk or leaked into baselines/reports. +- `git_diff_ref` parameter is validated against a strict regex to prevent + command injection via shell-interpreted git arguments. +- Run history is bounded by `--history-limit` (default 10) to prevent + unbounded memory growth. Refs: @@ -54,6 +66,9 @@ Refs: | Oversized baseline | Baseline rejected | | Oversized cache | Cache ignored | | HTML-injected payload in metadata/source | Escaped output | +| `--allow-remote` not passed for HTTP | Transport rejected | +| `cache_policy=refresh` requested | Policy rejected | +| `git_diff_ref` fails regex | Parameter rejected | ## Determinism / canonicalization @@ -74,6 +89,8 @@ Refs: - `tests/test_security.py::test_html_report_escapes_user_content` - `tests/test_html_report.py::test_html_report_escapes_script_breakout_payload` - `tests/test_cache.py::test_cache_too_large_warns` +- `tests/test_mcp_service.py::test_cache_policy_refresh_rejected` +- `tests/test_mcp_server.py::test_allow_remote_guard` ## Non-guarantees diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md index ed9d483..7f9f760 100644 --- a/docs/book/15-metrics-and-quality-gates.md +++ b/docs/book/15-metrics-and-quality-gates.md @@ -52,8 +52,8 @@ Refs: runtime auto-enables clone-only mode (`skip_metrics=true`). - In clone-only mode: `skip_dead_code=true`, `skip_dependencies=true`. -- `--fail-dead-code` forces dead-code analysis on. -- `--fail-cycles` forces dependency analysis on. +- `--fail-dead-code` forces dead-code analysis on (even if metrics are skipped). +- `--fail-cycles` forces dependency analysis on (even if metrics are skipped). - `--update-baseline` in full mode implies metrics-baseline update in the same run. - If metrics baseline path equals clone baseline path and clone baseline file is diff --git a/docs/book/17-suggestions-and-clone-typing.md b/docs/book/17-suggestions-and-clone-typing.md index 1ee0a95..eac9246 100644 --- a/docs/book/17-suggestions-and-clone-typing.md +++ b/docs/book/17-suggestions-and-clone-typing.md @@ -21,6 +21,8 @@ Suggestion shape: - `severity`: `critical|warning|info` - `category`: `clone|structural|complexity|coupling|cohesion|dead_code|dependency` +- `source_kind`: source classification of the primary location + (`production` / `tests` / `fixtures` / `other`) - `title`, `location`, `steps`, `effort`, `priority` Clone typing: diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index cc5bd9a..d59b978 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -77,9 +77,9 @@ Current tool set: |--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `analyze_repository` | `root`, `analysis_mode`, `changed_paths`, `git_diff_ref`, inline thresholds, cache/baseline paths | Run deterministic CodeClone analysis and register the result as the latest MCP run | | `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, inline thresholds | Diff-aware fast path: analyze a repo and attach a changed-files projection to the run; summary inventory is slimmed to `{count}` | -| `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run, with slim inventory counts instead of the full file registry | +| `get_run_summary` | `run_id` | Return the stored summary for the latest or specified run, with slim inventory counts instead of the full file registry; `health` becomes explicit `available=false` when metrics were skipped | | `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Return a compact production-first MCP projection: health, cache `effective_freshness`, production hotspots, production suggestions, and global source-kind counters | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and health delta | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Compare two registered runs by finding ids and run-to-run health delta; `verdict` becomes `mixed` when findings and health move in opposite directions, and `incomparable` when roots/settings differ. In incomparable cases, finding and health deltas are omitted | | `evaluate_gates` | `run_id`, gate thresholds/booleans | Evaluate CI/gating conditions against an existing run without exiting the process | | `get_report_section` | `run_id`, `section` | Return a canonical report section. `metrics` is summary-only; `metrics_detail` exposes the full metrics payload; other sections stay canonical | | `list_findings` | `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, `git_diff_ref`, `exclude_reviewed`, pagination | Return deterministically ordered finding groups with filtering and pagination; list responses include `base_uri` and compact summary/normal projections | @@ -206,6 +206,16 @@ state behind `codeclone://latest/...`. `priority_factors` and location `uri` are still available there. - `compare_runs` is only semantically meaningful when both runs use comparable repository scope/root and analysis settings. +- `compare_runs` exposes a `comparability` block. When roots or effective + analysis settings differ, finding deltas and `health_delta` are omitted and + `verdict` becomes `incomparable`. +- `compare_runs.health_delta` is `after.health - before.health` between the two + selected comparable runs. It is independent of baseline or metrics-baseline + drift. +- `compare_runs.verdict` is intentionally conservative but not one-dimensional: + it returns `mixed` when run-to-run finding deltas and `health_delta` disagree. +- `analysis_mode="clones_only"` keeps clone findings fully usable, but MCP + surfaces mark `health` as unavailable instead of fabricating zeroed metrics. - `codeclone://latest/triage` is a latest-only resource; run-specific triage is available via the tool, not via a `codeclone://runs/{run_id}/...` resource URI. diff --git a/docs/mcp.md b/docs/mcp.md index 2e4f88b..abca501 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -41,9 +41,9 @@ core CodeClone runtime. |--------------------------|------------------------------------------------------------------------------------------------------| | `analyze_repository` | Full analysis → register as latest run | | `analyze_changed_paths` | Diff-aware analysis with `changed_paths` or `git_diff_ref`; summary inventory is slimmed to counts | -| `get_run_summary` | Compact health/findings/baseline snapshot with slim inventory counts | +| `get_run_summary` | Compact health/findings/baseline snapshot with slim inventory counts; `health` is explicit `available=false` when metrics were skipped | | `get_production_triage` | Compact production-first view: health, cache freshness, production hotspots, production suggestions | -| `compare_runs` | Regressions, improvements, health delta between two runs | +| `compare_runs` | Regressions, improvements, and run-to-run health delta between comparable runs; returns `mixed` for conflicting signals and `incomparable` when roots/settings differ, omitting deltas in that case | | `list_findings` | Filtered, paginated finding groups with envelope-level `base_uri` | | `get_finding` | Deep inspection of one finding by id | | `get_remediation` | Structured remediation payload for one finding | diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 6491e31..133b0df 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -15,11 +15,12 @@ import pytest -from codeclone import extractor +from codeclone import extractor, qualnames from codeclone.errors import ParseError from codeclone.metrics import find_unused from codeclone.models import BlockUnit, ClassMetrics, ModuleDep, SegmentUnit from codeclone.normalize import NormalizationConfig +from codeclone.qualnames import FunctionNode, QualnameCollector def extract_units_from_source( @@ -58,9 +59,9 @@ def extract_units_from_source( def _parse_tree_and_collector( source: str, -) -> tuple[ast.Module, extractor._QualnameCollector]: +) -> tuple[ast.Module, QualnameCollector]: tree = ast.parse(source) - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit(tree) return tree, collector @@ -70,7 +71,7 @@ def _collect_module_walk( *, module_name: str = "pkg.mod", collect_referenced_names: bool = True, -) -> tuple[ast.Module, extractor._QualnameCollector, extractor._ModuleWalkResult]: +) -> tuple[ast.Module, QualnameCollector, extractor._ModuleWalkResult]: tree, collector = _parse_tree_and_collector(source) walk = extractor._collect_module_walk_data( tree=tree, @@ -666,7 +667,7 @@ def test_collect_module_walk_data_imports_and_references() -> None: obj.method() """.strip() ) - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit(tree) walk = extractor._collect_module_walk_data( tree=tree, @@ -706,7 +707,7 @@ def test_collect_module_walk_data_imports_and_references() -> None: def test_collect_module_walk_data_edge_branches() -> None: tree = ast.parse("from .... import parent") - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit(tree) walk = extractor._collect_module_walk_data( tree=tree, @@ -719,7 +720,7 @@ def test_collect_module_walk_data_edge_branches() -> None: assert walk.referenced_names == frozenset() lambda_call_tree = ast.parse("(lambda x: x)(1)") - lambda_collector = extractor._QualnameCollector() + lambda_collector = QualnameCollector() lambda_collector.visit(lambda_call_tree) lambda_walk = extractor._collect_module_walk_data( tree=lambda_call_tree, @@ -738,7 +739,7 @@ def test_collect_module_walk_data_without_referenced_name_collection() -> None: from .... import parent """.strip() ) - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit(tree) walk = extractor._collect_module_walk_data( tree=tree, @@ -834,7 +835,7 @@ class B(te.Protocol[int]): pass """.strip() ) - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit(tree) walk = extractor._collect_module_walk_data( tree=tree, @@ -1286,7 +1287,7 @@ def orphan(self) -> int: def test_collect_dead_candidates_and_extract_skip_classes_without_lineno( monkeypatch: pytest.MonkeyPatch, ) -> None: - collector = extractor._QualnameCollector() + collector = QualnameCollector() collector.visit( ast.parse( """ @@ -1314,7 +1315,7 @@ def used(): class _CollectorNoClassMetrics: def __init__(self) -> None: - self.units: list[tuple[str, extractor.FunctionNode]] = [] + self.units: list[tuple[str, FunctionNode]] = [] self.class_nodes = [("Broken", broken_class)] self.function_count = 0 self.method_count = 0 @@ -1323,7 +1324,7 @@ def __init__(self) -> None: def visit(self, _tree: ast.AST) -> None: return None - monkeypatch.setattr(extractor, "_QualnameCollector", _CollectorNoClassMetrics) + monkeypatch.setattr(qualnames, "QualnameCollector", _CollectorNoClassMetrics) _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( source="class Broken:\n pass\n", filepath="pkg/mod.py", diff --git a/tests/test_html_report.py b/tests/test_html_report.py index f7b886a..e6f52fe 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1618,7 +1618,12 @@ def test_html_report_metrics_risk_branches() -> None: assert 'stroke="var(--error)"' in html assert "Cycles: 1; max dependency depth: 4." in html assert "5 candidates total; 2 high-confidence items; 0 suppressed." in html - assert 'Dead Code2' in html + assert '