Files
claw-code/rust/scripts/run_mock_parity_diff.py
Yeachan-Heo 85c5b0e01d Expand parity harness coverage before behavioral drift lands
The landed mock Anthropic harness now covers multi-tool turns, bash flows,
permission prompt approve/deny paths, and an external plugin tool path.
A machine-readable scenario manifest plus a diff/checklist runner keep the
new scenarios tied back to PARITY.md so future additions stay honest.

Constraint: Must build on the deterministic mock service and clean-environment CLI harness
Rejected: Add an MCP tool scenario now | current MCP tool surface is still stubbed, so plugin coverage is the real executable path
Confidence: high
Scope-risk: moderate
Reversibility: clean
Directive: Keep rust/mock_parity_scenarios.json, mock_parity_harness.rs, and PARITY.md refs in lockstep
Tested: cargo fmt --all
Tested: cargo clippy --workspace --all-targets -- -D warnings
Tested: cargo test --workspace
Tested: python3 rust/scripts/run_mock_parity_diff.py
Not-tested: Real MCP lifecycle handshakes; remote plugin marketplace install flows
2026-04-03 04:00:33 +00:00

131 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
import tempfile
from collections import defaultdict
from pathlib import Path
def load_manifest(path: Path) -> list[dict]:
return json.loads(path.read_text())
def load_parity_text(path: Path) -> str:
return path.read_text()
def ensure_refs_exist(manifest: list[dict], parity_text: str) -> list[tuple[str, str]]:
missing: list[tuple[str, str]] = []
for entry in manifest:
for ref in entry.get("parity_refs", []):
if ref not in parity_text:
missing.append((entry["name"], ref))
return missing
def run_harness(rust_root: Path) -> dict:
with tempfile.TemporaryDirectory(prefix="mock-parity-report-") as temp_dir:
report_path = Path(temp_dir) / "report.json"
env = os.environ.copy()
env["MOCK_PARITY_REPORT_PATH"] = str(report_path)
subprocess.run(
[
"cargo",
"test",
"-p",
"rusty-claude-cli",
"--test",
"mock_parity_harness",
"--",
"--nocapture",
],
cwd=rust_root,
check=True,
env=env,
)
return json.loads(report_path.read_text())
def main() -> int:
script_path = Path(__file__).resolve()
rust_root = script_path.parent.parent
repo_root = rust_root.parent
manifest = load_manifest(rust_root / "mock_parity_scenarios.json")
parity_text = load_parity_text(repo_root / "PARITY.md")
missing_refs = ensure_refs_exist(manifest, parity_text)
if missing_refs:
print("Missing PARITY.md references:", file=sys.stderr)
for scenario_name, ref in missing_refs:
print(f" - {scenario_name}: {ref}", file=sys.stderr)
return 1
should_run = "--no-run" not in sys.argv[1:]
report = run_harness(rust_root) if should_run else None
report_by_name = {
entry["name"]: entry for entry in report.get("scenarios", [])
} if report else {}
print("Mock parity diff checklist")
print(f"Repo root: {repo_root}")
print(f"Scenario manifest: {rust_root / 'mock_parity_scenarios.json'}")
print(f"PARITY source: {repo_root / 'PARITY.md'}")
print()
for entry in manifest:
scenario_name = entry["name"]
scenario_report = report_by_name.get(scenario_name)
status = "PASS" if scenario_report else ("MAPPED" if not should_run else "MISSING")
print(f"[{status}] {scenario_name} ({entry['category']})")
print(f" description: {entry['description']}")
print(f" parity refs: {' | '.join(entry['parity_refs'])}")
if scenario_report:
print(
" result: iterations={iterations} requests={requests} tool_uses={tool_uses} tool_errors={tool_errors}".format(
iterations=scenario_report["iterations"],
requests=scenario_report["request_count"],
tool_uses=", ".join(scenario_report["tool_uses"]) or "none",
tool_errors=scenario_report["tool_error_count"],
)
)
print(f" final: {scenario_report['final_message']}")
print()
coverage = defaultdict(list)
for entry in manifest:
for ref in entry["parity_refs"]:
coverage[ref].append(entry["name"])
print("PARITY coverage map")
for ref, scenarios in coverage.items():
print(f"- {ref}")
print(f" scenarios: {', '.join(scenarios)}")
if report and report.get("scenarios"):
first = report["scenarios"][0]
print()
print("First scenario result")
print(f"- name: {first['name']}")
print(f"- iterations: {first['iterations']}")
print(f"- requests: {first['request_count']}")
print(f"- tool_uses: {', '.join(first['tool_uses']) or 'none'}")
print(f"- tool_errors: {first['tool_error_count']}")
print(f"- final_message: {first['final_message']}")
print()
print(
"Harness summary: {scenario_count} scenarios, {request_count} requests".format(
scenario_count=report["scenario_count"],
request_count=report["request_count"],
)
)
return 0
if __name__ == "__main__":
raise SystemExit(main())