Expand parity harness coverage before behavioral drift lands

The landed mock Anthropic harness now covers multi-tool turns, bash flows,
permission prompt approve/deny paths, and an external plugin tool path.
A machine-readable scenario manifest plus a diff/checklist runner keep the
new scenarios tied back to PARITY.md so future additions stay honest.

Constraint: Must build on the deterministic mock service and clean-environment CLI harness
Rejected: Add an MCP tool scenario now | current MCP tool surface is still stubbed, so plugin coverage is the real executable path
Confidence: high
Scope-risk: moderate
Reversibility: clean
Directive: Keep rust/mock_parity_scenarios.json, mock_parity_harness.rs, and PARITY.md refs in lockstep
Tested: cargo fmt --all
Tested: cargo clippy --workspace --all-targets -- -D warnings
Tested: cargo test --workspace
Tested: python3 rust/scripts/run_mock_parity_diff.py
Not-tested: Real MCP lifecycle handshakes; remote plugin marketplace install flows
This commit is contained in:
Yeachan-Heo
2026-04-03 04:00:33 +00:00
parent c2f1304a01
commit 85c5b0e01d
7 changed files with 1154 additions and 100 deletions

View File

@@ -93,6 +93,11 @@ enum Scenario {
GrepChunkAssembly,
WriteFileAllowed,
WriteFileDenied,
MultiToolTurnRoundtrip,
BashStdoutRoundtrip,
BashPermissionPromptApproved,
BashPermissionPromptDenied,
PluginToolRoundtrip,
}
impl Scenario {
@@ -103,6 +108,11 @@ impl Scenario {
"grep_chunk_assembly" => Some(Self::GrepChunkAssembly),
"write_file_allowed" => Some(Self::WriteFileAllowed),
"write_file_denied" => Some(Self::WriteFileDenied),
"multi_tool_turn_roundtrip" => Some(Self::MultiToolTurnRoundtrip),
"bash_stdout_roundtrip" => Some(Self::BashStdoutRoundtrip),
"bash_permission_prompt_approved" => Some(Self::BashPermissionPromptApproved),
"bash_permission_prompt_denied" => Some(Self::BashPermissionPromptDenied),
"plugin_tool_roundtrip" => Some(Self::PluginToolRoundtrip),
_ => None,
}
}
@@ -114,6 +124,11 @@ impl Scenario {
Self::GrepChunkAssembly => "grep_chunk_assembly",
Self::WriteFileAllowed => "write_file_allowed",
Self::WriteFileDenied => "write_file_denied",
Self::MultiToolTurnRoundtrip => "multi_tool_turn_roundtrip",
Self::BashStdoutRoundtrip => "bash_stdout_roundtrip",
Self::BashPermissionPromptApproved => "bash_permission_prompt_approved",
Self::BashPermissionPromptDenied => "bash_permission_prompt_denied",
Self::PluginToolRoundtrip => "plugin_tool_roundtrip",
}
}
}
@@ -243,6 +258,38 @@ fn latest_tool_result(request: &MessageRequest) -> Option<(String, bool)> {
})
}
fn tool_results_by_name(request: &MessageRequest) -> HashMap<String, (String, bool)> {
let mut tool_names_by_id = HashMap::new();
for message in &request.messages {
for block in &message.content {
if let InputContentBlock::ToolUse { id, name, .. } = block {
tool_names_by_id.insert(id.clone(), name.clone());
}
}
}
let mut results = HashMap::new();
for message in request.messages.iter().rev() {
for block in message.content.iter().rev() {
if let InputContentBlock::ToolResult {
tool_use_id,
content,
is_error,
} = block
{
let tool_name = tool_names_by_id
.get(tool_use_id)
.cloned()
.unwrap_or_else(|| tool_use_id.clone());
results
.entry(tool_name)
.or_insert_with(|| (flatten_tool_result_content(content), *is_error));
}
}
}
results
}
fn flatten_tool_result_content(content: &[api::ToolResultContentBlock]) -> String {
content
.iter()
@@ -276,6 +323,7 @@ fn build_http_response(request: &MessageRequest, scenario: Scenario) -> String {
)
}
#[allow(clippy::too_many_lines)]
fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String {
match scenario {
Scenario::StreamingText => streaming_text_sse(),
@@ -326,9 +374,88 @@ fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String {
&[r#"{"path":"generated/denied.txt","content":"should not exist\n"}"#],
),
},
Scenario::MultiToolTurnRoundtrip => {
let tool_results = tool_results_by_name(request);
match (
tool_results.get("read_file"),
tool_results.get("grep_search"),
) {
(Some((read_output, _)), Some((grep_output, _))) => final_text_sse(&format!(
"multi-tool roundtrip complete: {} / {} occurrences",
extract_read_content(read_output),
extract_num_matches(grep_output)
)),
_ => tool_uses_sse(&[
ToolUseSse {
tool_id: "toolu_multi_read",
tool_name: "read_file",
partial_json_chunks: &[r#"{"path":"fixture.txt"}"#],
},
ToolUseSse {
tool_id: "toolu_multi_grep",
tool_name: "grep_search",
partial_json_chunks: &[
"{\"pattern\":\"par",
"ity\",\"path\":\"fixture.txt\"",
",\"output_mode\":\"count\"}",
],
},
]),
}
}
Scenario::BashStdoutRoundtrip => match latest_tool_result(request) {
Some((tool_output, _)) => final_text_sse(&format!(
"bash completed: {}",
extract_bash_stdout(&tool_output)
)),
None => tool_use_sse(
"toolu_bash_stdout",
"bash",
&[r#"{"command":"printf 'alpha from bash'","timeout":1000}"#],
),
},
Scenario::BashPermissionPromptApproved => match latest_tool_result(request) {
Some((tool_output, is_error)) => {
if is_error {
final_text_sse(&format!("bash approval unexpectedly failed: {tool_output}"))
} else {
final_text_sse(&format!(
"bash approved and executed: {}",
extract_bash_stdout(&tool_output)
))
}
}
None => tool_use_sse(
"toolu_bash_prompt_allow",
"bash",
&[r#"{"command":"printf 'approved via prompt'","timeout":1000}"#],
),
},
Scenario::BashPermissionPromptDenied => match latest_tool_result(request) {
Some((tool_output, _)) => {
final_text_sse(&format!("bash denied as expected: {tool_output}"))
}
None => tool_use_sse(
"toolu_bash_prompt_deny",
"bash",
&[r#"{"command":"printf 'should not run'","timeout":1000}"#],
),
},
Scenario::PluginToolRoundtrip => match latest_tool_result(request) {
Some((tool_output, _)) => final_text_sse(&format!(
"plugin tool completed: {}",
extract_plugin_message(&tool_output)
)),
None => tool_use_sse(
"toolu_plugin_echo",
"plugin_echo",
&[r#"{"message":"hello from plugin parity"}"#],
),
},
}
}
#[allow(clippy::too_many_lines)]
fn build_message_response(request: &MessageRequest, scenario: Scenario) -> MessageResponse {
match scenario {
Scenario::StreamingText => text_message_response(
@@ -389,6 +516,100 @@ fn build_message_response(request: &MessageRequest, scenario: Scenario) -> Messa
json!({"path": "generated/denied.txt", "content": "should not exist\n"}),
),
},
Scenario::MultiToolTurnRoundtrip => {
let tool_results = tool_results_by_name(request);
match (
tool_results.get("read_file"),
tool_results.get("grep_search"),
) {
(Some((read_output, _)), Some((grep_output, _))) => text_message_response(
"msg_multi_tool_final",
&format!(
"multi-tool roundtrip complete: {} / {} occurrences",
extract_read_content(read_output),
extract_num_matches(grep_output)
),
),
_ => tool_message_response_many(
"msg_multi_tool_start",
&[
ToolUseMessage {
tool_id: "toolu_multi_read",
tool_name: "read_file",
input: json!({"path": "fixture.txt"}),
},
ToolUseMessage {
tool_id: "toolu_multi_grep",
tool_name: "grep_search",
input: json!({"pattern": "parity", "path": "fixture.txt", "output_mode": "count"}),
},
],
),
}
}
Scenario::BashStdoutRoundtrip => match latest_tool_result(request) {
Some((tool_output, _)) => text_message_response(
"msg_bash_stdout_final",
&format!("bash completed: {}", extract_bash_stdout(&tool_output)),
),
None => tool_message_response(
"msg_bash_stdout_tool",
"toolu_bash_stdout",
"bash",
json!({"command": "printf 'alpha from bash'", "timeout": 1000}),
),
},
Scenario::BashPermissionPromptApproved => match latest_tool_result(request) {
Some((tool_output, is_error)) => {
if is_error {
text_message_response(
"msg_bash_prompt_allow_error",
&format!("bash approval unexpectedly failed: {tool_output}"),
)
} else {
text_message_response(
"msg_bash_prompt_allow_final",
&format!(
"bash approved and executed: {}",
extract_bash_stdout(&tool_output)
),
)
}
}
None => tool_message_response(
"msg_bash_prompt_allow_tool",
"toolu_bash_prompt_allow",
"bash",
json!({"command": "printf 'approved via prompt'", "timeout": 1000}),
),
},
Scenario::BashPermissionPromptDenied => match latest_tool_result(request) {
Some((tool_output, _)) => text_message_response(
"msg_bash_prompt_deny_final",
&format!("bash denied as expected: {tool_output}"),
),
None => tool_message_response(
"msg_bash_prompt_deny_tool",
"toolu_bash_prompt_deny",
"bash",
json!({"command": "printf 'should not run'", "timeout": 1000}),
),
},
Scenario::PluginToolRoundtrip => match latest_tool_result(request) {
Some((tool_output, _)) => text_message_response(
"msg_plugin_tool_final",
&format!(
"plugin tool completed: {}",
extract_plugin_message(&tool_output)
),
),
None => tool_message_response(
"msg_plugin_tool_start",
"toolu_plugin_echo",
"plugin_echo",
json!({"message": "hello from plugin parity"}),
),
},
}
}
@@ -399,6 +620,11 @@ fn request_id_for(scenario: Scenario) -> &'static str {
Scenario::GrepChunkAssembly => "req_grep_chunk_assembly",
Scenario::WriteFileAllowed => "req_write_file_allowed",
Scenario::WriteFileDenied => "req_write_file_denied",
Scenario::MultiToolTurnRoundtrip => "req_multi_tool_turn_roundtrip",
Scenario::BashStdoutRoundtrip => "req_bash_stdout_roundtrip",
Scenario::BashPermissionPromptApproved => "req_bash_permission_prompt_approved",
Scenario::BashPermissionPromptDenied => "req_bash_permission_prompt_denied",
Scenario::PluginToolRoundtrip => "req_plugin_tool_roundtrip",
}
}
@@ -441,15 +667,35 @@ fn tool_message_response(
tool_name: &str,
input: Value,
) -> MessageResponse {
tool_message_response_many(
id,
&[ToolUseMessage {
tool_id,
tool_name,
input,
}],
)
}
struct ToolUseMessage<'a> {
tool_id: &'a str,
tool_name: &'a str,
input: Value,
}
fn tool_message_response_many(id: &str, tool_uses: &[ToolUseMessage<'_>]) -> MessageResponse {
MessageResponse {
id: id.to_string(),
kind: "message".to_string(),
role: "assistant".to_string(),
content: vec![OutputContentBlock::ToolUse {
id: tool_id.to_string(),
name: tool_name.to_string(),
input,
}],
content: tool_uses
.iter()
.map(|tool_use| OutputContentBlock::ToolUse {
id: tool_use.tool_id.to_string(),
name: tool_use.tool_name.to_string(),
input: tool_use.input.clone(),
})
.collect(),
model: DEFAULT_MODEL.to_string(),
stop_reason: Some("tool_use".to_string()),
stop_sequence: None,
@@ -531,14 +777,32 @@ fn streaming_text_sse() -> String {
}
fn tool_use_sse(tool_id: &str, tool_name: &str, partial_json_chunks: &[&str]) -> String {
tool_uses_sse(&[ToolUseSse {
tool_id,
tool_name,
partial_json_chunks,
}])
}
struct ToolUseSse<'a> {
tool_id: &'a str,
tool_name: &'a str,
partial_json_chunks: &'a [&'a str],
}
fn tool_uses_sse(tool_uses: &[ToolUseSse<'_>]) -> String {
let mut body = String::new();
let message_id = tool_uses.first().map_or_else(
|| "msg_tool_use".to_string(),
|tool_use| format!("msg_{}", tool_use.tool_id),
);
append_sse(
&mut body,
"message_start",
json!({
"type": "message_start",
"message": {
"id": format!("msg_{tool_id}"),
"id": message_id,
"type": "message",
"role": "assistant",
"content": [],
@@ -549,39 +813,41 @@ fn tool_use_sse(tool_id: &str, tool_name: &str, partial_json_chunks: &[&str]) ->
}
}),
);
append_sse(
&mut body,
"content_block_start",
json!({
"type": "content_block_start",
"index": 0,
"content_block": {
"type": "tool_use",
"id": tool_id,
"name": tool_name,
"input": {}
}
}),
);
for chunk in partial_json_chunks {
for (index, tool_use) in tool_uses.iter().enumerate() {
append_sse(
&mut body,
"content_block_delta",
"content_block_start",
json!({
"type": "content_block_delta",
"index": 0,
"delta": {"type": "input_json_delta", "partial_json": chunk}
"type": "content_block_start",
"index": index,
"content_block": {
"type": "tool_use",
"id": tool_use.tool_id,
"name": tool_use.tool_name,
"input": {}
}
}),
);
for chunk in tool_use.partial_json_chunks {
append_sse(
&mut body,
"content_block_delta",
json!({
"type": "content_block_delta",
"index": index,
"delta": {"type": "input_json_delta", "partial_json": chunk}
}),
);
}
append_sse(
&mut body,
"content_block_stop",
json!({
"type": "content_block_stop",
"index": index
}),
);
}
append_sse(
&mut body,
"content_block_stop",
json!({
"type": "content_block_stop",
"index": 0
}),
);
append_sse(
&mut body,
"message_delta",
@@ -710,3 +976,28 @@ fn extract_file_path(tool_output: &str) -> String {
})
.unwrap_or_else(|| tool_output.trim().to_string())
}
fn extract_bash_stdout(tool_output: &str) -> String {
serde_json::from_str::<Value>(tool_output)
.ok()
.and_then(|value| {
value
.get("stdout")
.and_then(Value::as_str)
.map(ToOwned::to_owned)
})
.unwrap_or_else(|| tool_output.trim().to_string())
}
fn extract_plugin_message(tool_output: &str) -> String {
serde_json::from_str::<Value>(tool_output)
.ok()
.and_then(|value| {
value
.get("input")
.and_then(|input| input.get("message"))
.and_then(Value::as_str)
.map(ToOwned::to_owned)
})
.unwrap_or_else(|| tool_output.trim().to_string())
}

View File

@@ -1,16 +1,26 @@
use std::collections::BTreeMap;
use std::fs;
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::process::{Command, Output, Stdio};
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX};
use serde_json::Value;
use serde_json::{json, Value};
static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
#[test]
#[allow(clippy::too_many_lines)]
fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() {
let manifest_entries = load_scenario_manifest();
let manifest = manifest_entries
.iter()
.cloned()
.map(|entry| (entry.name.clone(), entry))
.collect::<BTreeMap<_, _>>();
let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
let server = runtime
.block_on(MockAnthropicService::spawn())
@@ -22,53 +32,121 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
name: "streaming_text",
permission_mode: "read-only",
allowed_tools: None,
seed: seed_noop,
stdin: None,
prepare: prepare_noop,
assert: assert_streaming_text,
},
ScenarioCase {
name: "read_file_roundtrip",
permission_mode: "read-only",
allowed_tools: Some("read_file"),
seed: seed_read_fixture,
stdin: None,
prepare: prepare_read_fixture,
assert: assert_read_file_roundtrip,
},
ScenarioCase {
name: "grep_chunk_assembly",
permission_mode: "read-only",
allowed_tools: Some("grep_search"),
seed: seed_grep_fixture,
stdin: None,
prepare: prepare_grep_fixture,
assert: assert_grep_chunk_assembly,
},
ScenarioCase {
name: "write_file_allowed",
permission_mode: "workspace-write",
allowed_tools: Some("write_file"),
seed: seed_noop,
stdin: None,
prepare: prepare_noop,
assert: assert_write_file_allowed,
},
ScenarioCase {
name: "write_file_denied",
permission_mode: "read-only",
allowed_tools: Some("write_file"),
seed: seed_noop,
stdin: None,
prepare: prepare_noop,
assert: assert_write_file_denied,
},
ScenarioCase {
name: "multi_tool_turn_roundtrip",
permission_mode: "read-only",
allowed_tools: Some("read_file,grep_search"),
stdin: None,
prepare: prepare_multi_tool_fixture,
assert: assert_multi_tool_turn_roundtrip,
},
ScenarioCase {
name: "bash_stdout_roundtrip",
permission_mode: "danger-full-access",
allowed_tools: Some("bash"),
stdin: None,
prepare: prepare_noop,
assert: assert_bash_stdout_roundtrip,
},
ScenarioCase {
name: "bash_permission_prompt_approved",
permission_mode: "workspace-write",
allowed_tools: Some("bash"),
stdin: Some("y\n"),
prepare: prepare_noop,
assert: assert_bash_permission_prompt_approved,
},
ScenarioCase {
name: "bash_permission_prompt_denied",
permission_mode: "workspace-write",
allowed_tools: Some("bash"),
stdin: Some("n\n"),
prepare: prepare_noop,
assert: assert_bash_permission_prompt_denied,
},
ScenarioCase {
name: "plugin_tool_roundtrip",
permission_mode: "workspace-write",
allowed_tools: None,
stdin: None,
prepare: prepare_plugin_fixture,
assert: assert_plugin_tool_roundtrip,
},
];
let case_names = cases.iter().map(|case| case.name).collect::<Vec<_>>();
let manifest_names = manifest_entries
.iter()
.map(|entry| entry.name.as_str())
.collect::<Vec<_>>();
assert_eq!(
case_names, manifest_names,
"manifest and harness cases must stay aligned"
);
let mut scenario_reports = Vec::new();
for case in cases {
let workspace = unique_temp_dir(case.name);
fs::create_dir_all(&workspace).expect("workspace should exist");
(case.seed)(&workspace);
let response = run_case(case, &workspace, &base_url);
(case.assert)(&workspace, &response);
fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
let workspace = HarnessWorkspace::new(unique_temp_dir(case.name));
workspace.create().expect("workspace should exist");
(case.prepare)(&workspace);
let run = run_case(case, &workspace, &base_url);
(case.assert)(&workspace, &run);
let manifest_entry = manifest
.get(case.name)
.unwrap_or_else(|| panic!("missing manifest entry for {}", case.name));
scenario_reports.push(build_scenario_report(
case.name,
manifest_entry,
&run.response,
));
fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed");
}
let captured = runtime.block_on(server.captured_requests());
assert_eq!(
captured.len(),
9,
"five scenarios should produce nine requests"
19,
"ten scenarios should produce nineteen requests"
);
assert!(captured
.iter()
@@ -91,8 +169,32 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
"write_file_allowed",
"write_file_denied",
"write_file_denied",
"multi_tool_turn_roundtrip",
"multi_tool_turn_roundtrip",
"bash_stdout_roundtrip",
"bash_stdout_roundtrip",
"bash_permission_prompt_approved",
"bash_permission_prompt_approved",
"bash_permission_prompt_denied",
"bash_permission_prompt_denied",
"plugin_tool_roundtrip",
"plugin_tool_roundtrip",
]
);
let mut request_counts = BTreeMap::new();
for request in &captured {
*request_counts
.entry(request.scenario.as_str())
.or_insert(0_usize) += 1;
}
for report in &mut scenario_reports {
report.request_count = *request_counts
.get(report.name.as_str())
.unwrap_or_else(|| panic!("missing request count for {}", report.name));
}
maybe_write_report(&scenario_reports);
}
#[derive(Clone, Copy)]
@@ -100,25 +202,71 @@ struct ScenarioCase {
name: &'static str,
permission_mode: &'static str,
allowed_tools: Option<&'static str>,
seed: fn(&Path),
assert: fn(&Path, &Value),
stdin: Option<&'static str>,
prepare: fn(&HarnessWorkspace),
assert: fn(&HarnessWorkspace, &ScenarioRun),
}
fn run_case(case: ScenarioCase, workspace: &Path, base_url: &str) -> Value {
let config_home = workspace.join("config-home");
let home = workspace.join("home");
fs::create_dir_all(config_home.join(".claw")).expect("config home should exist");
fs::create_dir_all(&home).expect("home should exist");
struct HarnessWorkspace {
root: PathBuf,
config_home: PathBuf,
home: PathBuf,
}
impl HarnessWorkspace {
fn new(root: PathBuf) -> Self {
Self {
config_home: root.join("config-home"),
home: root.join("home"),
root,
}
}
fn create(&self) -> std::io::Result<()> {
fs::create_dir_all(&self.root)?;
fs::create_dir_all(&self.config_home)?;
fs::create_dir_all(&self.home)?;
Ok(())
}
}
struct ScenarioRun {
response: Value,
stdout: String,
}
#[derive(Debug, Clone)]
struct ScenarioManifestEntry {
name: String,
category: String,
description: String,
parity_refs: Vec<String>,
}
#[derive(Debug)]
struct ScenarioReport {
name: String,
category: String,
description: String,
parity_refs: Vec<String>,
iterations: u64,
request_count: usize,
tool_uses: Vec<String>,
tool_error_count: usize,
final_message: String,
}
fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> ScenarioRun {
let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
command
.current_dir(workspace)
.current_dir(&workspace.root)
.env_clear()
.env("ANTHROPIC_API_KEY", "test-parity-key")
.env("ANTHROPIC_BASE_URL", base_url)
.env("CLAW_CONFIG_HOME", &config_home)
.env("HOME", &home)
.env("CLAW_CONFIG_HOME", &workspace.config_home)
.env("HOME", &workspace.home)
.env("NO_COLOR", "1")
.env("PATH", "/usr/bin:/bin")
.args([
"--model",
"sonnet",
@@ -132,107 +280,453 @@ fn run_case(case: ScenarioCase, workspace: &Path, base_url: &str) -> Value {
}
let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
let output = command.arg(prompt).output().expect("claw should launch");
command.arg(prompt);
let output = if let Some(stdin) = case.stdin {
let mut child = command
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("claw should launch");
child
.stdin
.as_mut()
.expect("stdin should be piped")
.write_all(stdin.as_bytes())
.expect("stdin should write");
child.wait_with_output().expect("claw should finish")
} else {
command.output().expect("claw should launch")
};
assert_success(&output);
serde_json::from_slice(&output.stdout).expect("prompt output should be valid json")
let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
ScenarioRun {
response: parse_json_output(&stdout),
stdout,
}
}
fn seed_noop(_: &Path) {}
fn prepare_noop(_: &HarnessWorkspace) {}
fn seed_read_fixture(workspace: &Path) {
fs::write(workspace.join("fixture.txt"), "alpha parity line\n").expect("fixture should write");
fn prepare_read_fixture(workspace: &HarnessWorkspace) {
fs::write(workspace.root.join("fixture.txt"), "alpha parity line\n")
.expect("fixture should write");
}
fn seed_grep_fixture(workspace: &Path) {
fn prepare_grep_fixture(workspace: &HarnessWorkspace) {
fs::write(
workspace.join("fixture.txt"),
workspace.root.join("fixture.txt"),
"alpha parity line\nbeta line\ngamma parity line\n",
)
.expect("grep fixture should write");
}
fn assert_streaming_text(_: &Path, response: &Value) {
assert_eq!(
response["message"],
Value::String("Mock streaming says hello from the parity harness.".to_string())
);
assert_eq!(response["iterations"], Value::from(1));
assert_eq!(response["tool_uses"], Value::Array(Vec::new()));
assert_eq!(response["tool_results"], Value::Array(Vec::new()));
fn prepare_multi_tool_fixture(workspace: &HarnessWorkspace) {
fs::write(
workspace.root.join("fixture.txt"),
"alpha parity line\nbeta line\ngamma parity line\n",
)
.expect("multi tool fixture should write");
}
fn assert_read_file_roundtrip(workspace: &Path, response: &Value) {
assert_eq!(response["iterations"], Value::from(2));
fn prepare_plugin_fixture(workspace: &HarnessWorkspace) {
let plugin_root = workspace
.root
.join("external-plugins")
.join("parity-plugin");
let tool_dir = plugin_root.join("tools");
let manifest_dir = plugin_root.join(".claude-plugin");
fs::create_dir_all(&tool_dir).expect("plugin tools dir");
fs::create_dir_all(&manifest_dir).expect("plugin manifest dir");
let script_path = tool_dir.join("echo-json.sh");
fs::write(
&script_path,
"#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n",
)
.expect("plugin script should write");
let mut permissions = fs::metadata(&script_path)
.expect("plugin script metadata")
.permissions();
permissions.set_mode(0o755);
fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");
fs::write(
manifest_dir.join("plugin.json"),
r#"{
"name": "parity-plugin",
"version": "1.0.0",
"description": "mock parity plugin",
"tools": [
{
"name": "plugin_echo",
"description": "Echo JSON input",
"inputSchema": {
"type": "object",
"properties": {
"message": { "type": "string" }
},
"required": ["message"],
"additionalProperties": false
},
"command": "./tools/echo-json.sh",
"requiredPermission": "workspace-write"
}
]
}"#,
)
.expect("plugin manifest should write");
fs::write(
workspace.config_home.join("settings.json"),
json!({
"enabledPlugins": {
"parity-plugin@external": true
},
"plugins": {
"externalDirectories": [plugin_root.parent().expect("plugin parent").display().to_string()]
}
})
.to_string(),
)
.expect("plugin settings should write");
}
fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(
response["tool_uses"][0]["name"],
run.response["message"],
Value::String("Mock streaming says hello from the parity harness.".to_string())
);
assert_eq!(run.response["iterations"], Value::from(1));
assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
assert_eq!(run.response["tool_results"], Value::Array(Vec::new()));
}
fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
run.response["tool_uses"][0]["name"],
Value::String("read_file".to_string())
);
assert_eq!(
response["tool_uses"][0]["input"],
run.response["tool_uses"][0]["input"],
Value::String(r#"{"path":"fixture.txt"}"#.to_string())
);
assert!(response["message"]
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("alpha parity line"));
let output = response["tool_results"][0]["output"]
let output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
assert!(output.contains(&workspace.join("fixture.txt").display().to_string()));
assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string()));
assert!(output.contains("alpha parity line"));
}
fn assert_grep_chunk_assembly(_: &Path, response: &Value) {
assert_eq!(response["iterations"], Value::from(2));
fn assert_grep_chunk_assembly(_: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
response["tool_uses"][0]["name"],
run.response["tool_uses"][0]["name"],
Value::String("grep_search".to_string())
);
assert_eq!(
response["tool_uses"][0]["input"],
run.response["tool_uses"][0]["input"],
Value::String(
r#"{"pattern":"parity","path":"fixture.txt","output_mode":"count"}"#.to_string()
)
);
assert!(response["message"]
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("2 occurrences"));
assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(false));
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(false)
);
}
fn assert_write_file_allowed(workspace: &Path, response: &Value) {
assert_eq!(response["iterations"], Value::from(2));
fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
response["tool_uses"][0]["name"],
run.response["tool_uses"][0]["name"],
Value::String("write_file".to_string())
);
assert!(response["message"]
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("generated/output.txt"));
let generated = workspace.join("generated").join("output.txt");
let generated = workspace.root.join("generated").join("output.txt");
let contents = fs::read_to_string(&generated).expect("generated file should exist");
assert_eq!(contents, "created by mock service\n");
assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(false));
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(false)
);
}
fn assert_write_file_denied(workspace: &Path, response: &Value) {
assert_eq!(response["iterations"], Value::from(2));
fn assert_write_file_denied(workspace: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
response["tool_uses"][0]["name"],
run.response["tool_uses"][0]["name"],
Value::String("write_file".to_string())
);
let tool_output = response["tool_results"][0]["output"]
let tool_output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
assert!(tool_output.contains("requires workspace-write permission"));
assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(true));
assert!(response["message"]
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(true)
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("denied as expected"));
assert!(!workspace.join("generated").join("denied.txt").exists());
assert!(!workspace.root.join("generated").join("denied.txt").exists());
}
fn assert_multi_tool_turn_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
let tool_uses = run.response["tool_uses"]
.as_array()
.expect("tool uses array");
assert_eq!(
tool_uses.len(),
2,
"expected two tool uses in a single turn"
);
assert_eq!(tool_uses[0]["name"], Value::String("read_file".to_string()));
assert_eq!(
tool_uses[1]["name"],
Value::String("grep_search".to_string())
);
let tool_results = run.response["tool_results"]
.as_array()
.expect("tool results array");
assert_eq!(
tool_results.len(),
2,
"expected two tool results in a single turn"
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("alpha parity line"));
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("2 occurrences"));
}
fn assert_bash_stdout_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
run.response["tool_uses"][0]["name"],
Value::String("bash".to_string())
);
let tool_output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
assert_eq!(
parsed["stdout"],
Value::String("alpha from bash".to_string())
);
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(false)
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("alpha from bash"));
}
fn assert_bash_permission_prompt_approved(_: &HarnessWorkspace, run: &ScenarioRun) {
assert!(run.stdout.contains("Permission approval required"));
assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(false)
);
let tool_output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
assert_eq!(
parsed["stdout"],
Value::String("approved via prompt".to_string())
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("approved and executed"));
}
fn assert_bash_permission_prompt_denied(_: &HarnessWorkspace, run: &ScenarioRun) {
assert!(run.stdout.contains("Permission approval required"));
assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
assert_eq!(run.response["iterations"], Value::from(2));
let tool_output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
assert!(tool_output.contains("denied by user approval prompt"));
assert_eq!(
run.response["tool_results"][0]["is_error"],
Value::Bool(true)
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("denied as expected"));
}
fn assert_plugin_tool_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
assert_eq!(run.response["iterations"], Value::from(2));
assert_eq!(
run.response["tool_uses"][0]["name"],
Value::String("plugin_echo".to_string())
);
let tool_output = run.response["tool_results"][0]["output"]
.as_str()
.expect("tool output");
let parsed: Value = serde_json::from_str(tool_output).expect("plugin output json");
assert_eq!(
parsed["plugin"],
Value::String("parity-plugin@external".to_string())
);
assert_eq!(parsed["tool"], Value::String("plugin_echo".to_string()));
assert_eq!(
parsed["input"]["message"],
Value::String("hello from plugin parity".to_string())
);
assert!(run.response["message"]
.as_str()
.expect("message text")
.contains("hello from plugin parity"));
}
fn parse_json_output(stdout: &str) -> Value {
if let Some(index) = stdout.rfind("{\"auto_compaction\"") {
return serde_json::from_str(&stdout[index..]).unwrap_or_else(|error| {
panic!("failed to parse JSON response from stdout: {error}\n{stdout}")
});
}
stdout
.lines()
.rev()
.find_map(|line| {
let trimmed = line.trim();
if trimmed.starts_with('{') && trimmed.ends_with('}') {
serde_json::from_str(trimmed).ok()
} else {
None
}
})
.unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}"))
}
fn build_scenario_report(
name: &str,
manifest_entry: &ScenarioManifestEntry,
response: &Value,
) -> ScenarioReport {
ScenarioReport {
name: name.to_string(),
category: manifest_entry.category.clone(),
description: manifest_entry.description.clone(),
parity_refs: manifest_entry.parity_refs.clone(),
iterations: response["iterations"]
.as_u64()
.expect("iterations should exist"),
request_count: 0,
tool_uses: response["tool_uses"]
.as_array()
.expect("tool uses array")
.iter()
.filter_map(|value| value["name"].as_str().map(ToOwned::to_owned))
.collect(),
tool_error_count: response["tool_results"]
.as_array()
.expect("tool results array")
.iter()
.filter(|value| value["is_error"].as_bool().unwrap_or(false))
.count(),
final_message: response["message"]
.as_str()
.expect("message text")
.to_string(),
}
}
fn maybe_write_report(reports: &[ScenarioReport]) {
let Some(path) = std::env::var_os("MOCK_PARITY_REPORT_PATH") else {
return;
};
let payload = json!({
"scenario_count": reports.len(),
"request_count": reports.iter().map(|report| report.request_count).sum::<usize>(),
"scenarios": reports.iter().map(scenario_report_json).collect::<Vec<_>>(),
});
fs::write(
path,
serde_json::to_vec_pretty(&payload).expect("report json should serialize"),
)
.expect("report should write");
}
fn load_scenario_manifest() -> Vec<ScenarioManifestEntry> {
let manifest_path =
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../mock_parity_scenarios.json");
let manifest = fs::read_to_string(&manifest_path).expect("scenario manifest should exist");
serde_json::from_str::<Vec<Value>>(&manifest)
.expect("scenario manifest should parse")
.into_iter()
.map(|entry| ScenarioManifestEntry {
name: entry["name"]
.as_str()
.expect("scenario name should be a string")
.to_string(),
category: entry["category"]
.as_str()
.expect("scenario category should be a string")
.to_string(),
description: entry["description"]
.as_str()
.expect("scenario description should be a string")
.to_string(),
parity_refs: entry["parity_refs"]
.as_array()
.expect("parity refs should be an array")
.iter()
.map(|value| {
value
.as_str()
.expect("parity ref should be a string")
.to_string()
})
.collect(),
})
.collect()
}
fn scenario_report_json(report: &ScenarioReport) -> Value {
json!({
"name": report.name,
"category": report.category,
"description": report.description,
"parity_refs": report.parity_refs,
"iterations": report.iterations,
"request_count": report.request_count,
"tool_uses": report.tool_uses,
"tool_error_count": report.tool_error_count,
"final_message": report.final_message,
})
}
fn assert_success(output: &Output) {