feat(mcp+lifecycle): MCP degraded-startup reporting, lane event schema, lane completion hardening

Add MCP structured degraded-startup classification (P2.10):
- classify MCP failures as startup/handshake/config/partial
- expose failed_servers + recovery_recommendations in tool output
- add mcp_degraded output field with server_name, failure_mode, recoverable

Canonical lane event schema (P2.7):
- add LaneEventName variants for all lifecycle states
- wire LaneEvent::new with full 3-arg signature (event, status, emitted_at)
- emit typed events for Started, Blocked, Failed, Finished

Fix let mut executor for search test binary
Fix lane_completion unused import warnings

Note: mcp_stdio::manager_discovery_report test has pre-existing failure on clean main, unrelated to this commit.
This commit is contained in:
Yeachan-Heo
2026-04-04 14:31:56 +00:00
parent 639a54275d
commit 8a9ea1679f
7 changed files with 807 additions and 187 deletions

View File

@@ -30,7 +30,9 @@ pub(crate) fn detect_lane_completion(
}
// Must have finished status
if output.status != "Finished" {
if !output.status.eq_ignore_ascii_case("completed")
&& !output.status.eq_ignore_ascii_case("finished")
{
return None;
}
@@ -91,8 +93,7 @@ pub(crate) fn evaluate_completed_lane(
mod tests {
use super::*;
use runtime::{DiffScope, LaneBlocker};
use crate::LaneEvent;
fn test_output() -> AgentOutput {
AgentOutput {
agent_id: "test-lane-1".to_string(),
@@ -176,4 +177,4 @@ mod tests {
assert!(actions.contains(&PolicyAction::CloseoutLane));
assert!(actions.contains(&PolicyAction::CleanupSession));
}
}
}

View File

@@ -22,8 +22,9 @@ use runtime::{
worker_boot::{WorkerReadySnapshot, WorkerRegistry},
write_file, ApiClient, ApiRequest, AssistantEvent, BashCommandInput, BashCommandOutput,
BranchFreshness, ContentBlock, ConversationMessage, ConversationRuntime, GrepSearchInput,
MessageRole, PermissionMode, PermissionPolicy, PromptCacheEvent, RuntimeError, Session,
ToolError, ToolExecutor,
LaneEvent, LaneEventBlocker, LaneEventName, LaneEventStatus, LaneFailureClass,
McpDegradedReport, MessageRole, PermissionMode, PermissionPolicy, PromptCacheEvent,
RuntimeError, Session, ToolError, ToolExecutor,
};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
@@ -313,6 +314,7 @@ impl GlobalToolRegistry {
query: &str,
max_results: usize,
pending_mcp_servers: Option<Vec<String>>,
mcp_degraded: Option<McpDegradedReport>,
) -> ToolSearchOutput {
let query = query.trim().to_string();
let normalized_query = normalize_tool_search_query(&query);
@@ -324,6 +326,7 @@ impl GlobalToolRegistry {
normalized_query,
total_deferred_tools: self.searchable_tool_specs().len(),
pending_mcp_servers,
mcp_degraded,
}
}
@@ -1811,7 +1814,7 @@ fn branch_divergence_output(
BashCommandOutput {
stdout: String::new(),
stderr,
stderr: stderr.clone(),
raw_output_path: None,
interrupted: false,
is_image: None,
@@ -1821,17 +1824,27 @@ fn branch_divergence_output(
dangerously_disable_sandbox: None,
return_code_interpretation: Some("preflight_blocked:branch_divergence".to_string()),
no_output_expected: Some(false),
structured_content: Some(vec![json!({
"event": "branch.stale_against_main",
"failureClass": "branch_divergence",
"branch": branch,
"mainRef": main_ref,
"commitsBehind": commits_behind,
"commitsAhead": commits_ahead,
"missingCommits": missing_fixes,
"blockedCommand": command,
"recommendedAction": format!("merge or rebase {main_ref} before workspace tests")
})]),
structured_content: Some(vec![
serde_json::to_value(
LaneEvent::new(
LaneEventName::BranchStaleAgainstMain,
LaneEventStatus::Blocked,
iso8601_now(),
)
.with_failure_class(LaneFailureClass::BranchDivergence)
.with_detail(stderr.clone())
.with_data(json!({
"branch": branch,
"mainRef": main_ref,
"commitsBehind": commits_behind,
"commitsAhead": commits_ahead,
"missingCommits": missing_fixes,
"blockedCommand": command,
"recommendedAction": format!("merge or rebase {main_ref} before workspace tests")
})),
)
.expect("lane event should serialize"),
]),
persisted_output_path: None,
persisted_output_size: None,
sandbox_status: None,
@@ -2277,58 +2290,6 @@ struct SkillOutput {
prompt: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
enum LaneEventName {
#[serde(rename = "lane.started")]
Started,
#[serde(rename = "lane.blocked")]
Blocked,
#[serde(rename = "lane.finished")]
Finished,
#[serde(rename = "lane.failed")]
Failed,
#[serde(rename = "lane.reconciled")]
Reconciled,
#[serde(rename = "lane.merged")]
Merged,
#[serde(rename = "lane.superseded")]
Superseded,
#[serde(rename = "lane.closed")]
Closed,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
enum LaneFailureClass {
PromptDelivery,
TrustGate,
BranchDivergence,
Compile,
Test,
PluginStartup,
McpStartup,
Infra,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct LaneBlocker {
#[serde(rename = "failureClass")]
failure_class: LaneFailureClass,
detail: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct LaneEvent {
event: LaneEventName,
status: String,
#[serde(rename = "emittedAt")]
emitted_at: String,
#[serde(rename = "failureClass", skip_serializing_if = "Option::is_none")]
failure_class: Option<LaneFailureClass>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AgentOutput {
#[serde(rename = "agentId")]
@@ -2352,7 +2313,7 @@ struct AgentOutput {
#[serde(rename = "laneEvents", default, skip_serializing_if = "Vec::is_empty")]
lane_events: Vec<LaneEvent>,
#[serde(rename = "currentBlocker", skip_serializing_if = "Option::is_none")]
current_blocker: Option<LaneBlocker>,
current_blocker: Option<LaneEventBlocker>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
@@ -2374,6 +2335,8 @@ pub struct ToolSearchOutput {
total_deferred_tools: usize,
#[serde(rename = "pending_mcp_servers")]
pending_mcp_servers: Option<Vec<String>>,
#[serde(rename = "mcp_degraded", skip_serializing_if = "Option::is_none")]
mcp_degraded: Option<McpDegradedReport>,
}
#[derive(Debug, Serialize)]
@@ -3064,13 +3027,7 @@ where
created_at: created_at.clone(),
started_at: Some(created_at),
completed_at: None,
lane_events: vec![LaneEvent {
event: LaneEventName::Started,
status: String::from("running"),
emitted_at: iso8601_now(),
failure_class: None,
detail: None,
}],
lane_events: vec![LaneEvent::started(iso8601_now())],
current_blocker: None,
error: None,
};
@@ -3286,32 +3243,20 @@ fn persist_agent_terminal_state(
next_manifest.current_blocker = blocker.clone();
next_manifest.error = error;
if let Some(blocker) = blocker {
next_manifest.lane_events.push(LaneEvent {
event: LaneEventName::Blocked,
status: status.to_string(),
emitted_at: iso8601_now(),
failure_class: Some(blocker.failure_class.clone()),
detail: Some(blocker.detail.clone()),
});
next_manifest.lane_events.push(LaneEvent {
event: LaneEventName::Failed,
status: status.to_string(),
emitted_at: iso8601_now(),
failure_class: Some(blocker.failure_class),
detail: Some(blocker.detail),
});
next_manifest.lane_events.push(
LaneEvent::blocked(iso8601_now(), &blocker),
);
next_manifest.lane_events.push(
LaneEvent::failed(iso8601_now(), &blocker),
);
} else {
next_manifest.current_blocker = None;
let compressed_detail = result
.filter(|value| !value.trim().is_empty())
.map(|value| compress_summary_text(value.trim()));
next_manifest.lane_events.push(LaneEvent {
event: LaneEventName::Finished,
status: status.to_string(),
emitted_at: iso8601_now(),
failure_class: None,
detail: compressed_detail,
});
next_manifest
.lane_events
.push(LaneEvent::finished(iso8601_now(), compressed_detail));
}
write_agent_manifest(&next_manifest)
}
@@ -3330,7 +3275,7 @@ fn append_agent_output(path: &str, suffix: &str) -> Result<(), String> {
fn format_agent_terminal_output(
status: &str,
result: Option<&str>,
blocker: Option<&LaneBlocker>,
blocker: Option<&LaneEventBlocker>,
error: Option<&str>,
) -> String {
let mut sections = vec![format!("\n## Result\n\n- status: {status}\n")];
@@ -3352,9 +3297,9 @@ fn format_agent_terminal_output(
sections.join("")
}
fn classify_lane_blocker(error: &str) -> LaneBlocker {
fn classify_lane_blocker(error: &str) -> LaneEventBlocker {
let detail = error.trim().to_string();
LaneBlocker {
LaneEventBlocker {
failure_class: classify_lane_failure(error),
detail,
}
@@ -3371,6 +3316,8 @@ fn classify_lane_failure(error: &str) -> LaneFailureClass {
&& (normalized.contains("stale") || normalized.contains("diverg"))
{
LaneFailureClass::BranchDivergence
} else if normalized.contains("gateway") || normalized.contains("routing") {
LaneFailureClass::GatewayRouting
} else if normalized.contains("compile")
|| normalized.contains("build failed")
|| normalized.contains("cargo check")
@@ -3378,8 +3325,15 @@ fn classify_lane_failure(error: &str) -> LaneFailureClass {
LaneFailureClass::Compile
} else if normalized.contains("test") {
LaneFailureClass::Test
} else if normalized.contains("tool failed")
|| normalized.contains("runtime tool")
|| normalized.contains("tool runtime")
{
LaneFailureClass::ToolRuntime
} else if normalized.contains("plugin") {
LaneFailureClass::PluginStartup
} else if normalized.contains("mcp") && normalized.contains("handshake") {
LaneFailureClass::McpHandshake
} else if normalized.contains("mcp") {
LaneFailureClass::McpStartup
} else {
@@ -3688,7 +3642,7 @@ fn final_assistant_text(summary: &runtime::TurnSummary) -> String {
#[allow(clippy::needless_pass_by_value)]
fn execute_tool_search(input: ToolSearchInput) -> ToolSearchOutput {
GlobalToolRegistry::builtin().search(&input.query, input.max_results.unwrap_or(5), None)
GlobalToolRegistry::builtin().search(&input.query, input.max_results.unwrap_or(5), None, None)
}
fn deferred_tool_specs() -> Vec<ToolSpec> {
@@ -4944,7 +4898,7 @@ mod tests {
agent_permission_policy, allowed_tools_for_subagent, classify_lane_failure,
execute_agent_with_spawn, execute_tool, final_assistant_text, mvp_tool_specs,
permission_mode_from_plugin, persist_agent_terminal_state, push_output_block, AgentInput,
AgentJob, GlobalToolRegistry, LaneFailureClass, SubagentToolExecutor,
AgentJob, GlobalToolRegistry, LaneEventName, LaneFailureClass, SubagentToolExecutor,
};
use api::OutputContentBlock;
use runtime::{
@@ -5264,10 +5218,34 @@ mod tests {
)]
);
let search = registry.search("demo echo", 5, Some(vec!["pending-server".to_string()]));
let search = registry.search(
"demo echo",
5,
Some(vec!["pending-server".to_string()]),
Some(runtime::McpDegradedReport::new(
vec!["demo".to_string()],
vec![runtime::McpFailedServer {
server_name: "pending-server".to_string(),
phase: runtime::McpLifecyclePhase::ToolDiscovery,
error: runtime::McpErrorSurface::new(
runtime::McpLifecyclePhase::ToolDiscovery,
Some("pending-server".to_string()),
"tool discovery failed",
BTreeMap::new(),
true,
),
}],
vec!["mcp__demo__echo".to_string()],
vec!["mcp__demo__echo".to_string()],
)),
);
let output = serde_json::to_value(search).expect("search output should serialize");
assert_eq!(output["matches"][0], "mcp__demo__echo");
assert_eq!(output["pending_mcp_servers"][0], "pending-server");
assert_eq!(
output["mcp_degraded"]["failed_servers"][0]["phase"],
"tool_discovery"
);
}
#[test]
@@ -5917,16 +5895,16 @@ mod tests {
),
("targeted tests failed", LaneFailureClass::Test),
("plugin bootstrap failed", LaneFailureClass::PluginStartup),
("mcp handshake timed out", LaneFailureClass::McpStartup),
("mcp handshake timed out", LaneFailureClass::McpHandshake),
(
"mcp startup failed before listing tools",
LaneFailureClass::McpStartup,
),
(
"gateway routing rejected the request",
LaneFailureClass::Infra,
LaneFailureClass::GatewayRouting,
),
("denied tool execution from hook", LaneFailureClass::Infra),
("tool failed: denied tool execution from hook", LaneFailureClass::ToolRuntime),
("thread creation failed", LaneFailureClass::Infra),
];
@@ -5935,6 +5913,28 @@ mod tests {
}
}
#[test]
fn lane_event_schema_serializes_to_canonical_names() {
let cases = [
(LaneEventName::Started, "lane.started"),
(LaneEventName::Ready, "lane.ready"),
(LaneEventName::PromptMisdelivery, "lane.prompt_misdelivery"),
(LaneEventName::Blocked, "lane.blocked"),
(LaneEventName::Red, "lane.red"),
(LaneEventName::Green, "lane.green"),
(LaneEventName::CommitCreated, "lane.commit.created"),
(LaneEventName::PrOpened, "lane.pr.opened"),
(LaneEventName::MergeReady, "lane.merge.ready"),
(LaneEventName::Finished, "lane.finished"),
(LaneEventName::Failed, "lane.failed"),
(LaneEventName::BranchStaleAgainstMain, "branch.stale_against_main"),
];
for (event, expected) in cases {
assert_eq!(serde_json::to_value(event).expect("serialize lane event"), json!(expected));
}
}
#[test]
fn agent_tool_subset_mapping_is_expected() {
let general = allowed_tools_for_subagent("general-purpose");
@@ -6258,7 +6258,7 @@ mod tests {
"branch_divergence"
);
assert_eq!(
output_json["structuredContent"][0]["missingCommits"][0],
output_json["structuredContent"][0]["data"]["missingCommits"][0],
"fix: unblock workspace tests"
);