mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-07 08:34:49 +08:00
feat(mcp+lifecycle): MCP degraded-startup reporting, lane event schema, lane completion hardening
Add MCP structured degraded-startup classification (P2.10): - classify MCP failures as startup/handshake/config/partial - expose failed_servers + recovery_recommendations in tool output - add mcp_degraded output field with server_name, failure_mode, recoverable Canonical lane event schema (P2.7): - add LaneEventName variants for all lifecycle states - wire LaneEvent::new with full 3-arg signature (event, status, emitted_at) - emit typed events for Started, Blocked, Failed, Finished Fix let mut executor for search test binary Fix lane_completion unused import warnings Note: mcp_stdio::manager_discovery_report test has pre-existing failure on clean main, unrelated to this commit.
This commit is contained in:
@@ -124,11 +124,11 @@ pub enum McpPhaseResult {
|
||||
Failure {
|
||||
phase: McpLifecyclePhase,
|
||||
error: McpErrorSurface,
|
||||
recoverable: bool,
|
||||
},
|
||||
Timeout {
|
||||
phase: McpLifecyclePhase,
|
||||
waited: Duration,
|
||||
error: McpErrorSurface,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -200,6 +200,15 @@ impl McpLifecycleState {
|
||||
fn record_result(&mut self, result: McpPhaseResult) {
|
||||
self.phase_results.push(result);
|
||||
}
|
||||
|
||||
fn can_resume_after_error(&self) -> bool {
|
||||
match self.phase_results.last() {
|
||||
Some(McpPhaseResult::Failure { error, .. } | McpPhaseResult::Timeout { error, .. }) => {
|
||||
error.recoverable
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -286,34 +295,42 @@ impl McpLifecycleValidator {
|
||||
let started = Instant::now();
|
||||
|
||||
if let Some(current_phase) = self.state.current_phase() {
|
||||
if !Self::validate_phase_transition(current_phase, phase) {
|
||||
return self.record_failure(
|
||||
phase,
|
||||
McpErrorSurface::new(
|
||||
phase,
|
||||
None,
|
||||
format!("invalid MCP lifecycle transition from {current_phase} to {phase}"),
|
||||
BTreeMap::from([
|
||||
("from".to_string(), current_phase.to_string()),
|
||||
("to".to_string(), phase.to_string()),
|
||||
]),
|
||||
false,
|
||||
),
|
||||
false,
|
||||
);
|
||||
}
|
||||
} else if phase != McpLifecyclePhase::ConfigLoad {
|
||||
return self.record_failure(
|
||||
phase,
|
||||
McpErrorSurface::new(
|
||||
if current_phase == McpLifecyclePhase::ErrorSurfacing
|
||||
&& phase == McpLifecyclePhase::Ready
|
||||
&& !self.state.can_resume_after_error()
|
||||
{
|
||||
return self.record_failure(McpErrorSurface::new(
|
||||
phase,
|
||||
None,
|
||||
format!("invalid initial MCP lifecycle phase {phase}"),
|
||||
BTreeMap::from([("phase".to_string(), phase.to_string())]),
|
||||
"cannot return to ready after a non-recoverable MCP lifecycle failure",
|
||||
BTreeMap::from([
|
||||
("from".to_string(), current_phase.to_string()),
|
||||
("to".to_string(), phase.to_string()),
|
||||
]),
|
||||
false,
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if !Self::validate_phase_transition(current_phase, phase) {
|
||||
return self.record_failure(McpErrorSurface::new(
|
||||
phase,
|
||||
None,
|
||||
format!("invalid MCP lifecycle transition from {current_phase} to {phase}"),
|
||||
BTreeMap::from([
|
||||
("from".to_string(), current_phase.to_string()),
|
||||
("to".to_string(), phase.to_string()),
|
||||
]),
|
||||
false,
|
||||
));
|
||||
}
|
||||
} else if phase != McpLifecyclePhase::ConfigLoad {
|
||||
return self.record_failure(McpErrorSurface::new(
|
||||
phase,
|
||||
None,
|
||||
format!("invalid initial MCP lifecycle phase {phase}"),
|
||||
BTreeMap::from([("phase".to_string(), phase.to_string())]),
|
||||
false,
|
||||
);
|
||||
));
|
||||
}
|
||||
|
||||
self.state.record_phase(phase);
|
||||
@@ -325,19 +342,11 @@ impl McpLifecycleValidator {
|
||||
result
|
||||
}
|
||||
|
||||
pub fn record_failure(
|
||||
&mut self,
|
||||
phase: McpLifecyclePhase,
|
||||
error: McpErrorSurface,
|
||||
recoverable: bool,
|
||||
) -> McpPhaseResult {
|
||||
pub fn record_failure(&mut self, error: McpErrorSurface) -> McpPhaseResult {
|
||||
let phase = error.phase;
|
||||
self.state.record_error(error.clone());
|
||||
self.state.record_phase(McpLifecyclePhase::ErrorSurfacing);
|
||||
let result = McpPhaseResult::Failure {
|
||||
phase,
|
||||
error,
|
||||
recoverable,
|
||||
};
|
||||
let result = McpPhaseResult::Failure { phase, error };
|
||||
self.state.record_result(result.clone());
|
||||
result
|
||||
}
|
||||
@@ -360,9 +369,13 @@ impl McpLifecycleValidator {
|
||||
context,
|
||||
true,
|
||||
);
|
||||
self.state.record_error(error);
|
||||
self.state.record_error(error.clone());
|
||||
self.state.record_phase(McpLifecyclePhase::ErrorSurfacing);
|
||||
let result = McpPhaseResult::Timeout { phase, waited };
|
||||
let result = McpPhaseResult::Timeout {
|
||||
phase,
|
||||
waited,
|
||||
error,
|
||||
};
|
||||
self.state.record_result(result.clone());
|
||||
result
|
||||
}
|
||||
@@ -545,13 +558,9 @@ mod tests {
|
||||
|
||||
// then
|
||||
match result {
|
||||
McpPhaseResult::Failure {
|
||||
phase,
|
||||
error,
|
||||
recoverable,
|
||||
} => {
|
||||
McpPhaseResult::Failure { phase, error } => {
|
||||
assert_eq!(phase, McpLifecyclePhase::Ready);
|
||||
assert!(!recoverable);
|
||||
assert!(!error.recoverable);
|
||||
assert_eq!(error.phase, McpLifecyclePhase::Ready);
|
||||
assert_eq!(
|
||||
error.context.get("from").map(String::as_str),
|
||||
@@ -581,27 +590,22 @@ mod tests {
|
||||
|
||||
// when / then
|
||||
for phase in McpLifecyclePhase::all() {
|
||||
let result = validator.record_failure(
|
||||
let result = validator.record_failure(McpErrorSurface::new(
|
||||
phase,
|
||||
McpErrorSurface::new(
|
||||
phase,
|
||||
Some("alpha".to_string()),
|
||||
format!("failure at {phase}"),
|
||||
BTreeMap::from([("server".to_string(), "alpha".to_string())]),
|
||||
phase == McpLifecyclePhase::ResourceDiscovery,
|
||||
),
|
||||
Some("alpha".to_string()),
|
||||
format!("failure at {phase}"),
|
||||
BTreeMap::from([("server".to_string(), "alpha".to_string())]),
|
||||
phase == McpLifecyclePhase::ResourceDiscovery,
|
||||
);
|
||||
));
|
||||
|
||||
match result {
|
||||
McpPhaseResult::Failure {
|
||||
phase: failed_phase,
|
||||
error,
|
||||
recoverable,
|
||||
} => {
|
||||
McpPhaseResult::Failure { phase: failed_phase, error } => {
|
||||
assert_eq!(failed_phase, phase);
|
||||
assert_eq!(error.phase, phase);
|
||||
assert_eq!(recoverable, phase == McpLifecyclePhase::ResourceDiscovery);
|
||||
assert_eq!(
|
||||
error.recoverable,
|
||||
phase == McpLifecyclePhase::ResourceDiscovery
|
||||
);
|
||||
}
|
||||
other => panic!("expected failure result, got {other:?}"),
|
||||
}
|
||||
@@ -628,9 +632,12 @@ mod tests {
|
||||
McpPhaseResult::Timeout {
|
||||
phase,
|
||||
waited: actual,
|
||||
error,
|
||||
} => {
|
||||
assert_eq!(phase, McpLifecyclePhase::SpawnConnect);
|
||||
assert_eq!(actual, waited);
|
||||
assert!(error.recoverable);
|
||||
assert_eq!(error.server_name.as_deref(), Some("alpha"));
|
||||
}
|
||||
other => panic!("expected timeout result, got {other:?}"),
|
||||
}
|
||||
@@ -707,17 +714,13 @@ mod tests {
|
||||
let result = validator.run_phase(phase);
|
||||
assert!(matches!(result, McpPhaseResult::Success { .. }));
|
||||
}
|
||||
let _ = validator.record_failure(
|
||||
let _ = validator.record_failure(McpErrorSurface::new(
|
||||
McpLifecyclePhase::ResourceDiscovery,
|
||||
McpErrorSurface::new(
|
||||
McpLifecyclePhase::ResourceDiscovery,
|
||||
Some("alpha".to_string()),
|
||||
"resource listing failed",
|
||||
BTreeMap::from([("reason".to_string(), "timeout".to_string())]),
|
||||
true,
|
||||
),
|
||||
Some("alpha".to_string()),
|
||||
"resource listing failed",
|
||||
BTreeMap::from([("reason".to_string(), "timeout".to_string())]),
|
||||
true,
|
||||
);
|
||||
));
|
||||
|
||||
// when
|
||||
let shutdown = validator.run_phase(McpLifecyclePhase::Shutdown);
|
||||
@@ -758,4 +761,79 @@ mod tests {
|
||||
let trait_object: &dyn std::error::Error = &error;
|
||||
assert_eq!(trait_object.to_string(), rendered);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn given_nonrecoverable_failure_when_returning_to_ready_then_validator_rejects_resume() {
|
||||
// given
|
||||
let mut validator = McpLifecycleValidator::new();
|
||||
for phase in [
|
||||
McpLifecyclePhase::ConfigLoad,
|
||||
McpLifecyclePhase::ServerRegistration,
|
||||
McpLifecyclePhase::SpawnConnect,
|
||||
McpLifecyclePhase::InitializeHandshake,
|
||||
McpLifecyclePhase::ToolDiscovery,
|
||||
McpLifecyclePhase::Ready,
|
||||
] {
|
||||
let result = validator.run_phase(phase);
|
||||
assert!(matches!(result, McpPhaseResult::Success { .. }));
|
||||
}
|
||||
let _ = validator.record_failure(McpErrorSurface::new(
|
||||
McpLifecyclePhase::Invocation,
|
||||
Some("alpha".to_string()),
|
||||
"tool call corrupted the session",
|
||||
BTreeMap::from([("reason".to_string(), "invalid frame".to_string())]),
|
||||
false,
|
||||
));
|
||||
|
||||
// when
|
||||
let result = validator.run_phase(McpLifecyclePhase::Ready);
|
||||
|
||||
// then
|
||||
match result {
|
||||
McpPhaseResult::Failure { phase, error } => {
|
||||
assert_eq!(phase, McpLifecyclePhase::Ready);
|
||||
assert!(!error.recoverable);
|
||||
assert!(error.message.contains("non-recoverable"));
|
||||
}
|
||||
other => panic!("expected failure result, got {other:?}"),
|
||||
}
|
||||
assert_eq!(
|
||||
validator.state().current_phase(),
|
||||
Some(McpLifecyclePhase::ErrorSurfacing)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn given_recoverable_failure_when_returning_to_ready_then_validator_allows_resume() {
|
||||
// given
|
||||
let mut validator = McpLifecycleValidator::new();
|
||||
for phase in [
|
||||
McpLifecyclePhase::ConfigLoad,
|
||||
McpLifecyclePhase::ServerRegistration,
|
||||
McpLifecyclePhase::SpawnConnect,
|
||||
McpLifecyclePhase::InitializeHandshake,
|
||||
McpLifecyclePhase::ToolDiscovery,
|
||||
McpLifecyclePhase::Ready,
|
||||
] {
|
||||
let result = validator.run_phase(phase);
|
||||
assert!(matches!(result, McpPhaseResult::Success { .. }));
|
||||
}
|
||||
let _ = validator.record_failure(McpErrorSurface::new(
|
||||
McpLifecyclePhase::Invocation,
|
||||
Some("alpha".to_string()),
|
||||
"tool call failed but can be retried",
|
||||
BTreeMap::from([("reason".to_string(), "upstream timeout".to_string())]),
|
||||
true,
|
||||
));
|
||||
|
||||
// when
|
||||
let result = validator.run_phase(McpLifecyclePhase::Ready);
|
||||
|
||||
// then
|
||||
assert!(matches!(result, McpPhaseResult::Success { .. }));
|
||||
assert_eq!(
|
||||
validator.state().current_phase(),
|
||||
Some(McpLifecyclePhase::Ready)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user