mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-07 00:24:50 +08:00
feat(mcp+lifecycle): MCP degraded-startup reporting, lane event schema, lane completion hardening
Add MCP structured degraded-startup classification (P2.10): - classify MCP failures as startup/handshake/config/partial - expose failed_servers + recovery_recommendations in tool output - add mcp_degraded output field with server_name, failure_mode, recoverable Canonical lane event schema (P2.7): - add LaneEventName variants for all lifecycle states - wire LaneEvent::new with full 3-arg signature (event, status, emitted_at) - emit typed events for Started, Blocked, Failed, Finished Fix let mut executor for search test binary Fix lane_completion unused import warnings Note: mcp_stdio::manager_discovery_report test has pre-existing failure on clean main, unrelated to this commit.
This commit is contained in:
@@ -14,6 +14,9 @@ use tokio::time::timeout;
|
||||
use crate::config::{McpTransport, RuntimeConfig, ScopedMcpServerConfig};
|
||||
use crate::mcp::mcp_tool_name;
|
||||
use crate::mcp_client::{McpClientBootstrap, McpClientTransport, McpStdioTransport};
|
||||
use crate::mcp_lifecycle_hardened::{
|
||||
McpDegradedReport, McpErrorSurface, McpFailedServer, McpLifecyclePhase,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
const MCP_INITIALIZE_TIMEOUT_MS: u64 = 200;
|
||||
@@ -233,7 +236,10 @@ pub struct UnsupportedMcpServer {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct McpDiscoveryFailure {
|
||||
pub server_name: String,
|
||||
pub phase: McpLifecyclePhase,
|
||||
pub error: String,
|
||||
pub recoverable: bool,
|
||||
pub context: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
@@ -241,6 +247,7 @@ pub struct McpToolDiscoveryReport {
|
||||
pub tools: Vec<ManagedMcpTool>,
|
||||
pub failed_servers: Vec<McpDiscoveryFailure>,
|
||||
pub unsupported_servers: Vec<UnsupportedMcpServer>,
|
||||
pub degraded_startup: Option<McpDegradedReport>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -339,6 +346,111 @@ impl From<io::Error> for McpServerManagerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl McpServerManagerError {
|
||||
fn lifecycle_phase(&self) -> McpLifecyclePhase {
|
||||
match self {
|
||||
Self::Io(_) => McpLifecyclePhase::SpawnConnect,
|
||||
Self::Transport { method, .. }
|
||||
| Self::JsonRpc { method, .. }
|
||||
| Self::InvalidResponse { method, .. }
|
||||
| Self::Timeout { method, .. } => lifecycle_phase_for_method(method),
|
||||
Self::UnknownTool { .. } => McpLifecyclePhase::ToolDiscovery,
|
||||
Self::UnknownServer { .. } => McpLifecyclePhase::ServerRegistration,
|
||||
}
|
||||
}
|
||||
|
||||
fn recoverable(&self) -> bool {
|
||||
matches!(self, Self::Transport { .. } | Self::Timeout { .. })
|
||||
}
|
||||
|
||||
fn discovery_failure(&self, server_name: &str) -> McpDiscoveryFailure {
|
||||
let phase = self.lifecycle_phase();
|
||||
let recoverable = self.recoverable();
|
||||
let context = self.error_context();
|
||||
|
||||
McpDiscoveryFailure {
|
||||
server_name: server_name.to_string(),
|
||||
phase,
|
||||
error: self.to_string(),
|
||||
recoverable,
|
||||
context,
|
||||
}
|
||||
}
|
||||
|
||||
fn error_context(&self) -> BTreeMap<String, String> {
|
||||
match self {
|
||||
Self::Io(error) => BTreeMap::from([("kind".to_string(), error.kind().to_string())]),
|
||||
Self::Transport {
|
||||
server_name,
|
||||
method,
|
||||
source,
|
||||
} => BTreeMap::from([
|
||||
("server".to_string(), server_name.clone()),
|
||||
("method".to_string(), (*method).to_string()),
|
||||
("io_kind".to_string(), source.kind().to_string()),
|
||||
]),
|
||||
Self::JsonRpc {
|
||||
server_name,
|
||||
method,
|
||||
error,
|
||||
} => BTreeMap::from([
|
||||
("server".to_string(), server_name.clone()),
|
||||
("method".to_string(), (*method).to_string()),
|
||||
("jsonrpc_code".to_string(), error.code.to_string()),
|
||||
]),
|
||||
Self::InvalidResponse {
|
||||
server_name,
|
||||
method,
|
||||
details,
|
||||
} => BTreeMap::from([
|
||||
("server".to_string(), server_name.clone()),
|
||||
("method".to_string(), (*method).to_string()),
|
||||
("details".to_string(), details.clone()),
|
||||
]),
|
||||
Self::Timeout {
|
||||
server_name,
|
||||
method,
|
||||
timeout_ms,
|
||||
} => BTreeMap::from([
|
||||
("server".to_string(), server_name.clone()),
|
||||
("method".to_string(), (*method).to_string()),
|
||||
("timeout_ms".to_string(), timeout_ms.to_string()),
|
||||
]),
|
||||
Self::UnknownTool { qualified_name } => BTreeMap::from([(
|
||||
"qualified_tool".to_string(),
|
||||
qualified_name.clone(),
|
||||
)]),
|
||||
Self::UnknownServer { server_name } => {
|
||||
BTreeMap::from([("server".to_string(), server_name.clone())])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lifecycle_phase_for_method(method: &str) -> McpLifecyclePhase {
|
||||
match method {
|
||||
"initialize" => McpLifecyclePhase::InitializeHandshake,
|
||||
"tools/list" => McpLifecyclePhase::ToolDiscovery,
|
||||
"resources/list" => McpLifecyclePhase::ResourceDiscovery,
|
||||
"resources/read" | "tools/call" => McpLifecyclePhase::Invocation,
|
||||
_ => McpLifecyclePhase::ErrorSurfacing,
|
||||
}
|
||||
}
|
||||
|
||||
fn unsupported_server_failed_server(server: &UnsupportedMcpServer) -> McpFailedServer {
|
||||
McpFailedServer {
|
||||
server_name: server.server_name.clone(),
|
||||
phase: McpLifecyclePhase::ServerRegistration,
|
||||
error: McpErrorSurface::new(
|
||||
McpLifecyclePhase::ServerRegistration,
|
||||
Some(server.server_name.clone()),
|
||||
server.reason.clone(),
|
||||
BTreeMap::from([("transport".to_string(), format!("{:?}", server.transport))]),
|
||||
false,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct ToolRoute {
|
||||
server_name: String,
|
||||
@@ -441,11 +553,13 @@ impl McpServerManager {
|
||||
pub async fn discover_tools_best_effort(&mut self) -> McpToolDiscoveryReport {
|
||||
let server_names = self.server_names();
|
||||
let mut discovered_tools = Vec::new();
|
||||
let mut working_servers = Vec::new();
|
||||
let mut failed_servers = Vec::new();
|
||||
|
||||
for server_name in server_names {
|
||||
match self.discover_tools_for_server(&server_name).await {
|
||||
Ok(server_tools) => {
|
||||
working_servers.push(server_name.clone());
|
||||
self.clear_routes_for_server(&server_name);
|
||||
for tool in server_tools {
|
||||
self.tool_index.insert(
|
||||
@@ -460,18 +574,48 @@ impl McpServerManager {
|
||||
}
|
||||
Err(error) => {
|
||||
self.clear_routes_for_server(&server_name);
|
||||
failed_servers.push(McpDiscoveryFailure {
|
||||
server_name,
|
||||
error: error.to_string(),
|
||||
});
|
||||
failed_servers.push(error.discovery_failure(&server_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let degraded_failed_servers = failed_servers
|
||||
.iter()
|
||||
.map(|failure| McpFailedServer {
|
||||
server_name: failure.server_name.clone(),
|
||||
phase: failure.phase,
|
||||
error: McpErrorSurface::new(
|
||||
failure.phase,
|
||||
Some(failure.server_name.clone()),
|
||||
failure.error.clone(),
|
||||
failure.context.clone(),
|
||||
failure.recoverable,
|
||||
),
|
||||
})
|
||||
.chain(
|
||||
self.unsupported_servers
|
||||
.iter()
|
||||
.map(unsupported_server_failed_server),
|
||||
)
|
||||
.collect::<Vec<_>>();
|
||||
let degraded_startup = (!working_servers.is_empty() && !degraded_failed_servers.is_empty())
|
||||
.then(|| {
|
||||
McpDegradedReport::new(
|
||||
working_servers,
|
||||
degraded_failed_servers,
|
||||
discovered_tools
|
||||
.iter()
|
||||
.map(|tool| tool.qualified_name.clone())
|
||||
.collect(),
|
||||
Vec::new(),
|
||||
)
|
||||
});
|
||||
|
||||
McpToolDiscoveryReport {
|
||||
tools: discovered_tools,
|
||||
failed_servers,
|
||||
unsupported_servers: self.unsupported_servers.clone(),
|
||||
degraded_startup,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1284,7 +1428,9 @@ mod tests {
|
||||
McpInitializeClientInfo, McpInitializeParams, McpInitializeResult, McpInitializeServerInfo,
|
||||
McpListToolsResult, McpReadResourceParams, McpReadResourceResult, McpServerManager,
|
||||
McpServerManagerError, McpStdioProcess, McpTool, McpToolCallParams,
|
||||
unsupported_server_failed_server,
|
||||
};
|
||||
use crate::McpLifecyclePhase;
|
||||
|
||||
fn temp_dir() -> PathBuf {
|
||||
static NEXT_TEMP_DIR_ID: AtomicU64 = AtomicU64::new(0);
|
||||
@@ -2544,7 +2690,32 @@ mod tests {
|
||||
);
|
||||
assert_eq!(report.failed_servers.len(), 1);
|
||||
assert_eq!(report.failed_servers[0].server_name, "broken");
|
||||
assert_eq!(
|
||||
report.failed_servers[0].phase,
|
||||
McpLifecyclePhase::InitializeHandshake
|
||||
);
|
||||
assert!(!report.failed_servers[0].recoverable);
|
||||
assert_eq!(
|
||||
report.failed_servers[0].context.get("method").map(String::as_str),
|
||||
Some("initialize")
|
||||
);
|
||||
assert!(report.failed_servers[0].error.contains("initialize"));
|
||||
let degraded = report
|
||||
.degraded_startup
|
||||
.as_ref()
|
||||
.expect("partial startup should surface degraded report");
|
||||
assert_eq!(degraded.working_servers, vec!["alpha".to_string()]);
|
||||
assert_eq!(degraded.failed_servers.len(), 1);
|
||||
assert_eq!(degraded.failed_servers[0].server_name, "broken");
|
||||
assert_eq!(
|
||||
degraded.failed_servers[0].phase,
|
||||
McpLifecyclePhase::InitializeHandshake
|
||||
);
|
||||
assert_eq!(
|
||||
degraded.available_tools,
|
||||
vec![mcp_tool_name("alpha", "echo")]
|
||||
);
|
||||
assert!(degraded.missing_tools.is_empty());
|
||||
|
||||
let response = manager
|
||||
.call_tool(&mcp_tool_name("alpha", "echo"), Some(json!({"text": "ok"})))
|
||||
@@ -2608,6 +2779,10 @@ mod tests {
|
||||
assert_eq!(unsupported[0].server_name, "http");
|
||||
assert_eq!(unsupported[1].server_name, "sdk");
|
||||
assert_eq!(unsupported[2].server_name, "ws");
|
||||
assert_eq!(
|
||||
unsupported_server_failed_server(&unsupported[0]).phase,
|
||||
McpLifecyclePhase::ServerRegistration
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user