claw-code/prd.json

{
  "version": "1.0",
  "description": "Clawable Coding Harness - Clear roadmap stories and commit each",
  "stories": [
    {
      "id": "US-001",
      "title": "Phase 1.6 - startup-no-evidence evidence bundle + classifier",
      "description": "When startup times out, emit typed worker.startup_no_evidence event with evidence bundle including last known worker lifecycle state, pane command, prompt-send timestamp, prompt-acceptance state, trust-prompt detection result, and transport/MCP health summary. Classifier should down-rank into specific failure classes.",
      "acceptanceCriteria": [
        "worker.startup_no_evidence event emitted on startup timeout with evidence bundle",
        "Evidence bundle includes: last lifecycle state, pane command, prompt-send timestamp, prompt-acceptance state, trust-prompt detection, transport/MCP health",
        "Classifier attempts to categorize into: trust_required, prompt_misdelivery, prompt_acceptance_timeout, transport_dead, worker_crashed, or unknown",
        "Tests verify evidence bundle structure and classifier behavior"
      ],
      "passes": true,
      "priority": "P0"
    },
    {
      "id": "US-002",
      "title": "Phase 2 - Canonical lane event schema (4.x series)",
      "description": "Define typed events for lane lifecycle: lane.started, lane.ready, lane.prompt_misdelivery, lane.blocked, lane.red, lane.green, lane.commit.created, lane.pr.opened, lane.merge.ready, lane.finished, lane.failed, branch.stale_against_main. Also implement event ordering, reconciliation, provenance, deduplication, and projection contracts.",
      "acceptanceCriteria": [
        "LaneEvent enum with all required variants defined",
        "Event ordering with monotonic sequence metadata attached",
        "Event provenance labels (live_lane, test, healthcheck, replay, transport)",
        "Session identity completeness at creation (title, workspace, purpose)",
        "Duplicate terminal-event suppression with fingerprinting",
        "Lane ownership/scope binding in events",
        "Nudge acknowledgment with dedupe contract",
        "clawhip consumes typed lane events instead of pane scraping"
      ],
      "passes": true,
      "priority": "P0"
    },
    {
      "id": "US-003",
      "title": "Phase 3 - Stale-branch detection before broad verification",
      "description": "Before broad test runs, compare current branch to main and detect if known fixes are missing. Emit branch.stale_against_main event and suggest/auto-run rebase/merge-forward.",
      "acceptanceCriteria": [
        "Branch freshness comparison against main implemented",
        "branch.stale_against_main event emitted when behind",
        "Auto-rebase/merge-forward policy integration",
        "Avoid misclassifying stale-branch failures as new regressions"
      ],
      "passes": true,
      "priority": "P1"
    },
    {
      "id": "US-004",
      "title": "Phase 3 - Recovery recipes with ledger",
      "description": "Encode automatic recoveries for common failures (trust prompt, prompt misdelivery, stale branch, compile red, MCP startup). Expose recovery attempt ledger with recipe id, attempt count, state, timestamps, failure summary.",
      "acceptanceCriteria": [
        "Recovery recipes defined for: trust_prompt_unresolved, prompt_delivered_to_shell, stale_branch, compile_red_after_refactor, MCP_handshake_failure, partial_plugin_startup",
        "Recovery attempt ledger with: recipe id, attempt count, state, timestamps, failure summary, escalation reason",
        "One automatic recovery attempt before escalation",
        "Ledger emitted as structured event data"
      ],
      "passes": true,
      "priority": "P1"
    },
    {
      "id": "US-005",
      "title": "Phase 4 - Typed task packet format",
      "description": "Define structured task packet with fields: objective, scope, repo/worktree, branch policy, acceptance tests, commit policy, reporting contract, escalation policy.",
      "acceptanceCriteria": [
        "TaskPacket struct with all required fields",
        "TaskScope resolution (workspace/module/single-file/custom)",
        "Validation and serialization support",
        "Integration into tools/src/lib.rs"
      ],
      "passes": true,
      "priority": "P1"
    },
    {
      "id": "US-006",
      "title": "Phase 4 - Policy engine for autonomous coding",
      "description": "Encode automation rules: if green + scoped diff + review passed -> merge to dev; if stale branch -> merge-forward before broad tests; if startup blocked -> recover once, then escalate; if lane completed -> emit closeout and cleanup session.",
      "acceptanceCriteria": [
        "Policy rules engine implemented",
        "Rules: green + scoped diff + review -> merge",
        "Rules: stale branch -> merge-forward before tests",
        "Rules: startup blocked -> recover once, then escalate",
        "Rules: lane completed -> closeout and cleanup"
      ],
      "passes": true,
      "priority": "P2"
    },
    {
      "id": "US-007",
      "title": "Phase 5 - Plugin/MCP lifecycle maturity",
      "description": "First-class plugin/MCP lifecycle contract: config validation, startup healthcheck, discovery result, degraded-mode behavior, shutdown/cleanup. Close gaps in end-to-end lifecycle.",
      "acceptanceCriteria": [
        "Plugin/MCP config validation contract",
        "Startup healthcheck with structured results",
        "Discovery result reporting",
        "Degraded-mode behavior documented and implemented",
        "Shutdown/cleanup contract",
        "Partial startup and per-server failures reported structurally"
      ],
      "passes": true,
      "priority": "P2"
    },
    {
      "id": "US-008",
      "title": "Fix kimi-k2.5 model API compatibility",
      "description": "The kimi-k2.5 model (and other kimi models) reject API requests containing the is_error field in tool result messages. The OpenAI-compatible provider currently always includes is_error for all models. Need to make this field conditional based on model support.",
      "acceptanceCriteria": [
        "translate_message function accepts model parameter",
        "is_error field excluded for kimi models (kimi-k2.5, kimi-k1.5, etc.)",
        "is_error field included for models that support it (openai, grok, xai, etc.)",
        "build_chat_completion_request passes model to translate_message",
        "Tests verify is_error presence/absence based on model",
        "cargo test passes",
        "cargo clippy passes",
        "cargo fmt passes"
      ],
      "passes": true,
      "priority": "P0"
    },
    {
      "id": "US-009",
      "title": "Add unit tests for kimi model compatibility fix",
      "description": "During dogfooding we discovered the existing test coverage for model-specific is_error handling is insufficient. Need to add dedicated tests for model_rejects_is_error_field function and translate_message behavior with different models.",
      "acceptanceCriteria": [
        "Test model_rejects_is_error_field identifies kimi-k2.5, kimi-k1.5, dashscope/kimi-k2.5",
        "Test translate_message includes is_error for gpt-4, grok-3, claude models",
        "Test translate_message excludes is_error for kimi models",
        "Test build_chat_completion_request produces correct payload for kimi vs non-kimi",
        "All new tests pass",
        "cargo test --package api passes"
      ],
      "passes": true,
      "priority": "P1"
    },
    {
      "id": "US-010",
      "title": "Add model compatibility documentation",
      "description": "Document which models require special handling (is_error exclusion, reasoning model tuning param stripping, etc.) in a MODEL_COMPATIBILITY.md file for operators and contributors.",
      "acceptanceCriteria": [
        "MODEL_COMPATIBILITY.md created in docs/ or repo root",
        "Document kimi models is_error exclusion",
        "Document reasoning models (o1, o3, grok-3-mini) tuning param stripping",
        "Document gpt-5 max_completion_tokens requirement",
        "Document qwen model routing through dashscope",
        "Cross-reference with existing code comments"
      ],
      "passes": true,
      "priority": "P2"
    },
    {
      "id": "US-011",
      "title": "Performance optimization: reduce API request serialization overhead",
      "description": "The translate_message function creates intermediate JSON Value objects that could be optimized. Profile and optimize the hot path for API request building, especially for conversations with many tool results.",
      "acceptanceCriteria": [
        "Profile current request building with criterion or similar",
        "Identify bottlenecks in translate_message and build_chat_completion_request",
        "Implement optimizations (Vec pre-allocation, reduced cloning, etc.)",
        "Benchmark before/after showing improvement",
        "No functional changes or API breakage"
      ],
      "passes": true,
      "priority": "P2"
    }
  ]
}