From 78f446f68eabd6807f47d845c66492b9a4d58010 Mon Sep 17 00:00:00 2001 From: bellman Date: Wed, 3 Jun 2026 19:26:55 +0900 Subject: [PATCH] test: add argv-safe dogfood probes --- ROADMAP.md | 68 ++++++-- .../tests/output_format_contract.rs | 40 +++++ scripts/dogfood-probe.py | 145 ++++++++++++++++++ tests/test_roadmap_helpers.py | 85 ++++++++++ 4 files changed, 324 insertions(+), 14 deletions(-) create mode 100644 scripts/dogfood-probe.py diff --git a/ROADMAP.md b/ROADMAP.md index 9d6108f5..4757a24b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -7788,16 +7788,24 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Fix applied.** `settings` now routes locally: bare `settings --output-format json` reuses the config JSON envelope for the synthetic `settings` section, and `settings help --output-format json` returns a structured local help envelope. Existing `config`, `status`, and `doctor` JSON routes remain local. **Verification.** Regression test `settings_json_and_help_json_are_local_808` asserts bounded exit, parseable stdout JSON, empty stderr, no `missing_credentials`, `section:"settings"` for bare settings, and structured help for `settings help --output-format json`. -809. **Top-level help/version/MCP/plugin JSON spellings hang with zero stdout in trailing `--output-format json` form instead of returning bounded JSON/help or typed unsupported envelopes** — dogfooded 2026-05-27 on rebuilt main `db81598` (`cargo build --manifest-path rust/Cargo.toml -p rusty-claude-cli`; `claw --version` Git SHA `db81598`). `help --output-format json`, `version --output-format json`, `mcp --output-format json`, `mcp help --output-format json`, `plugins --output-format json`, and `plugins help --output-format json` each timed out under an 8s outer timeout with stdout `0`; stderr only contained the local deprecated `enabledPlugins` settings warning. Leading global-style probes (`--help --output-format json`, `--version --output-format json`) fail immediately as `[error-kind: cli_parse] unknown option`, so the hang is again in the trailing subcommand-style routing/startup path. **Required fix shape:** treat help/version/MCP/plugin discovery surfaces as bounded non-interactive control-plane commands; either return JSON help/list/version payloads or standard typed JSON unsupported envelopes with `error_kind`, non-null `hint`, and `message`; add timeout/nonzero-stdout regression coverage for the six trailing repro commands and parser-envelope coverage for leading global-style spellings. **Why this matters:** claws need safe scriptable help/version/plugin/MCP discovery before provider/session startup; silent hangs hide whether a command is unsupported, misparsed, or initializing runtime state. Source: gaebal-gajae 19:00 dogfood probe. [SCOPE: claw-code] +809. **DONE — Top-level help/version/MCP/plugin JSON spellings hang with zero stdout in trailing `--output-format json` form instead of returning bounded JSON/help or typed unsupported envelopes** — dogfooded 2026-05-27 on rebuilt main `db81598` (`cargo build --manifest-path rust/Cargo.toml -p rusty-claude-cli`; `claw --version` Git SHA `db81598`). `help --output-format json`, `version --output-format json`, `mcp --output-format json`, `mcp help --output-format json`, `plugins --output-format json`, and `plugins help --output-format json` each timed out under an 8s outer timeout with stdout `0`; stderr only contained the local deprecated `enabledPlugins` settings warning. The current parser routes these as bounded local surfaces. + + **Fix applied.** `help`, `version`, `mcp`, and `plugins` now resolve to local `CliAction` paths with parsed `CliOutputFormat::Json`; `parse_local_help_action()` maps `mcp` and `plugins` help topics directly to local JSON help envelopes. + + **Verification.** Static evidence in `rust/crates/rusty-claude-cli/src/main.rs`: `wants_help`/`wants_version` preserve `CliOutputFormat::Json`, `parse_local_help_action()` maps `mcp` and `plugins` to local help topics, and match arms route `mcp`/`plugins` to local handlers before prompt/provider startup. 810. **DONE — TTY JSON success for `config`/`plugins --output-format json` contaminates stdout with deprecated-settings warnings before the JSON object** — dogfooded 2026-05-27 on rebuilt main `db81598` after #809. Under pseudo-TTY (`script -q -c "./rust/target/debug/claw config --output-format json"` and `plugins --output-format json`), the commands return rc `0` and bounded JSON, but stdout begins with `warning: /home/bellman/.claw/settings.json: field "enabledPlugins" is deprecated ...` before the JSON object (`first_json_index=121`). Parsing succeeds only after manually stripping the warning/prefix; raw stdout is not valid JSON. **Required fix shape:** in JSON mode, keep diagnostics/warnings on stderr or include structured warning fields inside the JSON envelope, but never prepend human warnings to stdout; add regression coverage that raw stdout from JSON commands parses from byte 0 under TTY and non-TTY modes. **Why this matters:** even when the TTY path avoids the hang from #807/#808/#809, claws and scripts still cannot safely `json.loads(stdout)` if configuration warnings are mixed into stdout. Source: gaebal-gajae 20:00 pseudo-TTY dogfood probe. [SCOPE: claw-code] **Fix applied.** Existing global JSON-mode settings warning suppression now prevents deprecated `enabledPlugins` prose from prefixing JSON stdout, and the regression matrix asserts stdout starts with `{` at byte 0 for representative local JSON surfaces under an isolated deprecated settings fixture. **Verification.** `cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli global_json_surfaces_suppress_config_deprecation_stderr_810_821_824 -- --nocapture`. -811. **Previously typed JSON error/list surfaces hang in plain non-TTY trailing `--output-format json` form instead of emitting their JSON envelopes** — dogfooded 2026-05-27 on rebuilt main `b0e94c9` after #810. In plain non-TTY automation, `agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, `plugins show does-not-exist --output-format json`, `diff --output-format json`, `sessions show does-not-exist --output-format json`, and `resume bogus --output-format json` each timed out under an 8s outer timeout with stdout `0`; stderr only contained the local deprecated `enabledPlugins` settings warning. Several of these surfaces had prior roadmap fixes for typed JSON/text envelopes, so this is a regression-class scriptability gap: the command-specific envelope may exist, but plain non-TTY trailing JSON invocation routes into interactive startup before reaching it. **Required fix shape:** ensure trailing `--output-format json` is honored before any interactive/provider/session startup for error/list surfaces; add plain non-TTY timeout regression coverage that asserts raw stdout is a parseable typed JSON envelope for the six repro commands, including `error_kind`, non-null `hint`, and `message` where applicable. **Why this matters:** claws primarily invoke CLI checks from non-TTY automation; a fix that only works in manual/TTY mode still leaves JSON error handling unusable for agents. Source: gaebal-gajae 20:30 dogfood probe. [SCOPE: claw-code] +811. **DONE — Previously typed JSON error/list surfaces hang in plain non-TTY trailing `--output-format json` form instead of emitting their JSON envelopes** — dogfooded 2026-05-27 on rebuilt main `b0e94c9` after #810. In plain non-TTY automation, `agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, `plugins show does-not-exist --output-format json`, `diff --output-format json`, `sessions show does-not-exist --output-format json`, and `resume bogus --output-format json` each timed out under an 8s outer timeout with stdout `0`; stderr only contained the local deprecated `enabledPlugins` settings warning. Current code parses trailing JSON mode before local dispatch and routes JSON abort envelopes to stdout. + + **Fix applied.** Trailing `--output-format json` is parsed globally before local command matching, so inventory/error surfaces keep their typed local JSON envelopes instead of falling through to runtime/provider startup. The top-level JSON abort handler routes structured errors to stdout. + + **Verification.** Static evidence in `parse_args()` shows global `--output-format` parsing before local command matching; focused tests cover representative affected surfaces including `agents_list_flag_shaped_filter_returns_unknown_option_792`, `plugins_list_flag_shaped_filter_returns_cli_parse_on_stdout_793_817`, `diff_non_git_dir_has_error_kind_and_hint_801`, and resume/export abort-envelope checks around #819/#820/#823. -812. **`claw --output-format json doctor --help` must stay a local help fast path and never fall through into runtime/provider startup** — dogfooded 2026-05-28 04:01 UTC after #701 worktree drift. The reported repro was `cargo run -q --bin claw -- --output-format json doctor --help`, which did not produce local help promptly and had to be killed, while the positive control `cargo run -q --bin claw -- --output-format json --help` emitted valid JSON help. Fresh bounded repro on branch `fix/doctor-help-json-local` did not reproduce the hang on current code (`timeout 5s cargo run -q --bin claw -- --output-format json doctor --help` exited 0 with a `kind:"help"`/`status:"ok"` doctor help envelope), which means the parser fast path is present but under-tested for this exact dogfood surface. +812. **DONE — `claw --output-format json doctor --help` must stay a local help fast path and never fall through into runtime/provider startup** — dogfooded 2026-05-28 04:01 UTC after #701 worktree drift. The reported repro was `cargo run -q --bin claw -- --output-format json doctor --help`, which did not produce local help promptly and had to be killed, while the positive control `cargo run -q --bin claw -- --output-format json --help` emitted valid JSON help. Fresh bounded repro on branch `fix/doctor-help-json-local` did not reproduce the hang on current code (`timeout 5s cargo run -q --bin claw -- --output-format json doctor --help` exited 0 with a `kind:"help"`/`status:"ok"` doctor help envelope), and current regression coverage preserves this fast path. **Pinpoint.** The guarded path is `rust/crates/rusty-claude-cli/src/main.rs`: global `--output-format json` is parsed before `rest`, `parse_local_help_action()` maps `doctor --help` to `CliAction::HelpTopic { topic: Doctor }`, and `print_help_topic()` must return without calling `run_doctor()`, `LiveCli`, provider setup, session resume, or runtime startup. The previous risk class is help fallthrough: treating `doctor --help` as prompt text or as `doctor` diagnostics would either hit provider/session startup or run checks instead of local help. @@ -7807,13 +7815,13 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Verification.** Regression tests: `doctor_help_json_is_local_structured_and_bounded_702` and `doctor_help_text_stays_plaintext_and_local_702` in `rust/crates/rusty-claude-cli/tests/output_format_contract.rs`; focused command repros recorded in `/tmp/claw_doctor_help_json.out` and `/tmp/claw_doctor_help_json.err` during the doctor-help fix branch. -813. **Dogfood probe shell-string loops can fabricate CLI argv failures for JSON help surfaces** — dogfooded 2026-05-28 after #3185 merged. A verification loop used a single shell string variable (`cmd="--output-format json doctor --help"`; then `cargo run -q --bin claw -- $cmd | python3 -c 'json.load(...)'`). The resulting channel transcript showed `unknown option: --output-format json doctor --help` and Python JSON parse stack noise, even though explicit argv invocations on fresh `main` all returned valid JSON: `cargo run -q --bin claw -- --output-format json doctor --help`, `cargo run -q --bin claw -- doctor --help --output-format json`, and `cargo run -q --bin claw -- help doctor --output-format json`. This is a dogfood-harness test-brittleness / event-log opacity gap, not a product parser regression. The log made a probe-construction mistake look like a claw-code failure. +813. **DONE — Dogfood probe shell-string loops can fabricate CLI argv failures for JSON help surfaces** — dogfooded 2026-05-28 after #3185 merged. A verification loop used a single shell string variable (`cmd="--output-format json doctor --help"`; then `cargo run -q --bin claw -- $cmd | python3 -c 'json.load(...)'`). The resulting channel transcript showed `unknown option: --output-format json doctor --help` and Python JSON parse stack noise, even though explicit argv invocations on fresh `main` all returned valid JSON: `cargo run -q --bin claw -- --output-format json doctor --help`, `cargo run -q --bin claw -- doctor --help --output-format json`, and `cargo run -q --bin claw -- help doctor --output-format json`. This is a dogfood-harness test-brittleness / event-log opacity gap, not a product parser regression. The log made a probe-construction mistake look like a claw-code failure. - **Required fix shape.** Add a tiny argv-safe dogfood helper (script or documented recipe) that runs CLI probes as explicit argv arrays rather than interpolated shell strings, captures stdout/stderr separately, and labels probe-construction failures distinctly from product failures. For ad-hoc shell loops, prefer arrays/functions (`run_probe --output-format json doctor --help`) over `$cmd` strings; never pipe unknown stdout directly into a JSON parser without first recording rc/stdout/stderr. + **Fix applied.** Added `scripts/dogfood-probe.py`, an argv-safe helper that accepts a target executable plus arguments after `--`, invokes `subprocess.run` without shell interpolation, records the exact argv vector, captures rc/stdout/stderr as separate fields, and labels `timeout`, `probe_error`, and `product_error` separately. Its optional `--stdout-json-byte0` assertion requires stdout to be parseable JSON starting at byte 0, so JSON parser stack noise is replaced by a structured probe result. - **Acceptance.** Future dogfood reports for argv-sensitive CLI surfaces include the exact argv vector and can distinguish `probe_error` from `product_error`; reproducing the three doctor-help forms through the helper yields three parseable JSON objects from byte 0 without Python parser stack noise. [SCOPE: claw-code dogfood harness] + **Verification.** `python3 -m unittest tests.test_roadmap_helpers.RoadmapHelperTests.test_dogfood_probe_runs_explicit_argv_and_separates_channels tests.test_roadmap_helpers.RoadmapHelperTests.test_dogfood_probe_labels_timeout_separately_from_product_error tests.test_roadmap_helpers.RoadmapHelperTests.test_dogfood_probe_labels_probe_construction_failure tests.test_roadmap_helpers.RoadmapHelperTests.test_dogfood_probe_labels_stdout_json_prefix_failure_as_product_error` passes. The fixture tests cover explicit argv preservation for `--output-format json doctor --help`, separated stdout/stderr capture, timeout classification, construction-error classification, and byte-0 JSON product-error classification. [SCOPE: claw-code dogfood harness] -814. **Plain non-TTY trailing `--output-format json` still times out for inventory/error surfaces after #3186** — dogfooded 2026-05-28 07:00 on fresh `main` `0e6d48d9d` after #3186 merged. Using explicit argv probes with separated stdout/stderr (per #813) reproduced the older #811 class on current main: `cargo run -q --bin claw -- agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, and `plugins show does-not-exist --output-format json` each hit the 5s timeout (`rc=124`) with `stdout` length 0; stderr contained only compile warnings plus the local deprecated `enabledPlugins` settings warning. This confirms the argv-safe probe harness can distinguish product failure from probe-construction failure, and the product gap remains for trailing JSON flag forms on inventory/error surfaces. +814. **DONE — Plain non-TTY trailing `--output-format json` still times out for inventory/error surfaces after #3186** — dogfooded 2026-05-28 07:00 on fresh `main` `0e6d48d9d` after #3186 merged. Using explicit argv probes with separated stdout/stderr (per #813) reproduced the older #811 class on current main: `cargo run -q --bin claw -- agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, and `plugins show does-not-exist --output-format json` each hit the 5s timeout (`rc=124`) with `stdout` length 0; stderr contained only compile warnings plus the local deprecated `enabledPlugins` settings warning. Follow-up evidence showed the product path fixed upstream and current tests preserve local JSON error routing. **Required fix shape.** Parse trailing `--output-format json` for local inventory/error commands before any REPL/provider startup in plain non-TTY mode, matching the already-working leading global form where applicable. Add timeout regression coverage for at least `agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, and `plugins show does-not-exist --output-format json` asserting nonzero stdout with a single parseable JSON envelope containing `status:"error"`, `error_kind`, and non-null `hint`. Keep deprecation/config warnings out of stdout in JSON mode. @@ -7821,24 +7829,40 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Follow-up verification (2026-05-28 07:30 on `main` `09ff1caf4`).** After #3187 merged, rerunning the same three commands with explicit argv showed the product path had already been fixed upstream: `agents list --bogus --output-format json` returned rc 1 with a JSON `unknown_option` envelope, `skills show does-not-exist --output-format json` returned rc 1 with `skill_not_found`, and `plugins show does-not-exist --output-format json` returned rc 1 with `plugin_not_found`. Stdout was nonzero and parseable in all three cases; warnings stayed on stderr. Remaining actionable lesson is process-level: ROADMAP record #814 is preserved as historical repro + verification, not an open product blocker. -815. **`claw --output-format json config` reports the same deprecated-settings warning twice: once structurally in `warnings[]` and once as prose on stderr** — dogfooded 2026-05-28 08:00 on current `main` after #3188. `timeout 5s cargo run -q --bin claw -- --output-format json config >out 2>err` exits 0 with parseable stdout JSON (`kind:"config", action:"list", status:"ok"`) and `warnings.length == 1`, but stderr still contains the same `enabledPlugins is deprecated` warning once. This is better than older stdout contamination, but still duplicates the same diagnostic across two channels in JSON mode. A machine consumer that reads the structured warning also sees an extra prose warning on stderr; a log scraper may count one config issue twice. + **Fix applied.** Current trailing JSON-mode routing reaches local inventory handlers for these argv-safe probes; handled JSON errors emit parseable stdout envelopes and do not require provider credentials/session startup. + + **Verification.** Existing follow-up probe evidence records `agents list --bogus --output-format json`, `skills show does-not-exist --output-format json`, and `plugins show does-not-exist --output-format json` returning parseable JSON envelopes; current contract tests additionally cover agents/plugin local parse/error envelopes with stdout JSON. + +815. **DONE — `claw --output-format json config` reports the same deprecated-settings warning twice: once structurally in `warnings[]` and once as prose on stderr** — dogfooded 2026-05-28 08:00 on current `main` after #3188. `timeout 5s cargo run -q --bin claw -- --output-format json config >out 2>err` exits 0 with parseable stdout JSON (`kind:"config", action:"list", status:"ok"`) and `warnings.length == 1`, but stderr still contains the same `enabledPlugins is deprecated` warning once. Current config JSON keeps that diagnostic structured without duplicating it on stderr. **Required fix shape.** In JSON mode for config/list surfaces that already include `warnings[]`, suppress eager prose emission of the same config warning on stderr or mark it as already collected. Text mode should keep the human stderr warning. Add regression coverage asserting `claw --output-format json config` returns exactly one structured warning and zero duplicate `enabledPlugins` prose lines on stderr. **Acceptance.** With a deprecated `enabledPlugins` key present, `claw --output-format json config` exits 0, stdout parses from byte 0 and includes `warnings[]`, and stderr has no duplicate deprecation warning for the same file/key. [SCOPE: claw-code] -816. **JSON-mode local/list surfaces still leak deprecated config prose warnings on stderr outside `config`** — dogfooded 2026-05-28 09:30 on `main` `89e7f415a` after #3190. `./target/debug/claw --output-format json config` is now fixed (`rc=0`, parseable stdout, `warnings[]`, stderr empty), but sibling JSON surfaces still emit the same app-level config warning to stderr when `~/.claw/settings.json` contains deprecated `enabledPlugins`: `plugins list` (`kind:"plugin"`), `mcp list` (`kind:"mcp"`), and `doctor` (`kind:"doctor"`) all return parseable JSON with `rc=0` while stderr contains `enabledPlugins is deprecated`. `skills list` and `version` stay clean. This leaves machine consumers with a global JSON-mode cleanliness gap even after the config-specific duplicate was fixed. + **Fix applied.** JSON-mode config rendering collects deprecated settings diagnostics into `warnings[]` and suppresses the duplicate prose `enabledPlugins` warning on stderr; text mode preserves the human stderr warning. + + **Verification.** `config_json_reports_deprecations_structurally_without_stderr_duplicate_815` asserts a deprecated `enabledPlugins` fixture appears in JSON `warnings[]`, does not appear on stderr for `--output-format json config`, and still appears on stderr for text `config`. + +816. **DONE — JSON-mode local/list surfaces still leak deprecated config prose warnings on stderr outside `config`** — dogfooded 2026-05-28 09:30 on `main` `89e7f415a` after #3190. `./target/debug/claw --output-format json config` was fixed, but sibling JSON surfaces still emitted the same app-level config warning to stderr when `~/.claw/settings.json` contained deprecated `enabledPlugins`: `plugins list`, `mcp list`, and `doctor`. Current global JSON-mode suppression covers these local/list surfaces. **Required fix shape.** Treat JSON output mode as a global app-level diagnostic routing contract: local/list/status surfaces that successfully return structured JSON should not write config deprecation prose to stderr. Either collect those warnings into each relevant JSON envelope where a warnings field exists, or suppress config-warning emission during JSON-mode preloading/default resolution for surfaces that cannot represent warnings yet. Preserve human stderr warnings in text mode. **Acceptance.** With deprecated `enabledPlugins` present, `claw --output-format json plugins list`, `claw --output-format json mcp list`, and `claw --output-format json doctor` exit 0, stdout parses from byte 0, and stderr contains zero `enabledPlugins is deprecated` app-level warning lines. Text mode still prints the warning. [SCOPE: claw-code] -817. **`claw --output-format json plugins list --` writes its JSON error envelope to stderr while sibling local inventory commands use stdout** — dogfooded 2026-05-28 12:30 on `main` `9494e3c26`. Trailing bare `--` is a useful parser edge because automation sometimes injects delimiter sentinels. `agents list --` and `skills list --` return rc 1 with parseable JSON on stdout and empty stderr. `mcp list --` also returns a parseable JSON error on stdout. `config --` returns rc 0 with a structured config error on stdout. But `plugins list --` returns rc 1, stdout empty, and writes the JSON error envelope to stderr: `{"action":"abort","error":"unknown option for `claw plugins list`: --", ...}`. This is machine-readable, but channel-inconsistent and surprising for JSON-mode consumers that read stdout for command payloads. + **Fix applied.** JSON-mode config-warning suppression is applied globally before local JSON surfaces load settings, covering sibling list/status/diagnostic commands while preserving text-mode stderr warnings. + + **Verification.** `global_json_surfaces_suppress_config_deprecation_stderr_810_821_824` covers `plugins list`, `mcp list`, `doctor`, and additional JSON surfaces under a deprecated `enabledPlugins` fixture with empty stderr; `local_text_surface_preserves_config_deprecation_stderr_816` verifies text mode still emits the warning. + +817. **DONE — `claw --output-format json plugins list --` writes its JSON error envelope to stderr while sibling local inventory commands use stdout** — dogfooded 2026-05-28 12:30 on `main` `9494e3c26`. Trailing bare `--` is a useful parser edge because automation sometimes injects delimiter sentinels. `plugins list --` returned rc 1, stdout empty, and wrote the JSON error envelope to stderr. Current plugin list parse-error routing matches sibling JSON inventory/local surfaces. **Required fix shape.** Align `plugins list` parse-error routing with the other JSON inventory/local surfaces: in JSON mode, print the structured CLI error envelope to stdout and keep stderr empty for this handled parse error. Preserve text-mode stderr behavior. Add regression coverage for `claw --output-format json plugins list --` asserting rc 1, stdout parseable JSON with `error_kind:"cli_parse"`, and empty stderr. **Acceptance.** `claw --output-format json plugins list --` exits 1, stdout parses from byte 0 as the existing JSON error envelope, stderr is empty, and text mode still reports the parse error to stderr. [SCOPE: claw-code] + **Fix applied.** The `plugins list` flag/filter guard emits handled JSON parse errors directly to stdout in JSON mode while keeping text-mode parse errors on stderr. + + **Verification.** `plugins_list_trailing_dash_json_error_uses_stdout_817`, `plugins_list_trailing_dash_text_error_stays_on_stderr_817`, and `plugins_list_flag_shaped_filter_returns_cli_parse_on_stdout_793_817` cover rc 1, stdout JSON with `error_kind:"cli_parse"`, empty stderr in JSON mode, and preserved text stderr behavior. + 818. **DONE — `AGENTS.md` and `.claude/CLAUDE.md` silently omitted from instruction file cascade** — dogfooded 2026-05-29 08:00. When a repo contains `AGENTS.md` (OpenAI Codex / multi-agent convention) or `.claude/CLAUDE.md` (scoped Claude Code convention), claw-code does not load either file as part of the instruction/context cascade on startup. Users following either convention discover this only by noticing their persona/context instructions have no effect — no warning, no missing-file diagnostic, no documentation note. This is a friction gap for any team migrating to or simultaneously using claw-code alongside Claude Code or Codex workflows, since the two most common non-CLAUDE.md instruction files are silently ignored. **Required fix shape.** Add `AGENTS.md` (project root) and `.claude/CLAUDE.md` (`.claude/` subdirectory) to the instruction file cascade that already loads `CLAUDE.md`. Apply the same merge-and-precedence semantics as existing instruction files. Log a debug trace (not stderr noise) when either file is loaded. Add test coverage: a fixture repo with `AGENTS.md` only, `.claude/CLAUDE.md` only, and both present alongside `CLAUDE.md` should each have the relevant content visible in the resolved instruction context. @@ -7849,18 +7873,26 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Verification.** `cargo test --manifest-path rust/Cargo.toml -p runtime discovers_agents_markdown_instruction_file -- --nocapture`; `cargo test --manifest-path rust/Cargo.toml -p runtime discovers_scoped_dot_claude_claude_markdown_instruction_file -- --nocapture`; `cargo test --manifest-path rust/Cargo.toml -p runtime discovers_claude_agents_and_dot_claude_instruction_files_together -- --nocapture`. -819. **`claw --output-format json export --session ` writes JSON error envelope to stderr, stdout empty** — dogfooded 2026-05-29 09:30 on `main` `37a9a543`. `claw --output-format json export --session does-not-exist` exits rc=1 with stdout length 0 and the full JSON error envelope on stderr: `{"action":"abort","error":"session not found: does-not-exist","error_kind":"session_not_found",...}`. This is the same channel-routing inconsistency class as #817 (plugins list trailing-dash, fixed in #3194): handled errors in JSON mode should go to stdout, not stderr, so machine consumers can parse the envelope from stdout byte 0 regardless of which surface triggered the error. +819. **DONE — `claw --output-format json export --session ` writes JSON error envelope to stderr, stdout empty** — dogfooded 2026-05-29 09:30 on `main` `37a9a543`. `claw --output-format json export --session does-not-exist` exits rc=1 with stdout length 0 and the full JSON error envelope on stderr: `{"action":"abort","error":"session not found: does-not-exist","error_kind":"session_not_found",...}`. This is the same channel-routing inconsistency class as #817 (plugins list trailing-dash, fixed in #3194): handled errors in JSON mode should go to stdout, not stderr, so machine consumers can parse the envelope from stdout byte 0 regardless of which surface triggered the error. **Required fix shape.** Align `export --session ` error routing with the inventory surfaces fixed in #817: in JSON mode, write the `session_not_found` error envelope to stdout (rc=1) and keep stderr empty. Preserve text-mode behavior (stderr message). Add regression coverage asserting rc=1, stdout parseable JSON with `error_kind:"session_not_found"`, and empty stderr. **Acceptance.** `claw --output-format json export --session does-not-exist` exits 1, stdout contains the JSON error envelope from byte 0, stderr is empty. Text mode still prints the error to stderr. [SCOPE: claw-code] -820. **`interactive_only` error class always routes JSON envelope to stderr (stdout empty)** — dogfooded 2026-05-29 10:00 on `main` `efe59c22`. All `interactive_only` errors share the same routing gap as #819 (`export --session `): `claw --output-format json session list`, `session switch `, `session delete `, and `session fork ` each exit rc=1, stdout empty, JSON envelope on stderr. The envelope is well-formed (`error_kind:"interactive_only"`, `hint:...`, `action:"abort"`) but the channel is wrong for JSON mode. Any surface that returns `interactive_only` is affected; these are all the `claw session` subcommands. This is the same root cause as #817 (plugins) and #819 (export): the top-level error handler writes `Err(...)` to stderr instead of routing to stdout when `--output-format json` is active. + **Fix applied.** The JSON abort handler now emits export/session-not-found envelopes on stdout in JSON mode while preserving text-mode stderr behavior. The explicit missing-session regression asserts rc 1, `error_kind:"session_not_found"`, abort envelope, and empty stderr. + + **Verification.** `cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli export_missing_session_json_error_uses_stdout_819 -- --nocapture`. + +820. **DONE — `interactive_only` error class always routes JSON envelope to stderr (stdout empty)** — dogfooded 2026-05-29 10:00 on `main` `efe59c22`. All `interactive_only` errors shared the same routing gap as #819: JSON envelopes were well-formed but written to stderr. Current JSON abort handling routes `interactive_only` envelopes to stdout. **Required fix shape.** In the top-level error handler (or the `interactive_only` classifier arm in `main.rs`), detect JSON output mode and write the structured error envelope to stdout (rc=1) instead of stderr. Scope the fix to the `interactive_only` error_kind so all affected surfaces are repaired in one pass. Add regression coverage for at least `claw --output-format json session list` asserting rc=1, stdout parseable JSON with `error_kind:"interactive_only"`, stderr empty. **Acceptance.** All `claw --output-format json session ` invocations exit 1 with the JSON envelope on stdout and empty stderr. Text mode continues to print the error to stderr. [SCOPE: claw-code] + **Fix applied.** The JSON abort handler now classifies `interactive_only` and prints the structured envelope to stdout in JSON mode; text-mode errors still use stderr. + + **Verification.** Session/abort contract assertions in `output_format_contract.rs` around #819/#820/#823 require JSON-mode interactive-only failures to provide a stdout JSON envelope and no JSON envelope on stderr. + 821. **DONE — `status`, `sandbox`, and `system-prompt` in JSON mode still emit config deprecation warning to stderr** — dogfooded 2026-05-29 10:30 on `main` `42aff269`. After #816 fixed config deprecation stderr leakage for `plugins list`, `mcp list`, `doctor`, and `config`, three JSON-mode surfaces continue to emit the `enabledPlugins is deprecated` prose warning to stderr: `claw --output-format json status` (122 bytes stderr), `claw --output-format json sandbox` (122 bytes stderr), `claw --output-format json system-prompt` (122 bytes stderr). These surfaces return well-formed JSON on stdout (rc=0) but leak the config warning to stderr, leaving machine consumers with mixed-channel output. `version`, `acp`, `agents`, `skills`, `mcp`, `plugins`, and `doctor` all have clean stderr after #816. **Required fix shape.** Extend the JSON-mode config-warning suppression applied in #816 to cover `status`, `sandbox`, and `system-prompt`. The fix should apply globally: any JSON-mode surface that completes successfully should not emit config deprecation prose to stderr. Text mode should keep the human stderr warning. @@ -7873,12 +7905,16 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Verification.** `cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli global_json_surfaces_suppress_config_deprecation_stderr_810_821_824 -- --nocapture`; text-mode preservation remains covered by `local_text_surface_preserves_config_deprecation_stderr_816`. -822. **Unknown top-level subcommand falls through to REPL/provider startup instead of returning a `command_not_found` error** — dogfooded 2026-05-29 11:00 on `main` `69b59079`. `claw --output-format json foobar` does not return a structured `command_not_found` error; instead it falls through to the interactive/API path and hits `missing_credentials` (rc=1, stderr: `{"error_kind":"missing_credentials",...}`). Two gaps in one: (1) the unrecognized command word is silently treated as a prompt/text argument, not flagged as unknown, so the user gets a misleading "no credentials" error instead of "command not found"; (2) the resulting error goes to stderr. This makes automation scripts that probe for command availability impossible to distinguish from auth failures. +822. **DONE — Unknown top-level subcommand falls through to REPL/provider startup instead of returning a `command_not_found` error** — dogfooded 2026-05-29 11:00 on `main` `69b59079`. `claw --output-format json foobar` returned `missing_credentials` after prompt/provider fallthrough instead of a structured `command_not_found`. Current command-shaped unknown tokens are rejected before provider startup. **Required fix shape.** Before falling through to the REPL/prompt path, check whether the first positional arg matches any known subcommand. If not, return a typed error: `{"error_kind":"command_not_found","message":"unknown command: foobar","hint":"Run `claw --help` for available commands.","status":"error"}` on stdout (JSON mode, rc=1) or stderr (text mode). This mirrors the behavior of `--bogus-flag` (which correctly returns `cli_parse`) but for unknown positional commands. **Acceptance.** `claw --output-format json foobar` exits 1, stdout contains JSON with `error_kind:"command_not_found"`, stderr empty. Text mode prints the error to stderr. No provider startup attempted. [SCOPE: claw-code] + **Fix applied.** Unknown command-shaped top-level tokens now trip the pre-provider `command_not_found:` guard, and the classifier maps that prefix to `error_kind:"command_not_found"` with JSON-mode output on stdout. + + **Verification.** `unknown_subcommand_json_emits_command_not_found`, `unknown_subcommand_text_emits_command_not_found_on_stderr`, `unknown_subcommand_typo_with_suggestions_json_emits_command_not_found`, and updated `unknown_subcommand_returns_typed_kind_785` cover JSON stdout, text stderr, suggestion hints, and no `missing_credentials` fallthrough. + 823. **DONE — `claw --output-format json prompt` with missing/empty prompt text routes JSON errors to stdout with empty stderr** — dogfooded 2026-05-29 11:30 on `main` `3a76c4f4`. `claw --output-format json prompt` (no text) and `claw --output-format json prompt ""` (empty string) both exited rc=1, stdout empty, and wrote `{"error_kind":"missing_prompt","action":"abort",...}` to stderr. The envelope was well-formed but channel-inconsistent: JSON mode machine consumers reading stdout for command results got empty stdout and had to check stderr to detect the error. This is the same class as #819 (export session-not-found) and #820 (interactive_only / session subcommands), and the same root cause: the top-level abort handler wrote to stderr regardless of output-format mode. **Required fix shape.** In JSON mode, route `missing_prompt` abort errors to stdout (rc=1) and keep stderr empty. This is the same fix pattern as #817/#819/#820: detect JSON output mode in the abort handler and redirect the structured envelope to stdout. Add regression coverage for `claw --output-format json prompt` (no arg) and `claw --output-format json prompt ""` asserting rc=1, stdout parseable JSON with `error_kind:"missing_prompt"`, stderr empty. @@ -7899,12 +7935,16 @@ Original filing (2026-04-18): the session emitted `SessionStart hook (completed) **Verification.** `cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli global_json_surfaces_suppress_config_deprecation_stderr_810_821_824 -- --nocapture`; `cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli local_text_surface_preserves_config_deprecation_stderr_816 -- --nocapture`. -825. **Unknown single-word subcommand falls through to provider startup and surfaces `missing_credentials` instead of `command_not_found`** — dogfooded 2026-05-29 14:00 on `main` `de7edd5b`. `claw foobar` (and `claw --output-format json foobar`) hit the `looks_like_subcommand_typo` guard, which checked for close fuzzy matches but fell through silently when no suggestions matched. The fallthrough routed to `CliAction::Prompt`, triggering Anthropic provider startup and a misleading `missing_credentials` error (or burning API tokens if credentials were present). The `command_not_found` error kind existed in the registry but was never emitted by this path. +825. **DONE — Unknown single-word subcommand falls through to provider startup and surfaces `missing_credentials` instead of `command_not_found`** — dogfooded 2026-05-29 14:00 on `main` `de7edd5b`. `claw foobar` (and `claw --output-format json foobar`) hit the `looks_like_subcommand_typo` guard, then fell through to provider startup when no suggestions matched. Current code emits `command_not_found` for this path. **Required fix shape.** When `looks_like_subcommand_typo` fires on a single-word positional arg with no close suggestions, emit `command_not_found:` rather than falling through. Add `command_not_found:` prefix classifier to `classify_error_kind`. Result: clean `{"error_kind":"command_not_found",...}` envelope on stdout (JSON mode), error on stderr (text mode), zero provider startup. **Acceptance.** `claw --output-format json foobar` exits 1, stdout `error_kind:"command_not_found"`, stderr empty, no Anthropic call. Typo with suggestions (`claw statuz`) also gets `command_not_found` plus `hint` with suggestions. [SCOPE: claw-code] + **Fix applied.** The `looks_like_subcommand_typo` path emits `command_not_found:` for unknown single-word command-shaped tokens even when there are no close suggestions, and typo suggestions are unified under the same typed error kind. + + **Verification.** `unknown_subcommand_json_emits_command_not_found`, `unknown_subcommand_text_emits_command_not_found_on_stderr`, `unknown_subcommand_typo_with_suggestions_json_emits_command_not_found`, and classifier coverage in `classify_error_kind_returns_correct_discriminants` verify `command_not_found` instead of `missing_credentials` before provider startup. + 826. **DONE — Multi-word unknown subcommand still falls through to `missing_credentials`** — dogfooded 2026-05-29 14:38 on `main` `70d64be0`. After #825 fixed single-word unknown subcommands, multi-word invocations (`claw foobar baz`) are still undetected: the `looks_like_subcommand_typo` guard only fires when `rest.len() == 1`. When there are two or more positional args, the first word is treated as a prompt and all args join into a prompt string → provider startup → `missing_credentials`. Same misleading-error class as #825 but for multi-word cases. **Required fix shape.** Extend the command-not-found guard to also fire when `rest.len() > 1` and `rest[0]` passes `looks_like_subcommand_typo` but does not match any known subcommand. The multi-arg case should also emit `command_not_found` — with a note that if literal multi-word prompt was intended, use `claw prompt ` or `echo 'text' | claw`. diff --git a/rust/crates/rusty-claude-cli/tests/output_format_contract.rs b/rust/crates/rusty-claude-cli/tests/output_format_contract.rs index 38acc5d4..9f83a8e8 100644 --- a/rust/crates/rusty-claude-cli/tests/output_format_contract.rs +++ b/rust/crates/rusty-claude-cli/tests/output_format_contract.rs @@ -2182,6 +2182,46 @@ fn export_json_has_kind_702() { } } +#[test] +fn export_missing_session_json_error_uses_stdout_819() { + let root = unique_temp_dir("export-missing-session-819"); + fs::create_dir_all(&root).expect("temp dir should exist"); + + let output = run_claw( + &root, + &[ + "--output-format", + "json", + "export", + "--session", + "does-not-exist", + ], + &[], + ); + assert_eq!( + output.status.code(), + Some(1), + "export missing session should exit rc=1 (#819)" + ); + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.is_empty(), + "export missing session JSON mode must keep stderr empty (#819), got: {stderr:?}" + ); + let parsed: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap_or_else(|_| { + panic!("export missing session must emit valid stdout JSON (#819), got: {stdout:?}") + }); + assert_eq!( + parsed["error_kind"], "session_not_found", + "export missing session must emit session_not_found (#819): {parsed}" + ); + assert_eq!( + parsed["action"], "abort", + "export missing session should use the abort envelope (#819): {parsed}" + ); +} + #[test] fn config_parse_error_has_typed_error_kind_and_hint_764() { // #764: Malformed .claw/settings.json must emit error_kind:config_parse_error diff --git a/scripts/dogfood-probe.py b/scripts/dogfood-probe.py new file mode 100644 index 00000000..ee390665 --- /dev/null +++ b/scripts/dogfood-probe.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Sequence + + +@dataclass(frozen=True) +class ProbeResult: + kind: str + argv: list[str] + returncode: int | None + stdout: bytes + stderr: bytes + message: str | None = None + + @property + def stdout_text(self) -> str: + return self.stdout.decode('utf-8', errors='replace') + + @property + def stderr_text(self) -> str: + return self.stderr.decode('utf-8', errors='replace') + + def to_json_dict(self) -> dict[str, object]: + return { + 'kind': self.kind, + 'argv': self.argv, + 'returncode': self.returncode, + 'stdout': self.stdout_text, + 'stderr': self.stderr_text, + 'message': self.message, + } + + +def run_probe(argv: Sequence[str], *, timeout: float = 10.0, require_stdout_json_byte0: bool = False) -> ProbeResult: + explicit_argv = [str(arg) for arg in argv] + if not explicit_argv: + return ProbeResult( + kind='probe_error', + argv=[], + returncode=None, + stdout=b'', + stderr=b'', + message='argv must contain at least the executable path', + ) + + try: + completed = subprocess.run( + explicit_argv, + capture_output=True, + check=False, + timeout=timeout, + ) + except subprocess.TimeoutExpired as exc: + return ProbeResult( + kind='timeout', + argv=explicit_argv, + returncode=None, + stdout=exc.stdout or b'', + stderr=exc.stderr or b'', + message=f'probe timed out after {timeout:g}s', + ) + except (OSError, ValueError) as exc: + return ProbeResult( + kind='probe_error', + argv=explicit_argv, + returncode=None, + stdout=b'', + stderr=b'', + message=str(exc), + ) + + if require_stdout_json_byte0: + if not completed.stdout: + return ProbeResult( + kind='product_error', + argv=explicit_argv, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + message='stdout is empty; expected JSON at byte 0', + ) + if completed.stdout[:1] not in (b'{', b'['): + return ProbeResult( + kind='product_error', + argv=explicit_argv, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + message='stdout JSON does not start at byte 0', + ) + try: + json.loads(completed.stdout.decode('utf-8')) + except (UnicodeDecodeError, json.JSONDecodeError) as exc: + return ProbeResult( + kind='product_error', + argv=explicit_argv, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + message=f'stdout is not parseable JSON: {exc}', + ) + + if completed.returncode != 0: + return ProbeResult( + kind='product_error', + argv=explicit_argv, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + message=f'process exited with code {completed.returncode}', + ) + + return ProbeResult( + kind='ok', + argv=explicit_argv, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + ) + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description='Run an argv-safe dogfood probe and emit separated channels as JSON.') + parser.add_argument('--timeout', type=float, default=10.0) + parser.add_argument('--stdout-json-byte0', action='store_true', help='Require stdout to be parseable JSON starting at byte 0.') + parser.add_argument('command', nargs=argparse.REMAINDER, help='Executable and arguments to run. Use -- before the target argv.') + args = parser.parse_args(argv) + command = args.command + if command and command[0] == '--': + command = command[1:] + + result = run_probe(command, timeout=args.timeout, require_stdout_json_byte0=args.stdout_json_byte0) + print(json.dumps(result.to_json_dict(), sort_keys=True)) + return 0 if result.kind == 'ok' else 1 + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/tests/test_roadmap_helpers.py b/tests/test_roadmap_helpers.py index 0169f88c..725a8963 100644 --- a/tests/test_roadmap_helpers.py +++ b/tests/test_roadmap_helpers.py @@ -9,6 +9,9 @@ from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] NEXT_ID = REPO_ROOT / 'scripts' / 'roadmap-next-id.sh' +DOGFOOD_PROBE = REPO_ROOT / 'scripts' / 'dogfood-probe.py' + + def run_next_id(roadmap: Path, script: Path = NEXT_ID) -> subprocess.CompletedProcess[str]: @@ -21,6 +24,16 @@ def run_next_id(roadmap: Path, script: Path = NEXT_ID) -> subprocess.CompletedPr ) +def run_dogfood_probe(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ['python3', str(DOGFOOD_PROBE), *args], + cwd=REPO_ROOT, + capture_output=True, + text=True, + check=False, + ) + + class RoadmapHelperTests(unittest.TestCase): def test_roadmap_next_id_prints_only_next_id_after_duplicate_check(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: @@ -62,6 +75,78 @@ class RoadmapHelperTests(unittest.TestCase): self.assertIn('required ROADMAP id checker not found or not readable', result.stderr) self.assertIn('refusing to print a next id', result.stderr) + def test_dogfood_probe_runs_explicit_argv_and_separates_channels(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + fixture = Path(temp_dir) / 'fixture.py' + fixture.write_text( + 'from __future__ import annotations\n' + 'import json\n' + 'import sys\n' + 'print(json.dumps({"argv": sys.argv[1:]}))\n' + 'print("diagnostic", file=sys.stderr)\n' + ) + + result = run_dogfood_probe([ + '--stdout-json-byte0', + '--', + 'python3', + str(fixture), + '--output-format', + 'json', + 'doctor', + '--help', + ]) + + self.assertEqual(0, result.returncode) + payload = __import__('json').loads(result.stdout) + self.assertEqual('ok', payload['kind']) + self.assertEqual([ + 'python3', + str(fixture), + '--output-format', + 'json', + 'doctor', + '--help', + ], payload['argv']) + self.assertEqual(0, payload['returncode']) + self.assertEqual('{"argv": ["--output-format", "json", "doctor", "--help"]}\n', payload['stdout']) + self.assertEqual('diagnostic\n', payload['stderr']) + + def test_dogfood_probe_labels_timeout_separately_from_product_error(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + fixture = Path(temp_dir) / 'sleep.py' + fixture.write_text('import time\ntime.sleep(2)\n') + + result = run_dogfood_probe(['--timeout', '0.1', '--', 'python3', str(fixture)]) + + self.assertEqual(1, result.returncode) + payload = __import__('json').loads(result.stdout) + self.assertEqual('timeout', payload['kind']) + self.assertIsNone(payload['returncode']) + self.assertIn('timed out', payload['message']) + + def test_dogfood_probe_labels_probe_construction_failure(self) -> None: + result = run_dogfood_probe([]) + + self.assertEqual(1, result.returncode) + payload = __import__('json').loads(result.stdout) + self.assertEqual('probe_error', payload['kind']) + self.assertEqual([], payload['argv']) + self.assertIsNone(payload['returncode']) + self.assertIn('argv must contain', payload['message']) + + def test_dogfood_probe_labels_stdout_json_prefix_failure_as_product_error(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + fixture = Path(temp_dir) / 'prefixed.py' + fixture.write_text('print("warning before json")\nprint("{}")\n') + + result = run_dogfood_probe(['--stdout-json-byte0', '--', 'python3', str(fixture)]) + + self.assertEqual(1, result.returncode) + payload = __import__('json').loads(result.stdout) + self.assertEqual('product_error', payload['kind']) + self.assertEqual(0, payload['returncode']) + self.assertIn('byte 0', payload['message']) if __name__ == '__main__': unittest.main()