From d453eedae68cc1b252447355747b58663d718f91 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Wed, 22 Apr 2026 17:44:48 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20#165=20=E2=80=94=20load-session=20CLI=20?= =?UTF-8?q?now=20parity-matches=20list/delete=20(--directory,=20--output-f?= =?UTF-8?q?ormat,=20typed=20JSON=20errors)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and delete-session accepted --directory + --output-format and emitted typed JSON error envelopes, but load-session had neither flag and dumped a raw Python traceback (including the SessionNotFoundError class name) on a missing session. Three concrete impacts this fix closes: 1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions) were unreachable via load-session; claws had to chdir or monkeypatch DEFAULT_SESSION_DIR to work around it. 2. Not-found emitted a multi-line Python stack, not a parseable envelope. Claws deciding retry/escalate/give-up had only exit code 1 to work with. 3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim, coupling version-pinned claws to our internal exception class name. Now all three triplet commands accept the same flag pair and emit the same JSON error shape: Success (json mode): {"session_id": "alpha", "loaded": true, "messages_count": 3, "input_tokens": 42, "output_tokens": 99} Not-found: {"session_id": "missing", "loaded": false, "error": {"kind": "session_not_found", "message": "session 'missing' not found in /path", "directory": "/path", "retryable": false}} Corrupted file: {"session_id": "broken", "loaded": false, "error": {"kind": "session_load_failed", "message": "...", "directory": "/path", "retryable": true}} Exit code contract: - 0 on successful load - 1 on not-found (preserves existing $?) - 1 on OSError/JSONDecodeError (distinct 'kind' in JSON) Backward compat: legacy 'claw load-session ID' text output unchanged byte-for-byte. Only new behaviour is the flags and structured error path. Tests (tests/test_load_session_cli.py, 13 tests): - TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions - TestOutputFormatFlagParity (2): json schema + text-mode backward compat - TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in either mode; no internal class name leak - TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed with retryable=true, distinct from session_not_found - TestTripletParityConsistency (6): parametrised over [list, delete, load] * [--directory, --output-format] — explicit parity guard for future regressions Full suite: 80/80 passing, zero regression. Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran 'claw load-session nonexistent' expecting a clean error, got a Python traceback. Filed #165 + fixed in same commit. Closes ROADMAP #165. --- ROADMAP.md | 85 ++++++++++++++++ src/main.py | 69 ++++++++++++- tests/test_load_session_cli.py | 179 +++++++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+), 3 deletions(-) create mode 100644 tests/test_load_session_cli.py diff --git a/ROADMAP.md b/ROADMAP.md index 4cb03e4..5d54011 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -6294,3 +6294,88 @@ with patch.object(QueryEnginePort, 'submit_message', hang_and_mutate): **Blocker.** Python threading does not expose preemptive cancellation, so purely CPU-bound stalls inside `_format_output` or provider client libraries cannot be force-killed. The fix makes cancellation *cooperative*, not *guaranteed*. Eventually the engine will need an `asyncio`-native path with `asyncio.Task.cancel()` for real provider IO, but that is a larger refactor. **Source.** Jobdori dogfood sweep 2026-04-22 17:36 KST — filed while landing #162, following review feedback on #161 that pointed out the caller-facing timeout and underlying work-cancellation are two different problems. #161 closed the first; #164 is the second. + +## Pinpoint #165. `claw load-session` lacks the `--directory` / `--output-format` / JSON-error parity that #160 established for `list-sessions` and `delete-session` — session-lifecycle CLI triplet is asymmetric + +**Gap.** The #160 session-lifecycle surface is three commands: `list-sessions`, `delete-session`, `load-session`. The first two accept `--directory DIR` and `--output-format {text,json}`, and emit a typed JSON error envelope (`{session_id, deleted, error: {kind, message, retryable}}`) on failure. `load-session` accepts neither flag and, on a missing session, dumps a **raw Python traceback** to stderr that includes the internal exception class name: + +``` +$ claw load-session nonexistent +Traceback (most recent call last): + File "/.../src/main.py", line 324, in + raise SystemExit(main()) + File "/.../src/main.py", line 230, in main + session = load_session(args.session_id) + File "/.../src/session_store.py", line 32, in load_session + raise SessionNotFoundError(f'session {session_id!r} not found in {target_dir}') from None +src.session_store.SessionNotFoundError: "session 'nonexistent' not found in .port_sessions" +$ echo $? +1 +``` + +**Impact.** Three concrete breakages: + +1. **Alternate session-store locations are unreachable via `load-session`.** Claws that keep sessions in `/tmp/claw-run-XXX/.port_sessions` can `list-sessions --directory /tmp/.../port_sessions` and `delete-session id --directory /tmp/.../port_sessions`, but they cannot `load-session id --directory /tmp/.../port_sessions`. The load path is hardcoded to `.port_sessions` in CWD. This breaks any orchestration that runs out-of-tree. + +2. **Not-found is a traceback, not an envelope.** Claws parsing `load-session` output to decide "retry vs escalate vs give up" see a multi-line Python stack instead of a `{error: {kind: "session_not_found", ...}}` structure. The exit code (1) is the only machine-readable signal, which collapses every load failure into a single bucket. + +3. **Leaked internal class name creates parsing coupling.** The traceback contains `src.session_store.SessionNotFoundError` verbatim. If we ever rename the class, version-pinned claws that grep for it break. That's accidental API surface. + +**Repro (the #160 triplet side-by-side).** +```bash +# list-sessions: structured + parameterised +$ claw list-sessions --directory /tmp/never-created --output-format json +{"sessions": [], "count": 0} + +# delete-session: structured + parameterised + typed error on partial failure +$ claw delete-session nonexistent --directory /tmp/never-created --output-format json +{"session_id": "nonexistent", "deleted": false, "status": "not_found"} + +# load-session: neither + raw traceback +$ claw load-session nonexistent --directory /tmp/never-created +error: unrecognized arguments: --directory /tmp/never-created + +$ claw load-session nonexistent +Traceback (most recent call last): + ... +src.session_store.SessionNotFoundError: "session 'nonexistent' not found in .port_sessions" +``` + +**Fix shape (~30 lines).** +1. Add `--directory DIR` to `load-session` argparse (forward to `load_session(args.session_id, directory)`). +2. Add `--output-format {text,json}` to `load-session` argparse. +3. Catch `SessionNotFoundError` in the handler and emit a typed error envelope that mirrors the `delete-session` shape: + ```json + { + "session_id": "nonexistent", + "loaded": false, + "error": { + "kind": "session_not_found", + "message": "session 'nonexistent' not found in /path/to/dir", + "directory": "/path/to/dir", + "retryable": false + } + } + ``` + `retryable: false` is the right default here: not-found doesn't resolve itself on retry (unlike `delete-session` partial-failure which might). Claws know to stop vs retry from this flag alone. +4. Exit code contract: 0 on successful load, 1 on not-found (preserves current `$?`), still 1 on unexpected `OSError`/`JSONDecodeError` with a distinct `kind` so callers can distinguish "no such session" from "session file corrupted". +5. Success path JSON shape: + ```json + { + "session_id": "alpha", + "loaded": true, + "messages_count": 3, + "input_tokens": 42, + "output_tokens": 99 + } + ``` + Mirrors what text mode already prints but as parseable data. + +**Acceptance.** All three of these pass: +- `claw load-session ID --directory /some/other/dir` succeeds on a session in that dir (parity with list/delete) +- `claw load-session nonexistent --output-format json` exits 1 with `{session_id, loaded: false, error: {kind: "session_not_found", ...}}` — no traceback, no class name leak +- Existing `claw load-session ID` text-mode output unchanged for backward compat + +**Blocker.** None. Purely CLI-layer wiring; `session_store.load_session` already accepts `directory` and already raises the typed `SessionNotFoundError`. This is closing the gap between the library contract (which is clean) and the CLI contract (which isn't). + +**Source.** Jobdori dogfood sweep 2026-04-22 17:44 KST — ran `claw load-session nonexistent`, got a Python traceback. Compared `--help` across the #160 triplet; confirmed `list-sessions` and `delete-session` both have `--directory` + `--output-format` but `load-session` has neither. The session-lifecycle surface is inconsistent in a way that directly hurts claws that already adopted #160. diff --git a/src/main.py b/src/main.py index e1a5663..50797a3 100644 --- a/src/main.py +++ b/src/main.py @@ -84,8 +84,20 @@ def build_parser() -> argparse.ArgumentParser: flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript') flush_parser.add_argument('prompt') - load_session_parser = subparsers.add_parser('load-session', help='load a previously persisted session') + load_session_parser = subparsers.add_parser( + 'load-session', + help='load a previously persisted session (#160/#165: claw-native session API)', + ) load_session_parser.add_argument('session_id') + load_session_parser.add_argument( + '--directory', help='session storage directory (default: .port_sessions)' + ) + load_session_parser.add_argument( + '--output-format', + choices=['text', 'json'], + default='text', + help='output format', + ) list_sessions_parser = subparsers.add_parser( 'list-sessions', @@ -227,8 +239,59 @@ def main(argv: list[str] | None = None) -> int: print(f'flushed={engine.transcript_store.flushed}') return 0 if args.command == 'load-session': - session = load_session(args.session_id) - print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}') + from pathlib import Path as _Path + directory = _Path(args.directory) if args.directory else None + # #165: catch typed SessionNotFoundError + surface a JSON error envelope + # matching the delete-session contract shape. No more raw tracebacks. + try: + session = load_session(args.session_id, directory) + except SessionNotFoundError as exc: + if args.output_format == 'json': + import json as _json + resolved_dir = str(directory) if directory else '.port_sessions' + print(_json.dumps({ + 'session_id': args.session_id, + 'loaded': False, + 'error': { + 'kind': 'session_not_found', + 'message': str(exc), + 'directory': resolved_dir, + 'retryable': False, + }, + })) + else: + print(f'error: {exc}') + return 1 + except (OSError, ValueError) as exc: + # Corrupted session file, IO error, JSON decode error — distinct + # from 'not found'. Callers may retry here (fs glitch). + if args.output_format == 'json': + import json as _json + resolved_dir = str(directory) if directory else '.port_sessions' + print(_json.dumps({ + 'session_id': args.session_id, + 'loaded': False, + 'error': { + 'kind': 'session_load_failed', + 'message': str(exc), + 'directory': resolved_dir, + 'retryable': True, + }, + })) + else: + print(f'error: {exc}') + return 1 + if args.output_format == 'json': + import json as _json + print(_json.dumps({ + 'session_id': session.session_id, + 'loaded': True, + 'messages_count': len(session.messages), + 'input_tokens': session.input_tokens, + 'output_tokens': session.output_tokens, + })) + else: + print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}') return 0 if args.command == 'list-sessions': from pathlib import Path as _Path diff --git a/tests/test_load_session_cli.py b/tests/test_load_session_cli.py new file mode 100644 index 0000000..c2ce0e2 --- /dev/null +++ b/tests/test_load_session_cli.py @@ -0,0 +1,179 @@ +"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165). + +Verifies the session-lifecycle CLI triplet is now symmetric: +- --directory DIR accepted (alternate storage locations reachable) +- --output-format {text,json} accepted +- Not-found emits typed JSON error envelope, never a Python traceback +- Corrupted session file distinguished from not-found via 'kind' +- Legacy text-mode output unchanged (backward compat) +""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from src.session_store import StoredSession, save_session # noqa: E402 + + +_REPO_ROOT = Path(__file__).resolve().parent.parent + + +def _run_cli( + *args: str, cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + """Always invoke the CLI with cwd=repo-root so ``python -m src.main`` + can resolve the ``src`` package, regardless of where the test's + tmp_path is. + """ + return subprocess.run( + [sys.executable, '-m', 'src.main', *args], + capture_output=True, + text=True, + cwd=str(cwd) if cwd else str(_REPO_ROOT), + ) + + +def _make_session(session_id: str) -> StoredSession: + return StoredSession( + session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2, + ) + + +class TestDirectoryFlagParity: + def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None: + save_session(_make_session('alpha'), tmp_path) + result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path)) + assert result.returncode == 0, result.stderr + assert 'alpha' in result.stdout + + def test_load_session_without_directory_uses_cwd_default( + self, tmp_path: Path, + ) -> None: + """When --directory is omitted, fall back to .port_sessions in CWD. + + Subprocess CWD must still be able to import ``src.main``, so we use + ``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on + sys.path. We set PYTHONPATH to the repo root via env. + """ + sessions_dir = tmp_path / '.port_sessions' + sessions_dir.mkdir() + save_session(_make_session('beta'), sessions_dir) + import os + env = os.environ.copy() + env['PYTHONPATH'] = str(_REPO_ROOT) + result = subprocess.run( + [sys.executable, '-m', 'src.main', 'load-session', 'beta'], + capture_output=True, text=True, cwd=str(tmp_path), env=env, + ) + assert result.returncode == 0, result.stderr + assert 'beta' in result.stdout + + +class TestOutputFormatFlagParity: + def test_json_mode_on_success(self, tmp_path: Path) -> None: + save_session( + StoredSession( + session_id='gamma', messages=('x', 'y'), + input_tokens=5, output_tokens=7, + ), + tmp_path, + ) + result = _run_cli( + 'load-session', 'gamma', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 0 + data = json.loads(result.stdout) + assert data == { + 'session_id': 'gamma', + 'loaded': True, + 'messages_count': 2, + 'input_tokens': 5, + 'output_tokens': 7, + } + + def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None: + """Legacy text output must be byte-identical for backward compat.""" + save_session(_make_session('delta'), tmp_path) + result = _run_cli('load-session', 'delta', '--directory', str(tmp_path)) + assert result.returncode == 0 + lines = result.stdout.strip().split('\n') + assert lines == ['delta', '1 messages', 'in=1 out=2'] + + +class TestNotFoundTypedError: + def test_not_found_json_envelope(self, tmp_path: Path) -> None: + """Not-found emits structured JSON, never a Python traceback.""" + result = _run_cli( + 'load-session', 'missing', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 1 + assert 'Traceback' not in result.stderr, ( + 'regression #165: raw traceback leaked to stderr' + ) + assert 'SessionNotFoundError' not in result.stdout, ( + 'regression #165: internal class name leaked into CLI output' + ) + data = json.loads(result.stdout) + assert data['session_id'] == 'missing' + assert data['loaded'] is False + assert data['error']['kind'] == 'session_not_found' + assert data['error']['retryable'] is False + # directory field is populated so claws know where we looked + assert 'directory' in data['error'] + + def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None: + """Text mode on not-found must not dump a Python stack either.""" + result = _run_cli( + 'load-session', 'missing', '--directory', str(tmp_path), + ) + assert result.returncode == 1 + assert 'Traceback' not in result.stderr + assert result.stdout.startswith('error:') + + +class TestLoadFailedDistinctFromNotFound: + def test_corrupted_session_file_surfaces_distinct_kind( + self, tmp_path: Path, + ) -> None: + """A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'.""" + (tmp_path / 'broken.json').write_text('{ not valid json') + result = _run_cli( + 'load-session', 'broken', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 1 + data = json.loads(result.stdout) + assert data['error']['kind'] == 'session_load_failed' + assert data['error']['retryable'] is True, ( + 'corrupted file is potentially retryable (fs glitch) unlike not-found' + ) + + +class TestTripletParityConsistency: + """All three #160 CLI commands should accept the same flag pair.""" + + @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session']) + def test_all_three_accept_directory_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--directory' in help_text, ( + f'{command} missing --directory flag (#165 parity gap)' + ) + + @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session']) + def test_all_three_accept_output_format_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--output-format' in help_text, ( + f'{command} missing --output-format flag (#165 parity gap)' + )