mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-27 07:45:08 +08:00
fix: #161 — wall-clock timeout for run_turn_loop; stalled turns now abort with stop_reason='timeout'
Previously, run_turn_loop was bounded only by max_turns (turn count). If engine.submit_message stalled — slow provider, hung network, infinite stream — the loop blocked indefinitely with no cancellation path. Claws calling run_turn_loop in CI or orchestration had no reliable way to enforce a deadline; the loop would hang until OS kill or human intervention. Fix: - Add timeout_seconds parameter to run_turn_loop (default None = legacy unbounded). - When set, each submit_message call runs inside a ThreadPoolExecutor and is bounded by the remaining wall-clock budget (total across all turns, not per-turn). - On timeout, synthesize a TurnResult with stop_reason='timeout' carrying the turn's prompt and routed matches so transcripts preserve orchestration context. - Exhausted/negative budget short-circuits before calling submit_message. - Legacy path (timeout_seconds=None) bypasses the executor entirely — zero overhead for callers that don't opt in. CLI: - Added --timeout-seconds flag to 'turn-loop' command. - Exit code 2 when the loop terminated on timeout (vs 0 for completed), so shell scripts can distinguish 'done' from 'budget exhausted'. Tests (tests/test_run_turn_loop_timeout.py, 6 tests): - Legacy unbounded path unchanged (timeout_seconds=None never emits 'timeout') - Hung submit_message aborted within budget (0.3s budget, 5s mock hang → exit <1.5s) - Budget is cumulative across turns (0.6s budget, 0.4s per turn, not per-turn) - timeout_seconds=0 short-circuits first turn without calling submit_message - Negative timeout treated as exhausted (guard against caller bugs) - Timeout TurnResult carries correct prompt, matches, UsageSummary shape Full suite: 49/49 passing, zero regression. Blocker: none. Closes ROADMAP #161.
This commit is contained in:
18
src/main.py
18
src/main.py
@@ -65,6 +65,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
loop_parser.add_argument('--limit', type=int, default=5)
|
||||
loop_parser.add_argument('--max-turns', type=int, default=3)
|
||||
loop_parser.add_argument('--structured-output', action='store_true')
|
||||
loop_parser.add_argument(
|
||||
'--timeout-seconds',
|
||||
type=float,
|
||||
default=None,
|
||||
help='total wall-clock budget across all turns (#161). Default: unbounded.',
|
||||
)
|
||||
|
||||
flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript')
|
||||
flush_parser.add_argument('prompt')
|
||||
@@ -187,11 +193,21 @@ def main(argv: list[str] | None = None) -> int:
|
||||
print(PortRuntime().bootstrap_session(args.prompt, limit=args.limit).as_markdown())
|
||||
return 0
|
||||
if args.command == 'turn-loop':
|
||||
results = PortRuntime().run_turn_loop(args.prompt, limit=args.limit, max_turns=args.max_turns, structured_output=args.structured_output)
|
||||
results = PortRuntime().run_turn_loop(
|
||||
args.prompt,
|
||||
limit=args.limit,
|
||||
max_turns=args.max_turns,
|
||||
structured_output=args.structured_output,
|
||||
timeout_seconds=args.timeout_seconds,
|
||||
)
|
||||
for idx, result in enumerate(results, start=1):
|
||||
print(f'## Turn {idx}')
|
||||
print(result.output)
|
||||
print(f'stop_reason={result.stop_reason}')
|
||||
# Exit 2 when a timeout terminated the loop so claws can distinguish
|
||||
# 'ran to completion' from 'hit wall-clock budget'.
|
||||
if results and results[-1].stop_reason == 'timeout':
|
||||
return 2
|
||||
return 0
|
||||
if args.command == 'flush-transcript':
|
||||
engine = QueryEnginePort.from_workspace()
|
||||
|
||||
Reference in New Issue
Block a user