diff --git a/AGENTS.md b/AGENTS.md index 65842f8..279b3f9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,6 +6,8 @@ Harness-independent core for an agent ecosystem (messaging, live-agent lifecycle For maintainer/debug update work, also read `docs/DEBUG-ROLLOUT.md` before touching the update-set or rollout path. +For work needing Linux tests and proof, this machine is authorized to ssh into a Linux box: `reavus@kitsubito` + ## Requirement traceability (binding) This project uses `traceable-reqs` (`traceable-reqs.toml` = the authoritative `REQ-*` registry). The full contract is `docs/TRACEABILITY.md`. The rules you must follow: @@ -25,5 +27,7 @@ This project uses `traceable-reqs` (`traceable-reqs.toml` = the authoritative `R - Docs are dual-audience (human + AI dev-agent) per `docs/DOCS-STRATEGY.md`; doc generation is CI-gated against drift. - Commit messages end with the project's Co-Authored-By trailer. -If you finish a significant body of work without need for user intervention, or if your context gets too high, prepare for the next session: +If your context gets too high, prepare for the next session: - Create a JIT plan for the next immediate body of work, if it isn't already planned +- /commune with immediate next steps and broad summary of the project's status + end goal +- prompt the operator to /clear into a new session. you will resume there \ No newline at end of file diff --git a/crates/spt-daemon/tests/inject_control_wedge.rs b/crates/spt-daemon/tests/inject_control_wedge.rs index cea8cec..cfd6d2c 100644 --- a/crates/spt-daemon/tests/inject_control_wedge.rs +++ b/crates/spt-daemon/tests/inject_control_wedge.rs @@ -499,14 +499,12 @@ fn a_journaled_input_wedge_does_not_starve_a_concurrent_rc_attach() { .spawn_session(flood_spawn_req("wedge-jrnl-ep")) .expect("spawn flood child"); - // ── The WEDGE driver: on its OWN connection, pump journaled input - // (op_id-carrying send_effect → broker dispatch_input → journal.apply_once - // runs write_input INSIDE the global journal lock). On Unix the full input - // buffer parks write_input, holding the lock; on Windows ConPTY absorbs it. - // The driver fires a BOUNDED burst of MODEST-sized ops (so a backed-up - // broker IPC never blocks the driver's own socket write — the burst always - // completes and the thread exits cleanly, never hanging the test process) - // and then idles, checking the stop flag. ── + // ── The WEDGE driver: on its OWN connection, pump journaled input via the REAL + // operator seam (op_id-carrying send_effect_no_ack → broker dispatch_input → + // journal.apply_once enqueues the PtyWrite exactly-once). A sustained flood of + // MODEST-sized ops keeps a journaled write in-flight; the pump carrier drains + // the driver's own socket so a backed-up broker never blocks its writes, and + // the thread exits cleanly on the stop flag. ── let stop = Arc::new(AtomicBool::new(false)); let pumped = Arc::new(AtomicU64::new(0)); let driver_name = name.clone(); @@ -529,30 +527,30 @@ fn a_journaled_input_wedge_does_not_starve_a_concurrent_rc_attach() { } let chunk = vec![b'W'; 16 * 1024]; let mut op = 1u64; - // Sustain journaled input: keep ops in flight (and, on Unix, parked under - // the journal lock) until teardown. Each send_effect writes a frame; the - // pump reader drains replies concurrently so the write never deadlocks. + // Sustain journaled input in-flight until teardown, via the REAL operator/rc + // seam: send_effect_NO_ACK. The operator/rc drive floods input fire-and- + // forward (attach.rs:205 `brain.send_effect_no_ack`) precisely so the broker + // never writes an ACK back onto the very conn it is flooding — one ACK per op + // self-contends with the controller flood-OUTPUT on that conn's Mutex + // and wedges the per-conn dispatch handler, starving a concurrent attach's + // subscribe (root-caused via gdb: the dispatch thread parks in the ACK + // send_frame on the SendHalf mutex, NOT the OutputLog mutex). The earlier + // acked `send_effect` here was a TEST MIS-MODEL: no production path sustain- + // floods the acked seam (its acked callers are bounded one-shot / one-at-a- + // time), and acked-flood self-wedging is the documented hazard. no_ack stays + // JOURNALED + exactly-once (op_id => the broker's journal.apply_once runs + // regardless of ack), so facet B is non-vacuous: still a sustained journaled + // flood, just via the seam the real operator actually uses. + // [int->REQ-HAZARD-INPUT-ACK-BACKPRESSURE] while !driver_stop.load(std::sync::atomic::Ordering::Relaxed) { - if w.send_effect(op, &chunk).is_err() { + if w.send_effect_no_ack(op, &chunk).is_err() { break; } driver_pumped.fetch_add(1, std::sync::atomic::Ordering::Relaxed); op += 1; - // Drain queued replies (acks/output) so the carrier stays healthy, then - // PACE with an explicit floor sleep. The doc above says "sustain a write - // in-flight", but the bare `while !stop` pumped ~6094 ops and pegged the - // core, out-competing the SIBLING attacher thread for scheduling under - // contention so its subscribe was never serviced in 30s (the kitsubito - // reliability victim, subscribed=false). A small sleep keeps a journaled - // write continuously in-flight (facet-B's load is intact) while yielding - // the CPU the attacher needs — sustained, not storming. + // Keep the carrier healthy by draining session output; with no_ack there + // is no ACK frame to read and none to self-contend on the SendHalf. let _ = w.read_event_until(Some(std::time::Instant::now() + Duration::from_millis(20))); - // Unix-only: the CPU storm is a forkpty problem. On Windows ConPTY - // ABSORBS the journaled write, so there is no storm to pace and the sleep - // only adds a timing variable (p0 flaked on the SHARED Windows runner) — - // skip it there; Windows p0 was green pre-pacing. - #[cfg(unix)] - std::thread::sleep(Duration::from_millis(5)); } }); @@ -1785,19 +1783,24 @@ fn p0_paste_wedge_parked_write_does_not_starve_attach_or_wedge_broker() { // FIFO holds, so saturation (and DROP) is guaranteed once the writer parks. let chunk = vec![b'P'; 4 * 1024]; let mut op = 1u64; + // Flood paste-shaped input via the REAL operator seam (send_effect_no_ack): + // the rc/operator paste drive is fire-and-forward (attach.rs:205) so the + // broker never ACKs back onto the flooded conn. The earlier acked send_effect + // was a test mis-model — one ACK per op self-contends with the controller + // flood-output on that conn's Mutex and starves the concurrent + // attach's subscribe (REQ-HAZARD-INPUT-ACK-BACKPRESSURE). no_ack stays + // journaled+exactly-once (op_id => journal.apply_once), so the paste-wedge + // gate is unchanged: still a sustained journaled paste flood that must NOT + // wedge the concurrent attach. + // [int->REQ-HAZARD-INPUT-ACK-BACKPRESSURE] while !driver_stop.load(Ordering::Relaxed) { - if w.send_effect(op, &chunk).is_err() { + if w.send_effect_no_ack(op, &chunk).is_err() { break; } driver_pumped.fetch_add(1, Ordering::Relaxed); op += 1; - // Pace the pump (see the journaled-wedge driver): keep a paste write - // in-flight without a CPU storm that starves the concurrent attacher. + // Drain session output to keep the carrier healthy; no ACK frame with no_ack. let _ = w.read_event_until(Some(Instant::now() + Duration::from_millis(15))); - // Unix-only (see the journaled-wedge driver): Windows ConPTY absorbs the - // paste, so there is no CPU storm to pace here. - #[cfg(unix)] - std::thread::sleep(Duration::from_millis(5)); } });