// spatial_light — M2 gate 6: first spatial light (the M2 exit). // // Everything live in one process: Tundra IMU -> Mahony AHRS pose -> // per-eye scene render (world-fixed test quad, projection-fold frusta, // ±ipd/2) -> per-channel POLY3 distortion warp -> NVAPI direct mode. // // Exit criteria (eyes-in): screen stays put under yaw/pitch/roll; no // inversion or mirroring; stereo fuses at screen distance; no swimming at // rest; recenter snaps it back ahead. // // spatial_light [seconds] [--config ] [--ipd-mm 64] // [--neck-mm fwd,up] [--no-neck] [--grid] [--ss 1.5] // [--no-capture] [--monitor N] [--imu-serial LHR-...] // [--hz 75|90] [--brightness 0..1023] [--idle-min N] // [--no-console] [--predict-ms N] [--predict-auto] // [--no-latelatch] // // Desktop capture and a visible console are the DEFAULT (team-share UX: // double-click = floating monitors + status terminal). --no-capture = // M2 test card; --no-console hides the console when launched outside a // shell (tray-only). // // App shell (M3 step 3): tray icon (recenter / neck cal / save settings / // open settings file / quit). Console shows by default (status line + // keys); --no-console hides it when launched outside a terminal. // // Calibration: native Watchman flash read by DEFAULT (M3 step 6.3, locked // decision 3) — no JSON needed. --config is the dev override. // // Settings (M3 step 5): %APPDATA%\sauna\config.json is loaded at startup // and provides the defaults; CLI flags override for the run. The file is // watched — edits live-apply in-headset (distance, scale, predict-ms, // neck, ipd, hotkey; supersample and monitor_exclude need a restart). // Console 's' saves the current live values back to the file. // // --capture (M3 steps 1+2): the desktop monitors replace the test card — // one world quad per monitor, laid out to match the OS arrangement // (positions and gaps preserved; primary monitor width = the M2 quad's // 2.133 m at 2 m). --monitor N restricts capture to one DXGI output index // (centered). Test-card fallback on capture failure or before first frame. // // Console keys: r = recenter (yaw zero), b = AHRS bias recapture (hold // still), arrows = neck tune, -/+ = screen distance, [/] = screen scale, // w = warp direct/two-pass A/B, p = prediction auto/manual A/B, // l = late-latch on/off A/B, f = freeze pose, n = neck calibration, // s = save settings, q = quit. M1 resilience invariants hold: hot-unplug exits the // present loop cleanly (exit 4), IMU reconnect re-levels the AHRS, doff // keeps tracking and the presenter unattended. // // Exit codes: 0 ok, 2 IMU missing, 3 display init failed, 4 display lost. #include "app/gpu_hogs.h" #include "app/settings.h" #include "calib/hmd_config.h" #include "track/neck_cal.h" #include "capture/duplication_source.h" #include "capture/monitor_layout.h" #include "device/mcu_prox.h" #include "device/tundra_config.h" #include "device/tundra_imu.h" #include "present/nvapi_d3d12.h" #include "render/scene_renderer.h" #include "render/warp_direct.h" #include "render/warp_pass.h" #include "track/ahrs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using Microsoft::WRL::ComPtr; using namespace sauna; namespace { std::atomic g_stop{false}; BOOL WINAPI ctrlHandler(DWORD type) { if (type == CTRL_C_EVENT || type == CTRL_BREAK_EVENT || type == CTRL_CLOSE_EVENT) { g_stop.store(true); return TRUE; } return FALSE; } // View matrix (row-major, column-vector): v_eye = R^T (v_world - headPos) - t_e. // headPos = world position of the head origin (eye midpoint) — the neck // model output; t_e = eye offset in head frame. void viewFromPose(const Quat& q, const float headPos[3], const float tEye[3], float out[16]) { const double w = q.w, x = q.x, y = q.y, z = q.z; const double R[9] = {1 - 2 * (y * y + z * z), 2 * (x * y - w * z), 2 * (x * z + w * y), 2 * (x * y + w * z), 1 - 2 * (x * x + z * z), 2 * (y * z - w * x), 2 * (x * z - w * y), 2 * (y * z + w * x), 1 - 2 * (x * x + y * y)}; memset(out, 0, 16 * sizeof(float)); for (int r = 0; r < 3; r++) for (int c = 0; c < 3; c++) out[r * 4 + c] = (float)R[c * 3 + r]; for (int r = 0; r < 3; r++) { double rtH = 0; // (R^T headPos)[r] for (int c = 0; c < 3; c++) rtH += R[c * 3 + r] * headPos[c]; out[r * 4 + 3] = (float)(-rtH) - tEye[r]; } out[15] = 1.0f; } // Apply the eye_to_head rotation as an extra eye<-head stage: // V' = [R_e2h^T] * V (rotation only; the +-ipd/2 translation is already in // V's head-frame stage). For --canted mode. void applyCant(const double e2h[3][3], float view[16]) { float E[16]; memset(E, 0, sizeof(E)); for (int r = 0; r < 3; r++) for (int c = 0; c < 3; c++) E[r * 4 + c] = (float)e2h[c][r]; // transpose E[15] = 1.0f; float out[16]; for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) { out[i * 4 + j] = 0; for (int k = 0; k < 4; k++) out[i * 4 + j] += E[i * 4 + k] * view[k * 4 + j]; } memcpy(view, out, sizeof(out)); } // Rotate v by q. void quatRotate(const Quat& q, const float v[3], float out[3]) { const double qv[3] = {q.x, q.y, q.z}; const double t[3] = {2 * (qv[1] * v[2] - qv[2] * v[1]), 2 * (qv[2] * v[0] - qv[0] * v[2]), 2 * (qv[0] * v[1] - qv[1] * v[0])}; out[0] = (float)(v[0] + q.w * t[0] + qv[1] * t[2] - qv[2] * t[1]); out[1] = (float)(v[1] + q.w * t[1] + qv[2] * t[0] - qv[0] * t[2]); out[2] = (float)(v[2] + q.w * t[2] + qv[0] * t[1] - qv[1] * t[0]); } void projFromTangents(double L, double R, double T, double B, float zn, float zf, float out[16]) { const double u = -T, d = -B; memset(out, 0, 16 * sizeof(float)); out[0] = (float)(2.0 / (R - L)); out[2] = (float)((R + L) / (R - L)); out[5] = (float)(2.0 / (u - d)); out[6] = (float)((u + d) / (u - d)); out[10] = zf / (zn - zf); out[11] = zn * zf / (zn - zf); out[14] = -1.0f; } void matMul(const float a[16], const float b[16], float out[16]) { float r[16]; for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) { r[i * 4 + j] = 0; for (int k = 0; k < 4; k++) r[i * 4 + j] += a[i * 4 + k] * b[k * 4 + j]; } memcpy(out, r, sizeof(r)); } // --grid isolation mode: straight grid in SOURCE space (C++ port of the S3 // make_warped_grid.py pattern). Through the warp + lens this must look // rectilinear — the exact test that passed eyes-in on LHR-599F3B91 in S3. // Bowed lines here = warp/lens/config mismatch on THIS unit; rectilinear // here = warp chain fine, problem is upstream (scene/projection). void makeSourceGrid(std::vector* px, uint32_t size) { px->resize((size_t)size * size); const double kStep = 0.05, kLineHw = 0.0018, kCrossHw = 0.005; for (uint32_t y = 0; y < size; y++) { const double sv = (y + 0.5) / size; for (uint32_t x = 0; x < size; x++) { const double su = (x + 0.5) / size; double du = fabs(fmod(su / kStep + 0.5, 1.0) - 0.5) * kStep; double dv = fabs(fmod(sv / kStep + 0.5, 1.0) - 0.5) * kStep; bool lines = du < kLineHw || dv < kLineHw; bool cross = fabs(su - 0.5) < kCrossHw || fabs(sv - 0.5) < kCrossHw; double tv = (sv - 0.30) / 0.08; bool tri = tv >= 0 && tv <= 1 && fabs(su - 0.5) < 0.04 * tv; double inten = (lines || cross || tri) ? 1.0 : 0.06; uint8_t v = (uint8_t)(pow(inten, 1.0 / 2.2) * 255.0 + 0.5); (*px)[(size_t)y * size + x] = 0xFF000000u | (v << 16) | (v << 8) | v; } } } // App shell (M3 step 3): hidden window backing the tray icon. The shell // thread owns it (tray callbacks and RegisterHotKey are thread-affine). constexpr UINT kTrayMsg = WM_APP + 1; constexpr UINT kCmdRecenter = 1, kCmdSave = 2, kCmdOpen = 3, kCmdQuit = 4, kCmdNeckCal = 5, kCmdSleep = 6; struct ShellCtx { std::function recenter, save, openSettings, quit, neckCal; std::function sleepNow, wakeNow; // 0 = awake (offer sleep), 1 = SteamVR owns the headset (grayed), // 2 = asleep (offer wake — bypasses grace and prox gating). std::function sleepState; }; LRESULT CALLBACK shellWndProc(HWND w, UINT m, WPARAM wp, LPARAM lp) { if (m == WM_CREATE) { SetWindowLongPtrW( w, GWLP_USERDATA, (LONG_PTR)((CREATESTRUCTW*)lp)->lpCreateParams); return 0; } auto* ctx = (ShellCtx*)GetWindowLongPtrW(w, GWLP_USERDATA); if (m == kTrayMsg && ctx) { const UINT ev = LOWORD(lp); if (ev == WM_RBUTTONUP || ev == WM_LBUTTONUP || ev == WM_CONTEXTMENU) { HMENU menu = CreatePopupMenu(); AppendMenuW(menu, MF_STRING, kCmdRecenter, L"Recenter"); AppendMenuW(menu, MF_STRING, kCmdNeckCal, L"Start neck calibration"); AppendMenuW(menu, MF_STRING, kCmdSave, L"Save settings"); AppendMenuW(menu, MF_STRING, kCmdOpen, L"Open settings file"); AppendMenuW(menu, MF_SEPARATOR, 0, nullptr); // Sleep/wake entry: asleep offers "Wake up" (works regardless of // the sleep grace or prox state); only SteamVR grays it. const int ss = ctx->sleepState ? ctx->sleepState() : 0; AppendMenuW(menu, ss == 1 ? MF_STRING | MF_GRAYED : MF_STRING, kCmdSleep, ss == 1 ? L"VR is active" : ss == 2 ? L"Wake up" : L"Sleep now"); AppendMenuW(menu, MF_STRING, kCmdQuit, L"Quit"); POINT pt; GetCursorPos(&pt); SetForegroundWindow(w); // menu dismisses correctly (MS-documented) const UINT cmd = (UINT)TrackPopupMenu( menu, TPM_RETURNCMD | TPM_NONOTIFY, pt.x, pt.y, 0, w, nullptr); DestroyMenu(menu); if (cmd == kCmdRecenter) ctx->recenter(); if (cmd == kCmdNeckCal) ctx->neckCal(); if (cmd == kCmdSave) ctx->save(); if (cmd == kCmdOpen) ctx->openSettings(); if (cmd == kCmdSleep) { if (ss == 2 && ctx->wakeNow) ctx->wakeNow(); else if (ss == 0 && ctx->sleepNow) ctx->sleepNow(); } if (cmd == kCmdQuit) ctx->quit(); } return 0; } return DefWindowProcW(w, m, wp, lp); } } // namespace // Fatal-exit visibility (field, Win10/RTX 4090 rig: presenter init failed // and "it just quits" — the double-click-owned console died with the // process, taking the actual error text with it). When this process is the // console's sole owner there is no shell left to hold the message: pause // for a key so the error is readable. From a terminal (shared console) // exits stay prompt-clean. static int FailExit(int code) { DWORD pids[2]; if (GetConsoleProcessList(pids, 2) == 1) { fprintf(stderr, "\n[startup/exit error above — press any key to close]\n"); _getch(); } return code; } // Process-local GPU memory in MB (IDXGIAdapter3::QueryVideoMemoryInfo, // CurrentUsage on the local segment) — the same per-process number Task // Manager shows, distinct from TOTAL VRAM (which includes other apps like // SteamVR). Diagnostic instrument for the M5 doze VRAM question; cached adapter // 0 (the discrete GPU on the bench). Returns 0 if unavailable. static uint64_t vramUsedMB() { static Microsoft::WRL::ComPtr adapter; if (!adapter) { Microsoft::WRL::ComPtr f; if (FAILED(CreateDXGIFactory1(IID_PPV_ARGS(&f)))) return 0; // Pick the adapter with the most dedicated VRAM (the dGPU). EnumAdapters1(0) // returns Windows' default order; on a hybrid box with a display on the iGPU // that's the iGPU (≈0 dedicated VRAM), so the counter read 0 and every [VRAM] // log went blind once a monitor was plugged into the motherboard. Microsoft::WRL::ComPtr best, a1; SIZE_T bestVram = 0; for (UINT i = 0; f->EnumAdapters1(i, &a1) != DXGI_ERROR_NOT_FOUND; i++) { DXGI_ADAPTER_DESC1 d{}; if (SUCCEEDED(a1->GetDesc1(&d)) && !(d.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) && d.DedicatedVideoMemory > bestVram) { bestVram = d.DedicatedVideoMemory; best = a1; } a1.Reset(); } if (!best) return 0; best.As(&adapter); } DXGI_QUERY_VIDEO_MEMORY_INFO info{}; if (adapter && SUCCEEDED(adapter->QueryVideoMemoryInfo( 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info))) return info.CurrentUsage / (1024 * 1024); return 0; } int main(int argc, char** argv) { setbuf(stdout, nullptr); setbuf(stderr, nullptr); // Disable console QuickEdit: a click-drag to select text in the console // SUSPENDS the whole process until Enter/Esc — which froze the don/doff + // SteamVR state watchers (and the present loop) whenever the window was // highlighted. Clearing ENABLE_QUICK_EDIT_MODE (with ENABLE_EXTENDED_FLAGS // so the change takes) keeps the app running while text is selected. { HANDLE hIn = GetStdHandle(STD_INPUT_HANDLE); DWORD cmode = 0; if (hIn != INVALID_HANDLE_VALUE && GetConsoleMode(hIn, &cmode)) { cmode &= ~ENABLE_QUICK_EDIT_MODE; cmode |= ENABLE_EXTENDED_FLAGS; SetConsoleMode(hIn, cmode); } } // Per-monitor DPI awareness, before any window/cursor API: the capture // cursor authority (GetCursorInfo) and DXGI DesktopCoordinates must // speak the same physical-pixel space — an unaware process gets // VIRTUALIZED cursor coords on scaled monitors (offset overlay). if (!SetProcessDpiAwarenessContext( DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2)) SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE); // Real-time pacing QoS (SCELTOUIN hitch triage, 2026-06-11): Win11 // laptops power-throttle (EcoQoS) unfocused console apps and ignore // their timer resolution — suspected cause of periodic 27-71 ms present // loop preemptions (hitch/rubber-band every 1-3 s while the headset is // worn and the console is, by definition, unfocused). Opt the process // out of EcoQoS, make Windows honor our timer resolution, and pin it at // 1 ms (the latch fallback Sleep and all pacing sleeps quantize on it). // The present thread additionally raises its own priority. All // harmless on machines that never throttled. { PROCESS_POWER_THROTTLING_STATE pps{}; pps.Version = PROCESS_POWER_THROTTLING_CURRENT_VERSION; pps.ControlMask = PROCESS_POWER_THROTTLING_EXECUTION_SPEED | PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION; pps.StateMask = 0; // EcoQoS off; timer resolution honored SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &pps, sizeof(pps)); timeBeginPeriod(1); } double seconds = 0.0; std::string configPath; const char* imuSerial = ""; bool gridMode = false; // Team-share defaults (M4 step 7 pre-work): desktop capture AND a // visible console are the DEFAULT — a bare double-click gets floating // monitors plus the status/keys terminal. --no-capture = test card, // --no-console = hide when launched outside a shell. bool captureMode = true; // M3: desktop duplication virtual screens bool consoleMode = true; // keep the console when launched outside a shell int monitorIndex = -1; // -1 = all monitors (OS layout) // Canted cameras are the DEFAULT: eyes-in A/B (2026-06-10, LHR-1F8E25F1) // showed the parallel+fold contract breaks stereo convergence off-center, // while full eye_to_head rotations + unfolded windows fuse correctly — // the calibration polynomials live in the lens-axis frame. --fold reverts. bool canted = true; bool r2Clamp = false; // clamp warp r^2 at cutoff — eyes-in verdict: BAD // (jarring distortion boundary near the FOV edge) // Settings (M3 step 5): %APPDATA%\sauna\config.json provides defaults, // CLI flags override for the run, the watcher live-applies file edits, // and 's' persists the live values. Live-tuned knobs are atomics: the // keys/watcher threads write, the render loop reads. Settings settings; std::string serr; if (!LoadSettings(&settings, &serr)) fprintf(stderr, "settings: %s\n", serr.c_str()); std::atomic predictMs{settings.predict_ms}; // manual/fallback span // M4 step 3: predict to the MEASURED photon time (per-frame vsync-derived // sample->photon age) instead of the fixed span. predictAgeMs mirrors the // span actually used each frame for the status line (age=). std::atomic predictAuto{settings.predict_auto}; std::atomic predictAgeMs{0.0}; std::atomic ipdMm{settings.ipd_mm}; // 0 = flash config default // Screen placement: distance moves the plane, scale multiplies physical // size (scale=1 => primary monitor width = kSceneQuadW, the M2 anchor). std::atomic screenDistM{settings.screen_distance_m}; std::atomic screenScale{settings.screen_scale}; // Curved arrangement (M4 ergonomics): screens on a cylinder of radius // screenDistM, normals at the head, edges touching in OS x order. Live // ('c' key / settings watcher); the flat M3 plane is the fallback. std::atomic screenCurved{settings.screen_curved}; // Display policy (M3 step 4): brightness doubles as the persistence/ // burn-in knob (emission duty); idle timeout parks the panels via the // presenter power gate; refresh picks the DirectMode mode at startup. std::atomic brightness{settings.brightness}; std::atomic idleTimeoutMin{settings.idle_timeout_min}; // Motion wake feature flag (default OFF — the don is the wake signal; // motion wake fires from desk bumps). Forced when no prox hardware. std::atomic wakeOnMotion{settings.wake_on_motion}; double refreshHz = settings.refresh_hz; // startup only // Eye render-target supersampling — TWO-PASS path only. Warp-direct // (default) made it unnecessary: the M3 fuzziness it compensated was the // double resample, now gone (step-2 gate verdict). Kept for the 'w' // fallback path. Startup-only (eye targets are allocated once). double superSample = settings.supersample; // Neck model: eye midpoint relative to the neck pivot, head frame // (+y up, +z back). GearVR/Quest 3DOF lineage values. std::atomic neckUpM{settings.neck_up_mm * 0.001}, neckFwdM{settings.neck_forward_mm * 0.001}; std::atomic neckModel{settings.neck_enabled}; // M4 step 2: single-resample warp-direct compositing — DEFAULT since the // gate passed (eyes-in 2026-06-10: much finer patterns resolve, gpu= // unchanged). Falls back to two-pass automatically when unavailable // (test card, --fold, grid, init failure); --warp2pass starts on the // old path, 'w' key live-toggles either way. std::atomic warpDirectOn{true}; // M4 step 3: late-latch pacing default ON (presenter delays each draw to // just-before-flip; 'l' key A/B, --no-latelatch starts it off). bool lateLatchStart = true; int vramCycle = 0; // --vram-cycle N: diagnostic release/reacquire loop for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--vram-cycle") && i + 1 < argc) vramCycle = atoi(argv[++i]); else if (!strcmp(argv[i], "--config") && i + 1 < argc) configPath = argv[++i]; else if (!strcmp(argv[i], "--imu-serial") && i + 1 < argc) imuSerial = argv[++i]; else if (!strcmp(argv[i], "--ipd-mm") && i + 1 < argc) ipdMm.store(atof(argv[++i])); else if (!strcmp(argv[i], "--grid")) gridMode = true; else if (!strcmp(argv[i], "--capture")) captureMode = true; else if (!strcmp(argv[i], "--no-capture")) captureMode = false; else if (!strcmp(argv[i], "--console")) consoleMode = true; else if (!strcmp(argv[i], "--no-console")) consoleMode = false; else if (!strcmp(argv[i], "--monitor") && i + 1 < argc) monitorIndex = atoi(argv[++i]); else if (!strcmp(argv[i], "--no-neck")) neckModel.store(false); else if (!strcmp(argv[i], "--canted")) canted = true; else if (!strcmp(argv[i], "--fold")) canted = false; else if (!strcmp(argv[i], "--r2-clamp")) r2Clamp = true; else if (!strcmp(argv[i], "--warpdirect")) warpDirectOn.store(true); else if (!strcmp(argv[i], "--warp2pass")) warpDirectOn.store(false); else if (!strcmp(argv[i], "--predict-ms") && i + 1 < argc) { predictMs.store(atof(argv[++i])); predictAuto.store(false); // explicit span = manual intent } else if (!strcmp(argv[i], "--predict-auto")) predictAuto.store(true); else if (!strcmp(argv[i], "--no-latelatch")) lateLatchStart = false; else if (!strcmp(argv[i], "--ss") && i + 1 < argc) superSample = atof(argv[++i]); else if (!strcmp(argv[i], "--hz") && i + 1 < argc) refreshHz = atof(argv[++i]); else if (!strcmp(argv[i], "--brightness") && i + 1 < argc) brightness.store(atoi(argv[++i])); else if (!strcmp(argv[i], "--idle-min") && i + 1 < argc) idleTimeoutMin.store(atof(argv[++i])); else if (!strcmp(argv[i], "--neck-mm") && i + 1 < argc) { // "forward,up" in millimeters double f = 80, u = 75; sscanf(argv[++i], "%lf,%lf", &f, &u); neckFwdM.store(f * 0.001); neckUpM.store(u * 0.001); } else seconds = atof(argv[i]); } SetConsoleCtrlHandler(ctrlHandler, TRUE); // Console is dev-only (M3 step 3): launched outside a terminal (Explorer // double-click, startup shortcut) Windows allocates a fresh console that // would float over the captured desktop — hide it, the tray owns the UX. // From a terminal the console is shared (process list > 1) and stays. // --console forces it visible either way. if (!consoleMode) { DWORD pids[2]; if (GetConsoleProcessList(pids, 2) == 1) ShowWindow(GetConsoleWindow(), SW_HIDE); } // Recenter hotkey binding: settings string, fall back to the default on // a parse failure. Generation counter tells the hotkey thread to // re-register on a live change. std::mutex hkMu; std::string hkSpec = settings.recenter_hotkey; std::atomic hkMods{MOD_CONTROL | MOD_ALT}, hkVk{VK_HOME}; std::atomic hkGen{1}; { uint32_t m, v; if (ParseHotkey(hkSpec, &m, &v)) { hkMods.store(m); hkVk.store(v); } else { fprintf(stderr, "settings: bad recenter_hotkey \"%s\" — using " "ctrl+alt+home\n", hkSpec.c_str()); hkSpec = "ctrl+alt+home"; } } HmdConfig cfg; std::string err; if (configPath.empty()) { // Default: read the calibration straight from Watchman flash (M3 step // 6.3). The flash is the authoritative source — the lighthouse-cache // JSON is lossy (ipd block). --config keeps the dev-JSON override. std::string text, flashSerial; if (!ReadWatchmanConfig(imuSerial, &text, &err, &flashSerial)) { fprintf(stderr, "native calib read: %s\n(use --config as a " "fallback)\n", err.c_str()); return FailExit(2); } if (!LoadHmdConfigFromString(text, &cfg, &err)) { fprintf(stderr, "%s\n", err.c_str()); return FailExit(2); } configPath = "flash:" + flashSerial; } else if (!LoadHmdConfig(configPath, &cfg, &err)) { fprintf(stderr, "%s\n", err.c_str()); return FailExit(2); } double frus[2][4]; for (int e = 0; e < 2; e++) { if (canted) { // Genuinely canted cameras (the S3 alternative branch): view carries // the full eye_to_head rotation, so the window is the UNFOLDED // grow-expanded intrinsics frustum in the lens-axis (eye) frame. // The warp is unchanged — it maps panel -> the same q-space the // window spans (u_src = (q+G)/2G either way). const EyeCalib& eye = cfg.eye[e]; const double G = 1.0 + eye.grow_for_undistort; const double cx = -eye.intrinsics[0][2], cy = eye.intrinsics[1][2]; const double fx = eye.intrinsics[0][0], fy = eye.intrinsics[1][1]; frus[e][0] = (-G - cx) / fx; frus[e][1] = (G - cx) / fx; frus[e][2] = -(G - cy) / fy; frus[e][3] = -(-G - cy) / fy; } else { ProjectionRawTangents(cfg.eye[e], &frus[e][0], &frus[e][1], &frus[e][2], &frus[e][3]); } } if (predictAuto.load()) printf("%s cameras, prediction AUTO (measured sample->photon age; " "%.0f ms fallback), supersample %.2fx\n", canted ? "CANTED (default)" : "PARALLEL+FOLD (--fold)", predictMs.load(), superSample); else printf("%s cameras, prediction %.0f ms (manual), supersample %.2fx\n", canted ? "CANTED (default)" : "PARALLEL+FOLD (--fold)", predictMs.load(), superSample); if (r2Clamp) printf("R2-CLAMP mode: warp clamps r^2 at undistort_r2_cutoff " "(eyes-in verdict was BAD — A/B only)\n"); printf("unit %s ipd %.1f mm%s neck model %s (fwd %.0f mm, up %.0f mm) " "(config: %s)\n", cfg.serial.c_str(), ipdMm.load() > 0 ? ipdMm.load() : cfg.ipd_default_mm, ipdMm.load() > 0 ? " (user)" : " (config default)", neckModel.load() ? "ON" : "OFF", neckFwdM.load() * 1000, neckUpM.load() * 1000, configPath.c_str()); printf("settings: %s screen %.2f m / scale %.2f (live: edit the file, " "keys -/+ dist, [/] scale, s = save)\n", SettingsPath().empty() ? "(APPDATA unset — not persisted)" : SettingsPath().c_str(), screenDistM.load(), screenScale.load()); if (neckModel.load()) printf("arrow keys tune the neck live: left/right = forward -/+5 mm, " "down/up = up -/+5 mm\n"); Ahrs ahrs; Ahrs::Params ap; ahrs.configure(cfg, ap); TundraImu imu; imu.setSampleSink([&ahrs](const ImuSample& s) { ahrs.update(s); }); if (!imu.start(imuSerial)) return FailExit(2); std::this_thread::sleep_for(std::chrono::milliseconds(1500)); if (imu.stats().samples == 0) { fprintf(stderr, "no IMU samples in 1.5 s — aborting (tracking is the " "whole point of this gate)\n"); return FailExit(2); } if (!cfg.serial.empty() && imu.connectedSerial() != cfg.serial) printf("WARNING: config %s but attached unit %s — wrong calibration!\n", cfg.serial.c_str(), imu.connectedSerial().c_str()); // Instant-start bias (M4 step 1): seed the persisted per-serial gyro // bias so tracking runs from the first sample — users don the headset // before launching, the still-window wait was the pain. Keyed to the // ATTACHED unit (config may be the wrong one — warned above). const std::string biasSerial = !imu.connectedSerial().empty() ? imu.connectedSerial() : cfg.serial; { double b[3]; if (LoadGyroBias(biasSerial, b)) { ahrs.seedBias(b); printf("bias: seeded from %s — tracking immediately ('b' recaptures; " "background still-windows keep it fresh)\n", GyroBiasPath(biasSerial).c_str()); } else { printf("bias: none stored for %s — hold still ~1 s to capture " "(one-time; persisted for instant start next run)\n", biasSerial.empty() ? "(unknown serial)" : biasSerial.c_str()); } } McuProx prox; bool haveProx = prox.start(); // Brightness (M3 step 4): hands-off by default — the firmware re-applies // its own stored startup brightness (Beyond Utility's value) on every // display-on. Only push a value the user explicitly set, and then keep // re-pushing it after each panel power cycle (idle thread owns that). if (haveProx && brightness.load() >= 0) { if (prox.setBrightness((uint16_t)brightness.load())) printf("brightness %d/1023 applied (override; -1 in settings returns " "control to the headset)\n", brightness.load()); else fprintf(stderr, "brightness set failed — firmware value stands\n"); } NvapiPresenterConfig pcfg; if (refreshHz >= 89.0) { // 90 Hz opt-in (M3 step 4): the 90 Hz DirectMode mode is 3840x1920, // not the native 5088x2544 — lower res for higher rate, user's call. pcfg.width = 3840; pcfg.height = 1920; pcfg.refreshHz = 90.0; printf("90 Hz mode: 3840x1920 (native 5088x2544 is 75 Hz only)\n"); } NvapiD3d12Presenter presenter(pcfg); // Born-dozing at launch (M5/ADR-0005): arm the firmware doze-latch and the // host present-idle BEFORE init() brings video up, so the bringup settles // straight into doze-dark — no emission, fan, or LED flash as the headset // comes alive. The idle thread settles the prox and decides worn -> wake / // not worn -> stay dozed. No-op when the firmware can't doze. if (haveProx && prox.dozeCapable()) { prox.setDoze(true); presenter.requestDoze(true); printf("born dozing: doze-latch armed before video (no bringup flash)\n"); } if (!presenter.init()) return FailExit(3); // init() may have adopted a different rate than requested (headset // firmware rate toggle restricted the mode list) — everything derived // from the rate (capture rate cap below) must use the real one. refreshHz = presenter.modeHz(); presenter.requestLateLatch(lateLatchStart); if (!lateLatchStart) printf("late-latch OFF (--no-latelatch) — draw right after vsync, 'l' " "re-enables\n"); if (captureMode && gridMode) { printf("--grid overrides --capture (grid is a warp isolation mode)\n"); captureMode = false; } // Virtual screens — built lazily in renderInit (monitor enumeration and // sources need the presenter's device). Falls back to the test card if // no source comes up. screensReady gates the status thread; the vector // is stable (no realloc) once published. struct ScreenQuad { std::unique_ptr src; ComPtr heap; // Non-shader-visible mirror of heap — CopyDescriptors source for the // warp-direct shared table (copies must come from a CPU-only heap). ComPtr cpuHeap; uint32_t gen = 0xFFFFFFFFu; // Layout in units of primary-monitor width (OS-relative, gaps kept). // World meters = n * kSceneQuadW * screenScale, plane at -screenDistM // — both live knobs, so placement is computed per frame. float w = 0, h = 0, cx = 0, cy = 0; int idx = -1; // this frame's published buffer }; std::vector screens; std::atomic screensReady{false}; // Curved-layout walk order (indices into screens, sorted by OS x) and // the anchor position within it (screen nearest the OS origin — the // primary — which stays dead ahead). Built once with screens. std::vector screenOrder; int anchorPos = 0; if (captureMode) printf("CAPTURE mode: desktop duplication, %s (M3)\n", monitorIndex < 0 ? "all monitors, OS layout" : "single monitor (--monitor)"); // Capture pause is OR-ed from two independent reasons — SteamVR owns // the headset, or the idle policy parked the panels (nobody is looking; // copy+mips of a redrawing desktop is pure GPU burn). Both writers call // apply after flipping their flag; pause drops the OS-side duplication // entirely (smallest possible footprint while parked). std::atomic capPauseVr{false}, capPauseIdle{false}; auto applyCapturePause = [&screens, &screensReady, &capPauseVr, &capPauseIdle] { if (!screensReady.load()) return; const bool p = capPauseVr.load() || capPauseIdle.load(); for (auto& s : screens) if (s.src) s.src->setPaused(p); }; // Tray "Sleep now" (M4 UX): jump-starts the idle park. The idle thread // consumes the request; the steamvr watcher publishes VR-active for the // menu's gray-out. std::atomic sleepNowReq{false}; std::atomic steamvrActive{false}; // Tray "Wake up": two consumers because asleep has two shapes — the // idle thread unparks panels; the steamvr watcher treats it as a don // when the HMD sits released awaiting one. Both fire from one click. std::atomic wakeNowReq{false}; std::atomic trayDonReq{false}; // SteamVR-exit reclaim-into-doze handoff (M5): when SteamVR exits while the // headset is NOT worn, the steamvr watcher re-acquires the display straight // into doze (warm) instead of waiting for a don to cold-reacquire — then sets // this so the idle thread adopts the doze (parked/dozing bookkeeping) and a // later don is an instant ~95 ms doze-wake. std::atomic vrDozedReq{false}; // SteamVR coexistence (M4/M5): the release and reclaim triggers are // ASYMMETRIC because they ride different processes. // - RELEASE on vrserver/vrmonitor (the EARLY signal): SteamVR's compositor // (vrcompositor.exe) cannot acquire the headset — cannot even start — // while sauna holds the DirectMode display, so sauna must let go as soon // as SteamVR begins coming up to clear the path for it. // - RECLAIM on vrcompositor.exe going away: that process owns the video, so // its exit is the precise "SteamVR is done with the headset" signal // (vrserver lingers, so watching it would delay reclaim by seconds). We // only arm this after vrcompositor has actually been seen up (kVrActive), // never in the release->compositor-start gap where it is briefly absent. // After SteamVR lets go, reclaim per the prox: worn => immediately, not worn // => on the next don. IMU + prox HID reads stay live throughout. std::thread steamvrWatch([&] { auto procRunning = [](const wchar_t* name) { HANDLE snap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (snap == INVALID_HANDLE_VALUE) return false; PROCESSENTRY32W pe{}; pe.dwSize = sizeof(pe); bool found = false; if (Process32FirstW(snap, &pe)) { do { if (!_wcsicmp(pe.szExeFile, name)) found = true; } while (!found && Process32NextW(snap, &pe)); } CloseHandle(snap); return found; }; // vrserver/vrmonitor up = SteamVR starting (release trigger). vrcompositor // up = SteamVR owns the video (its presence then absence gates reclaim). auto vrServerUp = [&] { return procRunning(L"vrserver.exe") || procRunning(L"vrmonitor.exe"); }; auto vrCompositorUp = [&] { return procRunning(L"vrcompositor.exe"); }; auto pauseCapture = [&](bool p) { capPauseVr.store(p); applyCapturePause(); }; const bool dozeCapable = haveProx && prox.dozeCapable(); // Release with a fade (M5/ADR-0005): instead of hard-cutting the display // when SteamVR takes over, kick off the firmware doze sweep on the LIVE // image and wait ~300 ms for it to reach true black, THEN relinquish. The // present loop keeps presenting real content through the sweep, so the // firmware brightness ramp dims the actual image to black. Releasing drops // video -> the firmware clears its own doze-latch (panels already dark). auto releaseWithFade = [&] { pauseCapture(true); if (dozeCapable) { prox.setDoze(true); std::this_thread::sleep_for(std::chrono::milliseconds(300)); } presenter.requestHmdOwnership(false); // Free the capture GPU ring for the whole SteamVR session: sauna has let // the headset go entirely (no present, no desktop sampling), so the // hundreds-of-MB ring is dead weight the SteamVR game wants. Wait until the // present loop has actually released (draw no longer samples the textures) // before stop(); reclaim resume()s them. Skip on timeout (never stop while // a frame might still reference them). Mirrors enterSleep's doze stop. for (int i = 0; i < 80 && !presenter.hmdReleasedNow() && !g_stop.load(); i++) std::this_thread::sleep_for(std::chrono::milliseconds(25)); if (presenter.hmdReleasedNow() && screensReady.load()) for (auto& s : screens) if (s.src) s.src->stop(); // Release the 0101 MCU interface too: SteamVR's BeyondProximity driver // opens the same interface when the compositor comes up, and two openers // wedge the MCU app (telemetry dies, panels stick mid-doze → power-cycle). // Suspend before the compositor loads it. reclaim resume()s the MCU. if (haveProx) prox.suspend(); }; // Bring the MCU back after the SteamVR session released it. BeyondProximity // may still hold 0101 for a beat past compositor exit, so resume() lets the // read thread reopen and we wait for telemetry to actually flow before // commanding doze — else the wake 'h' is dropped on a still-closed handle // and the panels stay dark (the stuck-dim symptom). auto resumeProx = [&] { if (!haveProx) return; const uint64_t r0 = prox.reports(); prox.resume(); for (int i = 0; i < 120 && prox.reports() == r0 && !g_stop.load(); i++) std::this_thread::sleep_for(std::chrono::milliseconds(50)); }; // Reclaim with a fade-in (M5/ADR-0005): come back born-dozed — arm the // latch and the host present-idle BEFORE re-acquiring, so the bringup // settles straight into doze-dark with no emission flash — then wake from // doze so the image sweeps up from black instead of snapping on. auto reclaimWithFade = [&] { if (dozeCapable) { prox.setDoze(true); // latch before video (born-dozed reclaim) presenter.requestDoze(true); // host present-idle through re-acquire } presenter.requestHmdOwnership(true); // Wait out the present loop's re-acquire (it retries while SteamVR is // still letting the display go); bounded so a stuck reclaim still moves. for (int i = 0; i < 200 && presenter.hmdReleasedNow() && !g_stop.load(); i++) std::this_thread::sleep_for(std::chrono::milliseconds(50)); // 'h' may land before the re-acquired link has settled into DOZE_ASLEEP; // the firmware (>= 0.4.3) remembers it (doze_wake_pending) and settles // into the wake fade — so no DSC/link wait is needed here. // Content live again, ready for the fade-in. releaseWithFade stopped the // capture sources to free VRAM for SteamVR — rebuild them now (resume() is // a no-op if they were never stopped). Clear the VR pause flag too. capPauseVr.store(false); if (screensReady.load()) for (auto& s : screens) if (s.src) s.src->resume(); applyCapturePause(); if (dozeCapable) { presenter.requestDoze(false); // resume active present (still floor-dim) prox.setDoze(false); // 'h' sweeps brightness up -> fade-in } }; // Reclaim straight into doze (M5): SteamVR exited while NOT worn — re- // acquire the display born-dozed and LEAVE it dozed (warm) instead of // waiting for a don to cold-reacquire (3-5 s). The idle thread adopts the // doze via vrDozedReq, so a later don is an instant ~95 ms doze-wake. auto reclaimIntoDoze = [&] { prox.setDoze(true); // arm latch before re-acquire (born-dozed) presenter.requestDoze(true); // present comes up idled presenter.requestHmdOwnership(true); for (int i = 0; i < 200 && presenter.hmdReleasedNow() && !g_stop.load(); i++) std::this_thread::sleep_for(std::chrono::milliseconds(50)); // Transfer the capture pause from VR to idle ownership: stays paused // (dozed); the idle thread's wake() unpauses it cleanly on the don. capPauseIdle.store(true); capPauseVr.store(false); applyCapturePause(); vrDozedReq.store(true); // idle thread adopts the doze bookkeeping }; // kReleased = let go, waiting for SteamVR's compositor to take video. // kVrActive = compositor up, SteamVR owns the headset. kAwaitDon = SteamVR // done but not worn, waiting for a don to reclaim. enum class Own { kOwned, kReleased, kVrActive, kAwaitDon }; Own own = Own::kOwned; while (!g_stop.load()) { const bool srvUp = vrServerUp(); const bool compUp = vrCompositorUp(); steamvrActive.store(srvUp || compUp); const bool worn = haveProx && prox.worn(); // Reclaim per the prox once SteamVR has let the video go: worn (or no // prox) -> reclaim now; else wait for a don. The reclaim branches re-read // worn AFTER resumeProx() — prox is suspended while SteamVR owns 0101, so // the loop-top worn above reads stale not-worn during a session. switch (own) { case Own::kOwned: trayDonReq.store(false); // drop stale wake clicks (idle owns those) if (srvUp) { // SteamVR starting — let go so its compositor can start printf("[steamvr starting — fading out, releasing HMD displays]\n"); releaseWithFade(); own = Own::kReleased; } break; case Own::kReleased: // released; waiting for the compositor to take over if (compUp) { printf("[steamvr compositor up — it owns the headset now]\n"); own = Own::kVrActive; } else if (!srvUp) { // SteamVR aborted before taking video — take back resumeProx(); // reopen 0101 + settle before reading worn const bool wornR = haveProx && prox.worn(); if (wornR || !haveProx) { printf("[steamvr gone before taking video — reclaiming%s]\n", haveProx ? " (worn)" : ""); reclaimWithFade(); own = Own::kOwned; } else if (dozeCapable) { printf("[steamvr gone before taking video — reclaiming into " "doze (warm)]\n"); reclaimIntoDoze(); own = Own::kOwned; } else { printf("[steamvr gone before taking video — don to reclaim]\n"); own = Own::kAwaitDon; } } break; case Own::kVrActive: // compositor owns video; reclaim when it exits if (!compUp) { // vrcompositor exit = SteamVR done with the headset. Reopen our 0101 // MCU (suspended for the session) and settle BEFORE reading worn — // suspended prox reads not-worn, which would mis-route the reclaim. resumeProx(); const bool wornR = haveProx && prox.worn(); if (wornR || !haveProx) { printf("[steamvr released video — fading in, reclaiming%s]\n", haveProx ? " (worn)" : ""); reclaimWithFade(); own = Own::kOwned; } else if (dozeCapable) { printf("[steamvr released video — not worn; reclaiming into " "doze (warm), don to wake]\n"); reclaimIntoDoze(); own = Own::kOwned; } else { printf("[steamvr released video — not worn; don to reclaim]\n"); own = Own::kAwaitDon; } } break; case Own::kAwaitDon: if (srvUp || compUp) { trayDonReq.store(false); if (haveProx) prox.suspend(); // SteamVR back — release 0101 again own = Own::kReleased; // SteamVR back before a don } else if (worn || trayDonReq.exchange(false)) { printf("[%s — fading in, reclaiming HMD displays]\n", worn ? "headset donned" : "tray wake"); reclaimWithFade(); own = Own::kOwned; } break; } // Slow poll while we own (process snapshot is cheap but not free); // fast poll otherwise so release/reclaim feel instant. std::this_thread::sleep_for( std::chrono::milliseconds(own == Own::kOwned ? 2000 : 500)); } }); // Startup snapshot of monitor excludes — renderInit (lazy, render // thread) reads this, the watcher only diffs against the file (live // changes to the set need a restart). const std::vector monExclude = settings.monitor_exclude; // 's' key: persist the current LIVE values (flag overrides and key // tunes included — saving them is the explicit user intent). auto saveSettingsNow = [&] { Settings s; s.screen_distance_m = screenDistM.load(); s.screen_scale = screenScale.load(); s.screen_curved = screenCurved.load(); s.supersample = superSample; s.predict_ms = predictMs.load(); s.predict_auto = predictAuto.load(); s.neck_enabled = neckModel.load(); s.neck_forward_mm = neckFwdM.load() * 1000.0; s.neck_up_mm = neckUpM.load() * 1000.0; s.ipd_mm = ipdMm.load(); { std::lock_guard l(hkMu); s.recenter_hotkey = hkSpec; } s.monitor_exclude = monExclude; s.cursor_overlay = settings.cursor_overlay; // startup-only knob s.wake_on_motion = wakeOnMotion.load(); s.brightness = brightness.load(); s.idle_timeout_min = idleTimeoutMin.load(); s.refresh_hz = refreshHz; std::string e; if (SaveSettings(s, &e)) { printf("[settings saved -> %s]\n", SettingsPath().c_str()); if (s.ipd_mm <= 0) printf(" (ipd_mm 0 = follow the headset value, currently %.1f mm)\n", cfg.ipd_default_mm); if (s.brightness < 0) printf(" (brightness -1 = headset value rules)\n"); } else { fprintf(stderr, "settings save: %s\n", e.c_str()); } }; // Neck calibration request flag — 'n' key and the tray item queue it, // the guided-capture thread below consumes it. std::atomic neckCalReq{false}; // Console keys. std::atomic freezePose{false}; std::thread keys([&] { while (!g_stop.load()) { if (_kbhit()) { int c = _getch(); if (c == 'q' || c == 'Q') { g_stop.store(true); presenter.stop(); } if (c == 'r' || c == 'R') { ahrs.recenter(); printf("[recentered]\n"); } if (c == 'b' || c == 'B') { ahrs.requestBiasCapture(); printf("[bias recapture — hold still ~1 s; explicit capture " "trusts your stillness over the sane gate]\n"); } if (c == 's' || c == 'S') saveSettingsNow(); if (c == '-' || c == '_' || c == '+' || c == '=') { double d = screenDistM.load() + ((c == '-' || c == '_') ? -0.1 : 0.1); d = d < 0.5 ? 0.5 : d > 10.0 ? 10.0 : d; screenDistM.store(d); printf("[screen distance %.2f m]\n", d); } if (c == '[' || c == ']') { double sc = screenScale.load() * (c == '[' ? 1.0 / 1.05 : 1.05); sc = sc < 0.2 ? 0.2 : sc > 5.0 ? 5.0 : sc; screenScale.store(sc); printf("[screen scale %.2f]\n", sc); } if (c == 'c' || c == 'C') { const bool cv = !screenCurved.load(); screenCurved.store(cv); printf("[layout %s] — curved: screens on a cylinder at the set " "distance, every screen faces you, edges touch; flat: M3 " "plane, OS-faithful gaps\n", cv ? "CURVED" : "FLAT"); } if (c == 'w' || c == 'W') { warpDirectOn.store(!warpDirectOn.load()); printf("[warp %s] — A/B: direct = desktop sampled once per panel " "pixel (sharpness); two-pass = M2/M3 baseline\n", warpDirectOn.load() ? "DIRECT (single-resample)" : "TWO-PASS"); } if (c == 'n' || c == 'N') { neckCalReq.store(true); printf("[neck calibration queued — follow the console prompts]\n"); } if (c == 'p' || c == 'P') { predictAuto.store(!predictAuto.load()); printf("[prediction %s] — A/B: auto = measured sample->photon age " "per frame; manual = fixed %.0f ms (status age= shows the " "span in use)\n", predictAuto.load() ? "AUTO" : "MANUAL", predictMs.load()); } if (c == 'l' || c == 'L') { const bool on = !presenter.lateLatchEnabled(); presenter.requestLateLatch(on); printf("[late-latch %s] — A/B: on = draw just before flip (pose " "~10 ms fresher); off = draw right after vsync (M3 " "baseline)\n", on ? "ON" : "OFF"); } if (c == 'f' || c == 'F') { freezePose.store(!freezePose.load()); printf("[pose %s] — frozen scene = static image; bowing that " "vanishes when frozen is motion-coupled (rolling scan / " "latency), not distortion\n", freezePose.load() ? "FROZEN" : "live"); } if (c == 0 || c == 0xE0) { // arrow keys: live neck-model tuning int k = _getch(); double f = neckFwdM.load(), u = neckUpM.load(); if (k == 72) u += 0.005; // up arrow if (k == 80) u -= 0.005; // down arrow if (k == 77) f += 0.005; // right arrow if (k == 75) f -= 0.005; // left arrow neckFwdM.store(f); neckUpM.store(u); printf("[neck: --neck-mm %.0f,%.0f]\n", f * 1000, u * 1000); } } else { std::this_thread::sleep_for(std::chrono::milliseconds(50)); } } }); // App shell thread (M3 step 3): tray icon (recenter / save / open // settings / quit) + the global recenter hotkey. One thread for both — // tray callbacks need a window owned by the pumping thread, and // RegisterHotKey is per-thread too. Binding comes from settings // (recenter_hotkey); a live change bumps hkGen and this re-registers. std::thread shell([&] { ShellCtx ctx; ctx.recenter = [&] { ahrs.recenter(); printf("[recentered (tray)]\n"); }; ctx.save = saveSettingsNow; ctx.neckCal = [&] { neckCalReq.store(true); printf("[neck calibration queued (tray) — follow the console " "prompts]\n"); }; ctx.openSettings = [&] { const std::string p = SettingsPath(); if (p.empty()) return; // Materialize the file first so the editor has something to open; // edits live-apply through the watcher. if (GetFileAttributesA(p.c_str()) == INVALID_FILE_ATTRIBUTES) saveSettingsNow(); ShellExecuteA(nullptr, "open", "notepad.exe", p.c_str(), nullptr, SW_SHOWNORMAL); }; ctx.quit = [&] { g_stop.store(true); presenter.stop(); }; ctx.sleepNow = [&] { sleepNowReq.store(true); }; ctx.wakeNow = [&] { wakeNowReq.store(true); trayDonReq.store(true); }; ctx.sleepState = [&]() -> int { if (steamvrActive.load()) return 1; // "VR is active" // Dozing (warm, panels driven dark) OR idle-parked panels OR a released // display awaiting a don all read as asleep — depth isn't a tray concern, // "Wake up" leaves whichever one we're in. if (presenter.dozingNow() || presenter.panelsOffNow() || presenter.hmdReleasedNow()) return 2; return 0; }; WNDCLASSW wc{}; wc.lpfnWndProc = shellWndProc; wc.hInstance = GetModuleHandleW(nullptr); wc.lpszClassName = L"sauna_shell"; RegisterClassW(&wc); HWND hwnd = CreateWindowExW(0, L"sauna_shell", L"sauna", 0, 0, 0, 0, 0, nullptr, nullptr, wc.hInstance, &ctx); NOTIFYICONDATAW nid{}; nid.cbSize = sizeof(nid); nid.hWnd = hwnd; nid.uID = 1; nid.uFlags = NIF_MESSAGE | NIF_ICON | NIF_TIP; nid.uCallbackMessage = kTrayMsg; nid.hIcon = LoadIconW(nullptr, MAKEINTRESOURCEW(32512)); // IDI_APPLICATION wcscpy_s(nid.szTip, L"sauna — spatial display"); const bool tray = hwnd && Shell_NotifyIconW(NIM_ADD, &nid); if (tray) printf("tray icon up (right-click: recenter / settings / quit)\n"); uint32_t seen = 0; bool registered = false; while (!g_stop.load()) { const uint32_t gen = hkGen.load(); if (gen != seen) { if (registered) { UnregisterHotKey(nullptr, 1); registered = false; } std::string name; { std::lock_guard l(hkMu); name = hkSpec; } if (RegisterHotKey(nullptr, 1, hkMods.load() | MOD_NOREPEAT, hkVk.load())) { registered = true; printf("global recenter hotkey: %s\n", name.c_str()); } else { fprintf(stderr, "global hotkey %s unavailable (in use?) — console " "'r' still works\n", name.c_str()); } seen = gen; } MSG msg; while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { if (msg.message == WM_HOTKEY && msg.wParam == 1) { ahrs.recenter(); printf("[recentered (global hotkey)]\n"); } TranslateMessage(&msg); DispatchMessage(&msg); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); } if (registered) UnregisterHotKey(nullptr, 1); if (tray) Shell_NotifyIconW(NIM_DELETE, &nid); if (hwnd) DestroyWindow(hwnd); }); // Settings live-apply (M3 step 5 gate: edit -> see it in-headset): poll // the file's write time; on change, reload and apply the live-able // knobs. Diff-based against the last file state, so a self-save (the // 's' key) and flag-overridden values apply nothing spuriously. std::thread watcher([&] { Settings cur = settings; uint64_t lastT = SettingsFileTime(); while (!g_stop.load()) { std::this_thread::sleep_for(std::chrono::seconds(1)); const uint64_t t = SettingsFileTime(); if (t == 0 || t == lastT) continue; lastT = t; Settings ns; std::string e; if (!LoadSettings(&ns, &e)) { fprintf(stderr, "settings reload: %s\n", e.c_str()); continue; } if (ns.screen_distance_m != cur.screen_distance_m) { screenDistM.store(ns.screen_distance_m); printf("[settings: screen distance %.2f m]\n", ns.screen_distance_m); } if (ns.screen_scale != cur.screen_scale) { screenScale.store(ns.screen_scale); printf("[settings: screen scale %.2f]\n", ns.screen_scale); } if (ns.screen_curved != cur.screen_curved) { screenCurved.store(ns.screen_curved); printf("[settings: layout %s]\n", ns.screen_curved ? "curved" : "flat"); } if (ns.wake_on_motion != cur.wake_on_motion) { wakeOnMotion.store(ns.wake_on_motion); printf("[settings: wake on motion %s]\n", ns.wake_on_motion ? "on" : "off (prox don wakes)"); } if (ns.cursor_overlay != cur.cursor_overlay) { settings.cursor_overlay = ns.cursor_overlay; if (screensReady.load()) for (auto& s : screens) if (s.src) s.src->setCursorOverlay(ns.cursor_overlay); printf("[settings: cursor overlay %d (-1 auto / 0 off / 1 on)]\n", ns.cursor_overlay); } if (ns.predict_ms != cur.predict_ms) { predictMs.store(ns.predict_ms); printf("[settings: predict %.1f ms]\n", ns.predict_ms); } if (ns.predict_auto != cur.predict_auto) { predictAuto.store(ns.predict_auto); printf("[settings: prediction %s]\n", ns.predict_auto ? "AUTO (measured age)" : "MANUAL (predict_ms)"); } if (ns.ipd_mm != cur.ipd_mm) { ipdMm.store(ns.ipd_mm); printf("[settings: ipd %.1f mm]\n", ns.ipd_mm); } if (ns.neck_enabled != cur.neck_enabled) { neckModel.store(ns.neck_enabled); printf("[settings: neck model %s]\n", ns.neck_enabled ? "ON" : "OFF"); } if (ns.neck_forward_mm != cur.neck_forward_mm || ns.neck_up_mm != cur.neck_up_mm) { neckFwdM.store(ns.neck_forward_mm * 0.001); neckUpM.store(ns.neck_up_mm * 0.001); printf("[settings: neck fwd %.0f mm, up %.0f mm]\n", ns.neck_forward_mm, ns.neck_up_mm); } if (ns.recenter_hotkey != cur.recenter_hotkey) { uint32_t m, v; if (ParseHotkey(ns.recenter_hotkey, &m, &v)) { { std::lock_guard l(hkMu); hkSpec = ns.recenter_hotkey; } hkMods.store(m); hkVk.store(v); hkGen.fetch_add(1); // hotkey thread re-registers } else { fprintf(stderr, "settings: bad recenter_hotkey \"%s\" — keeping " "current binding\n", ns.recenter_hotkey.c_str()); } } if (ns.brightness != cur.brightness) { brightness.store(ns.brightness); if (ns.brightness < 0) printf("[settings: brightness hands-off — headset value rules]\n"); else if (prox.setBrightness((uint16_t)ns.brightness)) printf("[settings: brightness %d/1023]\n", ns.brightness); else fprintf(stderr, "settings: brightness set failed\n"); } if (ns.idle_timeout_min != cur.idle_timeout_min) { idleTimeoutMin.store(ns.idle_timeout_min); printf("[settings: idle timeout %.1f min%s]\n", ns.idle_timeout_min, ns.idle_timeout_min <= 0 ? " (disabled)" : ""); } if (ns.supersample != cur.supersample) printf("[settings: supersample %.2f applies on restart (running " "%.2f)]\n", ns.supersample, superSample); if (ns.refresh_hz != cur.refresh_hz) printf("[settings: refresh %.0f Hz applies on restart]\n", ns.refresh_hz); if (ns.monitor_exclude != cur.monitor_exclude) printf("[settings: monitor_exclude applies on restart]\n"); cur = ns; } }); // Idle/burn-in policy (M3 step 4): no head motion above the stillness // threshold for idle_timeout_min -> panels off via the presenter power // gate; head motion above the (higher) wake threshold or putting the // headset on brings them back. AHRS keeps running throughout — pose is // continuous across the gap (the step 4 gate). A triggered proximity // sensor (worn) vetoes the timer outright — a worn headset never idles // off, however still the head is; the policy targets the headset left // lying with the prox NOT triggered (firmware's own doff path handles // the triggered-but-doffed case with its dim + 5-min cutoff). std::thread idle([&] { const double kStillRadS = 0.05; // ~3°/s — below = still const double kWakeRadS = 0.15; // ~9°/s — above = deliberate motion const double kTickSec = 0.25; double stillSec = 0.0; bool parked = false; // Tray "Sleep now" wake gating: slept-while-worn must not flap back // on from the prox that is still pressed or from doffing motion — // wake stays disarmed until the prox RELEASES, then a further 30 s // grace before don/motion may wake again. Slept-while-doffed arms // immediately (normal parked behavior). bool waitProxRelease = false; bool manualPark = false; // tray sleep — "idle policy off" must not wake auto wakeArmedAt = std::chrono::steady_clock::now(); // Sleep depth (M5/ADR-0005): doze (warm pipeline, ~95 ms wake) when the // firmware advertises the doze contract (SW_VER >= 0.4.0), else parked // (POWER_OFF, 3-5 s). `parked` stays the generic "asleep" flag the whole // wake/grace machinery below keys off; `dozing` only records WHICH depth // so wake leaves the right one. Resolved once — capability is static. const bool dozeCapable = haveProx && prox.dozeCapable(); bool dozing = false; // Enter the chosen depth. Both depths pause capture — the OS duplication // copy + mips of a desktop nobody sees is pure GPU burn. (Pause gates the // copy WORK; it does not yet free the ring-texture VRAM — that needs a // separate capture teardown.) Doze keeps the video pipeline warm and idles // the host present to a black heartbeat while the firmware sweeps the // panels dark (~95 ms wake); parked powers the panels off (3-5 s wake). auto enterSleep = [&] { parked = true; dozing = dozeCapable; capPauseIdle.store(true); applyCapturePause(); // freeze content (last frame stays on the link) if (dozing) { // Order matters for the visible fade (same as the SteamVR release // fade): fire the firmware 'H' sweep on the LIVE (frozen) image and // let it run to true black (~300 ms) BEFORE idling the host present. // Idling first would cut to a black present instantly and the firmware // brightness sweep would have nothing to fade — the missing-sweep // regression. So: 'H' -> wait the sweep -> then present-idle. prox.setDoze(true); std::this_thread::sleep_for(std::chrono::milliseconds(300)); presenter.requestDoze(true); } else { presenter.requestPanelPower(false); } // Free the capture GPU resources (private D3D11 device + ring, ~hundreds // of MB) once the present loop is confirmed IDLE — draw is no longer // sampling, so stop() is safe. The source objects persist (other threads' // accessors stay valid); wake's resume() rebuilds them (~tens of ms, // hidden by the wake sweep). This is the doze/park VRAM drop. for (int i = 0; i < 60 && !g_stop.load(); i++) { if (dozing ? presenter.dozingNow() : presenter.panelsOffNow()) break; std::this_thread::sleep_for(std::chrono::milliseconds(25)); } if (screensReady.load()) for (auto& s : screens) if (s.src) s.src->stop(); }; // Leave sleep by the depth we entered: doze wakes over HID ('h', ~95 ms, // no DSC/link retrain); parked re-modesets + powers on (3-5 s). Both resume // capture and land awake. auto wake = [&] { // Rebuild the capture GPU side FIRST so content is coming back as the // panels relight. Clear the idle pause before resume() so the freshly // spawned workers come up capturing (not paused). The consumer skips each // source (published = -1) until its first frame republishes — black for // ~tens of ms, hidden by the wake brightness sweep. capPauseIdle.store(false); applyCapturePause(); if (screensReady.load()) for (auto& s : screens) if (s.src) s.src->resume(); if (dozing) { // Resume mode-rate presenting first so live content is on the link // when the firmware re-lights, then 'h' runs the wake choreography // (display-on, sweep brightness back up, fan/LED to video, clear the // latch). ~95 ms to DISPLAYS_ON, no DSC/link retrain. presenter.requestDoze(false); prox.setDoze(false); } else { presenter.requestPanelPower(true); } parked = false; dozing = false; }; // Launch asleep (M4 UX): a headset on the desk at startup should not // light its panels or burn capture GPU until donned. Let the prox // telemetry settle first (50 ms cadence, worn_ defaults false — an // instant check would false-park a worn headset and cycle its // panels). manualPark: a disabled idle policy must not auto-unpark. if (dozeCapable) { // Came up born-dozed: main armed the firmware doze-latch + host present- // idle BEFORE video, so the bringup settled into doze-dark (no flash). // enterSleep() syncs this thread's state (idempotent — re-sending 'H' // while already dozed is a harmless firmware no-op). Then let the settled // prox decide: worn -> wake, not worn -> stay dozed. enterSleep(); manualPark = true; std::this_thread::sleep_for(std::chrono::milliseconds(600)); // prox settle if (prox.worn() && !g_stop.load()) { // Wake into the fade-in. 'h' may land before the cold bringup has // settled into DOZE_ASLEEP — the firmware (>= 0.4.3) remembers it // (doze_wake_pending) and the settle comes up straight into the wake // fade, so no host-side link/settle timing is needed here. wake(); manualPark = false; printf("[launch: worn — waking from born-doze (fade in)]\n"); } else { printf("[launch: not worn — born dozing, don to wake]\n"); } } else if (haveProx) { std::this_thread::sleep_for(std::chrono::milliseconds(600)); if (!prox.worn() && !g_stop.load()) { enterSleep(); manualPark = true; printf("[launch: headset not worn — starting asleep (parked), don to " "wake]\n"); } } bool prevWorn = prox.worn(); // Prox liveness (field: manual sleep then NO wake by don — panels-off // can take the MCU's telemetry down with the video signal, and a dead // prox means don edges never arrive). While parked with the prox // silent > 5 s, motion wake arms regardless of the flag: the Watchman // IMU streams independently of panel power — wake of last resort. uint64_t lastRep = 0; auto lastRepAt = std::chrono::steady_clock::now(); bool proxDeadWarned = false; while (!g_stop.load()) { std::this_thread::sleep_for(std::chrono::milliseconds(250)); const double timeoutMin = idleTimeoutMin.load(); const auto as = ahrs.status(); const bool worn = haveProx && prox.worn(); const bool wornEdge = worn && !prevWorn; prevWorn = worn; const auto now = std::chrono::steady_clock::now(); const uint64_t rep = haveProx ? prox.reports() : 0; if (rep != lastRep) { lastRep = rep; lastRepAt = now; } const bool proxAlive = haveProx && (now - lastRepAt) < std::chrono::seconds(5); if (parked && haveProx && !proxAlive) { if (!proxDeadWarned) { printf("[parked: prox telemetry silent — motion wake armed as " "failsafe]\n"); proxDeadWarned = true; } } else if (proxAlive) { proxDeadWarned = false; } // SteamVR-exit reclaim-into-doze handoff: the watcher re-acquired the // display straight into doze (firmware latched, present idled, capture // paused) and handed it to us. Adopt the doze bookkeeping so the normal // don/grace/motion logic below wakes it — an instant doze-wake, no cold // reacquire on the don. if (vrDozedReq.exchange(false)) { parked = true; dozing = true; manualPark = false; waitProxRelease = false; wakeArmedAt = now; // reclaimed not-worn: a don wakes immediately prevWorn = worn; // resync the edge so the next don is a fresh wornEdge stillSec = 0; if (worn) { // Donned DURING the reclaim (worn now, but the watcher reclaimed into // doze on the not-worn read) — wake straight back out so a worn // headset is never left dozing dark. wake(); printf("[steamvr exit: worn during reclaim — waking from doze]\n"); } else if (screensReady.load()) { // Staying dozed: free the capture GPU resources the SteamVR pause only // froze (the present is already dozed/idle from the reclaim, so draw // is not sampling). wake()'s resume() rebuilds on the don. for (auto& s : screens) if (s.src) s.src->stop(); } } // Tray "Wake up": unconditional un-park — bypasses the sleep grace, // waitProxRelease, everything. The user clicked; obey. if (wakeNowReq.exchange(false) && parked) { const bool wasDozing = dozing; wake(); manualPark = false; waitProxRelease = false; wakeArmedAt = now; stillSec = 0; printf("[wake (tray) — %s]\n", wasDozing ? "doze wake (~95 ms)" : "panels on"); if (brightness.load() >= 0) { // chase the firmware display-on write std::this_thread::sleep_for(std::chrono::milliseconds(1500)); prox.setBrightness((uint16_t)brightness.load()); } continue; } if (sleepNowReq.exchange(false) && !parked && !steamvrActive.load()) { // menu grays these, but races exist enterSleep(); manualPark = true; waitProxRelease = worn; wakeArmedAt = now; // immediate arm when not worn printf("[sleep (tray): %s%s]\n", dozing ? "dozing" : "panels off", worn ? " — doff, then 30 s grace before don wakes it" : ""); continue; } // Brightness override survival: the firmware re-applies ITS stored // value a moment after every display-on (doff/don cycles displays // even without our power gate), so a user-set override must chase // each don. Small delay lets the firmware's own write land first. if (wornEdge && brightness.load() >= 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1500)); prox.setBrightness((uint16_t)brightness.load()); } if (!parked) { if (timeoutMin <= 0 || !as.initialized || worn) { stillSec = 0; // worn = hard veto, not just a timer pause continue; } stillSec = as.omegaRadS < kStillRadS ? stillSec + kTickSec : 0.0; if (stillSec >= timeoutMin * 60.0) { enterSleep(); printf("[idle: no head motion for %.1f min — %s, %s to wake]\n", timeoutMin, dozing ? "dozing" : "panels off", (wakeOnMotion.load() || !haveProx) ? "move" : "don"); } } else if (waitProxRelease) { // Slept while worn: stay parked through anything until the prox // releases; the release starts the 30 s grace. if (!worn) { waitProxRelease = false; wakeArmedAt = now + std::chrono::seconds(30); printf("[sleep: headset doffed — wake re-arms in 30 s]\n"); } } else if (now < wakeArmedAt) { // Grace window: don/motion during handling must not wake it. A // swallowed don says so — silence here read as "wake is broken" // in the field (the grace anchors at the DOFF, not at the sleep // click, which surprises). if (wornEdge) printf("[don ignored — sleep grace, doff + don after %.0f s " "wakes]\n", std::chrono::duration(wakeArmedAt - now).count()); } else if (((wakeOnMotion.load() || !haveProx || !proxAlive) && as.omegaRadS > kWakeRadS) || wornEdge || (timeoutMin <= 0 && !manualPark)) { // Don-wake re-arms only through a prox UN-trigger: wornEdge by // definition. Donned-during-grace stays asleep until a doff + // re-don (explicit field decision — no still-worn shortcut). const bool wasDozing = dozing; wake(); manualPark = false; stillSec = 0; printf("[wake: %s — %s]\n", wornEdge ? "headset donned" : timeoutMin <= 0 ? "idle policy disabled" : "head motion", wasDozing ? "doze wake (~95 ms)" : "panels on"); if (brightness.load() >= 0) { // chase the firmware's display-on write std::this_thread::sleep_for(std::chrono::milliseconds(1500)); prox.setBrightness((uint16_t)brightness.load()); } } } presenter.requestDoze(false); // never leave the present loop idled presenter.requestPanelPower(true); // never leave the gate parked }); // Neck calibration runner (M4 step 4): guided lever-arm capture. Torso // still; 10 s of yaw shakes (observes forward+lateral), 10 s of pitch // nods (observes up+forward); LSQ solve with gates (excitation, lateral // ~0, residual) — reject and ask to redo rather than accept garbage. // Tracking runs untouched throughout (the AHRS tap is append-only). std::thread neckCalThread([&] { auto pause = [&](double sec) { // g_stop-aware; false = aborting for (int i = 0; i < (int)(sec * 10.0); i++) { if (g_stop.load()) return false; std::this_thread::sleep_for(std::chrono::milliseconds(100)); } return true; }; const double imuPos[3] = {cfg.imu.frame.position[0], cfg.imu.frame.position[1], cfg.imu.frame.position[2]}; while (!g_stop.load()) { std::this_thread::sleep_for(std::chrono::milliseconds(200)); if (!neckCalReq.exchange(false)) continue; printf("\n=== NECK CALIBRATION ===\n" "Sit upright, shoulders against the backrest. Keep the TORSO\n" "completely still — only the head moves. Starting in 5 s...\n"); NeckCalibrator cal; std::atomic phase{NeckCalibrator::kIdle}; ahrs.setSampleTap([&cal, &phase](const double w[3], const double a[3], const Quat& q, double dt) { cal.feed(w, a, q, dt, phase.load()); }); bool done = pause(5.0); if (done) { printf(">>> SHAKE head side to side (\"no-no\"), brisk but natural " "— 10 s\n"); phase.store(NeckCalibrator::kYaw); done = pause(10.0); } if (done) { printf(">>> now NOD head up and down (\"yes-yes\") — 10 s\n"); phase.store(NeckCalibrator::kPitch); done = pause(10.0); } phase.store(NeckCalibrator::kIdle); ahrs.setSampleTap(nullptr); if (!done) break; const auto r = cal.solve(imuPos); if (!r.ok) { printf("=== REJECTED: %s\n=== neck values unchanged — 'n' or the " "tray item retries\n", r.message.c_str()); continue; } printf("=== SOLVED: %s\n", r.message.c_str()); if (imuPos[0] == 0 && imuPos[1] == 0 && imuPos[2] == 0) printf(" (config imu position is zero — IMU assumed at the eye " "midpoint, pivot->IMU used as pivot->eye)\n"); // Plausibility: forward ~70-120 mm is anatomy; up is POSTURE — small // nods pivot at the skull base (roughly eye height -> up near zero), // nodding from lower in the neck reads 100+ (field: a natural // capture solved up 19 mm and eyes-in preferred it). if (r.neckForwardMm < 20 || r.neckForwardMm > 200 || r.neckUpMm < -30 || r.neckUpMm > 250) printf(" WARNING: implausible lever arm — applying anyway, " "judge by eye (ballpark: fwd 70-120, up 0-180 mm)\n"); neckFwdM.store(r.neckForwardMm * 0.001); neckUpM.store(r.neckUpMm * 0.001); printf("=== applied live — nod and check the world holds still; " "arrows fine-tune, 's' persists\n"); } ahrs.setSampleTap(nullptr); }); // Status line. std::thread status([&] { PresentStats prevP{}; ImuStats prevI{}; uint64_t prevCap = 0; bool biasSavedOnce = false; int ticksSinceBiasSave = 0; int hogCooldown = 0; // GPU-hog forensic: at most one sample / 15 s int cpuCooldown = 0; // CPU/stall forensic: same throttle auto lastTick = std::chrono::steady_clock::now(); while (!g_stop.load()) { std::this_thread::sleep_for(std::chrono::seconds(1)); // Tick-stretch detector (M4 step 3 forensics): this 1 s sleep // overshooting means the whole machine stalled (scheduler, DPC // storm, DWM seizure) — and the per-second fps=/imu= counts for // that line are inflated by the longer window (field: fps=132, // imu=1799 during a dwm.exe episode). Print the truth. const auto tickNow = std::chrono::steady_clock::now(); const double tickMs = std::chrono::duration( tickNow - lastTick).count(); lastTick = tickNow; if (cpuCooldown > 0) cpuCooldown--; if (tickMs > 1400.0) { printf(" [stall] status tick stretched to %.0f ms — system-wide " "scheduling stall (counts on this line read high)\n", tickMs); // CPU-side forensics while the episode is live: high dpc=/int= = a // driver storming (LatencyMon will name it); all-low while the // machine still stalls = SMI/firmware, invisible to Windows. if (cpuCooldown == 0) { cpuCooldown = 15; printf(" [cpu] %s\n", CpuStallForensics().c_str()); } } // Bias persistence (M4 step 1): the first refresh saves immediately // (first-run capture / 'b' recapture before a quick quit); later // background refreshes hit the file at most once a minute. The AHRS // keeps dirty state until polled, so skipped ticks lose nothing. ticksSinceBiasSave++; if (!biasSavedOnce || ticksSinceBiasSave >= 60) { double b[3]; if (ahrs.takeBiasRefresh(b)) { std::string e; if (SaveGyroBias(biasSerial, b, &e)) { if (!biasSavedOnce) printf("[bias persisted -> %s]\n", GyroBiasPath(biasSerial).c_str()); biasSavedOnce = true; } else { fprintf(stderr, "bias save: %s\n", e.c_str()); } ticksSinceBiasSave = 0; } } auto ps = presenter.stats(); auto is = imu.stats(); auto as = ahrs.status(); // Bias-capture telemetry (field: LHR-599F3B91 startup capture // rejected every window on the sane gate — frozen view, silent // console). Windows evaluate ~1/s and this tick runs 1/s, so the // single-slot event keeps up. { Ahrs::BiasCaptureEvent be; if (ahrs.takeBiasCaptureEvent(&be)) { using K = Ahrs::BiasCaptureEvent::Kind; if (be.kind == K::kRejected) { const char* why = be.sdFail && be.saneFail ? "not still AND mean beyond sane gate" : be.sdFail ? "not still (gyro sd over gate — motion/vibration?)" : "mean beyond sane gate (large rest bias on this " "unit?)"; printf(" bias: window rejected — %s; sd=(%.1f %.1f %.1f) " "mean=(%.1f %.1f %.1f) LSB%s\n", why, be.sd[0], be.sd[1], be.sd[2], be.mean[0], be.mean[1], be.mean[2], (!be.sdFail && be.saneFail) ? " — keep holding still (auto-accepts after 5 " "consistent windows), or press 'b'" : ""); } else if (be.kind == K::kAcceptedEscalated) { printf(" bias: accepted after %d consistent still windows — " "rest bias (%.1f %.1f %.1f) LSB beyond the sane gate is " "genuine for this unit\n", be.streak, be.mean[0], be.mean[1], be.mean[2]); } else { printf(" bias: accepted via explicit 'b' (stillness attested)" " — (%.1f %.1f %.1f) LSB\n", be.mean[0], be.mean[1], be.mean[2]); } } } char cap[48] = ""; if (screensReady.load() && !screens.empty()) { uint64_t tot = 0; int live = 0; for (auto& s : screens) { auto cs = s.src->stats(); tot += cs.frames; if (cs.capturing) live++; } snprintf(cap, sizeof(cap), " cap=%lluHz %d/%zu%s", (unsigned long long)(tot - prevCap), live, screens.size(), live < (int)screens.size() ? " [CAP LOST]" : ""); prevCap = tot; } // gap = max IMU delivery gap this second (HID burstiness; clean ~1-3 // ms). frame = max present interval this second (GPU/present stall; // clean ~13 ms at 75 Hz). soft = AHRS micro-gap recoveries. The // rubber-band triage line: gap spike = USB/HID delivery, frame // spike = render/present, soft/resets = device stream holes. // frame split: gpu = render-fence wait (GPU contention/throttle — // correlates with GPU fan spikes), vsw = present-waitable wait // (display link stall: DSC retrain, dim cycle, panel transition), // drw = draw-callback CPU (recording + capture consumer), lat = // latch-timer oversleep (OS timer coalescing / power-save), prs = // DirectMode present call CPU (driver block). frame= spiking while // ALL of gpu/vsw/drw/lat/prs stay small = the loop thread was // preempted between sections (process starvation / EcoQoS). // age = prediction span in use (EMA ms): auto = measured sample-> // photon age (latch margin + half scanout + IMU age — the step-3 // gate truth replacing the magic 20), manual = fixed predict_ms. printf(" fps=%5.1f imu=%4lluHz gap=%3ums frame=%3.0fms gpu=%3.0fms " "vsw=%3.0fms drw=%3.0f lat=%3.0f prs=%3.0f clk=%4d age=%4.1f " "vram=%4lluMB corr=%.3f " "ahrs=%s |a|=%.3fg resets=%llu soft=%llu%s%s%s%s%s%s%s\n", (double)(ps.frames - prevP.frames), (unsigned long long)(is.samples - prevI.samples), imu.maxDeliveryGapMs(), ps.maxFrameIntervalMs, ps.maxFenceWaitMs, ps.maxVsyncWaitMs, ps.maxDrawCpuMs, ps.maxLatchOverMs, ps.maxPresentCpuMs, ps.gpuCoreClockMHz, predictAgeMs.load(), (unsigned long long)vramUsedMB(), ahrs.takeMaxCorrectionRadS(), as.initialized ? "ok" : "bias", as.accelMagG, (unsigned long long)as.resets, (unsigned long long)as.softRecoveries, haveProx ? (prox.worn() ? " worn" : " away") : "", cap, warpDirectOn.load() ? " [DIRECT]" : "", presenter.lateLatchEnabled() ? "" : " [LL-OFF]", ps.freeRunning ? " [FREE-RUN]" : "", ps.panelsOff ? " [PANELS OFF (idle)]" : "", ps.hmdReleased ? " [HMD RELEASED (steamvr)]" : ""); prevP = ps; prevI = is; // GPU-hog forensics (M4 step 3): a fence-wait balloon at full clock // means another GPU client — name it while the episode is live. // Blocking ~300 ms inside this thread just delays the next tick. if (hogCooldown > 0) hogCooldown--; if (ps.maxFenceWaitMs > 15.0 && hogCooldown == 0) { hogCooldown = 15; printf(" [gpu hogs] %s\n", TopGpuConsumers().c_str()); } } }); // Render state, lazily created on the presenter's device at first frame. SceneRenderer scene; WarpPass warp; // M4 step 2: warp-direct resources — shared SRV table (3-frame ring of // kWarpDirectMaxScreens slots) + per-eye CB upload ring. The presenter // never runs more than the fence depth (<3) ahead. WarpDirect wdirect; bool wdReady = false; ComPtr wdHeap; ComPtr wdCbBuf; uint8_t* wdCbPtr = nullptr; constexpr uint32_t kWdCbStride = kWarpDirectCbStride; ComPtr eyeTex[2]; ComPtr gridStaging; // alive until first frame executed ComPtr eyeRtvHeap, eyeSrvHeap; D3D12_CPU_DESCRIPTOR_HANDLE eyeRtv[2]{}; UINT srvStep = 0; bool renderInit = false; bool renderInitFailed = false; uint32_t builtRtvFmt = 0; // format the warp PSOs were built against if (superSample < 0.5) superSample = 0.5; if (superSample > 2.5) superSample = 2.5; const uint32_t eyeW = (uint32_t)(cfg.eye_width_px * superSample + 0.5); const uint32_t eyeH = (uint32_t)(cfg.eye_height_px * superSample + 0.5); const DXGI_FORMAT kEyeFmt = DXGI_FORMAT_R8G8B8A8_UNORM; // IPD: user setting beats the config default (S3: the real driver takes // it from a user setting; config default_mm was 6 mm off on one unit). // Read per frame — live-applies from a settings edit. // VRAM leak diagnostic (--vram-cycle N): drive N release/reacquire cycles // (the SteamVR coexistence path) and log per-process VRAM each step, then // quit. A deterministic repro/verify loop for the scanout-surface leak — no // SteamVR needed; the present loop below services the ownership toggles. std::thread vramCycleThread; if (vramCycle > 0) { vramCycleThread = std::thread([&] { std::this_thread::sleep_for(std::chrono::seconds(4)); // init settle printf("[VRAM] cycle start: %llu MB\n", (unsigned long long)vramUsedMB()); for (int c = 0; c < vramCycle && !g_stop.load(); c++) { presenter.requestHmdOwnership(false); std::this_thread::sleep_for(std::chrono::milliseconds(1500)); printf("[VRAM] cycle %2d released: %llu MB\n", c, (unsigned long long)vramUsedMB()); presenter.requestHmdOwnership(true); std::this_thread::sleep_for(std::chrono::milliseconds(2500)); printf("[VRAM] cycle %2d reacquired: %llu MB\n", c, (unsigned long long)vramUsedMB()); } printf("[VRAM] cycle done: %llu MB\n", (unsigned long long)vramUsedMB()); g_stop.store(true); presenter.stop(); }); } presenter.run(seconds, [&](const FrameContext& fc) { auto* list = static_cast(fc.cmdList); auto* rtv = static_cast(fc.rtv); if (!renderInit) { if (renderInitFailed) return; ComPtr dev; list->GetDevice(IID_PPV_ARGS(&dev)); bool ok = (gridMode || scene.init(dev.Get(), kEyeFmt)) && warp.init(dev.Get(), (DXGI_FORMAT)fc.rtvFormat, cfg); if (ok) { D3D12_RESOURCE_DESC td{}; td.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; td.Width = eyeW; td.Height = eyeH; td.DepthOrArraySize = 1; td.MipLevels = 1; td.Format = kEyeFmt; td.SampleDesc.Count = 1; if (!gridMode) td.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; D3D12_HEAP_PROPERTIES def{D3D12_HEAP_TYPE_DEFAULT}; D3D12_CLEAR_VALUE cv{kEyeFmt, {0.05f, 0.05f, 0.08f, 1.0f}}; D3D12_DESCRIPTOR_HEAP_DESC rh{D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 2}; dev->CreateDescriptorHeap(&rh, IID_PPV_ARGS(&eyeRtvHeap)); D3D12_DESCRIPTOR_HEAP_DESC sh{}; sh.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; sh.NumDescriptors = 2; sh.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; dev->CreateDescriptorHeap(&sh, IID_PPV_ARGS(&eyeSrvHeap)); srvStep = dev->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); const UINT rtvStep = dev->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_RTV); for (int e = 0; e < 2 && ok; e++) { if (FAILED(dev->CreateCommittedResource( &def, D3D12_HEAP_FLAG_NONE, &td, gridMode ? D3D12_RESOURCE_STATE_COPY_DEST : D3D12_RESOURCE_STATE_RENDER_TARGET, gridMode ? nullptr : &cv, IID_PPV_ARGS(&eyeTex[e])))) { ok = false; break; } if (!gridMode) { eyeRtv[e] = eyeRtvHeap->GetCPUDescriptorHandleForHeapStart(); eyeRtv[e].ptr += e * rtvStep; dev->CreateRenderTargetView(eyeTex[e].Get(), nullptr, eyeRtv[e]); } D3D12_SHADER_RESOURCE_VIEW_DESC sv{}; sv.Format = kEyeFmt; sv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; sv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; sv.Texture2D.MipLevels = 1; D3D12_CPU_DESCRIPTOR_HANDLE h = eyeSrvHeap->GetCPUDescriptorHandleForHeapStart(); h.ptr += e * srvStep; dev->CreateShaderResourceView(eyeTex[e].Get(), &sv, h); } if (ok && gridMode) { // Upload the source-space grid into both eye textures via this // frame's command list; transition to PSR for good. std::vector grid; makeSourceGrid(&grid, eyeW); D3D12_PLACED_SUBRESOURCE_FOOTPRINT fp{}; UINT64 upSize = 0; dev->GetCopyableFootprints(&td, 0, 1, 0, &fp, nullptr, nullptr, &upSize); D3D12_HEAP_PROPERTIES up{D3D12_HEAP_TYPE_UPLOAD}; D3D12_RESOURCE_DESC bd{}; bd.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; bd.Width = upSize; bd.Height = 1; bd.DepthOrArraySize = 1; bd.MipLevels = 1; bd.SampleDesc.Count = 1; bd.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; dev->CreateCommittedResource(&up, D3D12_HEAP_FLAG_NONE, &bd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gridStaging)); uint8_t* p = nullptr; gridStaging->Map(0, nullptr, (void**)&p); for (uint32_t y = 0; y < eyeH; y++) memcpy(p + fp.Offset + y * fp.Footprint.RowPitch, &grid[(size_t)y * eyeW], (size_t)eyeW * 4); gridStaging->Unmap(0, nullptr); for (int e = 0; e < 2; e++) { D3D12_TEXTURE_COPY_LOCATION src{ gridStaging.Get(), D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT}; src.PlacedFootprint = fp; D3D12_TEXTURE_COPY_LOCATION dst{ eyeTex[e].Get(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); D3D12_RESOURCE_BARRIER bar{}; bar.Transition.pResource = eyeTex[e].Get(); bar.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; bar.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; bar.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; list->ResourceBarrier(1, &bar); } printf("GRID MODE: static source-space grid through the warp — " "rectilinear through the lens = warp/config good on this " "unit; bowed = unit/config mismatch\n"); } } if (!ok) { renderInitFailed = true; fprintf(stderr, "render init failed — presenting black\n"); return; } if (captureMode && canted && !gridMode) { // Warp-direct (M4 step 2): best effort — failure just pins the // two-pass path. Canted only (the ray reconstruction inverts the // canted projection; the --fold A/B branch has no direct mapping). wdReady = wdirect.init(dev.Get(), (DXGI_FORMAT)fc.rtvFormat, cfg); if (wdReady) { D3D12_DESCRIPTOR_HEAP_DESC wh{}; wh.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; wh.NumDescriptors = 3 * kWarpDirectMaxScreens; wh.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; wdReady = SUCCEEDED( dev->CreateDescriptorHeap(&wh, IID_PPV_ARGS(&wdHeap))); if (wdReady) { // Null-SRV every slot: the shader's unrolled loop never samples // past screenCount, but a bound table range must not hold // garbage descriptors. D3D12_SHADER_RESOURCE_VIEW_DESC nv{}; nv.Format = DXGI_FORMAT_B8G8R8A8_UNORM; nv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; nv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; nv.Texture2D.MipLevels = 1; for (uint32_t i = 0; i < wh.NumDescriptors; i++) { D3D12_CPU_DESCRIPTOR_HANDLE h = wdHeap->GetCPUDescriptorHandleForHeapStart(); h.ptr += i * srvStep; dev->CreateShaderResourceView(nullptr, &nv, h); } } } if (wdReady) { D3D12_HEAP_PROPERTIES up{D3D12_HEAP_TYPE_UPLOAD}; D3D12_RESOURCE_DESC bd{}; bd.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; bd.Width = 3 * 2 * kWdCbStride; bd.Height = 1; bd.DepthOrArraySize = 1; bd.MipLevels = 1; bd.SampleDesc.Count = 1; bd.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; wdReady = SUCCEEDED(dev->CreateCommittedResource( &up, D3D12_HEAP_FLAG_NONE, &bd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&wdCbBuf))) && SUCCEEDED(wdCbBuf->Map(0, nullptr, (void**)&wdCbPtr)); } if (!wdReady) fprintf(stderr, "warp-direct init failed — two-pass path only ('w' " "inactive)\n"); } if (captureMode) { auto mons = EnumerateMonitors(dev.Get()); if (monitorIndex >= 0) { std::vector one; for (const auto& m : mons) if (m.outputIndex == monitorIndex) one.push_back(m); mons.swap(one); } if (mons.empty()) { fprintf(stderr, "capture: no monitors found — test-card fallback\n"); } else { // Anchor: primary monitor (else first) width -> 1.0 normalized // (= kSceneQuadW * screenScale world meters, the M2 angular size // at scale 1); one scale for all so OS-relative placement (gaps // included) is preserved. Desktop y is down, world y up. const MonitorDesc* prim = &mons[0]; for (const auto& m : mons) if (m.primary) prim = &m; const float norm = 1.0f / prim->width(); const float pcx = (prim->left + prim->right) * 0.5f; const float pcy = (prim->top + prim->bottom) * 0.5f; for (const auto& m : mons) { bool excluded = false; for (int x : monExclude) if (x == m.outputIndex) excluded = true; if (excluded) { printf("screen: output %d excluded (settings)\n", m.outputIndex); continue; } ScreenQuad sq; sq.src.reset(new DuplicationSource(m.outputIndex)); // Capture rate cap (M4 step 3): content redrawing faster than // the panel costs pure GPU contention (copy+mips per frame). sq.src->setMinFramePeriodMs(1000.0 / refreshHz - 1.5); sq.src->setCursorOverlay(settings.cursor_overlay); if (!sq.src->start(dev.Get())) { fprintf(stderr, "capture: output %d failed to start — skipped\n", m.outputIndex); continue; } sq.w = m.width() * norm; sq.h = m.height() * norm; sq.cx = ((m.left + m.right) * 0.5f - pcx) * norm; sq.cy = -((m.top + m.bottom) * 0.5f - pcy) * norm; // Same-row monitors land center-aligned: OS layouts usually // align TOP edges, so unequal panels leave a small center // offset that reads as a step in VR (field: "misaligned by // ~1/4 of the taskbar"). A genuinely stacked monitor (offset // >= half the primary height) keeps its OS placement. if (sq.cy != 0 && std::fabs(sq.cy) < 0.5f * prim->height() * norm) { printf("screen: output %d vertical offset %+.3f snapped to " "the primary row\n", m.outputIndex, sq.cy); sq.cy = 0; } printf("screen: output %d \"%ls\" %dx%d px -> %.2fx%.2f m at " "(%.2f, %.2f) (scale %.2f)\n", m.outputIndex, m.name, m.width(), m.height(), sq.w * kSceneQuadW * (float)screenScale.load(), sq.h * kSceneQuadW * (float)screenScale.load(), sq.cx * kSceneQuadW * (float)screenScale.load(), sq.cy * kSceneQuadW * (float)screenScale.load(), screenScale.load()); screens.push_back(std::move(sq)); } } if (screens.empty()) fprintf(stderr, "capture: no sources up — test-card fallback\n"); // Curved layout walks screens edge-to-edge in OS x order from the // screen nearest the OS origin (the primary). screenOrder.clear(); for (int i = 0; i < (int)screens.size(); i++) screenOrder.push_back(i); std::sort(screenOrder.begin(), screenOrder.end(), [&](int a, int b) { return screens[a].cx < screens[b].cx; }); anchorPos = 0; for (int p = 1; p < (int)screenOrder.size(); p++) if (std::fabs(screens[screenOrder[p]].cx) < std::fabs(screens[screenOrder[anchorPos]].cx)) anchorPos = p; screensReady.store(true); // Pause state may predate the sources (launch-asleep parks before // render init builds them) — apply it now that they exist. applyCapturePause(); } renderInit = true; builtRtvFmt = fc.rtvFormat; printf("render pipeline up: eye %ux%u -> warp -> %ux%u (fmt %u)\n", eyeW, eyeH, fc.width, fc.height, fc.rtvFormat); } // DM surfaces re-acquired after a SteamVR session can come back in a // different typeless family (S2: the format VARIES across // acquisitions) — the warp PSOs are format-bound, rebuild on change. if (renderInit && fc.rtvFormat != builtRtvFmt) { ComPtr dev; list->GetDevice(IID_PPV_ARGS(&dev)); printf("rtv format changed %u -> %u (re-acquire) — rebuilding warp " "PSOs\n", builtRtvFmt, fc.rtvFormat); if (!warp.init(dev.Get(), (DXGI_FORMAT)fc.rtvFormat, cfg)) { renderInit = false; // black rather than scan out garbage renderInitFailed = true; fprintf(stderr, "warp rebuild failed — presenting black\n"); return; } if (wdReady) wdReady = wdirect.init(dev.Get(), (DXGI_FORMAT)fc.rtvFormat, cfg); builtRtvFmt = fc.rtvFormat; } static Quat frozenQ{}; static bool wasFrozen = false; // Pose pick (M4 step 3): auto = extrapolate to this frame's measured // photon time (vsync-derived; falls back to the manual span when no // vsync reference — free-run, startup). Manual = fixed predict_ms. Quat q; double ageMsNow; if (predictAuto.load() && fc.photonTimeNs != 0) { double ageSec = 0.0; q = ahrs.poseAtHostNs(fc.photonTimeNs, &ageSec); ageMsNow = ageSec * 1000.0; } else { const double ms = predictMs.load(); q = ahrs.pose(ms * 0.001); ageMsNow = ms; } { // EMA for the status line (render thread only writes). static double ageEma = 0.0; ageEma = ageEma == 0.0 ? ageMsNow : ageEma + 0.05 * (ageMsNow - ageEma); predictAgeMs.store(ageEma); } if (freezePose.load()) { if (!wasFrozen) { frozenQ = q; wasFrozen = true; } q = frozenQ; } else { wasFrozen = false; } // Neck model: head rotates about the neck pivot, so the eye midpoint // translates as headPos = R*v - v (v = neck->eyes in head frame; the // "-v" pins the identity pose at the world origin). float headPos[3] = {0, 0, 0}; if (neckModel) { const float v[3] = {0, (float)neckUpM.load(), (float)-neckFwdM.load()}; quatRotate(q, v, headPos); headPos[0] -= v[0]; headPos[1] -= v[1]; headPos[2] -= v[2]; } D3D12_RESOURCE_BARRIER bars[2]{}; bool directNow = false; // warp-direct this frame (decided below) if (!gridMode) { // Sample each screen's latest frame ONCE per present (both eyes share // the buffer). SRV rings are rebuilt when a source regenerates its // textures (display-mode change). Sources publish only CPU-confirmed // complete frames, so the only sync is COMMON <-> PSR around use. bool anyContent = false; for (auto& s : screens) { uint32_t gen = 0; s.idx = s.src->latest(&gen); if (s.idx >= 0 && (gen != s.gen || !s.heap)) { ComPtr dev; list->GetDevice(IID_PPV_ARGS(&dev)); D3D12_DESCRIPTOR_HEAP_DESC hd{}; hd.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; hd.NumDescriptors = s.src->textureCount(); hd.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; s.heap.Reset(); dev->CreateDescriptorHeap(&hd, IID_PPV_ARGS(&s.heap)); s.cpuHeap.Reset(); D3D12_DESCRIPTOR_HEAP_DESC ch = hd; // CPU mirror (copy source) ch.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; dev->CreateDescriptorHeap(&ch, IID_PPV_ARGS(&s.cpuHeap)); for (uint32_t i = 0; i < s.src->textureCount(); i++) { D3D12_SHADER_RESOURCE_VIEW_DESC sv{}; sv.Format = DXGI_FORMAT_B8G8R8A8_UNORM; sv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; sv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; sv.Texture2D.MipLevels = s.src->mipLevels(); D3D12_CPU_DESCRIPTOR_HANDLE h = s.heap->GetCPUDescriptorHandleForHeapStart(); h.ptr += i * srvStep; dev->CreateShaderResourceView(s.src->texture(i), &sv, h); D3D12_CPU_DESCRIPTOR_HANDLE hc = s.cpuHeap->GetCPUDescriptorHandleForHeapStart(); hc.ptr += i * srvStep; dev->CreateShaderResourceView(s.src->texture(i), &sv, hc); } s.gen = gen; } if (s.idx >= 0) anyContent = true; } std::vector cbs; for (auto& s : screens) { if (s.idx < 0) continue; D3D12_RESOURCE_BARRIER cb{}; cb.Transition.pResource = s.src->texture((uint32_t)s.idx); cb.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; cb.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; cb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; cbs.push_back(cb); } if (!cbs.empty()) list->ResourceBarrier((UINT)cbs.size(), cbs.data()); // Per-eye scene: one pass, all screen quads (or the test card). // Placement knobs are live (settings watcher / -+ [] keys): one // normalized->meters factor and the plane depth, read per frame. const float quadScale = (float)(kSceneQuadW * screenScale.load()); const float quadZ = (float)-screenDistM.load(); // Curved arrangement (M4 ergonomics): azimuth per screen on a // cylinder of radius screenDistM. A flat panel of width w centered // at distance R subtends half-angle atan(w/2R); adjacent edges // touch when neighboring center azimuths differ by the sum of the // two half-angles. Walk outward from the anchor (primary, az 0) in // OS x order. Distance/scale are live knobs, so this is per-frame // (a handful of atans). placeScreen() yields the world center + // azimuth either mode; flat keeps the OS-faithful M3 plane. const bool curvedNow = screenCurved.load(); const float cylR = (float)screenDistM.load(); float azs[kWarpDirectMaxScreens] = {}; if (curvedNow && !screenOrder.empty()) { auto halfAng = [&](int si) { return std::atan(screens[si].w * quadScale * 0.5f / cylR); }; for (int p = anchorPos + 1; p < (int)screenOrder.size(); p++) { if (screenOrder[p] >= kWarpDirectMaxScreens) continue; azs[screenOrder[p]] = azs[screenOrder[p - 1]] + halfAng(screenOrder[p - 1]) + halfAng(screenOrder[p]); } for (int p = anchorPos - 1; p >= 0; p--) { if (screenOrder[p] >= kWarpDirectMaxScreens) continue; azs[screenOrder[p]] = azs[screenOrder[p + 1]] - halfAng(screenOrder[p + 1]) - halfAng(screenOrder[p]); } } auto placeScreen = [&](int si, float* az, float* ccx, float* ccz) { if (curvedNow && si < kWarpDirectMaxScreens) { *az = azs[si]; *ccx = cylR * std::sin(*az); *ccz = -cylR * std::cos(*az); } else { *az = 0; *ccx = screens[si].cx * quadScale; *ccz = quadZ; } }; const double ipdNow = ipdMm.load(); const float ipdM = (float)((ipdNow > 0 ? ipdNow : cfg.ipd_default_mm) * 0.001); const float tEye[2][3] = {{-ipdM / 2, 0, 0}, {+ipdM / 2, 0, 0}}; directNow = warpDirectOn.load() && wdReady && anyContent; if (directNow) { // Warp-direct (M4 step 2): no eye pass at all — each panel pixel's // warp ray samples the desktop textures once. Build this frame's // SRV table slice + per-eye CBs. const uint32_t ring = (uint32_t)(fc.frameIndex % 3); ComPtr dev; list->GetDevice(IID_PPV_ARGS(&dev)); WarpDirectCB wcb{}; D3D12_CPU_DESCRIPTOR_HANDLE tbl = wdHeap->GetCPUDescriptorHandleForHeapStart(); tbl.ptr += ring * kWarpDirectMaxScreens * srvStep; int n = 0; for (int si = 0; si < (int)screens.size(); si++) { auto& s = screens[si]; if (s.idx < 0 || n >= kWarpDirectMaxScreens) continue; D3D12_CPU_DESCRIPTOR_HANDLE src = s.cpuHeap->GetCPUDescriptorHandleForHeapStart(); src.ptr += (uint32_t)s.idx * srvStep; D3D12_CPU_DESCRIPTOR_HANDLE d = tbl; d.ptr += n * srvStep; dev->CopyDescriptorsSimple(1, d, src, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // Oriented plane basis (curved: yawed about +Y to face the // user; flat: az = 0 reproduces the old z = quadZ rect). const float w = s.w * quadScale, h = s.h * quadScale; float az, ccx, ccz; placeScreen(si, &az, &ccx, &ccz); const float cy = s.cy * quadScale; const float rx = std::cos(az), rz = std::sin(az); // unit right wcb.p0n[n][0] = ccx - rx * w * 0.5f; // top-left corner wcb.p0n[n][1] = cy + h * 0.5f; wcb.p0n[n][2] = ccz - rz * w * 0.5f; wcb.p0n[n][3] = -rz; // normal = (-sin, 0, cos) wcb.un[n][0] = rx / w; wcb.un[n][1] = 0; wcb.un[n][2] = rz / w; wcb.un[n][3] = 0; wcb.vn[n][0] = 0; wcb.vn[n][1] = -1.0f / h; wcb.vn[n][2] = 0; wcb.vn[n][3] = rx; n++; } // Eye->world per eye: M = R(q) * e2h, o = R(q)*tEye + headPos — // the exact rigid inverse of viewFromPose + applyCant. const double qw = q.w, qx = q.x, qy = q.y, qz = q.z; const double Rw[9] = { 1 - 2 * (qy * qy + qz * qz), 2 * (qx * qy - qw * qz), 2 * (qx * qz + qw * qy), 2 * (qx * qy + qw * qz), 1 - 2 * (qx * qx + qz * qz), 2 * (qy * qz - qw * qx), 2 * (qx * qz - qw * qy), 2 * (qy * qz + qw * qx), 1 - 2 * (qx * qx + qy * qy)}; list->OMSetRenderTargets(1, rtv, FALSE, nullptr); ID3D12DescriptorHeap* wheaps[] = {wdHeap.Get()}; list->SetDescriptorHeaps(1, wheaps); D3D12_GPU_DESCRIPTOR_HANDLE tblGpu = wdHeap->GetGPUDescriptorHandleForHeapStart(); tblGpu.ptr += ring * kWarpDirectMaxScreens * srvStep; for (int e = 0; e < 2; e++) { wdirect.fillEyeConsts(e, &wcb); const auto& e2h = cfg.eye[e].eye_to_head; for (int r = 0; r < 3; r++) { for (int cc = 0; cc < 3; cc++) { double m = 0; for (int k = 0; k < 3; k++) m += Rw[r * 3 + k] * e2h[k][cc]; wcb.rwe[r][cc] = (float)m; } wcb.rwe[r][3] = 0; double o = headPos[r]; for (int k = 0; k < 3; k++) o += Rw[r * 3 + k] * tEye[e][k]; wcb.origin[r] = (float)o; } wcb.origin[3] = 0; // unused (plane bases carry the geometry) wcb.misc[0] = (float)n; wcb.misc[1] = 1.0f; // color mult on wcb.misc[2] = r2Clamp ? 1.0f : 0.0f; wcb.misc[3] = 0.0f; const uint32_t slot = ring * 2 + (uint32_t)e; memcpy(wdCbPtr + slot * kWdCbStride, &wcb, sizeof(wcb)); wdirect.record( list, e, wdCbBuf->GetGPUVirtualAddress() + slot * kWdCbStride, tblGpu, fc.width, fc.height); } } else { for (int e = 0; e < 2; e++) { float V[16], P[16], VP[16]; viewFromPose(q, headPos, tEye[e], V); if (canted) applyCant(cfg.eye[e].eye_to_head, V); projFromTangents(frus[e][0], frus[e][1], frus[e][2], frus[e][3], 0.05f, 100.0f, P); matMul(P, V, VP); scene.beginPass(list, eyeRtv[e], eyeW, eyeH); if (anyContent) { for (int si = 0; si < (int)screens.size(); si++) { auto& s = screens[si]; if (s.idx < 0) continue; float M[16], MVP[16]; float az, ccx, ccz; placeScreen(si, &az, &ccx, &ccz); MakeQuadModelYaw(s.w * quadScale, s.h * quadScale, ccx, s.cy * quadScale, ccz, az, M); matMul(VP, M, MVP); D3D12_GPU_DESCRIPTOR_HANDLE h = s.heap->GetGPUDescriptorHandleForHeapStart(); h.ptr += (uint32_t)s.idx * srvStep; scene.drawQuad(list, MVP, s.heap.Get(), h); } } else { float M[16], MVP[16]; MakeQuadModel(quadScale, kSceneQuadH / kSceneQuadW * quadScale, 0, 0, quadZ, M); matMul(VP, M, MVP); scene.drawQuad(list, MVP); } } } // Screens back to COMMON — after the direct draws or the scene pass, // whichever consumed them this frame. if (!cbs.empty()) { for (auto& cb : cbs) { cb.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; cb.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; } list->ResourceBarrier((UINT)cbs.size(), cbs.data()); } if (!directNow) { // Eyes -> SRV for the two-pass warp. for (int e = 0; e < 2; e++) { bars[e].Transition.pResource = eyeTex[e].Get(); bars[e].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; bars[e].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; bars[e].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; } list->ResourceBarrier(2, bars); } } if (!directNow) { list->OMSetRenderTargets(1, rtv, FALSE, nullptr); ID3D12DescriptorHeap* heaps[] = {eyeSrvHeap.Get()}; list->SetDescriptorHeaps(1, heaps); for (int e = 0; e < 2; e++) { D3D12_GPU_DESCRIPTOR_HANDLE h = eyeSrvHeap->GetGPUDescriptorHandleForHeapStart(); h.ptr += e * srvStep; warp.record(list, e, h, fc.width, fc.height, /*pointSample=*/false, /*colorMult=*/true, r2Clamp); } } if (!gridMode && !directNow) { for (int e = 0; e < 2; e++) { bars[e].Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; bars[e].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; } list->ResourceBarrier(2, bars); } if (g_stop.load()) presenter.stop(); }); g_stop.store(true); if (vramCycleThread.joinable()) vramCycleThread.join(); keys.join(); shell.join(); watcher.join(); steamvrWatch.join(); idle.join(); neckCalThread.join(); status.join(); prox.stop(); imu.stop(); auto ps = presenter.stats(); if (ps.displayLost) { printf("VERDICT: DISPLAY LOST — exited cleanly; restart after reconnect\n"); return FailExit(4); } printf("VERDICT: %s (eyes-in criteria are the M2 exit: world-fixed, " "fusable, no swim, recenter)\n", ps.frames > 0 && ps.presentErrors == 0 ? "RAN CLEAN" : "ERRORS"); return 0; }