#include "render/warp_direct.h" #include #include #include using Microsoft::WRL::ComPtr; namespace sauna { namespace { // Per panel pixel: POLY3 warp (S3-verified EvalDistortion, same as // warp_pass.cpp) -> source UV -> tangent ray -> world -> per-screen // ORIENTED plane hit (curved arrangement: every screen carries its own // plane basis; the flat layout is the special case normal = +Z) -> one // SampleGrad. Per channel independently (lateral CA: the three rays // differ, and may even land on different screens at a border). // Gradients: ddx/ddy of the green-channel WORLD RAY are taken in the // uniform-flow prologue, then chain-ruled onto each screen's uv inside // the divergent loop (pure ALU on uniform-derived values — SampleGrad // stays legal). const char kShader[] = R"( cbuffer C : register(b0) { float4 centerScale; // cx, cy, scale, r2 cutoff float4 kr; float4 kg; float4 kb; float4 colorMult; float4 tanLRTB; // tangent at u=0, u=1, v=0 (top), v=1 (bottom) float4 origin; // eye pos world xyz, w unused float4 rwe0; // eye->world rotation rows float4 rwe1; float4 rwe2; float4 misc; // screenCount, colorMultOn, r2Clamp, unused float4 p0n[8]; // screen top-left corner xyz; w = normal.x float4 un[8]; // u axis (unit right / width m) xyz; w = normal.y float4 vn[8]; // v axis (unit down / height m) xyz; w = normal.z }; Texture2D scr[8] : register(t0); SamplerState linBorder : register(s0); struct VSOut { float4 pos : SV_Position; float2 uv : TEXCOORD0; }; VSOut vsmain(uint id : SV_VertexID) { VSOut o; float2 uv = float2((id << 1) & 2, id & 2); o.pos = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); o.uv = uv; return o; } float2 srcUv(float2 t, float r2, float3 k, float2 c, float s) { float d = 1.0 + r2 * (k.x + r2 * (k.y + r2 * k.z)); return 0.5 + (t * d + c) * s; } // uv in q-space window -> world ray direction. float3 rayW(float2 uv) { float3 dEye = float3(lerp(tanLRTB.x, tanLRTB.y, uv.x), lerp(tanLRTB.z, tanLRTB.w, uv.y), -1.0); return float3(dot(rwe0.xyz, dEye), dot(rwe1.xyz, dEye), dot(rwe2.xyz, dEye)); } // dW = channel ray; dGx/dGy = ddx/ddy of the green ray (uniform flow). // Single-if loop body (no continue): the unroller chokes on multi-exit // bodies and a failed unroll makes scr[i] a dynamic index — illegal. // Speculative math is safe: denom ~ 0 yields inf/nan uv and every NaN // comparison is false, so the gate rejects. float sampleCh(float3 dW, float3 dGx, float3 dGy, int ch) { int n = (int)misc.x; [unroll] for (int i = 0; i < 8; i++) { if (i >= n) break; float3 nrm = float3(p0n[i].w, un[i].w, vn[i].w); float denom = dot(nrm, dW); float tt = dot(nrm, p0n[i].xyz - origin.xyz) / denom; float3 rel = origin.xyz + tt * dW - p0n[i].xyz; float2 uv = float2(dot(rel, un[i].xyz), dot(rel, vn[i].xyz)); if (denom < -1e-6 && tt > 0.0 && uv.x >= 0.0 && uv.x <= 1.0 && uv.y >= 0.0 && uv.y <= 1.0) { // Chain rule: t = a/dot(n,d) with a const per screen, so // dt = -t*dot(n,dd)/dot(n,d); dHit = dt*d + t*dd. float3 hx = (-tt * dot(nrm, dGx) / denom) * dW + tt * dGx; float3 hy = (-tt * dot(nrm, dGy) / denom) * dW + tt * dGy; float2 gx = float2(dot(hx, un[i].xyz), dot(hx, vn[i].xyz)); float2 gy = float2(dot(hy, un[i].xyz), dot(hy, vn[i].xyz)); float4 v = scr[i].SampleGrad(linBorder, uv, gx, gy); return ch == 0 ? v.r : (ch == 1 ? v.g : v.b); } } return 0.0; } float4 psmain(VSOut i) : SV_Target { float2 c = centerScale.xy; float s = centerScale.z; float2 t = 2.0 * i.uv - 1.0 - c; float r2 = dot(t, t); // no cutoff clamp by default (S3) if (misc.z > 0.5) r2 = min(r2, centerScale.w); float3 dR = rayW(srcUv(t, r2, kr.xyz, c, s)); float3 dG = rayW(srcUv(t, r2, kg.xyz, c, s)); float3 dB = rayW(srcUv(t, r2, kb.xyz, c, s)); // Uniform-flow ray gradients (green), chain-ruled per screen inside. float3 dGx = ddx(dG), dGy = ddy(dG); float3 outc; outc.r = sampleCh(dR, dGx, dGy, 0); outc.g = sampleCh(dG, dGx, dGy, 1); outc.b = sampleCh(dB, dGx, dGy, 2); if (misc.y > 0.5) outc *= colorMult.rgb; return float4(outc, 1.0); } )"; bool compile(const char* entry, const char* target, ComPtr* out) { ComPtr err; if (FAILED(D3DCompile(kShader, sizeof(kShader) - 1, nullptr, nullptr, nullptr, entry, target, 0, 0, &*out, &err))) { fprintf(stderr, "warp_direct %s: %s\n", entry, err ? (const char*)err->GetBufferPointer() : "failed"); return false; } return true; } } // namespace bool WarpDirect::init(ID3D12Device* dev, DXGI_FORMAT rtFormat, const HmdConfig& cfg) { for (int e = 0; e < 2; e++) { const EyeCalib& eye = cfg.eye[e]; float* c = eyeConsts_[e]; c[0] = (float)eye.rgb[1].center_x; // shared per eye on Beyond c[1] = (float)eye.rgb[1].center_y; c[2] = (float)(0.5 / (1.0 + eye.grow_for_undistort)); c[3] = (float)eye.undistort_r2_cutoff; for (int ch = 0; ch < 3; ch++) { c[4 + ch * 4 + 0] = (float)eye.rgb[ch].k[0]; c[4 + ch * 4 + 1] = (float)eye.rgb[ch].k[1]; c[4 + ch * 4 + 2] = (float)eye.rgb[ch].k[2]; c[4 + ch * 4 + 3] = 0; } for (int i = 0; i < 3; i++) c[16 + i] = (float)eye.color_mult[i]; c[19] = 0; // q-space window tangents: the UNFOLDED grow-expanded intrinsics // window in the lens-axis frame — identical to the canted-camera // frustum in spatial_light (CANTED is the locked render path), so the // ray reconstructed here matches what the two-pass projection spans. // v=0 is the texture top = +y tangent. const double G = 1.0 + eye.grow_for_undistort; const double cx = -eye.intrinsics[0][2], cy = eye.intrinsics[1][2]; const double fx = eye.intrinsics[0][0], fy = eye.intrinsics[1][1]; c[20] = (float)((-G - cx) / fx); // tan at u=0 (left) c[21] = (float)((G - cx) / fx); // tan at u=1 (right) c[22] = (float)((G - cy) / fy); // tan at v=0 (top) c[23] = (float)((-G - cy) / fy); // tan at v=1 (bottom) } D3D12_DESCRIPTOR_RANGE range{}; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = kWarpDirectMaxScreens; D3D12_ROOT_PARAMETER params[2]{}; params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; params[0].Descriptor.ShaderRegister = 0; params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; params[1].DescriptorTable.NumDescriptorRanges = 1; params[1].DescriptorTable.pDescriptorRanges = ⦥ params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; D3D12_STATIC_SAMPLER_DESC samp{}; samp.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; // trilinear — mips matter samp.AddressU = samp.AddressV = samp.AddressW = D3D12_TEXTURE_ADDRESS_MODE_BORDER; samp.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; samp.MaxLOD = D3D12_FLOAT32_MAX; samp.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; D3D12_ROOT_SIGNATURE_DESC rs{}; rs.NumParameters = 2; rs.pParameters = params; rs.NumStaticSamplers = 1; rs.pStaticSamplers = &samp; ComPtr sig, err; if (FAILED(D3D12SerializeRootSignature(&rs, D3D_ROOT_SIGNATURE_VERSION_1, &sig, &err))) { fprintf(stderr, "warp_direct root sig: %s\n", err ? (const char*)err->GetBufferPointer() : "failed"); return false; } if (FAILED(dev->CreateRootSignature(0, sig->GetBufferPointer(), sig->GetBufferSize(), IID_PPV_ARGS(&rootSig_)))) return false; ComPtr vs, ps; if (!compile("vsmain", "vs_5_0", &vs) || !compile("psmain", "ps_5_0", &ps)) return false; D3D12_GRAPHICS_PIPELINE_STATE_DESC pd{}; pd.pRootSignature = rootSig_.Get(); pd.VS = {vs->GetBufferPointer(), vs->GetBufferSize()}; pd.PS = {ps->GetBufferPointer(), ps->GetBufferSize()}; pd.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; pd.SampleMask = UINT_MAX; pd.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; pd.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; pd.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; pd.NumRenderTargets = 1; pd.RTVFormats[0] = rtFormat; pd.SampleDesc.Count = 1; return SUCCEEDED( dev->CreateGraphicsPipelineState(&pd, IID_PPV_ARGS(&pso_))); } void WarpDirect::fillEyeConsts(int eye, WarpDirectCB* cb) const { const float* c = eyeConsts_[eye]; memcpy(cb->centerScale, c, 4 * sizeof(float)); memcpy(cb->kr, c + 4, 4 * sizeof(float)); memcpy(cb->kg, c + 8, 4 * sizeof(float)); memcpy(cb->kb, c + 12, 4 * sizeof(float)); memcpy(cb->colorMult, c + 16, 4 * sizeof(float)); memcpy(cb->tanLRTB, c + 20, 4 * sizeof(float)); } void WarpDirect::record(ID3D12GraphicsCommandList* list, int eye, D3D12_GPU_VIRTUAL_ADDRESS cb, D3D12_GPU_DESCRIPTOR_HANDLE screens, uint32_t panelW, uint32_t panelH) { const float half = panelW / 2.0f; D3D12_VIEWPORT vp{eye * half, 0, half, (float)panelH, 0, 1}; D3D12_RECT sc{(LONG)(eye * half), 0, (LONG)((eye + 1) * half), (LONG)panelH}; list->RSSetViewports(1, &vp); list->RSSetScissorRects(1, &sc); list->SetGraphicsRootSignature(rootSig_.Get()); list->SetPipelineState(pso_.Get()); list->SetGraphicsRootConstantBufferView(0, cb); list->SetGraphicsRootDescriptorTable(1, screens); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->DrawInstanced(3, 1, 0, 0); } } // namespace sauna