|
| 1 | +// compute_test_matrices.cs - Matrix operations compute shader test |
| 2 | +// Tests various matrix operations: multiplication, transpose, transformations |
| 3 | +// |
| 4 | +#define SM_5_0 |
| 5 | +#include "common.h" |
| 6 | + |
| 7 | +// Test structure - contains matrices and vectors |
| 8 | +struct MatrixTestData |
| 9 | +{ |
| 10 | + float4x4 transform; // 64 bytes (16 floats) |
| 11 | + float4 position; // 16 bytes |
| 12 | + float4 result; // 16 bytes |
| 13 | + uint flags; // 4 bytes |
| 14 | + uint padding[3]; // 12 bytes (total: 112 bytes - structured buffer friendly) |
| 15 | +}; |
| 16 | + |
| 17 | +// Input buffer |
| 18 | +StructuredBuffer<MatrixTestData> g_input : register(t0); |
| 19 | + |
| 20 | +// Output buffer |
| 21 | +RWStructuredBuffer<MatrixTestData> g_output : register(u0); |
| 22 | + |
| 23 | +// Counter |
| 24 | +RWStructuredBuffer<uint> g_counter : register(u1); |
| 25 | + |
| 26 | +// Constant buffer |
| 27 | +cbuffer TestParams : register(b0) |
| 28 | +{ |
| 29 | + uint g_element_count; // 4 bytes |
| 30 | + uint g_iteration_count; // 4 bytes |
| 31 | + float g_rotation_angle; // 4 bytes - angle for rotation matrices |
| 32 | + float g_scale_factor; // 4 bytes - scale factor for transformations |
| 33 | +}; |
| 34 | + |
| 35 | +// Helper: Create rotation matrix around Y axis |
| 36 | +float4x4 CreateRotationY(float angle) |
| 37 | +{ |
| 38 | + float s = sin(angle); |
| 39 | + float c = cos(angle); |
| 40 | + |
| 41 | + float4x4 result = float4x4( |
| 42 | + c, 0.0, s, 0.0, |
| 43 | + 0.0, 1.0, 0.0, 0.0, |
| 44 | + -s, 0.0, c, 0.0, |
| 45 | + 0.0, 0.0, 0.0, 1.0 |
| 46 | + ); |
| 47 | + |
| 48 | + return result; |
| 49 | +} |
| 50 | + |
| 51 | +// Helper: Create scale matrix |
| 52 | +float4x4 CreateScale(float3 scale) |
| 53 | +{ |
| 54 | + return float4x4( |
| 55 | + scale.x, 0.0, 0.0, 0.0, |
| 56 | + 0.0, scale.y, 0.0, 0.0, |
| 57 | + 0.0, 0.0, scale.z, 0.0, |
| 58 | + 0.0, 0.0, 0.0, 1.0 |
| 59 | + ); |
| 60 | +} |
| 61 | + |
| 62 | +// Helper: Create translation matrix |
| 63 | +float4x4 CreateTranslation(float3 translation) |
| 64 | +{ |
| 65 | + return float4x4( |
| 66 | + 1.0, 0.0, 0.0, 0.0, |
| 67 | + 0.0, 1.0, 0.0, 0.0, |
| 68 | + 0.0, 0.0, 1.0, 0.0, |
| 69 | + translation.x, translation.y, translation.z, 1.0 |
| 70 | + ); |
| 71 | +} |
| 72 | + |
| 73 | +// Helper: Transpose matrix |
| 74 | +float4x4 Transpose(float4x4 m) |
| 75 | +{ |
| 76 | + return float4x4( |
| 77 | + m[0][0], m[1][0], m[2][0], m[3][0], |
| 78 | + m[0][1], m[1][1], m[2][1], m[3][1], |
| 79 | + m[0][2], m[1][2], m[2][2], m[3][2], |
| 80 | + m[0][3], m[1][3], m[2][3], m[3][3] |
| 81 | + ); |
| 82 | +} |
| 83 | + |
| 84 | +[numthreads(256, 1, 1)] |
| 85 | +void main(uint3 dispatch_thread_id : SV_DispatchThreadID) |
| 86 | +{ |
| 87 | + uint idx = dispatch_thread_id.x; |
| 88 | + |
| 89 | + if (idx >= g_element_count) |
| 90 | + return; |
| 91 | + |
| 92 | + // Read input |
| 93 | + MatrixTestData input_data = g_input[idx]; |
| 94 | + MatrixTestData output_data = input_data; |
| 95 | + |
| 96 | + // Test various matrix operations over multiple iterations |
| 97 | + float4x4 result_matrix = input_data.transform; |
| 98 | + float4 result_vec = input_data.position; |
| 99 | + |
| 100 | + // Thread-specific offset to prevent convergence |
| 101 | + float thread_offset = float(idx) * 0.0001; |
| 102 | + |
| 103 | + for (uint i = 0; i < g_iteration_count; ++i) |
| 104 | + { |
| 105 | + // Build transform with thread-specific variation (prevents convergence!) |
| 106 | + float angle = g_rotation_angle * float(i + 1) * 0.001 + thread_offset; |
| 107 | + float4x4 rotation = CreateRotationY(angle); |
| 108 | + |
| 109 | + // Thread-specific oscillating scale (0.95 to 1.05) |
| 110 | + float scale = 1.0 + sin(float(i) * 0.05 + thread_offset * 10.0) * 0.05; |
| 111 | + float4x4 scale_mat = CreateScale(float3(scale, scale, scale)); |
| 112 | + |
| 113 | + // Thread-specific translation |
| 114 | + float3 translation = float3( |
| 115 | + sin(angle + thread_offset) * 0.001, |
| 116 | + cos(angle + thread_offset) * 0.001, |
| 117 | + thread_offset * 0.0001 |
| 118 | + ); |
| 119 | + float4x4 translation_mat = CreateTranslation(translation); |
| 120 | + |
| 121 | + // Build complete transform from scratch (T * R * S) |
| 122 | + float4x4 transform = mul(scale_mat, rotation); |
| 123 | + transform = mul(transform, translation_mat); |
| 124 | + |
| 125 | + // Transform vector |
| 126 | + result_vec = mul(result_vec, transform); |
| 127 | + |
| 128 | + // Clamp vector every iteration to prevent overflow |
| 129 | + result_vec.xyz = clamp(result_vec.xyz, -100000.0, 100000.0); |
| 130 | + result_vec.w = 1.0; |
| 131 | + |
| 132 | + // Re-inject thread variation periodically to fight convergence |
| 133 | + if ((i % 100) == 0) |
| 134 | + { |
| 135 | + result_vec.xyz += float3( |
| 136 | + thread_offset * sin(float(i) * 0.01), |
| 137 | + thread_offset * cos(float(i) * 0.01), |
| 138 | + thread_offset * sin(float(i) * 0.02) |
| 139 | + ); |
| 140 | + } |
| 141 | + |
| 142 | + // Update result matrix occasionally for variety |
| 143 | + if ((i % 10) == 0) |
| 144 | + { |
| 145 | + result_matrix = mul(result_matrix, transform); |
| 146 | + |
| 147 | + // Clamp matrix translation to prevent accumulation |
| 148 | + result_matrix[3][0] = clamp(result_matrix[3][0], -1000.0, 1000.0); |
| 149 | + result_matrix[3][1] = clamp(result_matrix[3][1], -1000.0, 1000.0); |
| 150 | + result_matrix[3][2] = clamp(result_matrix[3][2], -1000.0, 1000.0); |
| 151 | + } |
| 152 | + |
| 153 | + // Apply transpose occasionally (thread-specific timing) |
| 154 | + if ((i % (17 + idx % 7)) == 0) |
| 155 | + { |
| 156 | + result_matrix = Transpose(result_matrix); |
| 157 | + } |
| 158 | + |
| 159 | + // Extract and manipulate matrix elements |
| 160 | + float det = result_matrix[0][0] * result_matrix[1][1] - |
| 161 | + result_matrix[0][1] * result_matrix[1][0]; |
| 162 | + |
| 163 | + // Use determinant to affect result (clamped, thread-specific) |
| 164 | + result_vec.xyz *= (1.0 + clamp(abs(det), 0.0, 0.1) * 0.0001 * (1.0 + thread_offset)); |
| 165 | + } |
| 166 | + |
| 167 | + // Store results |
| 168 | + output_data.transform = result_matrix; |
| 169 | + output_data.result = result_vec; |
| 170 | + output_data.flags = input_data.flags + g_iteration_count; // Deterministic update |
| 171 | + |
| 172 | + // Write output |
| 173 | + g_output[idx] = output_data; |
| 174 | + |
| 175 | + // Increment counter atomically |
| 176 | + InterlockedAdd(g_counter[0], 1); |
| 177 | +} |
0 commit comments