Skip to content

Commit e1312d6

Browse files
committed
Add simd and matrix tests
1 parent 067610c commit e1312d6

13 files changed

+2082
-24
lines changed
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// compute_test_matrices.cs - Matrix operations compute shader test
2+
// Tests various matrix operations: multiplication, transpose, transformations
3+
//
4+
#define SM_5_0
5+
#include "common.h"
6+
7+
// Test structure - contains matrices and vectors
8+
struct MatrixTestData
9+
{
10+
float4x4 transform; // 64 bytes (16 floats)
11+
float4 position; // 16 bytes
12+
float4 result; // 16 bytes
13+
uint flags; // 4 bytes
14+
uint padding[3]; // 12 bytes (total: 112 bytes - structured buffer friendly)
15+
};
16+
17+
// Input buffer
18+
StructuredBuffer<MatrixTestData> g_input : register(t0);
19+
20+
// Output buffer
21+
RWStructuredBuffer<MatrixTestData> g_output : register(u0);
22+
23+
// Counter
24+
RWStructuredBuffer<uint> g_counter : register(u1);
25+
26+
// Constant buffer
27+
cbuffer TestParams : register(b0)
28+
{
29+
uint g_element_count; // 4 bytes
30+
uint g_iteration_count; // 4 bytes
31+
float g_rotation_angle; // 4 bytes - angle for rotation matrices
32+
float g_scale_factor; // 4 bytes - scale factor for transformations
33+
};
34+
35+
// Helper: Create rotation matrix around Y axis
36+
float4x4 CreateRotationY(float angle)
37+
{
38+
float s = sin(angle);
39+
float c = cos(angle);
40+
41+
float4x4 result = float4x4(
42+
c, 0.0, s, 0.0,
43+
0.0, 1.0, 0.0, 0.0,
44+
-s, 0.0, c, 0.0,
45+
0.0, 0.0, 0.0, 1.0
46+
);
47+
48+
return result;
49+
}
50+
51+
// Helper: Create scale matrix
52+
float4x4 CreateScale(float3 scale)
53+
{
54+
return float4x4(
55+
scale.x, 0.0, 0.0, 0.0,
56+
0.0, scale.y, 0.0, 0.0,
57+
0.0, 0.0, scale.z, 0.0,
58+
0.0, 0.0, 0.0, 1.0
59+
);
60+
}
61+
62+
// Helper: Create translation matrix
63+
float4x4 CreateTranslation(float3 translation)
64+
{
65+
return float4x4(
66+
1.0, 0.0, 0.0, 0.0,
67+
0.0, 1.0, 0.0, 0.0,
68+
0.0, 0.0, 1.0, 0.0,
69+
translation.x, translation.y, translation.z, 1.0
70+
);
71+
}
72+
73+
// Helper: Transpose matrix
74+
float4x4 Transpose(float4x4 m)
75+
{
76+
return float4x4(
77+
m[0][0], m[1][0], m[2][0], m[3][0],
78+
m[0][1], m[1][1], m[2][1], m[3][1],
79+
m[0][2], m[1][2], m[2][2], m[3][2],
80+
m[0][3], m[1][3], m[2][3], m[3][3]
81+
);
82+
}
83+
84+
[numthreads(256, 1, 1)]
85+
void main(uint3 dispatch_thread_id : SV_DispatchThreadID)
86+
{
87+
uint idx = dispatch_thread_id.x;
88+
89+
if (idx >= g_element_count)
90+
return;
91+
92+
// Read input
93+
MatrixTestData input_data = g_input[idx];
94+
MatrixTestData output_data = input_data;
95+
96+
// Test various matrix operations over multiple iterations
97+
float4x4 result_matrix = input_data.transform;
98+
float4 result_vec = input_data.position;
99+
100+
// Thread-specific offset to prevent convergence
101+
float thread_offset = float(idx) * 0.0001;
102+
103+
for (uint i = 0; i < g_iteration_count; ++i)
104+
{
105+
// Build transform with thread-specific variation (prevents convergence!)
106+
float angle = g_rotation_angle * float(i + 1) * 0.001 + thread_offset;
107+
float4x4 rotation = CreateRotationY(angle);
108+
109+
// Thread-specific oscillating scale (0.95 to 1.05)
110+
float scale = 1.0 + sin(float(i) * 0.05 + thread_offset * 10.0) * 0.05;
111+
float4x4 scale_mat = CreateScale(float3(scale, scale, scale));
112+
113+
// Thread-specific translation
114+
float3 translation = float3(
115+
sin(angle + thread_offset) * 0.001,
116+
cos(angle + thread_offset) * 0.001,
117+
thread_offset * 0.0001
118+
);
119+
float4x4 translation_mat = CreateTranslation(translation);
120+
121+
// Build complete transform from scratch (T * R * S)
122+
float4x4 transform = mul(scale_mat, rotation);
123+
transform = mul(transform, translation_mat);
124+
125+
// Transform vector
126+
result_vec = mul(result_vec, transform);
127+
128+
// Clamp vector every iteration to prevent overflow
129+
result_vec.xyz = clamp(result_vec.xyz, -100000.0, 100000.0);
130+
result_vec.w = 1.0;
131+
132+
// Re-inject thread variation periodically to fight convergence
133+
if ((i % 100) == 0)
134+
{
135+
result_vec.xyz += float3(
136+
thread_offset * sin(float(i) * 0.01),
137+
thread_offset * cos(float(i) * 0.01),
138+
thread_offset * sin(float(i) * 0.02)
139+
);
140+
}
141+
142+
// Update result matrix occasionally for variety
143+
if ((i % 10) == 0)
144+
{
145+
result_matrix = mul(result_matrix, transform);
146+
147+
// Clamp matrix translation to prevent accumulation
148+
result_matrix[3][0] = clamp(result_matrix[3][0], -1000.0, 1000.0);
149+
result_matrix[3][1] = clamp(result_matrix[3][1], -1000.0, 1000.0);
150+
result_matrix[3][2] = clamp(result_matrix[3][2], -1000.0, 1000.0);
151+
}
152+
153+
// Apply transpose occasionally (thread-specific timing)
154+
if ((i % (17 + idx % 7)) == 0)
155+
{
156+
result_matrix = Transpose(result_matrix);
157+
}
158+
159+
// Extract and manipulate matrix elements
160+
float det = result_matrix[0][0] * result_matrix[1][1] -
161+
result_matrix[0][1] * result_matrix[1][0];
162+
163+
// Use determinant to affect result (clamped, thread-specific)
164+
result_vec.xyz *= (1.0 + clamp(abs(det), 0.0, 0.1) * 0.0001 * (1.0 + thread_offset));
165+
}
166+
167+
// Store results
168+
output_data.transform = result_matrix;
169+
output_data.result = result_vec;
170+
output_data.flags = input_data.flags + g_iteration_count; // Deterministic update
171+
172+
// Write output
173+
g_output[idx] = output_data;
174+
175+
// Increment counter atomically
176+
InterlockedAdd(g_counter[0], 1);
177+
}

0 commit comments

Comments
 (0)