Skip to content

Commit c047d9e

Browse files
committed
Implement GPU instancing for skinned meshes
1 parent 9d6f678 commit c047d9e

File tree

7 files changed

+529
-104
lines changed

7 files changed

+529
-104
lines changed

src/xrAnimation/tools/imgui.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ Size=400,400
3333
Collapsed=0
3434

3535
[Window][IK Controls]
36-
Pos=275,642
37-
Size=1600,502
36+
Pos=269,684
37+
Size=849,216
3838
Collapsed=0
3939

4040
[Table][0x87209752,5]

src/xrAnimation/tools/ozz_animation_viewer.cpp

Lines changed: 84 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,8 @@ struct ViewerState {
560560
int current_instance_count = 0;
561561
bool use_ecs_rendering = true; // Toggle between ECS and regular rendering
562562
bool randomize_instance_animations = false; // Randomize which animation each instance plays
563+
bool enable_gpu_mesh_instancing = true; // GPU skinning by uploading skeleton-order matrices
564+
bool enable_gpu_skeleton_instancing = true; // GPU instanced debug bones/spheres
563565

564566
// Reusable rendering buffers to avoid per-frame allocations
565567
std::vector<MeshInstanceData> render_instance_buffer;
@@ -1294,6 +1296,43 @@ void DrawRenderingPanel(ViewerState& state, VulkanRenderer& renderer) {
12941296
ImGui::TextColored(ImVec4(1.f, 1.f, 0.f, 1.f), "Mode: Regular Animation System");
12951297
}
12961298

1299+
ImGui::Separator();
1300+
ImGui::SeparatorText("GPU Instancing");
1301+
1302+
bool gpu_mesh_instancing = state.enable_gpu_mesh_instancing;
1303+
if (ImGui::Checkbox("Mesh skinning on GPU", &gpu_mesh_instancing)) {
1304+
state.enable_gpu_mesh_instancing = gpu_mesh_instancing;
1305+
renderer.SetUseGpuMeshInstancing(gpu_mesh_instancing);
1306+
}
1307+
ImGui::SameLine();
1308+
ImGui::TextDisabled("(?)");
1309+
if (ImGui::IsItemHovered()) {
1310+
ImGui::BeginTooltip();
1311+
ImGui::TextUnformatted("Toggle between GPU skeleton-order uploads and CPU palette skinning");
1312+
ImGui::TextUnformatted("Disable if you need legacy palette-aligned bone matrices");
1313+
ImGui::EndTooltip();
1314+
}
1315+
1316+
bool gpu_skeleton_instancing = state.enable_gpu_skeleton_instancing;
1317+
if (ImGui::Checkbox("Skeleton debug GPU instancing", &gpu_skeleton_instancing)) {
1318+
state.enable_gpu_skeleton_instancing = gpu_skeleton_instancing;
1319+
renderer.SetUseGpuSkeletonInstancing(gpu_skeleton_instancing);
1320+
}
1321+
ImGui::SameLine();
1322+
ImGui::TextDisabled("(?)");
1323+
if (ImGui::IsItemHovered()) {
1324+
ImGui::BeginTooltip();
1325+
ImGui::TextUnformatted("Control GPU batching for skeleton debug bones and joint spheres");
1326+
ImGui::TextUnformatted("Disable to fall back to per-primitive CPU drawing");
1327+
ImGui::EndTooltip();
1328+
}
1329+
const size_t palette_bones = renderer.GetMeshRenderer().BonesPerInstance();
1330+
const size_t skeleton_joints = static_cast<size_t>(state.skeleton.num_joints());
1331+
ImGui::Text("Palette bones: %zu", palette_bones);
1332+
ImGui::Text("Skeleton joints: %zu", skeleton_joints);
1333+
ImGui::Text("Matrices/instance (current): %zu",
1334+
state.enable_gpu_mesh_instancing ? skeleton_joints : palette_bones);
1335+
12971336
// Parallel implementation selection (only when ECS is active)
12981337
if (state.use_ecs_rendering && state.instance_count >= 1) {
12991338
ImGui::Separator();
@@ -2436,23 +2475,33 @@ void RenderECSInstances(ViewerState& state, VulkanRenderer& renderer) {
24362475
const int grid_size = static_cast<int>(std::ceil(std::sqrt(static_cast<float>(state.instance_count))));
24372476
const float spacing = state.instance_grid_spacing;
24382477

2439-
// Get the mesh to know how many bones per instance
24402478
const size_t bones_per_instance = renderer.GetMeshRenderer().BonesPerInstance();
2479+
const size_t skeleton_joint_count = static_cast<size_t>(state.skeleton.num_joints());
2480+
if (bones_per_instance == 0 || skeleton_joint_count == 0) {
2481+
return;
2482+
}
2483+
2484+
const bool use_gpu_skinning = renderer.GetUseGpuMeshInstancing();
2485+
const auto& joint_remaps = renderer.GetMeshJointRemaps();
2486+
const auto& inverse_bind_poses = renderer.GetMeshInverseBindPoses();
2487+
2488+
const size_t matrices_per_instance = use_gpu_skinning
2489+
? skeleton_joint_count
2490+
: bones_per_instance;
2491+
if (matrices_per_instance == 0) {
2492+
return;
2493+
}
24412494

24422495
// OPTIMIZED: Pre-allocate buffers to exact sizes (avoid reallocation)
24432496
state.render_instance_buffer.resize(state.instance_count);
24442497
state.render_skeleton_transforms_buffer.resize(state.instance_count);
2445-
state.render_bone_matrices_buffer.resize(state.instance_count * bones_per_instance);
2498+
state.render_bone_matrices_buffer.resize(state.instance_count * matrices_per_instance);
24462499

24472500
// For ECS multi-instance rendering, we don't use mesh_world_transform for positioning
24482501
// Grid positioning is absolute, not relative to a base transform
24492502
// Only use identity (or rotation if needed in the future)
24502503
const ozz::math::Float4x4 mesh_world_transform = ozz::math::Float4x4::identity();
24512504

2452-
// Get mesh data for skinning (read-only, thread-safe)
2453-
const auto& joint_remaps = renderer.GetMeshJointRemaps();
2454-
const auto& inverse_bind_poses = renderer.GetMeshInverseBindPoses();
2455-
24562505
// PRE-FETCH: Get all component pointers BEFORE parallel loop (EnTT is not thread-safe)
24572506
auto& registry = state.ecs_animation_registry->GetRegistry();
24582507
std::vector<AnimationECS::AnimationBuffers*> buffers_ptrs(state.instance_count);
@@ -2516,28 +2565,35 @@ void RenderECSInstances(ViewerState& state, VulkanRenderer& renderer) {
25162565
inst_transform->world_transform = instance_transform;
25172566
}
25182567

2519-
if (buffers && buffers->IsInitialized()) {
2520-
// Add instance data at indexed position
2521-
state.render_instance_buffer[i].transform = instance_transform;
2522-
state.render_instance_buffer[i].bone_matrix_offset = static_cast<uint32_t>(i * bones_per_instance);
2523-
2524-
// Compute skinning matrices = model_space_transform * inverse_bind_pose
2525-
const size_t bone_base_offset = i * bones_per_instance;
2568+
const size_t bone_base_offset = static_cast<size_t>(i) * matrices_per_instance;
2569+
auto dst_begin = state.render_bone_matrices_buffer.begin() + bone_base_offset;
2570+
auto dst_end = dst_begin + matrices_per_instance;
2571+
const ozz::math::Float4x4 identity = ozz::math::Float4x4::identity();
2572+
std::fill(dst_begin, dst_end, identity);
25262573

2527-
for (size_t bone_idx = 0; bone_idx < bones_per_instance && bone_idx < joint_remaps.size(); ++bone_idx) {
2528-
const uint16_t joint = joint_remaps[bone_idx];
2529-
const size_t write_idx = bone_base_offset + bone_idx;
2574+
// Add instance data at indexed position
2575+
state.render_instance_buffer[i].transform = instance_transform;
2576+
state.render_instance_buffer[i].bone_matrix_offset = static_cast<uint32_t>(bone_base_offset);
25302577

2531-
// Get the model-space transform for this joint
2532-
if (joint < buffers->models.size()) {
2533-
const ozz::math::Float4x4& model_space = buffers->models[joint];
2534-
const ozz::math::Float4x4& inv_bind_pose = inverse_bind_poses[bone_idx];
2535-
2536-
// Skinning matrix = model_space * inverse_bind_pose
2537-
state.render_bone_matrices_buffer[write_idx] = model_space * inv_bind_pose;
2538-
} else {
2539-
// Fallback to identity if joint index is out of range
2540-
state.render_bone_matrices_buffer[write_idx] = ozz::math::Float4x4::identity();
2578+
if (buffers && buffers->IsInitialized()) {
2579+
if (use_gpu_skinning) {
2580+
const size_t copy_count = std::min(buffers->models.size(), skeleton_joint_count);
2581+
if (copy_count > 0) {
2582+
std::copy_n(buffers->models.begin(), copy_count, dst_begin);
2583+
}
2584+
} else {
2585+
const size_t palette_count = std::min(
2586+
{bones_per_instance, joint_remaps.empty() ? bones_per_instance : joint_remaps.size(), inverse_bind_poses.size()});
2587+
2588+
for (size_t bone_idx = 0; bone_idx < palette_count; ++bone_idx) {
2589+
const uint16_t joint = joint_remaps.empty()
2590+
? static_cast<uint16_t>(bone_idx)
2591+
: joint_remaps[bone_idx];
2592+
if (joint < buffers->models.size()) {
2593+
const ozz::math::Float4x4& model_space = buffers->models[joint];
2594+
const ozz::math::Float4x4& inv_bind_pose = inverse_bind_poses[bone_idx];
2595+
state.render_bone_matrices_buffer[bone_base_offset + bone_idx] = model_space * inv_bind_pose;
2596+
}
25412597
}
25422598
}
25432599
}
@@ -2625,6 +2681,8 @@ int main(int argc, const char** argv) {
26252681

26262682
ViewerState state;
26272683
state.window = window;
2684+
renderer.SetUseGpuMeshInstancing(state.enable_gpu_mesh_instancing);
2685+
renderer.SetUseGpuSkeletonInstancing(state.enable_gpu_skeleton_instancing);
26282686

26292687
const std::string bundle_argument = ParseBundleArgument(argc, argv);
26302688
const std::string animation_argument = ParseAnimationArgument(argc, argv);

0 commit comments

Comments
 (0)