@@ -560,6 +560,8 @@ struct ViewerState {
560560 int current_instance_count = 0 ;
561561 bool use_ecs_rendering = true ; // Toggle between ECS and regular rendering
562562 bool randomize_instance_animations = false ; // Randomize which animation each instance plays
563+ bool enable_gpu_mesh_instancing = true ; // GPU skinning by uploading skeleton-order matrices
564+ bool enable_gpu_skeleton_instancing = true ; // GPU instanced debug bones/spheres
563565
564566 // Reusable rendering buffers to avoid per-frame allocations
565567 std::vector<MeshInstanceData> render_instance_buffer;
@@ -1294,6 +1296,43 @@ void DrawRenderingPanel(ViewerState& state, VulkanRenderer& renderer) {
12941296 ImGui::TextColored (ImVec4 (1 .f , 1 .f , 0 .f , 1 .f ), " Mode: Regular Animation System" );
12951297 }
12961298
1299+ ImGui::Separator ();
1300+ ImGui::SeparatorText (" GPU Instancing" );
1301+
1302+ bool gpu_mesh_instancing = state.enable_gpu_mesh_instancing ;
1303+ if (ImGui::Checkbox (" Mesh skinning on GPU" , &gpu_mesh_instancing)) {
1304+ state.enable_gpu_mesh_instancing = gpu_mesh_instancing;
1305+ renderer.SetUseGpuMeshInstancing (gpu_mesh_instancing);
1306+ }
1307+ ImGui::SameLine ();
1308+ ImGui::TextDisabled (" (?)" );
1309+ if (ImGui::IsItemHovered ()) {
1310+ ImGui::BeginTooltip ();
1311+ ImGui::TextUnformatted (" Toggle between GPU skeleton-order uploads and CPU palette skinning" );
1312+ ImGui::TextUnformatted (" Disable if you need legacy palette-aligned bone matrices" );
1313+ ImGui::EndTooltip ();
1314+ }
1315+
1316+ bool gpu_skeleton_instancing = state.enable_gpu_skeleton_instancing ;
1317+ if (ImGui::Checkbox (" Skeleton debug GPU instancing" , &gpu_skeleton_instancing)) {
1318+ state.enable_gpu_skeleton_instancing = gpu_skeleton_instancing;
1319+ renderer.SetUseGpuSkeletonInstancing (gpu_skeleton_instancing);
1320+ }
1321+ ImGui::SameLine ();
1322+ ImGui::TextDisabled (" (?)" );
1323+ if (ImGui::IsItemHovered ()) {
1324+ ImGui::BeginTooltip ();
1325+ ImGui::TextUnformatted (" Control GPU batching for skeleton debug bones and joint spheres" );
1326+ ImGui::TextUnformatted (" Disable to fall back to per-primitive CPU drawing" );
1327+ ImGui::EndTooltip ();
1328+ }
1329+ const size_t palette_bones = renderer.GetMeshRenderer ().BonesPerInstance ();
1330+ const size_t skeleton_joints = static_cast <size_t >(state.skeleton .num_joints ());
1331+ ImGui::Text (" Palette bones: %zu" , palette_bones);
1332+ ImGui::Text (" Skeleton joints: %zu" , skeleton_joints);
1333+ ImGui::Text (" Matrices/instance (current): %zu" ,
1334+ state.enable_gpu_mesh_instancing ? skeleton_joints : palette_bones);
1335+
12971336 // Parallel implementation selection (only when ECS is active)
12981337 if (state.use_ecs_rendering && state.instance_count >= 1 ) {
12991338 ImGui::Separator ();
@@ -2436,23 +2475,33 @@ void RenderECSInstances(ViewerState& state, VulkanRenderer& renderer) {
24362475 const int grid_size = static_cast <int >(std::ceil (std::sqrt (static_cast <float >(state.instance_count ))));
24372476 const float spacing = state.instance_grid_spacing ;
24382477
2439- // Get the mesh to know how many bones per instance
24402478 const size_t bones_per_instance = renderer.GetMeshRenderer ().BonesPerInstance ();
2479+ const size_t skeleton_joint_count = static_cast <size_t >(state.skeleton .num_joints ());
2480+ if (bones_per_instance == 0 || skeleton_joint_count == 0 ) {
2481+ return ;
2482+ }
2483+
2484+ const bool use_gpu_skinning = renderer.GetUseGpuMeshInstancing ();
2485+ const auto & joint_remaps = renderer.GetMeshJointRemaps ();
2486+ const auto & inverse_bind_poses = renderer.GetMeshInverseBindPoses ();
2487+
2488+ const size_t matrices_per_instance = use_gpu_skinning
2489+ ? skeleton_joint_count
2490+ : bones_per_instance;
2491+ if (matrices_per_instance == 0 ) {
2492+ return ;
2493+ }
24412494
24422495 // OPTIMIZED: Pre-allocate buffers to exact sizes (avoid reallocation)
24432496 state.render_instance_buffer .resize (state.instance_count );
24442497 state.render_skeleton_transforms_buffer .resize (state.instance_count );
2445- state.render_bone_matrices_buffer .resize (state.instance_count * bones_per_instance );
2498+ state.render_bone_matrices_buffer .resize (state.instance_count * matrices_per_instance );
24462499
24472500 // For ECS multi-instance rendering, we don't use mesh_world_transform for positioning
24482501 // Grid positioning is absolute, not relative to a base transform
24492502 // Only use identity (or rotation if needed in the future)
24502503 const ozz::math::Float4x4 mesh_world_transform = ozz::math::Float4x4::identity ();
24512504
2452- // Get mesh data for skinning (read-only, thread-safe)
2453- const auto & joint_remaps = renderer.GetMeshJointRemaps ();
2454- const auto & inverse_bind_poses = renderer.GetMeshInverseBindPoses ();
2455-
24562505 // PRE-FETCH: Get all component pointers BEFORE parallel loop (EnTT is not thread-safe)
24572506 auto & registry = state.ecs_animation_registry ->GetRegistry ();
24582507 std::vector<AnimationECS::AnimationBuffers*> buffers_ptrs (state.instance_count );
@@ -2516,28 +2565,35 @@ void RenderECSInstances(ViewerState& state, VulkanRenderer& renderer) {
25162565 inst_transform->world_transform = instance_transform;
25172566 }
25182567
2519- if (buffers && buffers->IsInitialized ()) {
2520- // Add instance data at indexed position
2521- state.render_instance_buffer [i].transform = instance_transform;
2522- state.render_instance_buffer [i].bone_matrix_offset = static_cast <uint32_t >(i * bones_per_instance);
2523-
2524- // Compute skinning matrices = model_space_transform * inverse_bind_pose
2525- const size_t bone_base_offset = i * bones_per_instance;
2568+ const size_t bone_base_offset = static_cast <size_t >(i) * matrices_per_instance;
2569+ auto dst_begin = state.render_bone_matrices_buffer .begin () + bone_base_offset;
2570+ auto dst_end = dst_begin + matrices_per_instance;
2571+ const ozz::math::Float4x4 identity = ozz::math::Float4x4::identity ();
2572+ std::fill (dst_begin, dst_end, identity);
25262573
2527- for ( size_t bone_idx = 0 ; bone_idx < bones_per_instance && bone_idx < joint_remaps. size (); ++bone_idx) {
2528- const uint16_t joint = joint_remaps[bone_idx] ;
2529- const size_t write_idx = bone_base_offset + bone_idx ;
2574+ // Add instance data at indexed position
2575+ state. render_instance_buffer [i]. transform = instance_transform ;
2576+ state. render_instance_buffer [i]. bone_matrix_offset = static_cast < uint32_t >( bone_base_offset) ;
25302577
2531- // Get the model-space transform for this joint
2532- if (joint < buffers->models .size ()) {
2533- const ozz::math::Float4x4& model_space = buffers->models [joint];
2534- const ozz::math::Float4x4& inv_bind_pose = inverse_bind_poses[bone_idx];
2535-
2536- // Skinning matrix = model_space * inverse_bind_pose
2537- state.render_bone_matrices_buffer [write_idx] = model_space * inv_bind_pose;
2538- } else {
2539- // Fallback to identity if joint index is out of range
2540- state.render_bone_matrices_buffer [write_idx] = ozz::math::Float4x4::identity ();
2578+ if (buffers && buffers->IsInitialized ()) {
2579+ if (use_gpu_skinning) {
2580+ const size_t copy_count = std::min (buffers->models .size (), skeleton_joint_count);
2581+ if (copy_count > 0 ) {
2582+ std::copy_n (buffers->models .begin (), copy_count, dst_begin);
2583+ }
2584+ } else {
2585+ const size_t palette_count = std::min (
2586+ {bones_per_instance, joint_remaps.empty () ? bones_per_instance : joint_remaps.size (), inverse_bind_poses.size ()});
2587+
2588+ for (size_t bone_idx = 0 ; bone_idx < palette_count; ++bone_idx) {
2589+ const uint16_t joint = joint_remaps.empty ()
2590+ ? static_cast <uint16_t >(bone_idx)
2591+ : joint_remaps[bone_idx];
2592+ if (joint < buffers->models .size ()) {
2593+ const ozz::math::Float4x4& model_space = buffers->models [joint];
2594+ const ozz::math::Float4x4& inv_bind_pose = inverse_bind_poses[bone_idx];
2595+ state.render_bone_matrices_buffer [bone_base_offset + bone_idx] = model_space * inv_bind_pose;
2596+ }
25412597 }
25422598 }
25432599 }
@@ -2625,6 +2681,8 @@ int main(int argc, const char** argv) {
26252681
26262682 ViewerState state;
26272683 state.window = window;
2684+ renderer.SetUseGpuMeshInstancing (state.enable_gpu_mesh_instancing );
2685+ renderer.SetUseGpuSkeletonInstancing (state.enable_gpu_skeleton_instancing );
26282686
26292687 const std::string bundle_argument = ParseBundleArgument (argc, argv);
26302688 const std::string animation_argument = ParseAnimationArgument (argc, argv);
0 commit comments