Skip to content

Commit 6ae62f1

Browse files
committed
Reduce thread divergence in covariance transport
Currently, the covariance transport uses the Jacobian engine which is templated on the frame type. While this makes the code easier to read and write, it requires the compiler to duplicate a lot of code for each of the frame types, and this duplicated code counts as multiple branches for the sake of GPU execution. Thus, this templating increases the amount of thread divergence. This commit refactors the Jacobian engine into smaller parts, some of which are templated on the frame type and some of which are not. Client code and then take a more fine-grained approach to branching and improve divergence.
1 parent 861e70e commit 6ae62f1

File tree

9 files changed

+198
-114
lines changed

9 files changed

+198
-114
lines changed

core/include/detray/geometry/detail/tracking_surface_kernels.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@ struct tracking_surface_kernels : public surface_kernels<algebra_t> {
7171

7272
using frame_t = typename mask_group_t::value_type::local_frame;
7373

74-
return detail::jacobian_engine<frame_t>::free_to_bound_jacobian(
75-
trf3, free_vec);
74+
return detail::jacobian_engine<
75+
algebra_t>::template free_to_bound_jacobian<frame_t>(trf3,
76+
free_vec);
7677
}
7778
};
7879

@@ -87,8 +88,9 @@ struct tracking_surface_kernels : public surface_kernels<algebra_t> {
8788

8889
using frame_t = typename mask_group_t::value_type::local_frame;
8990

90-
return detail::jacobian_engine<frame_t>::bound_to_free_jacobian(
91-
trf3, mask_group[index], bound_vec);
91+
return detail::jacobian_engine<algebra_t>::
92+
template bound_to_free_jacobian<frame_t>(
93+
trf3, mask_group[index], bound_vec);
9294
}
9395
};
9496

@@ -105,8 +107,8 @@ struct tracking_surface_kernels : public surface_kernels<algebra_t> {
105107

106108
using frame_t = typename mask_group_t::value_type::local_frame;
107109

108-
return detail::jacobian_engine<frame_t>::path_correction(
109-
pos, dir, dtds, dqopds, trf3);
110+
return detail::jacobian_engine<algebra_t>::template path_correction<
111+
frame_t>(pos, dir, dtds, dqopds, trf3);
110112
}
111113
};
112114
};

core/include/detray/propagator/actors/parameter_transporter.hpp

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,44 +29,45 @@ struct parameter_transporter : actor {
2929
using bound_matrix_t = bound_matrix<algebra_t>;
3030
// Matrix type for bound to free jacobian
3131
using bound_to_free_matrix_t = bound_to_free_matrix<algebra_t>;
32+
// Matrix type for free to bound jacobian
33+
using free_to_bound_matrix_t = free_to_bound_matrix<algebra_t>;
3234
/// @}
3335

34-
struct get_full_jacobian_kernel {
35-
36+
struct get_free_to_bound_jacobian_kernel {
3637
template <typename mask_group_t, typename index_t,
3738
typename stepper_state_t>
38-
DETRAY_HOST_DEVICE inline bound_matrix_t operator()(
39+
DETRAY_HOST_DEVICE inline free_to_bound_matrix_t operator()(
3940
const mask_group_t& /*mask_group*/, const index_t& /*index*/,
4041
const transform3_type& trf3,
41-
const bound_to_free_matrix_t& bound_to_free_jacobian,
42-
const material<scalar_type>* vol_mat_ptr,
4342
const stepper_state_t& stepping) const {
44-
4543
using frame_t = typename mask_group_t::value_type::shape::
4644
template local_frame_type<algebra_t>;
4745

48-
using jacobian_engine_t = detail::jacobian_engine<frame_t>;
49-
50-
using free_matrix_t = free_matrix<algebra_t>;
51-
using free_to_bound_matrix_t =
52-
typename jacobian_engine_t::free_to_bound_matrix_type;
46+
// Declare jacobian for bound to free coordinate transform
47+
free_to_bound_matrix_t jac_to_local =
48+
matrix::zero<free_to_bound_matrix_t>();
5349

54-
// Free to bound jacobian at the destination surface
55-
const free_to_bound_matrix_t free_to_bound_jacobian =
56-
jacobian_engine_t::free_to_bound_jacobian(trf3, stepping());
50+
detail::jacobian_engine<algebra_t>::
51+
template free_to_bound_jacobian_step_1<frame_t>(
52+
jac_to_local, trf3, stepping().pos(), stepping().dir());
5753

58-
// Path correction factor
59-
const free_matrix_t path_correction =
60-
jacobian_engine_t::path_correction(
61-
stepping().pos(), stepping().dir(), stepping.dtds(),
62-
stepping.dqopds(vol_mat_ptr), trf3);
54+
return jac_to_local;
55+
}
56+
};
6357

64-
const free_matrix_t correction_term =
65-
matrix::identity<free_matrix_t>() + path_correction;
58+
struct get_free_to_path_derivative_kernel {
59+
template <typename mask_group_t, typename index_t,
60+
typename stepper_state_t>
61+
DETRAY_HOST_DEVICE inline auto operator()(
62+
const mask_group_t& /*mask_group*/, const index_t& /*index*/,
63+
const transform3_type& trf3,
64+
const stepper_state_t& stepping) const {
65+
using frame_t = typename mask_group_t::value_type::shape::
66+
template local_frame_type<algebra_t>;
6667

67-
return free_to_bound_jacobian *
68-
(correction_term *
69-
(stepping.transport_jacobian() * bound_to_free_jacobian));
68+
return detail::jacobian_engine<algebra_t>::
69+
template free_to_path_derivative<frame_t>(
70+
stepping().pos(), stepping().dir(), stepping.dtds(), trf3);
7071
}
7172
};
7273

@@ -143,9 +144,32 @@ struct parameter_transporter : actor {
143144
? vol.material_parameters(stepping().pos())
144145
: nullptr;
145146

146-
return sf.template visit_mask<get_full_jacobian_kernel>(
147-
sf.transform(gctx), bound_to_free_jacobian, vol_mat_ptr,
148-
propagation._stepping);
147+
auto free_to_bound_jacobian =
148+
sf.template visit_mask<get_free_to_bound_jacobian_kernel>(
149+
sf.transform(gctx), propagation._stepping);
150+
151+
detail::jacobian_engine<algebra_t>::free_to_bound_jacobian_step_2(
152+
free_to_bound_jacobian, stepping().dir());
153+
154+
const auto path_to_free_derivative =
155+
detail::jacobian_engine<algebra_t>::path_to_free_derivative(
156+
stepping().dir(), stepping.dtds(),
157+
stepping.dqopds(vol_mat_ptr));
158+
159+
const auto free_to_path_derivative =
160+
sf.template visit_mask<get_free_to_path_derivative_kernel>(
161+
sf.transform(gctx), propagation._stepping);
162+
163+
const auto path_correction =
164+
path_to_free_derivative * free_to_path_derivative;
165+
166+
const auto correction_term =
167+
matrix::identity<std::decay_t<decltype(path_correction)>>() +
168+
path_correction;
169+
170+
return free_to_bound_jacobian *
171+
(correction_term *
172+
(stepping.transport_jacobian() * bound_to_free_jacobian));
149173
}
150174

151175
}; // namespace detray

core/include/detray/propagator/detail/jacobian_engine.hpp

Lines changed: 108 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,12 @@
2222
namespace detray::detail {
2323

2424
/// @brief Generate Jacobians
25-
template <typename frame_t>
26-
requires std::is_object_v<typename frame_t::loc_point> struct jacobian_engine {
25+
template <typename algebra_t>
26+
struct jacobian_engine {
2727

2828
/// @name Type definitions for the struct
2929
/// @{
30-
using frame_type = frame_t;
31-
using jacobian_t = jacobian<frame_t>;
32-
33-
using algebra_type = typename frame_t::algebra_type;
30+
using algebra_type = algebra_t;
3431
using transform3_type = dtransform3D<algebra_type>;
3532
using scalar_type = dscalar<algebra_type>;
3633
using point3_type = dpoint3D<algebra_type>;
@@ -42,39 +39,33 @@ requires std::is_object_v<typename frame_t::loc_point> struct jacobian_engine {
4239
using path_to_free_matrix_type = path_to_free_matrix<algebra_type>;
4340
/// @}
4441

45-
template <typename mask_t>
46-
DETRAY_HOST_DEVICE static inline bound_to_free_matrix_type
47-
bound_to_free_jacobian(
48-
const transform3_type& trf3, const mask_t& mask,
49-
const bound_parameters_vector<algebra_type>& bound_vec) {
42+
template <typename frame_t>
43+
requires std::is_object_v<typename frame_t::loc_point>
44+
DETRAY_HOST_DEVICE static inline void bound_to_free_jacobian_step_1(
45+
bound_to_free_matrix_type& jac_to_global,
46+
const transform3_type& trf3, const vector3_type& pos,
47+
const vector3_type& dir) {
48+
using jacobian_t = jacobian<frame_t>;
5049

51-
// Declare jacobian for bound to free coordinate transform
52-
bound_to_free_matrix_type jac_to_global =
53-
matrix::zero<bound_to_free_matrix_type>();
50+
// Set d(x,y,z)/d(loc0, loc1)
51+
jacobian_t::set_bound_pos_to_free_pos_derivative(jac_to_global, trf3,
52+
pos, dir);
53+
}
54+
55+
DETRAY_HOST_DEVICE static inline void bound_to_free_jacobian_step_2(
56+
bound_to_free_matrix_type& jac_to_global,
57+
const bound_parameters_vector<algebra_type>& bound_vec) {
58+
// Set d(bound time)/d(free time)
59+
getter::element(jac_to_global, e_free_time, e_bound_time) = 1.f;
5460

5561
// Get trigonometric values
5662
const scalar_type theta{bound_vec.theta()};
5763
const scalar_type phi{bound_vec.phi()};
58-
5964
const scalar_type cos_theta{math::cos(theta)};
6065
const scalar_type sin_theta{math::sin(theta)};
6166
const scalar_type cos_phi{math::cos(phi)};
6267
const scalar_type sin_phi{math::sin(phi)};
6368

64-
// Global position and direction
65-
const free_track_parameters<algebra_type> free_params =
66-
bound_to_free_vector(trf3, mask, bound_vec);
67-
68-
const vector3_type pos = free_params.pos();
69-
const vector3_type dir = free_params.dir();
70-
71-
// Set d(x,y,z)/d(loc0, loc1)
72-
jacobian_t::set_bound_pos_to_free_pos_derivative(jac_to_global, trf3,
73-
pos, dir);
74-
75-
// Set d(bound time)/d(free time)
76-
getter::element(jac_to_global, e_free_time, e_bound_time) = 1.f;
77-
7869
// Set d(n_x,n_y,n_z)/d(phi, theta)
7970
getter::element(jac_to_global, e_free_dir0, e_bound_phi) =
8071
-sin_theta * sin_phi;
@@ -86,42 +77,71 @@ requires std::is_object_v<typename frame_t::loc_point> struct jacobian_engine {
8677
cos_theta * sin_phi;
8778
getter::element(jac_to_global, e_free_dir2, e_bound_theta) = -sin_theta;
8879
getter::element(jac_to_global, e_free_qoverp, e_bound_qoverp) = 1.f;
80+
}
81+
82+
template <typename frame_t>
83+
requires std::is_object_v<typename frame_t::loc_point>
84+
DETRAY_HOST_DEVICE static inline void bound_to_free_jacobian_step_3(
85+
bound_to_free_matrix_type& jac_to_global,
86+
const transform3_type& trf3, const vector3_type& pos,
87+
const vector3_type& dir) {
88+
using jacobian_t = jacobian<frame_t>;
8989

9090
// Set d(x,y,z)/d(phi, theta)
9191
jacobian_t::set_bound_angle_to_free_pos_derivative(jac_to_global, trf3,
9292
pos, dir);
93-
94-
return jac_to_global;
9593
}
9694

97-
DETRAY_HOST_DEVICE
98-
static inline free_to_bound_matrix_type free_to_bound_jacobian(
99-
const transform3_type& trf3,
100-
const free_track_parameters<algebra_type>& free_params) {
101-
102-
// Declare jacobian for bound to free coordinate transform
103-
free_to_bound_matrix_type jac_to_local =
104-
matrix::zero<free_to_bound_matrix_type>();
95+
template <typename frame_t, typename mask_t>
96+
requires std::is_object_v<typename frame_t::loc_point>
97+
DETRAY_HOST_DEVICE static inline bound_to_free_matrix_type
98+
bound_to_free_jacobian(
99+
const transform3_type& trf3, const mask_t& mask,
100+
const bound_parameters_vector<algebra_type>& bound_vec) {
101+
bound_to_free_matrix_type jac_to_global =
102+
matrix::zero<bound_to_free_matrix_type>();
105103

106104
// Global position and direction
105+
const free_track_parameters<algebra_type> free_params =
106+
bound_to_free_vector(trf3, mask, bound_vec);
107+
107108
const vector3_type pos = free_params.pos();
108109
const vector3_type dir = free_params.dir();
109110

110-
const scalar_type theta{vector::theta(dir)};
111-
const scalar_type phi{vector::phi(dir)};
111+
bound_to_free_jacobian_step_1<frame_t>(jac_to_global, trf3, pos, dir);
112+
bound_to_free_jacobian_step_2(jac_to_global, bound_vec);
113+
bound_to_free_jacobian_step_3<frame_t>(jac_to_global, trf3, pos, dir);
112114

113-
const scalar_type cos_theta{math::cos(theta)};
114-
const scalar_type sin_theta{math::sin(theta)};
115-
const scalar_type cos_phi{math::cos(phi)};
116-
const scalar_type sin_phi{math::sin(phi)};
115+
return jac_to_global;
116+
}
117+
118+
template <typename frame_t>
119+
requires std::is_object_v<typename frame_t::loc_point>
120+
DETRAY_HOST_DEVICE static inline void free_to_bound_jacobian_step_1(
121+
free_to_bound_matrix_type& jac_to_local,
122+
const transform3_type& trf3, const vector3_type& pos,
123+
const vector3_type& dir) {
124+
using jacobian_t = jacobian<frame_t>;
117125

118126
// Set d(loc0, loc1)/d(x,y,z)
119127
jacobian_t::set_free_pos_to_bound_pos_derivative(jac_to_local, trf3,
120128
pos, dir);
129+
}
130+
131+
DETRAY_HOST_DEVICE static inline void free_to_bound_jacobian_step_2(
132+
free_to_bound_matrix_type& jac_to_local, const vector3_type& dir) {
121133

122134
// Set d(free time)/d(bound time)
123135
getter::element(jac_to_local, e_bound_time, e_free_time) = 1.f;
124136

137+
const scalar_type theta{vector::theta(dir)};
138+
const scalar_type phi{vector::phi(dir)};
139+
140+
const scalar_type cos_theta{math::cos(theta)};
141+
const scalar_type sin_theta{math::sin(theta)};
142+
const scalar_type cos_phi{math::cos(phi)};
143+
const scalar_type sin_phi{math::sin(phi)};
144+
125145
// Set d(phi, theta)/d(n_x, n_y, n_z)
126146
// @note This codes have a serious bug when theta is equal to zero...
127147
getter::element(jac_to_local, e_bound_phi, e_free_dir0) =
@@ -136,17 +156,42 @@ requires std::is_object_v<typename frame_t::loc_point> struct jacobian_engine {
136156

137157
// Set d(Free Qop)/d(Bound Qop)
138158
getter::element(jac_to_local, e_bound_qoverp, e_free_qoverp) = 1.f;
159+
}
160+
161+
template <typename frame_t>
162+
requires std::is_object_v<typename frame_t::loc_point>
163+
DETRAY_HOST_DEVICE static inline free_to_bound_matrix_type
164+
free_to_bound_jacobian(
165+
const transform3_type& trf3,
166+
const free_track_parameters<algebra_type>& free_params) {
167+
168+
// Declare jacobian for bound to free coordinate transform
169+
free_to_bound_matrix_type jac_to_local =
170+
matrix::zero<free_to_bound_matrix_type>();
171+
172+
// Global position and direction
173+
const vector3_type pos = free_params.pos();
174+
const vector3_type dir = free_params.dir();
175+
176+
free_to_bound_jacobian_step_1<frame_t>(jac_to_local, trf3, pos, dir);
177+
free_to_bound_jacobian_step_2(jac_to_local, dir);
139178

140179
return jac_to_local;
141180
}
142181

143-
DETRAY_HOST_DEVICE static inline free_matrix<algebra_type> path_correction(
144-
const vector3_type& pos, const vector3_type& dir,
145-
const vector3_type& dtds, const scalar_type dqopds,
146-
const transform3_type& trf3) {
182+
template <typename frame_t>
183+
requires std::is_object_v<typename frame_t::loc_point>
184+
DETRAY_HOST_DEVICE static inline auto free_to_path_derivative(
185+
const vector3_type& pos, const vector3_type& dir,
186+
const vector3_type& dtds, const transform3_type& trf3) {
187+
using jacobian_t = jacobian<frame_t>;
147188

148-
free_to_path_matrix_type path_derivative =
149-
jacobian_t::path_derivative(trf3, pos, dir, dtds);
189+
return jacobian_t::path_derivative(trf3, pos, dir, dtds);
190+
}
191+
192+
DETRAY_HOST_DEVICE static inline auto path_to_free_derivative(
193+
const vector3_type& dir, const vector3_type& dtds,
194+
const scalar_type dqopds) {
150195

151196
path_to_free_matrix_type derivative =
152197
matrix::zero<path_to_free_matrix_type>();
@@ -158,7 +203,18 @@ requires std::is_object_v<typename frame_t::loc_point> struct jacobian_engine {
158203
getter::element(derivative, e_free_dir2, 0u) = dtds[2];
159204
getter::element(derivative, e_free_qoverp, 0u) = dqopds;
160205

161-
return derivative * path_derivative;
206+
return derivative;
207+
}
208+
209+
template <typename frame_t>
210+
requires std::is_object_v<typename frame_t::loc_point>
211+
DETRAY_HOST_DEVICE static inline free_matrix<algebra_type>
212+
path_correction(const vector3_type& pos, const vector3_type& dir,
213+
const vector3_type& dtds, const scalar_type dqopds,
214+
const transform3_type& trf3) {
215+
216+
return path_to_free_derivative(dir, dtds, dqopds) *
217+
free_to_path_derivative<frame_t>(pos, dir, dtds, trf3);
162218
}
163219
};
164220

core/include/detray/utils/curvilinear_frame.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ struct curvilinear_frame {
2727
using unit_vectors_type = unit_vectors<vector3>;
2828
using bound_to_free_matrix_type = bound_to_free_matrix<algebra_t>;
2929
using bound_vector_type = bound_parameters_vector<algebra_t>;
30-
using jacobian_engine_type =
31-
detail::jacobian_engine<cartesian2D<algebra_t>>;
30+
using jacobian_engine_type = detail::jacobian_engine<algebra_t>;
3231
using free_track_parameters_type = free_track_parameters<algebra_t>;
3332

3433
DETRAY_HOST_DEVICE
@@ -47,8 +46,9 @@ struct curvilinear_frame {
4746

4847
DETRAY_HOST_DEVICE
4948
bound_to_free_matrix_type bound_to_free_jacobian() const {
50-
return jacobian_engine_type().bound_to_free_jacobian(
51-
m_trf, mask<rectangle2D, algebra_t>{}, m_bound_vec);
49+
return jacobian_engine_type()
50+
.template bound_to_free_jacobian<cartesian2D<algebra_t>>(
51+
m_trf, mask<rectangle2D, algebra_t>{}, m_bound_vec);
5252
}
5353

5454
transform3_type m_trf{};

0 commit comments

Comments
 (0)