99#include < sycl/sycl.hpp>
1010#include < sycl/ext/oneapi/experimental/device_architecture.hpp>
1111
12+ #include " ../../../src/common/common.h" // for HumanMemUnit
13+
1214namespace xgboost {
1315namespace sycl {
1416namespace tree {
@@ -31,7 +33,7 @@ class HistDispatcher {
3133
3234 void GetL2Size (const ::sycl::device& device) {
3335 size_t l2_size = device.get_info <::sycl::info::device::global_mem_cache_size>();
34- LOG (INFO) << " Detected L2 Size = " << l2_size / 1024 / 1024 << " MB " ;
36+ LOG (INFO) << " Detected L2 Size = " << :: xgboost::common::HumanMemUnit ( l2_size) ;
3537 l2_size_per_eu = static_cast <float >(l2_size) / max_compute_units;
3638 }
3739
@@ -170,8 +172,9 @@ class HistDispatcher {
170172 */
171173 float th_block_per_eu = 1 + base_block_penalty - atomic_penalty / atomic_efficency;
172174
173- /* The model will failed mostly
174- * if (1 + base_block_penalty) ~ (atomic_penalty / atomic_efficency)
175+ /* We can't trust the decision of the approximate performance model
176+ * if penalties are close to each other
177+ * i.e. (1 + base_block_penalty) ~ (atomic_penalty / atomic_efficency)
175178 * We manually limit the minimal value of th_block_per_eu,
176179 * to determine the behaviour in this region.
177180 */
@@ -193,7 +196,7 @@ class HistDispatcher {
193196
194197 // For some datasets buffer is not used, we estimate if it is the case.
195198 template <typename GradientPairT>
196- size_t GetRequaredBufferSize (size_t max_n_rows, size_t nbins, size_t ncolumns,
199+ size_t GetRequiredBufferSize (size_t max_n_rows, size_t nbins, size_t ncolumns,
197200 size_t max_num_bins, size_t min_num_bins) const {
198201 size_t max_nblocks = kMaxGPUUtilisation * max_compute_units;
199202 auto build_params = GetHistBuildParameters<GradientPairT>
0 commit comments