Skip to content

Commit a5710df

Browse files
authored
Fixes for release_3.2.0 (#83)
* initial * linting --------- Co-authored-by: Dmitry Razdoburdin <>
1 parent 7991260 commit a5710df

File tree

8 files changed

+144
-81
lines changed

8 files changed

+144
-81
lines changed
Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,42 @@
11
/*!
22
* Copyright by Contributors 2017-2025
33
*/
4-
#include <sycl/sycl.hpp>
5-
64
#include "../../../src/common/optional_weight.h"
75

6+
#include <sycl/sycl.hpp>
7+
88
#include "../device_manager.h"
99

1010
namespace xgboost::common::sycl_impl {
11-
double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {
12-
sycl::DeviceManager device_manager;
13-
auto* qu = device_manager.GetQueue(ctx->Device());
1411

12+
template <typename T>
13+
T ElementWiseSum(::sycl::queue* qu, OptionalWeights const& weights) {
1514
const auto* data = weights.Data();
16-
double result = 0;
15+
T result = 0;
1716
{
18-
::sycl::buffer<double> buff(&result, 1);
17+
::sycl::buffer<T> buff(&result, 1);
1918
qu->submit([&](::sycl::handler& cgh) {
20-
auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());
21-
cgh.parallel_for<>(::sycl::range<1>(weights.Size()), reduction,
22-
[=](::sycl::id<1> pid, auto& sum) {
23-
size_t i = pid[0];
24-
sum += data[i];
25-
});
26-
}).wait_and_throw();
19+
auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());
20+
cgh.parallel_for<>(::sycl::range<1>(weights.Size()), reduction,
21+
[=](::sycl::id<1> pid, auto& sum) {
22+
size_t i = pid[0];
23+
sum += data[i];
24+
});
25+
}).wait_and_throw();
2726
}
2827

2928
return result;
3029
}
30+
31+
double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {
32+
sycl::DeviceManager device_manager;
33+
auto* qu = device_manager.GetQueue(ctx->Device());
34+
35+
bool has_fp64_support = qu->get_device().has(::sycl::aspect::fp64);
36+
if (has_fp64_support) {
37+
return ElementWiseSum<double>(qu, weights);
38+
} else {
39+
return ElementWiseSum<float>(qu, weights);
40+
}
41+
}
3142
} // namespace xgboost::common::sycl_impl

plugin/sycl/common/stats.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*!
2+
* Copyright by Contributors 2017-2025
3+
*/
4+
#include "../../../src/common/stats.h"
5+
6+
#include <sycl/sycl.hpp>
7+
8+
#include "../device_manager.h"
9+
10+
namespace xgboost::common::sycl_impl {
11+
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {
12+
sycl::DeviceManager device_manager;
13+
auto* qu = device_manager.GetQueue(ctx->Device());
14+
15+
qu->submit([&](::sycl::handler& cgh) {
16+
auto reduction = ::sycl::reduction(&(out(0)), 0.0f, ::sycl::plus<float>(),
17+
::sycl::property::reduction::initialize_to_identity());
18+
cgh.parallel_for<>(::sycl::range<1>(v.Size()), reduction, [=](::sycl::id<1> pid, auto& sum) {
19+
size_t i = pid[0];
20+
sum += v(i);
21+
});
22+
}).wait_and_throw();
23+
}
24+
} // namespace xgboost::common::sycl_impl

plugin/sycl/context_helper.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,21 @@
33
* \file context_helper.cc
44
*/
55

6-
#include <sycl/sycl.hpp>
6+
#include "context_helper.h"
77

8+
#include <sycl/sycl.hpp>
89

910
#include "device_manager.h"
10-
#include "context_helper.h"
1111

1212
namespace xgboost {
1313
namespace sycl {
1414

1515
DeviceOrd DeviceFP64(const DeviceOrd& device) {
1616
DeviceManager device_manager;
17-
bool support_fp64 = device_manager.GetQueue(device)->get_device().has(::sycl::aspect::fp64);
17+
bool support_fp64 = true;
18+
if (device.IsSycl()) {
19+
support_fp64 = device_manager.GetQueue(device)->get_device().has(::sycl::aspect::fp64);
20+
}
1821
if (support_fp64) {
1922
return device;
2023
} else {

plugin/sycl/device_manager.cc

Lines changed: 63 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,73 +10,78 @@ namespace xgboost {
1010
namespace sycl {
1111

1212
::sycl::queue* DeviceManager::GetQueue(const DeviceOrd& device_spec) const {
13-
if (!device_spec.IsSycl()) {
14-
LOG(WARNING) << "Sycl kernel is executed with non-sycl context: "
15-
<< device_spec.Name() << ". "
16-
<< "Default sycl device_selector will be used.";
17-
}
13+
if (!device_spec.IsSycl()) {
14+
LOG(WARNING) << "Sycl kernel is executed with non-sycl context: " << device_spec.Name() << ". "
15+
<< "Default sycl device_selector will be used.";
16+
}
1817

19-
size_t queue_idx;
20-
bool not_use_default_selector = (device_spec.ordinal != kDefaultOrdinal) ||
21-
(collective::IsDistributed());
22-
DeviceRegister& device_register = GetDevicesRegister();
23-
if (not_use_default_selector) {
24-
const int device_idx =
25-
collective::IsDistributed() ? collective::GetRank() : device_spec.ordinal;
26-
if (device_spec.IsSyclDefault()) {
27-
auto& devices = device_register.devices;
28-
CHECK_LT(device_idx, devices.size());
29-
queue_idx = device_idx;
30-
} else if (device_spec.IsSyclCPU()) {
31-
auto& cpu_devices_idxes = device_register.cpu_devices_idxes;
32-
CHECK_LT(device_idx, cpu_devices_idxes.size());
33-
queue_idx = cpu_devices_idxes[device_idx];
34-
} else if (device_spec.IsSyclGPU()) {
35-
auto& gpu_devices_idxes = device_register.gpu_devices_idxes;
36-
CHECK_LT(device_idx, gpu_devices_idxes.size());
37-
queue_idx = gpu_devices_idxes[device_idx];
38-
} else {
39-
LOG(WARNING) << device_spec << " is not sycl, sycl:cpu or sycl:gpu";
40-
auto device = ::sycl::queue(::sycl::default_selector_v).get_device();
41-
queue_idx = device_register.devices.at(device);
42-
}
18+
size_t queue_idx;
19+
bool not_use_default_selector =
20+
(device_spec.ordinal != kDefaultOrdinal) || (collective::IsDistributed());
21+
DeviceRegister& device_register = GetDevicesRegister();
22+
if (not_use_default_selector) {
23+
if (device_spec.IsSyclDefault()) {
24+
auto& devices = device_register.devices;
25+
const int device_idx = collective::IsDistributed() ? collective::GetRank() % devices.size()
26+
: device_spec.ordinal;
27+
CHECK_LT(device_idx, devices.size());
28+
queue_idx = device_idx;
29+
} else if (device_spec.IsSyclCPU()) {
30+
auto& cpu_devices_idxes = device_register.cpu_devices_idxes;
31+
const int device_idx = collective::IsDistributed()
32+
? collective::GetRank() % cpu_devices_idxes.size()
33+
: device_spec.ordinal;
34+
CHECK_LT(device_idx, cpu_devices_idxes.size());
35+
queue_idx = cpu_devices_idxes[device_idx];
36+
} else if (device_spec.IsSyclGPU()) {
37+
auto& gpu_devices_idxes = device_register.gpu_devices_idxes;
38+
const int device_idx = collective::IsDistributed()
39+
? collective::GetRank() % gpu_devices_idxes.size()
40+
: device_spec.ordinal;
41+
CHECK_LT(device_idx, gpu_devices_idxes.size());
42+
queue_idx = gpu_devices_idxes[device_idx];
43+
} else {
44+
LOG(WARNING) << device_spec << " is not sycl, sycl:cpu or sycl:gpu";
45+
auto device = ::sycl::queue(::sycl::default_selector_v).get_device();
46+
queue_idx = device_register.devices.at(device);
47+
}
48+
} else {
49+
if (device_spec.IsSyclCPU()) {
50+
auto device = ::sycl::queue(::sycl::cpu_selector_v).get_device();
51+
queue_idx = device_register.devices.at(device);
52+
} else if (device_spec.IsSyclGPU()) {
53+
auto device = ::sycl::queue(::sycl::gpu_selector_v).get_device();
54+
queue_idx = device_register.devices.at(device);
4355
} else {
44-
if (device_spec.IsSyclCPU()) {
45-
auto device = ::sycl::queue(::sycl::cpu_selector_v).get_device();
46-
queue_idx = device_register.devices.at(device);
47-
} else if (device_spec.IsSyclGPU()) {
48-
auto device = ::sycl::queue(::sycl::gpu_selector_v).get_device();
49-
queue_idx = device_register.devices.at(device);
50-
} else {
51-
auto device = ::sycl::queue(::sycl::default_selector_v).get_device();
52-
queue_idx = device_register.devices.at(device);
53-
}
56+
auto device = ::sycl::queue(::sycl::default_selector_v).get_device();
57+
queue_idx = device_register.devices.at(device);
5458
}
55-
return &(device_register.queues[queue_idx]);
59+
}
60+
return &(device_register.queues[queue_idx]);
5661
}
5762

5863
DeviceManager::DeviceRegister& DeviceManager::GetDevicesRegister() const {
59-
static DeviceRegister device_register;
64+
static DeviceRegister device_register;
6065

61-
if (device_register.devices.size() == 0) {
62-
std::lock_guard<std::mutex> guard(device_registering_mutex);
63-
std::vector<::sycl::device> devices = ::sycl::device::get_devices();
64-
for (size_t i = 0; i < devices.size(); i++) {
65-
LOG(INFO) << "device_index = " << i << ", name = "
66-
<< devices[i].get_info<::sycl::info::device::name>();
67-
}
66+
if (device_register.devices.size() == 0) {
67+
std::lock_guard<std::mutex> guard(device_registering_mutex);
68+
std::vector<::sycl::device> devices = ::sycl::device::get_devices();
69+
for (size_t i = 0; i < devices.size(); i++) {
70+
LOG(INFO) << "device_index = " << i
71+
<< ", name = " << devices[i].get_info<::sycl::info::device::name>();
72+
}
6873

69-
for (size_t i = 0; i < devices.size(); i++) {
70-
device_register.devices[devices[i]] = i;
71-
device_register.queues.push_back(::sycl::queue(devices[i]));
72-
if (devices[i].is_cpu()) {
73-
device_register.cpu_devices_idxes.push_back(i);
74-
} else if (devices[i].is_gpu()) {
75-
device_register.gpu_devices_idxes.push_back(i);
76-
}
77-
}
74+
for (size_t i = 0; i < devices.size(); i++) {
75+
device_register.devices[devices[i]] = i;
76+
device_register.queues.push_back(::sycl::queue(devices[i]));
77+
if (devices[i].is_cpu()) {
78+
device_register.cpu_devices_idxes.push_back(i);
79+
} else if (devices[i].is_gpu()) {
80+
device_register.gpu_devices_idxes.push_back(i);
81+
}
7882
}
79-
return device_register;
83+
}
84+
return device_register;
8085
}
8186

8287
} // namespace sycl

src/common/linalg_op.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,14 @@ void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
159159
#elif defined(SYCL_LANGUAGE_VERSION)
160160
template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
161161
void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
162-
ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
163-
[&] { LOG(FATAL) << "Invalid TU"; },
164-
[&] { ::xgboost::sycl::linalg::ElementWiseKernel(t, std::forward<Fn>(fn)); });
162+
if (t.Device().IsCPU()) {
163+
cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn));
164+
} else {
165+
ctx->DispatchDevice(
166+
[&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
167+
[&] { LOG(FATAL) << "Invalid TU"; },
168+
[&] { ::xgboost::sycl::linalg::ElementWiseKernel(t, std::forward<Fn>(fn)); });
169+
}
165170
}
166171
#else
167172
template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>

src/common/stats.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::Vector<
5151

5252
if (ctx->IsCUDA()) {
5353
cuda_impl::Mean(ctx, v, out->View(ctx->Device()));
54+
} else if (ctx->IsSycl()) {
55+
sycl_impl::Mean(ctx, v, out->View(ctx->Device()));
5456
} else {
5557
auto h_v = v;
5658
float n = v.Size();

src/common/stats.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "xgboost/linalg.h" // TensorView,VectorView
1515
#include "xgboost/logging.h" // CHECK_GE
1616

17-
#if !defined(XGBOOST_USE_CUDA)
17+
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
1818
#include "common.h" // AssertGPUSupport
1919
#endif
2020

@@ -140,6 +140,17 @@ inline void WeightedSampleMean(Context const*, bool, linalg::MatrixView<float co
140140
#endif // !defined(XGBOOST_USE_CUDA)
141141
} // namespace cuda_impl
142142

143+
namespace sycl_impl {
144+
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
145+
146+
#if !defined(XGBOOST_USE_SYCL)
147+
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
148+
common::AssertGPUSupport();
149+
}
150+
151+
#endif // !defined(XGBOOST_USE_SYCL)
152+
} // namespace sycl_impl
153+
143154
/**
144155
* @brief Calculate medians for each column of the input matrix.
145156
*/

src/objective/multiclass_obj.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,12 @@ class SoftmaxMultiClassObj : public ObjFunction {
106106
<< "Number of weights should be equal to number of data points.";
107107
}
108108
info.weights_.SetDevice(device);
109-
auto weights = common::MakeOptionalWeights(this->ctx_->Device(), info.weights_);
109+
auto weights = common::MakeOptionalWeights(device, info.weights_);
110110

111111
preds.SetDevice(device);
112-
auto predt = linalg::MakeTensorView(this->ctx_, &preds, n_samples, n_classes);
112+
Context cpu_context = Context();
113+
auto predt = linalg::MakeTensorView(device == ctx_->Device() ? this->ctx_ : &cpu_context,
114+
&preds, n_samples, n_classes);
113115
CHECK_EQ(labels.Shape(1), 1);
114116
auto y1d = labels.Slice(linalg::All(), 0);
115117
CHECK_EQ(y1d.Shape(0), info.num_row_);

0 commit comments

Comments
 (0)