From cddb23e0a2858897eac33974a0d3adda5bfe047f Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Fri, 7 Mar 2025 16:48:08 +0100 Subject: [PATCH 1/2] Fix for multinode processing --- plugin/sycl/device_manager.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/plugin/sycl/device_manager.cc b/plugin/sycl/device_manager.cc index dc3939934e31..e3b4d9a35e98 100644 --- a/plugin/sycl/device_manager.cc +++ b/plugin/sycl/device_manager.cc @@ -19,20 +19,27 @@ ::sycl::queue* DeviceManager::GetQueue(const DeviceOrd& device_spec) const { size_t queue_idx; bool not_use_default_selector = (device_spec.ordinal != kDefaultOrdinal) || (collective::IsDistributed()); - DeviceRegister& device_register = GetDevicesRegister(); if (not_use_default_selector) { - const int device_idx = - collective::IsDistributed() ? collective::GetRank() : device_spec.ordinal; + DeviceRegister& device_register = GetDevicesRegister(); if (device_spec.IsSyclDefault()) { auto& devices = device_register.devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, devices.size()); queue_idx = device_idx; } else if (device_spec.IsSyclCPU()) { auto& cpu_devices_idxes = device_register.cpu_devices_idxes; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % cpu_devices_idxes.size() + : device_spec.ordinal; CHECK_LT(device_idx, cpu_devices_idxes.size()); queue_idx = cpu_devices_idxes[device_idx]; } else if (device_spec.IsSyclGPU()) { auto& gpu_devices_idxes = device_register.gpu_devices_idxes; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % gpu_devices_idxes.size() + : device_spec.ordinal; CHECK_LT(device_idx, gpu_devices_idxes.size()); queue_idx = gpu_devices_idxes[device_idx]; } else { From ae632dd3f2e65938e1ce27295fd14ae0ff0ea764 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Fri, 7 Mar 2025 07:58:29 -0800 Subject: [PATCH 2/2] fix build --- gputreeshap | 2 +- plugin/sycl/device_manager.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gputreeshap b/gputreeshap index 40eae8c4c459..787259b412c1 160000 --- a/gputreeshap +++ b/gputreeshap @@ -1 +1 @@ -Subproject commit 40eae8c4c45974705f8053e4d3d05b88e3cfaefd +Subproject commit 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3 diff --git a/plugin/sycl/device_manager.cc b/plugin/sycl/device_manager.cc index e3b4d9a35e98..ee652065db23 100644 --- a/plugin/sycl/device_manager.cc +++ b/plugin/sycl/device_manager.cc @@ -19,8 +19,8 @@ ::sycl::queue* DeviceManager::GetQueue(const DeviceOrd& device_spec) const { size_t queue_idx; bool not_use_default_selector = (device_spec.ordinal != kDefaultOrdinal) || (collective::IsDistributed()); + DeviceRegister& device_register = GetDevicesRegister(); if (not_use_default_selector) { - DeviceRegister& device_register = GetDevicesRegister(); if (device_spec.IsSyclDefault()) { auto& devices = device_register.devices; const int device_idx = collective::IsDistributed()