Skip to content

Commit 0ce4d8f

Browse files
authored
Merge pull request #98086 from DarioSamo/transfer-queues-semaphores
Rewrite semaphore handling for transfer workers.
2 parents cdf45f7 + 7a936e8 commit 0ce4d8f

File tree

2 files changed

+29
-15
lines changed

2 files changed

+29
-15
lines changed

servers/rendering/rendering_device.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,7 +5052,6 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
50525052
// No existing worker was picked, we create a new one.
50535053
transfer_worker = memnew(TransferWorker);
50545054
transfer_worker->command_fence = driver->fence_create();
5055-
transfer_worker->command_semaphore = driver->semaphore_create();
50565055
transfer_worker->command_pool = driver->command_pool_create(transfer_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY);
50575056
transfer_worker->command_buffer = driver->command_buffer_create(transfer_worker->command_pool);
50585057
transfer_worker->index = transfer_worker_pool.size();
@@ -5075,7 +5074,7 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
50755074
// If there's not enough bytes to use on the staging buffer, we submit everything pending from the worker and wait for the work to be finished.
50765075
if (transfer_worker->recording) {
50775076
_end_transfer_worker(transfer_worker);
5078-
_submit_transfer_worker(transfer_worker, false);
5077+
_submit_transfer_worker(transfer_worker);
50795078
}
50805079

50815080
if (transfer_worker->submitted) {
@@ -5128,12 +5127,12 @@ void RenderingDevice::_end_transfer_worker(TransferWorker *p_transfer_worker) {
51285127
p_transfer_worker->recording = false;
51295128
}
51305129

5131-
void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore) {
5132-
const VectorView<RDD::SemaphoreID> execute_semaphore = p_signal_semaphore ? p_transfer_worker->command_semaphore : VectorView<RDD::SemaphoreID>();
5133-
driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, execute_semaphore, p_transfer_worker->command_fence, {});
5134-
if (p_signal_semaphore) {
5130+
void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores) {
5131+
driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, p_signal_semaphores, p_transfer_worker->command_fence, {});
5132+
5133+
for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
51355134
// Indicate the frame should wait on these semaphores before executing the main command buffer.
5136-
frames[frame].semaphores_to_wait_on.push_back(p_transfer_worker->command_semaphore);
5135+
frames[frame].semaphores_to_wait_on.push_back(p_signal_semaphores[i]);
51375136
}
51385137

51395138
p_transfer_worker->submitted = true;
@@ -5196,7 +5195,8 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
51965195

51975196
void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
51985197
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
5199-
for (TransferWorker *worker : transfer_worker_pool) {
5198+
for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
5199+
TransferWorker *worker = transfer_worker_pool[i];
52005200
if (p_operations_used_by_draw) {
52015201
MutexLock lock(worker->operations_mutex);
52025202
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
@@ -5208,8 +5208,9 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
52085208
{
52095209
MutexLock lock(worker->thread_mutex);
52105210
if (worker->recording) {
5211+
VectorView<RDD::SemaphoreID> semaphores = p_operations_used_by_draw ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
52115212
_end_transfer_worker(worker);
5212-
_submit_transfer_worker(worker, true);
5213+
_submit_transfer_worker(worker, semaphores);
52135214
}
52145215
}
52155216
}
@@ -5228,7 +5229,6 @@ void RenderingDevice::_wait_for_transfer_workers() {
52285229
void RenderingDevice::_free_transfer_workers() {
52295230
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
52305231
for (TransferWorker *worker : transfer_worker_pool) {
5231-
driver->semaphore_free(worker->command_semaphore);
52325232
driver->fence_free(worker->command_fence);
52335233
driver->buffer_free(worker->staging_buffer);
52345234
driver->command_pool_free(worker->command_pool);
@@ -6014,6 +6014,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
60146014
present_queue_family = main_queue_family;
60156015
}
60166016

6017+
// Use the processor count as the max amount of transfer workers that can be created.
6018+
transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
6019+
60176020
// Create data for all the frames.
60186021
for (uint32_t i = 0; i < frames.size(); i++) {
60196022
frames[i].index = 0;
@@ -6041,6 +6044,13 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
60416044

60426045
// Assign the main queue family and command pool to the command buffer pool.
60436046
frames[i].command_buffer_pool.pool = frames[i].command_pool;
6047+
6048+
// Create the semaphores for the transfer workers.
6049+
frames[i].transfer_worker_semaphores.resize(transfer_worker_pool_max_size);
6050+
for (uint32_t j = 0; j < transfer_worker_pool_max_size; j++) {
6051+
frames[i].transfer_worker_semaphores[j] = driver->semaphore_create();
6052+
ERR_FAIL_COND_V(!frames[i].transfer_worker_semaphores[j], FAILED);
6053+
}
60446054
}
60456055

60466056
// Start from frame count, so everything else is immediately old.
@@ -6087,9 +6097,6 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
60876097
ERR_FAIL_COND_V(err, FAILED);
60886098
}
60896099

6090-
// TODO: How should this max size be determined?
6091-
transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
6092-
60936100
draw_list = nullptr;
60946101
compute_list = nullptr;
60956102

@@ -6452,6 +6459,10 @@ void RenderingDevice::finalize() {
64526459
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
64536460
driver->semaphore_free(buffer_pool.semaphores[j]);
64546461
}
6462+
6463+
for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) {
6464+
driver->semaphore_free(frames[i].transfer_worker_semaphores[j]);
6465+
}
64556466
}
64566467

64576468
if (pipeline_cache_enabled) {

servers/rendering/rendering_device.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,7 +1267,6 @@ class RenderingDevice : public RenderingDeviceCommons {
12671267
RDD::CommandBufferID command_buffer;
12681268
RDD::CommandPoolID command_pool;
12691269
RDD::FenceID command_fence;
1270-
RDD::SemaphoreID command_semaphore;
12711270
bool recording = false;
12721271
bool submitted = false;
12731272
BinaryMutex thread_mutex;
@@ -1287,7 +1286,7 @@ class RenderingDevice : public RenderingDeviceCommons {
12871286
TransferWorker *_acquire_transfer_worker(uint32_t p_transfer_size, uint32_t p_required_align, uint32_t &r_staging_offset);
12881287
void _release_transfer_worker(TransferWorker *p_transfer_worker);
12891288
void _end_transfer_worker(TransferWorker *p_transfer_worker);
1290-
void _submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore);
1289+
void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
12911290
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
12921291
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
12931292
void _check_transfer_worker_buffer(Buffer *p_buffer);
@@ -1372,6 +1371,10 @@ class RenderingDevice : public RenderingDeviceCommons {
13721371
// Swap chains prepared for drawing during the frame that must be presented.
13731372
LocalVector<RDD::SwapChainID> swap_chains_to_present;
13741373

1374+
// Semaphores the transfer workers can use to wait before rendering the frame.
1375+
// This must have the same size of the transfer worker pool.
1376+
TightLocalVector<RDD::SemaphoreID> transfer_worker_semaphores;
1377+
13751378
// Extra command buffer pool used for driver workarounds.
13761379
RDG::CommandBufferPool command_buffer_pool;
13771380

0 commit comments

Comments
 (0)