Skip to content

Commit 58a7f9b

Browse files
authored
Merge pull request #98271 from DarioSamo/d3d12-enhanced-barrier-fix
Move transitions of textures from transfer workers to the graphics queue.
2 parents 4631a61 + 8c3e46b commit 58a7f9b

File tree

6 files changed

+48
-12
lines changed

6 files changed

+48
-12
lines changed

drivers/d3d12/rendering_device_driver_d3d12.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,8 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text
20032003
switch (p_texture_layout) {
20042004
case RDD::TEXTURE_LAYOUT_UNDEFINED:
20052005
return D3D12_BARRIER_LAYOUT_UNDEFINED;
2006+
case RDD::TEXTURE_LAYOUT_GENERAL:
2007+
return D3D12_BARRIER_LAYOUT_COMMON;
20062008
case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
20072009
return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
20082010
case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
@@ -6175,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
61756177
return false;
61766178
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
61776179
return false;
6180+
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
6181+
return true;
61786182
default:
61796183
return RenderingDeviceDriver::api_trait_get(p_trait);
61806184
}

drivers/vulkan/rendering_device_driver_vulkan.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
266266

267267
static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
268268
VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
269+
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_GENERAL
269270
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
270271
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
271272
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL

servers/rendering/rendering_device.cpp

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
12431243
TransferWorker *transfer_worker = nullptr;
12441244
const uint8_t *read_ptr = p_data.ptr();
12451245
uint8_t *write_ptr = nullptr;
1246+
const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
12461247
for (uint32_t pass = 0; pass < 2; pass++) {
12471248
const bool copy_pass = (pass == 1);
12481249
if (copy_pass) {
@@ -1267,7 +1268,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
12671268
tb.texture = texture->driver_id;
12681269
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
12691270
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
1270-
tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
1271+
tb.next_layout = copy_dst_layout;
12711272
tb.subresources.aspect = texture->barrier_aspect_flags;
12721273
tb.subresources.mipmap_count = texture->mipmaps;
12731274
tb.subresources.base_layer = p_layer;
@@ -1313,7 +1314,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
13131314
copy_region.texture_subresources.layer_count = 1;
13141315
copy_region.texture_offset = Vector3i(0, 0, z);
13151316
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
1316-
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
1317+
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region);
13171318
}
13181319

13191320
staging_local_offset += to_allocate;
@@ -1332,14 +1333,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
13321333
RDD::TextureBarrier tb;
13331334
tb.texture = texture->driver_id;
13341335
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
1335-
tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
1336+
tb.prev_layout = copy_dst_layout;
13361337
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
13371338
tb.subresources.aspect = texture->barrier_aspect_flags;
13381339
tb.subresources.mipmap_count = texture->mipmaps;
13391340
tb.subresources.base_layer = p_layer;
13401341
tb.subresources.layer_count = 1;
1341-
1342-
driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
1342+
transfer_worker->texture_barriers.push_back(tb);
13431343
}
13441344

13451345
_release_transfer_worker(transfer_worker);
@@ -5152,6 +5152,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
51525152
MutexLock lock(p_transfer_worker->operations_mutex);
51535153
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
51545154
}
5155+
5156+
if (!p_transfer_worker->texture_barriers.is_empty()) {
5157+
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
5158+
_flush_barriers_for_transfer_worker(p_transfer_worker);
5159+
}
5160+
}
5161+
5162+
void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) {
5163+
if (!p_transfer_worker->texture_barriers.is_empty()) {
5164+
for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) {
5165+
transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]);
5166+
}
5167+
5168+
p_transfer_worker->texture_barriers.clear();
5169+
}
51555170
}
51565171

51575172
void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
@@ -5193,11 +5208,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
51935208
}
51945209
}
51955210

5196-
void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
5211+
void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) {
51975212
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
51985213
for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
51995214
TransferWorker *worker = transfer_worker_pool[i];
5200-
if (p_operations_used_by_draw) {
5215+
if (p_draw_command_buffer) {
52015216
MutexLock lock(worker->operations_mutex);
52025217
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
52035218
// The operation used by the draw has already been processed, we don't need to wait on the worker.
@@ -5208,12 +5223,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
52085223
{
52095224
MutexLock lock(worker->thread_mutex);
52105225
if (worker->recording) {
5211-
VectorView<RDD::SemaphoreID> semaphores = p_operations_used_by_draw ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
5226+
VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
52125227
_end_transfer_worker(worker);
52135228
_submit_transfer_worker(worker, semaphores);
52145229
}
5230+
5231+
if (p_draw_command_buffer) {
5232+
_flush_barriers_for_transfer_worker(worker);
5233+
}
52155234
}
52165235
}
5236+
5237+
if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) {
5238+
driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
5239+
transfer_worker_pool_texture_barriers.clear();
5240+
}
52175241
}
52185242

52195243
void RenderingDevice::_wait_for_transfer_workers() {
@@ -5807,10 +5831,10 @@ void RenderingDevice::_end_frame() {
58075831
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
58085832
}
58095833

5810-
_submit_transfer_workers(true);
5811-
58125834
// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
58135835
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
5836+
_submit_transfer_workers(command_buffer);
5837+
58145838
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
58155839
driver->command_buffer_end(command_buffer);
58165840
driver->end_segment();
@@ -6387,7 +6411,7 @@ void RenderingDevice::finalize() {
63876411
}
63886412

63896413
// Wait for transfer workers to finish.
6390-
_submit_transfer_workers(false);
6414+
_submit_transfer_workers();
63916415
_wait_for_transfer_workers();
63926416

63936417
// Delete everything the graph has created.

servers/rendering/rendering_device.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,7 @@ class RenderingDevice : public RenderingDeviceCommons {
12671267
RDD::CommandBufferID command_buffer;
12681268
RDD::CommandPoolID command_pool;
12691269
RDD::FenceID command_fence;
1270+
LocalVector<RDD::TextureBarrier> texture_barriers;
12701271
bool recording = false;
12711272
bool submitted = false;
12721273
BinaryMutex thread_mutex;
@@ -1280,6 +1281,7 @@ class RenderingDevice : public RenderingDeviceCommons {
12801281
uint32_t transfer_worker_pool_max_size = 1;
12811282
LocalVector<uint64_t> transfer_worker_operation_used_by_draw;
12821283
LocalVector<uint32_t> transfer_worker_pool_available_list;
1284+
LocalVector<RDD::TextureBarrier> transfer_worker_pool_texture_barriers;
12831285
BinaryMutex transfer_worker_pool_mutex;
12841286
ConditionVariable transfer_worker_pool_condition;
12851287

@@ -1288,12 +1290,13 @@ class RenderingDevice : public RenderingDeviceCommons {
12881290
void _end_transfer_worker(TransferWorker *p_transfer_worker);
12891291
void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
12901292
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
1293+
void _flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker);
12911294
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
12921295
void _check_transfer_worker_buffer(Buffer *p_buffer);
12931296
void _check_transfer_worker_texture(Texture *p_texture);
12941297
void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array);
12951298
void _check_transfer_worker_index_array(IndexArray *p_index_array);
1296-
void _submit_transfer_workers(bool p_operations_used_by_draw);
1299+
void _submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer = RDD::CommandBufferID());
12971300
void _wait_for_transfer_workers();
12981301
void _free_transfer_workers();
12991302

servers/rendering/rendering_device_driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
374374
return 1;
375375
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
376376
return true;
377+
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
378+
return false;
377379
default:
378380
ERR_FAIL_V(0);
379381
}

servers/rendering/rendering_device_driver.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons {
220220

221221
enum TextureLayout {
222222
TEXTURE_LAYOUT_UNDEFINED,
223+
TEXTURE_LAYOUT_GENERAL,
223224
TEXTURE_LAYOUT_STORAGE_OPTIMAL,
224225
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
225226
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
@@ -750,6 +751,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons {
750751
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
751752
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
752753
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
754+
API_TRAIT_USE_GENERAL_IN_COPY_QUEUES,
753755
};
754756

755757
enum ShaderChangeInvalidation {

0 commit comments

Comments
 (0)