@@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
12431243 TransferWorker *transfer_worker = nullptr ;
12441244 const uint8_t *read_ptr = p_data.ptr ();
12451245 uint8_t *write_ptr = nullptr ;
1246+ const RDD::TextureLayout copy_dst_layout = driver->api_trait_get (RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
12461247 for (uint32_t pass = 0 ; pass < 2 ; pass++) {
12471248 const bool copy_pass = (pass == 1 );
12481249 if (copy_pass) {
@@ -1267,7 +1268,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
12671268 tb.texture = texture->driver_id ;
12681269 tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
12691270 tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
1270- tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL ;
1271+ tb.next_layout = copy_dst_layout ;
12711272 tb.subresources .aspect = texture->barrier_aspect_flags ;
12721273 tb.subresources .mipmap_count = texture->mipmaps ;
12731274 tb.subresources .base_layer = p_layer;
@@ -1313,7 +1314,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
13131314 copy_region.texture_subresources .layer_count = 1 ;
13141315 copy_region.texture_offset = Vector3i (0 , 0 , z);
13151316 copy_region.texture_region_size = Vector3i (logic_width, logic_height, 1 );
1316- driver->command_copy_buffer_to_texture (transfer_worker->command_buffer , transfer_worker->staging_buffer , texture->driver_id , RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL , copy_region);
1317+ driver->command_copy_buffer_to_texture (transfer_worker->command_buffer , transfer_worker->staging_buffer , texture->driver_id , copy_dst_layout , copy_region);
13171318 }
13181319
13191320 staging_local_offset += to_allocate;
@@ -1332,14 +1333,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
13321333 RDD::TextureBarrier tb;
13331334 tb.texture = texture->driver_id ;
13341335 tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
1335- tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL ;
1336+ tb.prev_layout = copy_dst_layout ;
13361337 tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
13371338 tb.subresources .aspect = texture->barrier_aspect_flags ;
13381339 tb.subresources .mipmap_count = texture->mipmaps ;
13391340 tb.subresources .base_layer = p_layer;
13401341 tb.subresources .layer_count = 1 ;
1341-
1342- driver->command_pipeline_barrier (transfer_worker->command_buffer , RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
1342+ transfer_worker->texture_barriers .push_back (tb);
13431343 }
13441344
13451345 _release_transfer_worker (transfer_worker);
@@ -5152,6 +5152,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
51525152 MutexLock lock (p_transfer_worker->operations_mutex );
51535153 p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted ;
51545154 }
5155+
5156+ if (!p_transfer_worker->texture_barriers .is_empty ()) {
5157+ MutexLock transfer_worker_lock (transfer_worker_pool_mutex);
5158+ _flush_barriers_for_transfer_worker (p_transfer_worker);
5159+ }
5160+ }
5161+
5162+ void RenderingDevice::_flush_barriers_for_transfer_worker (TransferWorker *p_transfer_worker) {
5163+ if (!p_transfer_worker->texture_barriers .is_empty ()) {
5164+ for (uint32_t i = 0 ; i < p_transfer_worker->texture_barriers .size (); i++) {
5165+ transfer_worker_pool_texture_barriers.push_back (p_transfer_worker->texture_barriers [i]);
5166+ }
5167+
5168+ p_transfer_worker->texture_barriers .clear ();
5169+ }
51555170}
51565171
51575172void RenderingDevice::_check_transfer_worker_operation (uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
@@ -5193,11 +5208,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
51935208 }
51945209}
51955210
5196- void RenderingDevice::_submit_transfer_workers (bool p_operations_used_by_draw ) {
5211+ void RenderingDevice::_submit_transfer_workers (RDD::CommandBufferID p_draw_command_buffer ) {
51975212 MutexLock transfer_worker_lock (transfer_worker_pool_mutex);
51985213 for (uint32_t i = 0 ; i < transfer_worker_pool.size (); i++) {
51995214 TransferWorker *worker = transfer_worker_pool[i];
5200- if (p_operations_used_by_draw ) {
5215+ if (p_draw_command_buffer ) {
52015216 MutexLock lock (worker->operations_mutex );
52025217 if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index ]) {
52035218 // The operation used by the draw has already been processed, we don't need to wait on the worker.
@@ -5208,12 +5223,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
52085223 {
52095224 MutexLock lock (worker->thread_mutex );
52105225 if (worker->recording ) {
5211- VectorView<RDD::SemaphoreID> semaphores = p_operations_used_by_draw ? frames[frame].transfer_worker_semaphores [i] : VectorView<RDD::SemaphoreID>();
5226+ VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores [i] : VectorView<RDD::SemaphoreID>();
52125227 _end_transfer_worker (worker);
52135228 _submit_transfer_worker (worker, semaphores);
52145229 }
5230+
5231+ if (p_draw_command_buffer) {
5232+ _flush_barriers_for_transfer_worker (worker);
5233+ }
52155234 }
52165235 }
5236+
5237+ if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty ()) {
5238+ driver->command_pipeline_barrier (p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
5239+ transfer_worker_pool_texture_barriers.clear ();
5240+ }
52175241}
52185242
52195243void RenderingDevice::_wait_for_transfer_workers () {
@@ -5807,10 +5831,10 @@ void RenderingDevice::_end_frame() {
58075831 ERR_PRINT (" Found open compute list at the end of the frame, this should never happen (further compute will likely not work)." );
58085832 }
58095833
5810- _submit_transfer_workers (true );
5811-
58125834 // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
58135835 RDD::CommandBufferID command_buffer = frames[frame].command_buffer ;
5836+ _submit_transfer_workers (command_buffer);
5837+
58145838 draw_graph.end (RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool );
58155839 driver->command_buffer_end (command_buffer);
58165840 driver->end_segment ();
@@ -6387,7 +6411,7 @@ void RenderingDevice::finalize() {
63876411 }
63886412
63896413 // Wait for transfer workers to finish.
6390- _submit_transfer_workers (false );
6414+ _submit_transfer_workers ();
63916415 _wait_for_transfer_workers ();
63926416
63936417 // Delete everything the graph has created.
0 commit comments