@@ -2125,7 +2125,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
21252125 return false ;
21262126 }
21272127 pipeline_cache_->AnalyzeShaderUcode (*vertex_shader);
2128- bool memexport_used_vertex = vertex_shader->is_valid_memexport_used ();
2128+ bool memexport_used_vertex = vertex_shader->memexport_eM_written ();
21292129
21302130 // Pixel shader analysis.
21312131 bool primitive_polygonal = draw_util::IsPrimitivePolygonal (regs);
@@ -2154,7 +2154,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
21542154 }
21552155 }
21562156 bool memexport_used_pixel =
2157- pixel_shader && pixel_shader->is_valid_memexport_used ();
2157+ pixel_shader && pixel_shader->memexport_eM_written ();
21582158 bool memexport_used = memexport_used_vertex || memexport_used_pixel;
21592159
21602160 if (!BeginSubmission (true )) {
@@ -2341,100 +2341,20 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
23412341 // Gather memexport ranges and ensure the heaps for them are resident, and
23422342 // also load the data surrounding the export and to fill the regions that
23432343 // won't be modified by the shaders.
2344- struct MemExportRange {
2345- uint32_t base_address_dwords;
2346- uint32_t size_dwords;
2347- };
2348- MemExportRange memexport_ranges[512 ];
2349- uint32_t memexport_range_count = 0 ;
2344+ memexport_ranges_.clear ();
23502345 if (memexport_used_vertex) {
2351- for (uint32_t constant_index :
2352- vertex_shader->memexport_stream_constants ()) {
2353- const auto & memexport_stream = regs.Get <xenos::xe_gpu_memexport_stream_t >(
2354- XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4 );
2355- if (memexport_stream.index_count == 0 ) {
2356- continue ;
2357- }
2358- uint32_t memexport_format_size =
2359- GetSupportedMemExportFormatSize (memexport_stream.format );
2360- if (memexport_format_size == 0 ) {
2361- XELOGE (" Unsupported memexport format {}" ,
2362- FormatInfo::Get (
2363- xenos::TextureFormat (uint32_t (memexport_stream.format )))
2364- ->name );
2365- return false ;
2366- }
2367- uint32_t memexport_size_dwords =
2368- memexport_stream.index_count * memexport_format_size;
2369- // Try to reduce the number of shared memory operations when writing
2370- // different elements into the same buffer through different exports
2371- // (happens in 4D5307E6).
2372- bool memexport_range_reused = false ;
2373- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2374- MemExportRange& memexport_range = memexport_ranges[i];
2375- if (memexport_range.base_address_dwords ==
2376- memexport_stream.base_address ) {
2377- memexport_range.size_dwords =
2378- std::max (memexport_range.size_dwords , memexport_size_dwords);
2379- memexport_range_reused = true ;
2380- break ;
2381- }
2382- }
2383- // Add a new range if haven't expanded an existing one.
2384- if (!memexport_range_reused) {
2385- MemExportRange& memexport_range =
2386- memexport_ranges[memexport_range_count++];
2387- memexport_range.base_address_dwords = memexport_stream.base_address ;
2388- memexport_range.size_dwords = memexport_size_dwords;
2389- }
2390- }
2346+ draw_util::AddMemExportRanges (regs, *vertex_shader, memexport_ranges_);
23912347 }
23922348 if (memexport_used_pixel) {
2393- for (uint32_t constant_index : pixel_shader->memexport_stream_constants ()) {
2394- const auto & memexport_stream = regs.Get <xenos::xe_gpu_memexport_stream_t >(
2395- XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4 );
2396- if (memexport_stream.index_count == 0 ) {
2397- continue ;
2398- }
2399- uint32_t memexport_format_size =
2400- GetSupportedMemExportFormatSize (memexport_stream.format );
2401- if (memexport_format_size == 0 ) {
2402- XELOGE (" Unsupported memexport format {}" ,
2403- FormatInfo::Get (
2404- xenos::TextureFormat (uint32_t (memexport_stream.format )))
2405- ->name );
2406- return false ;
2407- }
2408- uint32_t memexport_size_dwords =
2409- memexport_stream.index_count * memexport_format_size;
2410- bool memexport_range_reused = false ;
2411- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2412- MemExportRange& memexport_range = memexport_ranges[i];
2413- if (memexport_range.base_address_dwords ==
2414- memexport_stream.base_address ) {
2415- memexport_range.size_dwords =
2416- std::max (memexport_range.size_dwords , memexport_size_dwords);
2417- memexport_range_reused = true ;
2418- break ;
2419- }
2420- }
2421- if (!memexport_range_reused) {
2422- MemExportRange& memexport_range =
2423- memexport_ranges[memexport_range_count++];
2424- memexport_range.base_address_dwords = memexport_stream.base_address ;
2425- memexport_range.size_dwords = memexport_size_dwords;
2426- }
2427- }
2349+ draw_util::AddMemExportRanges (regs, *pixel_shader, memexport_ranges_);
24282350 }
2429- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2430- const MemExportRange& memexport_range = memexport_ranges[i];
2351+ for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
24312352 if (!shared_memory_->RequestRange (memexport_range.base_address_dwords << 2 ,
2432- memexport_range.size_dwords << 2 )) {
2353+ memexport_range.size_bytes )) {
24332354 XELOGE (
24342355 " Failed to request memexport stream at 0x{:08X} (size {}) in the "
24352356 " shared memory" ,
2436- memexport_range.base_address_dwords << 2 ,
2437- memexport_range.size_dwords << 2 );
2357+ memexport_range.base_address_dwords << 2 , memexport_range.size_bytes );
24382358 return false ;
24392359 }
24402360 }
@@ -2594,17 +2514,17 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
25942514 // when memexports should be awaited?
25952515 shared_memory_->MarkUAVWritesCommitNeeded ();
25962516 // Invalidate textures in memexported memory and watch for changes.
2597- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2598- const MemExportRange& memexport_range = memexport_ranges[i];
2517+ for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
25992518 shared_memory_->RangeWrittenByGpu (
2600- memexport_range.base_address_dwords << 2 ,
2601- memexport_range. size_dwords << 2 , false );
2519+ memexport_range.base_address_dwords << 2 , memexport_range. size_bytes ,
2520+ false );
26022521 }
26032522 if (cvars::d3d12_readback_memexport) {
26042523 // Read the exported data on the CPU.
26052524 uint32_t memexport_total_size = 0 ;
2606- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2607- memexport_total_size += memexport_ranges[i].size_dwords << 2 ;
2525+ for (const draw_util::MemExportRange& memexport_range :
2526+ memexport_ranges_) {
2527+ memexport_total_size += memexport_range.size_bytes ;
26082528 }
26092529 if (memexport_total_size != 0 ) {
26102530 ID3D12Resource* readback_buffer =
@@ -2614,9 +2534,9 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
26142534 SubmitBarriers ();
26152535 ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer ();
26162536 uint32_t readback_buffer_offset = 0 ;
2617- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2618- const MemExportRange& memexport_range = memexport_ranges[i];
2619- uint32_t memexport_range_size = memexport_range.size_dwords << 2 ;
2537+ for (const draw_util::MemExportRange& memexport_range :
2538+ memexport_ranges_) {
2539+ uint32_t memexport_range_size = memexport_range.size_bytes ;
26202540 deferred_command_list_.D3DCopyBufferRegion (
26212541 readback_buffer, readback_buffer_offset, shared_memory_buffer,
26222542 memexport_range.base_address_dwords << 2 , memexport_range_size);
@@ -2629,14 +2549,14 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
26292549 void * readback_mapping;
26302550 if (SUCCEEDED (readback_buffer->Map (0 , &readback_range,
26312551 &readback_mapping))) {
2632- const uint32_t * readback_dwords =
2633- reinterpret_cast <const uint32_t *>(readback_mapping);
2634- for (uint32_t i = 0 ; i < memexport_range_count; ++i) {
2635- const MemExportRange& memexport_range = memexport_ranges[i];
2552+ const uint8_t * readback_bytes =
2553+ reinterpret_cast <const uint8_t *>(readback_mapping);
2554+ for (const draw_util::MemExportRange& memexport_range :
2555+ memexport_ranges_) {
26362556 std::memcpy (memory_->TranslatePhysical (
26372557 memexport_range.base_address_dwords << 2 ),
2638- readback_dwords , memexport_range.size_dwords << 2 );
2639- readback_dwords += memexport_range.size_dwords ;
2558+ readback_bytes , memexport_range.size_bytes );
2559+ readback_bytes += memexport_range.size_bytes ;
26402560 }
26412561 D3D12_RANGE readback_write_range = {};
26422562 readback_buffer->Unmap (0 , &readback_write_range);
@@ -4510,36 +4430,6 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
45104430 return true ;
45114431}
45124432
4513- uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize (
4514- xenos::ColorFormat format) {
4515- switch (format) {
4516- case xenos::ColorFormat::k_8_8_8_8:
4517- case xenos::ColorFormat::k_2_10_10_10:
4518- // TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
4519- // texture cache currently.
4520- // case xenos::ColorFormat::k_8_8_8_8_A:
4521- case xenos::ColorFormat::k_10_11_11:
4522- case xenos::ColorFormat::k_11_11_10:
4523- case xenos::ColorFormat::k_16_16:
4524- case xenos::ColorFormat::k_16_16_FLOAT:
4525- case xenos::ColorFormat::k_32_FLOAT:
4526- case xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16:
4527- case xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16:
4528- case xenos::ColorFormat::k_10_11_11_AS_16_16_16_16:
4529- case xenos::ColorFormat::k_11_11_10_AS_16_16_16_16:
4530- return 1 ;
4531- case xenos::ColorFormat::k_16_16_16_16:
4532- case xenos::ColorFormat::k_16_16_16_16_FLOAT:
4533- case xenos::ColorFormat::k_32_32_FLOAT:
4534- return 2 ;
4535- case xenos::ColorFormat::k_32_32_32_32_FLOAT:
4536- return 4 ;
4537- default :
4538- break ;
4539- }
4540- return 0 ;
4541- }
4542-
45434433ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer (uint32_t size) {
45444434 if (size == 0 ) {
45454435 return nullptr ;
0 commit comments