1010#include < set>
1111#include < string>
1212#include < thread>
13+ #include < utility>
1314#include < unordered_map>
1415#include < vector>
15-
16+ # include < numeric >
1617#include " gguf_reader.hpp"
1718#include " model.h"
1819#include " stable-diffusion.h"
@@ -569,6 +570,15 @@ std::string convert_diffusers_name_to_compvis(std::string key, char seq) {
569570}
570571
571572std::string convert_tensor_name (std::string name) {
573+ static thread_local std::unordered_map<std::string, std::string> cache;
574+
575+ auto cached = cache.find (name);
576+ if (cached != cache.end ()) {
577+ return cached->second ;
578+ }
579+
580+ const std::string original = name;
581+
572582 if (starts_with (name, " diffusion_model" )) {
573583 name = " model." + name;
574584 }
@@ -670,33 +680,58 @@ std::string convert_tensor_name(std::string name) {
670680 // if (new_name != name) {
671681 // LOG_DEBUG("%s => %s", name.c_str(), new_name.c_str());
672682 // }
673- return new_name;
683+
684+ auto result = cache.emplace (original, new_name);
685+ return result.first ->second ;
674686}
675687
676- void add_preprocess_tensor_storage_types (String2GGMLType& tensor_storages_types, std::string name, enum ggml_type type) {
677- std::string new_name = convert_tensor_name (name);
688+ template <typename InserterT>
689+ static void for_each_preprocess_tensor_storage_type (const std::string& name,
690+ enum ggml_type type,
691+ InserterT&& insert,
692+ bool name_is_canonical = false ) {
693+ std::string new_name = name_is_canonical ? name : convert_tensor_name (name);
678694
679695 if (new_name.find (" cond_stage_model" ) != std::string::npos && ends_with (new_name, " attn.in_proj_weight" )) {
680- size_t prefix_size = new_name.find (" attn.in_proj_weight" );
681- std::string prefix = new_name.substr (0 , prefix_size);
682- tensor_storages_types[ prefix + " self_attn.q_proj.weight" ] = type;
683- tensor_storages_types[ prefix + " self_attn.k_proj.weight" ] = type;
684- tensor_storages_types[ prefix + " self_attn.v_proj.weight" ] = type;
696+ size_t prefix_size = new_name.find (" attn.in_proj_weight" );
697+ std::string prefix = new_name.substr (0 , prefix_size);
698+ insert ( prefix + " self_attn.q_proj.weight" , type) ;
699+ insert ( prefix + " self_attn.k_proj.weight" , type) ;
700+ insert ( prefix + " self_attn.v_proj.weight" , type) ;
685701 } else if (new_name.find (" cond_stage_model" ) != std::string::npos && ends_with (new_name, " attn.in_proj_bias" )) {
686- size_t prefix_size = new_name.find (" attn.in_proj_bias" );
687- std::string prefix = new_name.substr (0 , prefix_size);
688- tensor_storages_types[ prefix + " self_attn.q_proj.bias" ] = type;
689- tensor_storages_types[ prefix + " self_attn.k_proj.bias" ] = type;
690- tensor_storages_types[ prefix + " self_attn.v_proj.bias" ] = type;
702+ size_t prefix_size = new_name.find (" attn.in_proj_bias" );
703+ std::string prefix = new_name.substr (0 , prefix_size);
704+ insert ( prefix + " self_attn.q_proj.bias" , type) ;
705+ insert ( prefix + " self_attn.k_proj.bias" , type) ;
706+ insert ( prefix + " self_attn.v_proj.bias" , type) ;
691707 } else {
692- tensor_storages_types[ new_name] = type;
708+ insert ( std::move ( new_name), type) ;
693709 }
694710}
695711
712+ void add_preprocess_tensor_storage_types (String2GGMLType& tensor_storages_types,
713+ const std::string& name,
714+ enum ggml_type type,
715+ bool name_is_canonical) {
716+ for_each_preprocess_tensor_storage_type (name, type, [&](std::string key, ggml_type value) {
717+ tensor_storages_types.insert_or_assign (std::move (key), value);
718+ }, name_is_canonical);
719+ }
720+
721+ void add_preprocess_tensor_storage_types (String2GGMLType& tensor_storages_types, const std::string& name, enum ggml_type type) {
722+ add_preprocess_tensor_storage_types (tensor_storages_types, name, type, false );
723+ }
724+
696725void preprocess_tensor (TensorStorage tensor_storage,
697726 std::vector<TensorStorage>& processed_tensor_storages) {
698727 std::vector<TensorStorage> result;
699- std::string new_name = convert_tensor_name (tensor_storage.name );
728+ std::string new_name;
729+ if (tensor_storage.name_is_canonical ) {
730+ new_name = tensor_storage.name ;
731+ } else {
732+ new_name = convert_tensor_name (tensor_storage.name );
733+ tensor_storage.name_is_canonical = true ;
734+ }
700735
701736 // convert unet transformer linear to conv2d 1x1
702737 if (starts_with (new_name, " model.diffusion_model." ) &&
@@ -717,6 +752,7 @@ void preprocess_tensor(TensorStorage tensor_storage,
717752 }
718753
719754 tensor_storage.name = new_name;
755+ tensor_storage.name_is_canonical = true ;
720756
721757 if (new_name.find (" cond_stage_model" ) != std::string::npos &&
722758 ends_with (new_name, " attn.in_proj_weight" )) {
@@ -725,20 +761,25 @@ void preprocess_tensor(TensorStorage tensor_storage,
725761
726762 std::vector<TensorStorage> chunks = tensor_storage.chunk (3 );
727763 chunks[0 ].name = prefix + " self_attn.q_proj.weight" ;
764+ chunks[0 ].name_is_canonical = true ;
728765 chunks[1 ].name = prefix + " self_attn.k_proj.weight" ;
766+ chunks[1 ].name_is_canonical = true ;
729767 chunks[2 ].name = prefix + " self_attn.v_proj.weight" ;
768+ chunks[2 ].name_is_canonical = true ;
730769
731770 processed_tensor_storages.insert (processed_tensor_storages.end (), chunks.begin (), chunks.end ());
732-
733771 } else if (new_name.find (" cond_stage_model" ) != std::string::npos &&
734772 ends_with (new_name, " attn.in_proj_bias" )) {
735773 size_t prefix_size = new_name.find (" attn.in_proj_bias" );
736774 std::string prefix = new_name.substr (0 , prefix_size);
737775
738776 std::vector<TensorStorage> chunks = tensor_storage.chunk (3 );
739777 chunks[0 ].name = prefix + " self_attn.q_proj.bias" ;
778+ chunks[0 ].name_is_canonical = true ;
740779 chunks[1 ].name = prefix + " self_attn.k_proj.bias" ;
780+ chunks[1 ].name_is_canonical = true ;
741781 chunks[2 ].name = prefix + " self_attn.v_proj.bias" ;
782+ chunks[2 ].name_is_canonical = true ;
742783
743784 processed_tensor_storages.insert (processed_tensor_storages.end (), chunks.begin (), chunks.end ());
744785 } else {
@@ -1221,6 +1262,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12211262 bool is_f8_e5m2 = false ;
12221263 bool is_f64 = false ;
12231264 bool is_i64 = false ;
1265+ bool name_is_canonical = false ;
12241266 };
12251267
12261268 std::vector<SafetensorTask> tasks;
@@ -1244,7 +1286,6 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12441286 if (dtype == " U8" ) {
12451287 continue ;
12461288 }
1247-
12481289 size_t begin = tensor_info[" data_offsets" ][0 ].get <size_t >();
12491290 size_t end = tensor_info[" data_offsets" ][1 ].get <size_t >();
12501291
@@ -1283,9 +1324,9 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12831324 if (!starts_with (full_name, prefix)) {
12841325 full_name = prefix + full_name;
12851326 }
1286-
12871327 SafetensorTask task;
12881328 task.name = std::move (full_name);
1329+ task.name_is_canonical = false ;
12891330 task.type = type;
12901331 task.ne = ne;
12911332 task.n_dims = n_dims;
@@ -1313,24 +1354,50 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
13131354 n_threads = 1 ;
13141355 }
13151356
1316- std::vector<TensorStorage> processed (tasks.size ());
1357+ const size_t prior_size = tensor_storages.size ();
1358+ tensor_storages.resize (prior_size + tasks.size ());
1359+
1360+ std::vector<size_t > read_order (tasks.size ());
1361+ std::iota (read_order.begin (), read_order.end (), 0 );
1362+ std::sort (read_order.begin (), read_order.end (), [&](size_t a, size_t b) {
1363+ return tasks[a].offset < tasks[b].offset ;
1364+ });
13171365
1366+ std::vector<std::vector<std::pair<std::string, ggml_type>>> local_types (n_threads);
13181367 std::vector<std::thread> workers;
13191368 workers.reserve (n_threads);
13201369
1321- for (int i = 0 ; i < n_threads; ++i) {
1322- workers.emplace_back ([&, thread_id = i]() {
1323- for (size_t idx = thread_id; idx < tasks.size (); idx += n_threads) {
1370+ const size_t chunk_size = (read_order.size () + n_threads - 1 ) / n_threads;
1371+ for (int thread_id = 0 ; thread_id < n_threads; ++thread_id) {
1372+ size_t begin = static_cast <size_t >(thread_id) * chunk_size;
1373+ if (begin >= read_order.size ()) {
1374+ continue ;
1375+ }
1376+ size_t end = std::min (begin + chunk_size, read_order.size ());
1377+
1378+ workers.emplace_back ([&, thread_id, begin, end, prior_size]() {
1379+ auto & assignments = local_types[thread_id];
1380+ size_t expected = end - begin;
1381+ if (expected > 0 ) {
1382+ assignments.reserve (expected * 3 );
1383+ }
1384+
1385+ for (size_t ord_idx = begin; ord_idx < end; ++ord_idx) {
1386+ size_t idx = read_order[ord_idx];
13241387 const auto & task = tasks[idx];
13251388
1326- TensorStorage tensor_storage (task.name , task.type , task.ne .data (), task.n_dims , file_index, task.offset );
1389+ TensorStorage tensor_storage (task.name , task.type , task.ne .data (), task.n_dims , file_index, task.offset , task. name_is_canonical );
13271390 tensor_storage.reverse_ne ();
1391+ if (!tensor_storage.name_is_canonical ) {
1392+ tensor_storage.name = convert_tensor_name (tensor_storage.name );
1393+ tensor_storage.name_is_canonical = true ;
1394+ }
13281395
1329- tensor_storage.is_bf16 = task.is_bf16 ;
1396+ tensor_storage.is_bf16 = task.is_bf16 ;
13301397 tensor_storage.is_f8_e4m3 = task.is_f8_e4m3 ;
13311398 tensor_storage.is_f8_e5m2 = task.is_f8_e5m2 ;
1332- tensor_storage.is_f64 = task.is_f64 ;
1333- tensor_storage.is_i64 = task.is_i64 ;
1399+ tensor_storage.is_f64 = task.is_f64 ;
1400+ tensor_storage.is_i64 = task.is_i64 ;
13341401
13351402 size_t tensor_data_size = task.tensor_data_size ;
13361403
@@ -1348,54 +1415,30 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
13481415 GGML_ASSERT (tensor_storage.nbytes () == tensor_data_size);
13491416 }
13501417
1351- processed[idx] = std::move (tensor_storage);
1352- }
1353- });
1354- }
1355-
1356- for (auto & worker : workers) {
1357- worker.join ();
1358- }
1418+ size_t target_index = prior_size + idx;
1419+ tensor_storages[target_index] = std::move (tensor_storage);
13591420
1360-
1361- const size_t prior_size = tensor_storages.size ();
1362- tensor_storages.resize (prior_size + processed.size ());
1363-
1364- int append_threads = std::min (num_threads_to_use, (int )processed.size ());
1365- if (append_threads < 1 ) {
1366- append_threads = 1 ;
1367- }
1368-
1369- std::vector<String2GGMLType> local_types (append_threads);
1370- std::vector<std::thread> append_workers;
1371- append_workers.reserve (append_threads);
1372-
1373- for (int thread_id = 0 ; thread_id < append_threads; ++thread_id) {
1374- append_workers.emplace_back ([&, thread_id]() {
1375- auto & local_map = local_types[thread_id];
1376- for (size_t idx = thread_id; idx < processed.size (); idx += append_threads) {
1377- size_t target_index = prior_size + idx;
1378- tensor_storages[target_index] = std::move (processed[idx]);
1379- add_preprocess_tensor_storage_types (local_map,
1380- tensor_storages[target_index].name ,
1381- tensor_storages[target_index].type );
1421+ for_each_preprocess_tensor_storage_type (
1422+ tensor_storages[target_index].name ,
1423+ tensor_storages[target_index].type ,
1424+ [&](std::string key, ggml_type value) {
1425+ assignments.emplace_back (std::move (key), value);
1426+ },
1427+ tensor_storages[target_index].name_is_canonical );
13821428 }
13831429 });
13841430 }
13851431
1386- for (auto & worker : append_workers ) {
1432+ for (auto & worker : workers ) {
13871433 worker.join ();
13881434 }
13891435
1390- for (auto & local_map : local_types) {
1391- for (auto & kv : local_map ) {
1392- tensor_storages_types[ kv.first ] = kv.second ;
1436+ for (auto & assignments : local_types) {
1437+ for (auto & kv : assignments ) {
1438+ tensor_storages_types. insert_or_assign ( std::move ( kv.first ), kv.second ) ;
13931439 }
13941440 }
13951441
1396- processed.clear ();
1397- processed.shrink_to_fit ();
1398-
13991442 return true ;
14001443}
14011444/* ================================================= DiffusersModelLoader ==================================================*/
@@ -1426,8 +1469,9 @@ bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const s
14261469 }
14271470 if (pos != std::string::npos) {
14281471 tensor_storage.name = " model.diffusion_model.output_blocks.2.2.conv" + tensor_storage.name .substr (len);
1472+ tensor_storage.name_is_canonical = true ;
14291473 LOG_DEBUG (" NEW NAME: %s" , tensor_storage.name .c_str ());
1430- add_preprocess_tensor_storage_types (tensor_storages_types, tensor_storage.name , tensor_storage.type );
1474+ add_preprocess_tensor_storage_types (tensor_storages_types, tensor_storage.name , tensor_storage.type , true );
14311475 }
14321476 }
14331477 break ;
0 commit comments