Skip to content

Commit daf5405

Browse files
committed
fast lora safetensor staging
1 parent f633acf commit daf5405

File tree

2 files changed

+118
-67
lines changed

2 files changed

+118
-67
lines changed

model.cpp

Lines changed: 109 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
#include <set>
1111
#include <string>
1212
#include <thread>
13+
#include <utility>
1314
#include <unordered_map>
1415
#include <vector>
15-
16+
#include <numeric>
1617
#include "gguf_reader.hpp"
1718
#include "model.h"
1819
#include "stable-diffusion.h"
@@ -569,6 +570,15 @@ std::string convert_diffusers_name_to_compvis(std::string key, char seq) {
569570
}
570571

571572
std::string convert_tensor_name(std::string name) {
573+
static thread_local std::unordered_map<std::string, std::string> cache;
574+
575+
auto cached = cache.find(name);
576+
if (cached != cache.end()) {
577+
return cached->second;
578+
}
579+
580+
const std::string original = name;
581+
572582
if (starts_with(name, "diffusion_model")) {
573583
name = "model." + name;
574584
}
@@ -670,33 +680,58 @@ std::string convert_tensor_name(std::string name) {
670680
// if (new_name != name) {
671681
// LOG_DEBUG("%s => %s", name.c_str(), new_name.c_str());
672682
// }
673-
return new_name;
683+
684+
auto result = cache.emplace(original, new_name);
685+
return result.first->second;
674686
}
675687

676-
void add_preprocess_tensor_storage_types(String2GGMLType& tensor_storages_types, std::string name, enum ggml_type type) {
677-
std::string new_name = convert_tensor_name(name);
688+
template <typename InserterT>
689+
static void for_each_preprocess_tensor_storage_type(const std::string& name,
690+
enum ggml_type type,
691+
InserterT&& insert,
692+
bool name_is_canonical = false) {
693+
std::string new_name = name_is_canonical ? name : convert_tensor_name(name);
678694

679695
if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_weight")) {
680-
size_t prefix_size = new_name.find("attn.in_proj_weight");
681-
std::string prefix = new_name.substr(0, prefix_size);
682-
tensor_storages_types[prefix + "self_attn.q_proj.weight"] = type;
683-
tensor_storages_types[prefix + "self_attn.k_proj.weight"] = type;
684-
tensor_storages_types[prefix + "self_attn.v_proj.weight"] = type;
696+
size_t prefix_size = new_name.find("attn.in_proj_weight");
697+
std::string prefix = new_name.substr(0, prefix_size);
698+
insert(prefix + "self_attn.q_proj.weight", type);
699+
insert(prefix + "self_attn.k_proj.weight", type);
700+
insert(prefix + "self_attn.v_proj.weight", type);
685701
} else if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_bias")) {
686-
size_t prefix_size = new_name.find("attn.in_proj_bias");
687-
std::string prefix = new_name.substr(0, prefix_size);
688-
tensor_storages_types[prefix + "self_attn.q_proj.bias"] = type;
689-
tensor_storages_types[prefix + "self_attn.k_proj.bias"] = type;
690-
tensor_storages_types[prefix + "self_attn.v_proj.bias"] = type;
702+
size_t prefix_size = new_name.find("attn.in_proj_bias");
703+
std::string prefix = new_name.substr(0, prefix_size);
704+
insert(prefix + "self_attn.q_proj.bias", type);
705+
insert(prefix + "self_attn.k_proj.bias", type);
706+
insert(prefix + "self_attn.v_proj.bias", type);
691707
} else {
692-
tensor_storages_types[new_name] = type;
708+
insert(std::move(new_name), type);
693709
}
694710
}
695711

712+
void add_preprocess_tensor_storage_types(String2GGMLType& tensor_storages_types,
713+
const std::string& name,
714+
enum ggml_type type,
715+
bool name_is_canonical) {
716+
for_each_preprocess_tensor_storage_type(name, type, [&](std::string key, ggml_type value) {
717+
tensor_storages_types.insert_or_assign(std::move(key), value);
718+
}, name_is_canonical);
719+
}
720+
721+
void add_preprocess_tensor_storage_types(String2GGMLType& tensor_storages_types, const std::string& name, enum ggml_type type) {
722+
add_preprocess_tensor_storage_types(tensor_storages_types, name, type, false);
723+
}
724+
696725
void preprocess_tensor(TensorStorage tensor_storage,
697726
std::vector<TensorStorage>& processed_tensor_storages) {
698727
std::vector<TensorStorage> result;
699-
std::string new_name = convert_tensor_name(tensor_storage.name);
728+
std::string new_name;
729+
if (tensor_storage.name_is_canonical) {
730+
new_name = tensor_storage.name;
731+
} else {
732+
new_name = convert_tensor_name(tensor_storage.name);
733+
tensor_storage.name_is_canonical = true;
734+
}
700735

701736
// convert unet transformer linear to conv2d 1x1
702737
if (starts_with(new_name, "model.diffusion_model.") &&
@@ -717,6 +752,7 @@ void preprocess_tensor(TensorStorage tensor_storage,
717752
}
718753

719754
tensor_storage.name = new_name;
755+
tensor_storage.name_is_canonical = true;
720756

721757
if (new_name.find("cond_stage_model") != std::string::npos &&
722758
ends_with(new_name, "attn.in_proj_weight")) {
@@ -725,20 +761,25 @@ void preprocess_tensor(TensorStorage tensor_storage,
725761

726762
std::vector<TensorStorage> chunks = tensor_storage.chunk(3);
727763
chunks[0].name = prefix + "self_attn.q_proj.weight";
764+
chunks[0].name_is_canonical = true;
728765
chunks[1].name = prefix + "self_attn.k_proj.weight";
766+
chunks[1].name_is_canonical = true;
729767
chunks[2].name = prefix + "self_attn.v_proj.weight";
768+
chunks[2].name_is_canonical = true;
730769

731770
processed_tensor_storages.insert(processed_tensor_storages.end(), chunks.begin(), chunks.end());
732-
733771
} else if (new_name.find("cond_stage_model") != std::string::npos &&
734772
ends_with(new_name, "attn.in_proj_bias")) {
735773
size_t prefix_size = new_name.find("attn.in_proj_bias");
736774
std::string prefix = new_name.substr(0, prefix_size);
737775

738776
std::vector<TensorStorage> chunks = tensor_storage.chunk(3);
739777
chunks[0].name = prefix + "self_attn.q_proj.bias";
778+
chunks[0].name_is_canonical = true;
740779
chunks[1].name = prefix + "self_attn.k_proj.bias";
780+
chunks[1].name_is_canonical = true;
741781
chunks[2].name = prefix + "self_attn.v_proj.bias";
782+
chunks[2].name_is_canonical = true;
742783

743784
processed_tensor_storages.insert(processed_tensor_storages.end(), chunks.begin(), chunks.end());
744785
} else {
@@ -1221,6 +1262,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12211262
bool is_f8_e5m2 = false;
12221263
bool is_f64 = false;
12231264
bool is_i64 = false;
1265+
bool name_is_canonical = false;
12241266
};
12251267

12261268
std::vector<SafetensorTask> tasks;
@@ -1244,7 +1286,6 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12441286
if (dtype == "U8") {
12451287
continue;
12461288
}
1247-
12481289
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
12491290
size_t end = tensor_info["data_offsets"][1].get<size_t>();
12501291

@@ -1283,9 +1324,9 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
12831324
if (!starts_with(full_name, prefix)) {
12841325
full_name = prefix + full_name;
12851326
}
1286-
12871327
SafetensorTask task;
12881328
task.name = std::move(full_name);
1329+
task.name_is_canonical = false;
12891330
task.type = type;
12901331
task.ne = ne;
12911332
task.n_dims = n_dims;
@@ -1313,24 +1354,50 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
13131354
n_threads = 1;
13141355
}
13151356

1316-
std::vector<TensorStorage> processed(tasks.size());
1357+
const size_t prior_size = tensor_storages.size();
1358+
tensor_storages.resize(prior_size + tasks.size());
1359+
1360+
std::vector<size_t> read_order(tasks.size());
1361+
std::iota(read_order.begin(), read_order.end(), 0);
1362+
std::sort(read_order.begin(), read_order.end(), [&](size_t a, size_t b) {
1363+
return tasks[a].offset < tasks[b].offset;
1364+
});
13171365

1366+
std::vector<std::vector<std::pair<std::string, ggml_type>>> local_types(n_threads);
13181367
std::vector<std::thread> workers;
13191368
workers.reserve(n_threads);
13201369

1321-
for (int i = 0; i < n_threads; ++i) {
1322-
workers.emplace_back([&, thread_id = i]() {
1323-
for (size_t idx = thread_id; idx < tasks.size(); idx += n_threads) {
1370+
const size_t chunk_size = (read_order.size() + n_threads - 1) / n_threads;
1371+
for (int thread_id = 0; thread_id < n_threads; ++thread_id) {
1372+
size_t begin = static_cast<size_t>(thread_id) * chunk_size;
1373+
if (begin >= read_order.size()) {
1374+
continue;
1375+
}
1376+
size_t end = std::min(begin + chunk_size, read_order.size());
1377+
1378+
workers.emplace_back([&, thread_id, begin, end, prior_size]() {
1379+
auto& assignments = local_types[thread_id];
1380+
size_t expected = end - begin;
1381+
if (expected > 0) {
1382+
assignments.reserve(expected * 3);
1383+
}
1384+
1385+
for (size_t ord_idx = begin; ord_idx < end; ++ord_idx) {
1386+
size_t idx = read_order[ord_idx];
13241387
const auto& task = tasks[idx];
13251388

1326-
TensorStorage tensor_storage(task.name, task.type, task.ne.data(), task.n_dims, file_index, task.offset);
1389+
TensorStorage tensor_storage(task.name, task.type, task.ne.data(), task.n_dims, file_index, task.offset, task.name_is_canonical);
13271390
tensor_storage.reverse_ne();
1391+
if (!tensor_storage.name_is_canonical) {
1392+
tensor_storage.name = convert_tensor_name(tensor_storage.name);
1393+
tensor_storage.name_is_canonical = true;
1394+
}
13281395

1329-
tensor_storage.is_bf16 = task.is_bf16;
1396+
tensor_storage.is_bf16 = task.is_bf16;
13301397
tensor_storage.is_f8_e4m3 = task.is_f8_e4m3;
13311398
tensor_storage.is_f8_e5m2 = task.is_f8_e5m2;
1332-
tensor_storage.is_f64 = task.is_f64;
1333-
tensor_storage.is_i64 = task.is_i64;
1399+
tensor_storage.is_f64 = task.is_f64;
1400+
tensor_storage.is_i64 = task.is_i64;
13341401

13351402
size_t tensor_data_size = task.tensor_data_size;
13361403

@@ -1348,54 +1415,30 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
13481415
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size);
13491416
}
13501417

1351-
processed[idx] = std::move(tensor_storage);
1352-
}
1353-
});
1354-
}
1355-
1356-
for (auto& worker : workers) {
1357-
worker.join();
1358-
}
1418+
size_t target_index = prior_size + idx;
1419+
tensor_storages[target_index] = std::move(tensor_storage);
13591420

1360-
1361-
const size_t prior_size = tensor_storages.size();
1362-
tensor_storages.resize(prior_size + processed.size());
1363-
1364-
int append_threads = std::min(num_threads_to_use, (int)processed.size());
1365-
if (append_threads < 1) {
1366-
append_threads = 1;
1367-
}
1368-
1369-
std::vector<String2GGMLType> local_types(append_threads);
1370-
std::vector<std::thread> append_workers;
1371-
append_workers.reserve(append_threads);
1372-
1373-
for (int thread_id = 0; thread_id < append_threads; ++thread_id) {
1374-
append_workers.emplace_back([&, thread_id]() {
1375-
auto& local_map = local_types[thread_id];
1376-
for (size_t idx = thread_id; idx < processed.size(); idx += append_threads) {
1377-
size_t target_index = prior_size + idx;
1378-
tensor_storages[target_index] = std::move(processed[idx]);
1379-
add_preprocess_tensor_storage_types(local_map,
1380-
tensor_storages[target_index].name,
1381-
tensor_storages[target_index].type);
1421+
for_each_preprocess_tensor_storage_type(
1422+
tensor_storages[target_index].name,
1423+
tensor_storages[target_index].type,
1424+
[&](std::string key, ggml_type value) {
1425+
assignments.emplace_back(std::move(key), value);
1426+
},
1427+
tensor_storages[target_index].name_is_canonical);
13821428
}
13831429
});
13841430
}
13851431

1386-
for (auto& worker : append_workers) {
1432+
for (auto& worker : workers) {
13871433
worker.join();
13881434
}
13891435

1390-
for (auto& local_map : local_types) {
1391-
for (auto& kv : local_map) {
1392-
tensor_storages_types[kv.first] = kv.second;
1436+
for (auto& assignments : local_types) {
1437+
for (auto& kv : assignments) {
1438+
tensor_storages_types.insert_or_assign(std::move(kv.first), kv.second);
13931439
}
13941440
}
13951441

1396-
processed.clear();
1397-
processed.shrink_to_fit();
1398-
13991442
return true;
14001443
}
14011444
/*================================================= DiffusersModelLoader ==================================================*/
@@ -1426,8 +1469,9 @@ bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const s
14261469
}
14271470
if (pos != std::string::npos) {
14281471
tensor_storage.name = "model.diffusion_model.output_blocks.2.2.conv" + tensor_storage.name.substr(len);
1472+
tensor_storage.name_is_canonical = true;
14291473
LOG_DEBUG("NEW NAME: %s", tensor_storage.name.c_str());
1430-
add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type);
1474+
add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type, true);
14311475
}
14321476
}
14331477
break;

model.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ struct TensorStorage {
114114
bool is_f8_e5m2 = false;
115115
bool is_f64 = false;
116116
bool is_i64 = false;
117+
bool name_is_canonical = false;
117118
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
118119
int n_dims = 0;
119120

@@ -123,8 +124,14 @@ struct TensorStorage {
123124

124125
TensorStorage() = default;
125126

126-
TensorStorage(const std::string& name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
127-
: name(name), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
127+
TensorStorage(const std::string& name,
128+
ggml_type type,
129+
const int64_t* ne,
130+
int n_dims,
131+
size_t file_index,
132+
size_t offset = 0,
133+
bool name_is_canonical = false)
134+
: name(name), type(type), name_is_canonical(name_is_canonical), n_dims(n_dims), file_index(file_index), offset(offset) {
128135
for (int i = 0; i < n_dims; i++) {
129136
this->ne[i] = ne[i];
130137
}

0 commit comments

Comments
 (0)