Skip to content

Commit 73da9fb

Browse files
committed
delay allocation until model loading time
1 parent 89cc0ab commit 73da9fb

File tree

3 files changed

+23
-16
lines changed

3 files changed

+23
-16
lines changed

model.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,7 +1340,7 @@ std::string ModelLoader::load_umt5_tokenizer_json() {
13401340
return json_str;
13411341
}
13421342

1343-
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool use_mmap) {
1343+
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool use_mmap, alloc_cb_t alloc_cb) {
13441344
int64_t process_time_ms = 0;
13451345
std::atomic<int64_t> read_time_ms(0);
13461346
std::atomic<int64_t> memcpy_time_ms(0);
@@ -1368,6 +1368,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
13681368
const int64_t t_start = ggml_time_ms();
13691369
int last_n_threads = 1;
13701370

1371+
if (alloc_cb) {
1372+
alloc_cb();
1373+
}
1374+
13711375
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
13721376
std::string file_path = file_paths_[file_index];
13731377
LOG_DEBUG("loading tensors from %s", file_path.c_str());
@@ -1598,7 +1602,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
15981602
bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
15991603
std::set<std::string> ignore_tensors,
16001604
int n_threads,
1601-
bool use_mmap) {
1605+
bool use_mmap,
1606+
alloc_cb_t alloc_cb) {
16021607
std::set<std::string> tensor_names_in_file;
16031608
std::mutex tensor_names_mutex;
16041609
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
@@ -1641,7 +1646,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
16411646
return true;
16421647
};
16431648

1644-
bool success = load_tensors(on_new_tensor_cb, n_threads, use_mmap);
1649+
bool success = load_tensors(on_new_tensor_cb, n_threads, use_mmap, alloc_cb);
16451650
if (!success) {
16461651
LOG_ERROR("load tensors from file failed");
16471652
return false;

model.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ struct TensorStorage {
274274
};
275275

276276
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
277+
typedef std::function<void()> alloc_cb_t;
277278

278279
typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
279280

@@ -310,11 +311,12 @@ class ModelLoader {
310311
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
311312
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
312313
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
313-
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
314+
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false, alloc_cb_t alloc_cb = nullptr);
314315
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
315316
std::set<std::string> ignore_tensors = {},
316317
int n_threads = 0,
317-
bool use_mmap = false);
318+
bool use_mmap = false,
319+
alloc_cb_t alloc_cb = nullptr);
318320

319321
std::vector<std::string> get_tensor_names() const {
320322
std::vector<std::string> names;

stable-diffusion.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,6 @@ class StableDiffusionGGML {
478478
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
479479
offload_params_to_cpu,
480480
tensor_storage_map);
481-
clip_vision->alloc_params_buffer();
482481
clip_vision->get_param_tensors(tensors);
483482
}
484483
} else if (sd_version_is_qwen_image(version)) {
@@ -541,18 +540,15 @@ class StableDiffusionGGML {
541540
diffusion_model->set_flash_attn_enabled(true);
542541
}
543542

544-
cond_stage_model->alloc_params_buffer();
545543
cond_stage_model->get_param_tensors(tensors);
546544

547-
diffusion_model->alloc_params_buffer();
548545
diffusion_model->get_param_tensors(tensors);
549546

550547
if (sd_version_is_unet_edit(version)) {
551548
vae_decode_only = false;
552549
}
553550

554551
if (high_noise_diffusion_model) {
555-
high_noise_diffusion_model->alloc_params_buffer();
556552
high_noise_diffusion_model->get_param_tensors(tensors);
557553
}
558554

@@ -570,7 +566,6 @@ class StableDiffusionGGML {
570566
"first_stage_model",
571567
vae_decode_only,
572568
version);
573-
first_stage_model->alloc_params_buffer();
574569
first_stage_model->get_param_tensors(tensors, "first_stage_model");
575570
} else if (version == VERSION_CHROMA_RADIANCE) {
576571
first_stage_model = std::make_shared<FakeVAE>(vae_backend,
@@ -596,7 +591,6 @@ class StableDiffusionGGML {
596591
vae_conv_2d_scale);
597592
first_stage_model->set_conv2d_scale(vae_conv_2d_scale);
598593
}
599-
first_stage_model->alloc_params_buffer();
600594
first_stage_model->get_param_tensors(tensors, "first_stage_model");
601595
}
602596
if (use_tiny_autoencoder) {
@@ -666,10 +660,6 @@ class StableDiffusionGGML {
666660
}
667661
}
668662
if (stacked_id) {
669-
if (!pmid_model->alloc_params_buffer()) {
670-
LOG_ERROR(" pmid model params buffer allocation failed");
671-
return false;
672-
}
673663
pmid_model->get_param_tensors(tensors, "pmid");
674664
}
675665
}
@@ -710,7 +700,17 @@ class StableDiffusionGGML {
710700
if (version == VERSION_SVD) {
711701
ignore_tensors.insert("conditioner.embedders.3");
712702
}
713-
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads, sd_ctx_params->use_mmap);
703+
704+
auto alloc_cb = [&]() -> void {
705+
if (clip_vision) clip_vision->alloc_params_buffer();
706+
if (cond_stage_model) cond_stage_model->alloc_params_buffer();
707+
if (diffusion_model) diffusion_model->alloc_params_buffer();
708+
if (high_noise_diffusion_model) high_noise_diffusion_model->alloc_params_buffer();
709+
if (first_stage_model) first_stage_model->alloc_params_buffer();
710+
if (pmid_model) pmid_model->alloc_params_buffer();
711+
};
712+
713+
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads, sd_ctx_params->use_mmap, alloc_cb);
714714
if (!success) {
715715
LOG_ERROR("load tensors from model loader failed");
716716
ggml_free(ctx);

0 commit comments

Comments
 (0)