Skip to content

Commit 5c614e4

Browse files
authored
feat: add convert api (#142)
1 parent 2b6ec97 commit 5c614e4

File tree

5 files changed

+167
-25
lines changed

5 files changed

+167
-25
lines changed

README.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ cmake .. -DSD_METAL=ON
126126
cmake --build . --config Release
127127
```
128128
129-
### Using Flash Attention
129+
##### Using Flash Attention
130130
131131
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
132132
@@ -142,7 +142,7 @@ usage: ./bin/sd [arguments]
142142

143143
arguments:
144144
-h, --help show this help message and exit
145-
-M, --mode [txt2img or img2img] generation mode (default: txt2img)
145+
-M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)
146146
-t, --threads N number of threads to use during computation (default: -1).
147147
If threads <= 0, then threads will be set to the number of CPU physical cores
148148
-m, --model [MODEL] path to model
@@ -168,7 +168,8 @@ arguments:
168168
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
169169
-b, --batch-count COUNT number of images to generate.
170170
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
171-
--clip-skip N number of layers to skip of clip model (default: 0)
171+
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
172+
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
172173
--vae-tiling process vae in tiles to reduce memory usage
173174
-v, --verbose print extra info
174175
```
@@ -183,6 +184,16 @@ You can specify the model weight type using the `--type` parameter. The weights
183184
- `q5_0` or `q5_1` for 5-bit integer quantization
184185
- `q4_0` or `q4_1` for 4-bit integer quantization
185186
187+
#### Convert to GGUF
188+
189+
You can also convert weights in the formats `ckpt/safetensors/diffusers` to gguf and perform quantization in advance, avoiding the need for quantization every time you load them.
190+
191+
For example:
192+
193+
```sh
194+
./bin/sd -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
195+
```
196+
186197
#### txt2img example
187198

188199
```sh
@@ -240,7 +251,7 @@ Here's a simple example:
240251
| ---- |---- |
241252
| ![](./assets/without_lcm.png) |![](./assets/with_lcm.png) |
242253

243-
## Using TAESD to faster decoding
254+
#### Using TAESD to faster decoding
244255

245256
You can use TAESD to accelerate the decoding of latent images by following these steps:
246257

@@ -258,7 +269,7 @@ curl -L -O https://huggingface.co/madebyollin/taesd/blob/main/diffusion_pytorch_
258269
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
259270
```
260271

261-
## Using ESRGAN to upscale results
272+
#### Using ESRGAN to upscale results
262273

263274
You can use ESRGAN to upscale the generated images. At the moment, only the [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth) model is supported. Support for more models of this architecture will be added soon.
264275

examples/cli/main.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@ const char* schedule_str[] = {
4242
const char* modes_str[] = {
4343
"txt2img",
4444
"img2img",
45+
"convert",
4546
};
4647

4748
enum SDMode {
4849
TXT2IMG,
4950
IMG2IMG,
51+
CONVERT,
5052
MODE_COUNT
5153
};
5254

@@ -125,7 +127,7 @@ void print_usage(int argc, const char* argv[]) {
125127
printf("\n");
126128
printf("arguments:\n");
127129
printf(" -h, --help show this help message and exit\n");
128-
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
130+
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
129131
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
130132
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
131133
printf(" -m, --model [MODEL] path to model\n");
@@ -384,7 +386,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
384386
params.n_threads = get_num_physical_cores();
385387
}
386388

387-
if (params.prompt.length() == 0) {
389+
if (params.mode != CONVERT && params.prompt.length() == 0) {
388390
fprintf(stderr, "error: the following arguments are required: prompt\n");
389391
print_usage(argc, argv);
390392
exit(1);
@@ -432,6 +434,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
432434
srand((int)time(NULL));
433435
params.seed = rand();
434436
}
437+
438+
if (params.mode == CONVERT) {
439+
if (params.output_path == "output.png") {
440+
params.output_path = "output.gguf";
441+
}
442+
}
435443
}
436444

437445
std::string get_image_params(SDParams params, int64_t seed) {
@@ -479,6 +487,24 @@ int main(int argc, const char* argv[]) {
479487
printf("%s", sd_get_system_info());
480488
}
481489

490+
if (params.mode == CONVERT) {
491+
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
492+
if (!success) {
493+
fprintf(stderr,
494+
"convert '%s'/'%s' to '%s' failed\n",
495+
params.model_path.c_str(),
496+
params.vae_path.c_str(),
497+
params.output_path.c_str());
498+
return 1;
499+
} else {
500+
printf("convert '%s'/'%s' to '%s' success\n",
501+
params.model_path.c_str(),
502+
params.vae_path.c_str(),
503+
params.output_path.c_str());
504+
return 0;
505+
}
506+
}
507+
482508
bool vae_decode_only = true;
483509
uint8_t* input_image_buffer = NULL;
484510
if (params.mode == IMG2IMG) {

model.cpp

Lines changed: 117 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "ggml/ggml-backend.h"
1616
#include "ggml/ggml.h"
1717

18+
#include "stable-diffusion.h"
19+
1820
#ifdef SD_USE_METAL
1921
#include "ggml-metal.h"
2022
#endif
@@ -609,7 +611,7 @@ bool is_safetensors_file(const std::string& file_path) {
609611
}
610612

611613
size_t header_size_ = read_u64(header_size_buf);
612-
if (header_size_ >= file_size_) {
614+
if (header_size_ >= file_size_ || header_size_ <= 2) {
613615
return false;
614616
}
615617

@@ -1181,6 +1183,9 @@ SDVersion ModelLoader::get_sd_version() {
11811183
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos) {
11821184
return VERSION_XL;
11831185
}
1186+
if (tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
1187+
return VERSION_XL;
1188+
}
11841189
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
11851190
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
11861191
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
@@ -1218,7 +1223,35 @@ std::string ModelLoader::load_merges() {
12181223
return merges_utf8_str;
12191224
}
12201225

1226+
void remove_duplicates(std::vector<TensorStorage>& vec) {
1227+
std::unordered_map<std::string, size_t> name_to_index_map;
1228+
1229+
for (size_t i = 0; i < vec.size(); ++i) {
1230+
const std::string& current_name = vec[i].name;
1231+
auto it = name_to_index_map.find(current_name);
1232+
1233+
if (it != name_to_index_map.end()) {
1234+
vec[it->second] = vec[i];
1235+
} else {
1236+
name_to_index_map[current_name] = i;
1237+
}
1238+
}
1239+
1240+
vec.resize(name_to_index_map.size());
1241+
}
1242+
12211243
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
1244+
std::vector<TensorStorage> processed_tensor_storages;
1245+
for (auto& tensor_storage : tensor_storages) {
1246+
// LOG_DEBUG("%s", name.c_str());
1247+
1248+
if (is_unused_tensor(tensor_storage.name)) {
1249+
continue;
1250+
}
1251+
1252+
preprocess_tensor(tensor_storage, processed_tensor_storages);
1253+
}
1254+
remove_duplicates(processed_tensor_storages);
12221255
bool success = true;
12231256
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
12241257
std::string file_path = file_paths_[file_index];
@@ -1276,22 +1309,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
12761309
return true;
12771310
};
12781311

1279-
std::vector<TensorStorage> processed_tensor_storages;
1280-
for (auto& tensor_storage : tensor_storages) {
1312+
for (auto& tensor_storage : processed_tensor_storages) {
12811313
if (tensor_storage.file_index != file_index) {
12821314
continue;
12831315
}
1284-
1285-
// LOG_DEBUG("%s", name.c_str());
1286-
1287-
if (is_unused_tensor(tensor_storage.name)) {
1288-
continue;
1289-
}
1290-
1291-
preprocess_tensor(tensor_storage, processed_tensor_storages);
1292-
}
1293-
1294-
for (auto& tensor_storage : processed_tensor_storages) {
12951316
// LOG_DEBUG("%s", tensor_storage.name.c_str());
12961317

12971318
ggml_tensor* dst_tensor = NULL;
@@ -1437,7 +1458,61 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
14371458
return true;
14381459
}
14391460

1440-
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
1461+
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
1462+
auto backend = ggml_backend_cpu_init();
1463+
size_t mem_size = 1 * 1024 * 1024; // for padding
1464+
mem_size += tensor_storages.size() * ggml_tensor_overhead();
1465+
mem_size += cal_mem_size(backend, type);
1466+
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
1467+
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});
1468+
1469+
gguf_context* gguf_ctx = gguf_init_empty();
1470+
1471+
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
1472+
const std::string& name = tensor_storage.name;
1473+
1474+
ggml_type tensor_type = tensor_storage.type;
1475+
if (type != GGML_TYPE_COUNT) {
1476+
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
1477+
tensor_type = GGML_TYPE_F16;
1478+
} else {
1479+
tensor_type = type;
1480+
}
1481+
}
1482+
1483+
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
1484+
if (tensor == NULL) {
1485+
LOG_ERROR("ggml_new_tensor failed");
1486+
return false;
1487+
}
1488+
ggml_set_name(tensor, name.c_str());
1489+
1490+
// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
1491+
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
1492+
// tensor_storage.n_dims,
1493+
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
1494+
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
1495+
1496+
*dst_tensor = tensor;
1497+
1498+
gguf_add_tensor(gguf_ctx, tensor);
1499+
1500+
return true;
1501+
};
1502+
1503+
bool success = load_tensors(on_new_tensor_cb, backend);
1504+
ggml_backend_free(backend);
1505+
LOG_INFO("load tensors done");
1506+
LOG_INFO("trying to save tensors to %s", file_path.c_str());
1507+
if (success) {
1508+
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
1509+
}
1510+
ggml_free(ggml_ctx);
1511+
gguf_free(gguf_ctx);
1512+
return success;
1513+
}
1514+
1515+
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend, ggml_type type) {
14411516
size_t alignment = 128;
14421517
if (backend != NULL) {
14431518
alignment = ggml_backend_get_alignment(backend);
@@ -1452,8 +1527,35 @@ int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
14521527
}
14531528

14541529
for (auto& tensor_storage : processed_tensor_storages) {
1530+
ggml_type tensor_type = tensor_storage.type;
1531+
if (type != GGML_TYPE_COUNT) {
1532+
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
1533+
tensor_type = GGML_TYPE_F16;
1534+
} else {
1535+
tensor_type = type;
1536+
}
1537+
}
1538+
tensor_storage.type = tensor_type;
14551539
mem_size += tensor_storage.nbytes() + alignment;
14561540
}
14571541

14581542
return mem_size;
14591543
}
1544+
1545+
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
1546+
ModelLoader model_loader;
1547+
1548+
if (!model_loader.init_from_file(input_path)) {
1549+
LOG_ERROR("init model loader from file failed: '%s'", input_path);
1550+
return false;
1551+
}
1552+
1553+
if (vae_path != NULL && strlen(vae_path) > 0) {
1554+
if (!model_loader.init_from_file(vae_path, "vae.")) {
1555+
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
1556+
return false;
1557+
}
1558+
}
1559+
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
1560+
return success;
1561+
}

model.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
#include <functional>
55
#include <map>
66
#include <memory>
7+
#include <set>
78
#include <string>
89
#include <vector>
9-
#include <set>
1010

1111
#include "ggml/ggml-backend.h"
1212
#include "ggml/ggml.h"
@@ -121,7 +121,8 @@ class ModelLoader {
121121
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
122122
ggml_backend_t backend,
123123
std::set<std::string> ignore_tensors = {});
124-
int64_t cal_mem_size(ggml_backend_t backend);
124+
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
125+
int64_t cal_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
125126
~ModelLoader() = default;
126127
};
127128
#endif // __MODEL_H__

stable-diffusion.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
148148
enum sd_type_t wtype);
149149
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
150150

151-
SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);
151+
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
152+
153+
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type);
152154

153155
#ifdef __cplusplus
154156
}

0 commit comments

Comments
 (0)