Skip to content

Commit 5ef1821

Browse files
committed
format code
1 parent 6600b89 commit 5ef1821

File tree

14 files changed

+212
-241
lines changed

14 files changed

+212
-241
lines changed

clip.hpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,7 @@ class CLIPTokenizer {
343343
}
344344
}
345345

346-
std::string clean_up_tokenization(std::string &text){
347-
346+
std::string clean_up_tokenization(std::string& text) {
348347
std::regex pattern(R"( ,)");
349348
// Replace " ," with ","
350349
std::string result = std::regex_replace(text, pattern, ",");
@@ -359,10 +358,10 @@ class CLIPTokenizer {
359358
std::u32string ts = decoder[t];
360359
// printf("%d, %s \n", t, utf32_to_utf8(ts).c_str());
361360
std::string s = utf32_to_utf8(ts);
362-
if (s.length() >= 4 ){
363-
if(ends_with(s, "</w>")) {
361+
if (s.length() >= 4) {
362+
if (ends_with(s, "</w>")) {
364363
text += s.replace(s.length() - 4, s.length() - 1, "") + " ";
365-
}else{
364+
} else {
366365
text += s;
367366
}
368367
} else {
@@ -768,8 +767,7 @@ class CLIPVisionModel : public GGMLBlock {
768767
blocks["post_layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
769768
}
770769

771-
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values,
772-
bool return_pooled = true) {
770+
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values, bool return_pooled = true) {
773771
// pixel_values: [N, num_channels, image_size, image_size]
774772
auto embeddings = std::dynamic_pointer_cast<CLIPVisionEmbeddings>(blocks["embeddings"]);
775773
auto pre_layernorm = std::dynamic_pointer_cast<LayerNorm>(blocks["pre_layernorm"]);
@@ -779,11 +777,11 @@ class CLIPVisionModel : public GGMLBlock {
779777
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
780778
x = pre_layernorm->forward(ctx, x);
781779
x = encoder->forward(ctx, x, -1, false);
782-
// print_ggml_tensor(x, true, "ClipVisionModel x: ");
780+
// print_ggml_tensor(x, true, "ClipVisionModel x: ");
783781
auto last_hidden_state = x;
784-
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
782+
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
785783

786-
GGML_ASSERT(x->ne[3] == 1);
784+
GGML_ASSERT(x->ne[3] == 1);
787785
if (return_pooled) {
788786
ggml_tensor* pooled = ggml_cont(ctx, ggml_view_2d(ctx, x, x->ne[0], x->ne[2], x->nb[2], 0));
789787
return pooled; // [N, hidden_size]

common.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ class BasicTransformerBlock : public GGMLBlock {
304304
int64_t n_head,
305305
int64_t d_head,
306306
int64_t context_dim,
307-
bool ff_in = false,
307+
bool ff_in = false,
308308
bool flash_attn = false)
309309
: n_head(n_head), d_head(d_head), ff_in(ff_in) {
310310
// disable_self_attn is always False

conditioner.hpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include "clip.hpp"
55
#include "t5.hpp"
66

7-
87
struct SDCondition {
98
struct ggml_tensor* c_crossattn = NULL; // aka context
109
struct ggml_tensor* c_vector = NULL; // aka y
@@ -44,7 +43,7 @@ struct Conditioner {
4443
// ldm.modules.encoders.modules.FrozenCLIPEmbedder
4544
// Ref: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/cad87bf4e3e0b0a759afa94e933527c3123d59bc/modules/sd_hijack_clip.py#L283
4645
struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
47-
SDVersion version = VERSION_SD1;
46+
SDVersion version = VERSION_SD1;
4847
PMVersion pm_version = VERSION_1;
4948
CLIPTokenizer tokenizer;
5049
ggml_type wtype;
@@ -61,7 +60,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
6160
ggml_type wtype,
6261
const std::string& embd_dir,
6362
SDVersion version = VERSION_SD1,
64-
PMVersion pv = VERSION_1,
63+
PMVersion pv = VERSION_1,
6564
int clip_skip = -1)
6665
: version(version), pm_version(pv), tokenizer(version == VERSION_SD2 ? 0 : 49407), embd_dir(embd_dir), wtype(wtype) {
6766
if (clip_skip <= 0) {
@@ -162,7 +161,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
162161
tokenize_with_trigger_token(std::string text,
163162
int num_input_imgs,
164163
int32_t image_token,
165-
bool padding = false){
164+
bool padding = false) {
166165
return tokenize_with_trigger_token(text, num_input_imgs, image_token,
167166
text_model->model.n_token, padding);
168167
}
@@ -271,7 +270,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
271270
std::vector<int> clean_input_ids_tmp;
272271
for (uint32_t i = 0; i < class_token_index[0]; i++)
273272
clean_input_ids_tmp.push_back(clean_input_ids[i]);
274-
for (uint32_t i = 0; i < (pm_version == VERSION_2 ? 2*num_input_imgs: num_input_imgs); i++)
273+
for (uint32_t i = 0; i < (pm_version == VERSION_2 ? 2 * num_input_imgs : num_input_imgs); i++)
275274
clean_input_ids_tmp.push_back(class_token);
276275
for (uint32_t i = class_token_index[0] + 1; i < clean_input_ids.size(); i++)
277276
clean_input_ids_tmp.push_back(clean_input_ids[i]);
@@ -287,11 +286,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
287286
// weights.insert(weights.begin(), 1.0);
288287

289288
tokenizer.pad_tokens(tokens, weights, max_length, padding);
290-
int offset = pm_version == VERSION_2 ? 2*num_input_imgs: num_input_imgs;
289+
int offset = pm_version == VERSION_2 ? 2 * num_input_imgs : num_input_imgs;
291290
for (uint32_t i = 0; i < tokens.size(); i++) {
292291
// if (class_idx + 1 <= i && i < class_idx + 1 + 2*num_input_imgs) // photomaker V2 has num_tokens(=2)*num_input_imgs
293-
if (class_idx + 1 <= i && i < class_idx + 1 + offset) // photomaker V2 has num_tokens(=2)*num_input_imgs
294-
// hardcode for now
292+
if (class_idx + 1 <= i && i < class_idx + 1 + offset) // photomaker V2 has num_tokens(=2)*num_input_imgs
293+
// hardcode for now
295294
class_token_mask.push_back(true);
296295
else
297296
class_token_mask.push_back(false);
@@ -536,7 +535,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
536535
int height,
537536
int num_input_imgs,
538537
int adm_in_channels = -1,
539-
bool force_zero_embeddings = false){
538+
bool force_zero_embeddings = false) {
540539
auto image_tokens = convert_token_to_id(trigger_word);
541540
// if(image_tokens.size() == 1){
542541
// printf(" image token id is: %d \n", image_tokens[0]);
@@ -964,7 +963,7 @@ struct SD3CLIPEmbedder : public Conditioner {
964963
int height,
965964
int num_input_imgs,
966965
int adm_in_channels = -1,
967-
bool force_zero_embeddings = false){
966+
bool force_zero_embeddings = false) {
968967
GGML_ASSERT(0 && "Not implemented yet!");
969968
}
970969

diffusion_model.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct UNetModel : public DiffusionModel {
3333
UNetModel(ggml_backend_t backend,
3434
ggml_type wtype,
3535
SDVersion version = VERSION_SD1,
36-
bool flash_attn = false)
36+
bool flash_attn = false)
3737
: unet(backend, wtype, version, flash_attn) {
3838
}
3939

@@ -135,7 +135,7 @@ struct FluxModel : public DiffusionModel {
135135
FluxModel(ggml_backend_t backend,
136136
ggml_type wtype,
137137
SDVersion version = VERSION_FLUX_DEV,
138-
bool flash_attn = false)
138+
bool flash_attn = false)
139139
: flux(backend, wtype, version, flash_attn) {
140140
}
141141

examples/cli/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
483483
} else if (arg == "--vae-on-cpu") {
484484
params.vae_on_cpu = true; // will slow down latent decoding but necessary for low MEM GPUs
485485
} else if (arg == "--diffusion-fa") {
486-
params.diffusion_flash_attn = true; // can reduce MEM significantly
486+
params.diffusion_flash_attn = true; // can reduce MEM significantly
487487
} else if (arg == "--canny") {
488488
params.canny_preprocess = true;
489489
} else if (arg == "-b" || arg == "--batch-count") {

flux.hpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ namespace Flux {
170170
// x: [N, n_token, dim]
171171
// pe: [n_token, d_head/2, 2, 2]
172172
// return [N, n_token, dim]
173-
auto qkv = pre_attention(ctx, x); // q,k,v: [N, n_token, n_head, d_head]
173+
auto qkv = pre_attention(ctx, x); // q,k,v: [N, n_token, n_head, d_head]
174174
x = attention(ctx, qkv[0], qkv[1], qkv[2], pe, flash_attn); // [N, n_token, dim]
175-
x = post_attention(ctx, x); // [N, n_token, dim]
175+
x = post_attention(ctx, x); // [N, n_token, dim]
176176
return x;
177177
}
178178
};
@@ -241,11 +241,12 @@ namespace Flux {
241241

242242
struct DoubleStreamBlock : public GGMLBlock {
243243
bool flash_attn;
244+
244245
public:
245246
DoubleStreamBlock(int64_t hidden_size,
246247
int64_t num_heads,
247248
float mlp_ratio,
248-
bool qkv_bias = false,
249+
bool qkv_bias = false,
249250
bool flash_attn = false)
250251
: flash_attn(flash_attn) {
251252
int64_t mlp_hidden_dim = hidden_size * mlp_ratio;
@@ -322,7 +323,7 @@ namespace Flux {
322323
auto k = ggml_concat(ctx, txt_k, img_k, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
323324
auto v = ggml_concat(ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
324325

325-
auto attn = attention(ctx, q, k, v, pe, flash_attn); // [N, n_txt_token + n_img_token, n_head*d_head]
326+
auto attn = attention(ctx, q, k, v, pe, flash_attn); // [N, n_txt_token + n_img_token, n_head*d_head]
326327
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
327328
auto txt_attn_out = ggml_view_3d(ctx,
328329
attn,
@@ -830,7 +831,7 @@ namespace Flux {
830831
FluxRunner(ggml_backend_t backend,
831832
ggml_type wtype,
832833
SDVersion version = VERSION_FLUX_DEV,
833-
bool flash_attn = false)
834+
bool flash_attn = false)
834835
: GGMLRunner(backend, wtype) {
835836
flux_params.flash_attn = flash_attn;
836837
if (version == VERSION_FLUX_SCHNELL) {

ggml_extend.hpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -709,18 +709,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
709709

710710
float scale = (1.0f / sqrt((float)d_head));
711711

712-
//if (flash_attn) {
713-
// LOG_DEBUG("attention_ext L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
714-
//}
715-
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
712+
// if (flash_attn) {
713+
// LOG_DEBUG("attention_ext L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
714+
// }
715+
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
716716
GGML_ASSERT(((L_k % 256 == 0) && L_q == L_k) || !(L_k % 256 == 0));
717717

718718
bool can_use_flash_attn = true;
719-
can_use_flash_attn = can_use_flash_attn && L_k % 256 == 0;
720-
can_use_flash_attn = can_use_flash_attn && d_head % 64 == 0; // double check
719+
can_use_flash_attn = can_use_flash_attn && L_k % 256 == 0;
720+
can_use_flash_attn = can_use_flash_attn && d_head % 64 == 0; // double check
721721

722722
// cuda max d_head seems to be 256, cpu does seem to work with 512
723-
can_use_flash_attn = can_use_flash_attn && d_head <= 256; // double check
723+
can_use_flash_attn = can_use_flash_attn && d_head <= 256; // double check
724724

725725
if (mask != nullptr) {
726726
// TODO(Green-Sky): figure out if we can bend t5 to work too
@@ -731,9 +731,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
731731
// TODO(Green-Sky): more pad or disable for funny tensor shapes
732732

733733
ggml_tensor* kqv = nullptr;
734-
//GGML_ASSERT((flash_attn && can_use_flash_attn) || !flash_attn);
734+
// GGML_ASSERT((flash_attn && can_use_flash_attn) || !flash_attn);
735735
if (can_use_flash_attn && flash_attn) {
736-
//LOG_DEBUG("using flash attention");
736+
// LOG_DEBUG("using flash attention");
737737
k = ggml_cast(ctx, k, GGML_TYPE_F16);
738738

739739
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
@@ -743,7 +743,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
743743
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0, 0);
744744
ggml_flash_attn_ext_set_prec(kqv, GGML_PREC_F32);
745745

746-
//kqv = ggml_view_3d(ctx, kqv, d_head, n_head, L_k, kqv->nb[1], kqv->nb[2], 0);
746+
// kqv = ggml_view_3d(ctx, kqv, d_head, n_head, L_k, kqv->nb[1], kqv->nb[2], 0);
747747
kqv = ggml_view_3d(ctx, kqv, d_head, n_head, L_q, kqv->nb[1], kqv->nb[2], 0);
748748
} else {
749749
v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, L_k]
@@ -761,8 +761,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
761761

762762
kqv = ggml_mul_mat(ctx, v, kq); // [N * n_head, L_q, d_head]
763763

764-
kqv = ggml_reshape_4d(ctx, kqv, d_head, L_q, n_head, N); // [N, n_head, L_q, d_head]
765-
kqv = ggml_permute(ctx, kqv, 0, 2, 1, 3); // [N, L_q, n_head, d_head]
764+
kqv = ggml_reshape_4d(ctx, kqv, d_head, L_q, n_head, N); // [N, n_head, L_q, d_head]
765+
kqv = ggml_permute(ctx, kqv, 0, 2, 1, 3); // [N, L_q, n_head, d_head]
766766
}
767767

768768
kqv = ggml_cont(ctx, kqv);
@@ -1057,7 +1057,7 @@ struct GGMLRunner {
10571057
// get_desc().c_str(),
10581058
// params_buffer_size / (1024.0 * 1024.0),
10591059
// ggml_backend_is_cpu(backend) ? "RAM" : "VRAM",
1060-
// num_tensors);
1060+
// num_tensors);
10611061
return true;
10621062
}
10631063

@@ -1227,8 +1227,7 @@ class Linear : public UnaryBlock {
12271227
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
12281228
if (bias) {
12291229
params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);
1230-
}
1231-
1230+
}
12321231
}
12331232

12341233
public:

model.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,19 @@ std::unordered_map<std::string, std::string> vae_decoder_name_map = {
148148

149149
std::unordered_map<std::string, std::string> pmid_v2_name_map = {
150150
{"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.1.weight",
151-
"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.1.fc1.weight"},
151+
"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.1.fc1.weight"},
152152
{"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.3.weight",
153-
"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.1.fc2.weight"},
153+
"pmid.qformer_perceiver.perceiver_resampler.layers.0.1.1.fc2.weight"},
154154
{"pmid.qformer_perceiver.perceiver_resampler.layers.1.1.1.weight",
155-
"pmid.qformer_perceiver.perceiver_resampler.layers.1.1.1.fc1.weight"},
155+
"pmid.qformer_perceiver.perceiver_resampler.layers.1.1.1.fc1.weight"},
156156
{"pmid.qformer_perceiver.perceiver_resampler.layers.1.1.3.weight",
157157
"pmid.qformer_perceiver.perceiver_resampler.layers.1.1.1.fc2.weight"},
158158
{"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.1.weight",
159-
"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.1.fc1.weight"},
159+
"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.1.fc1.weight"},
160160
{"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.3.weight",
161-
"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.1.fc2.weight"},
161+
"pmid.qformer_perceiver.perceiver_resampler.layers.2.1.1.fc2.weight"},
162162
{"pmid.qformer_perceiver.perceiver_resampler.layers.3.1.1.weight",
163-
"pmid.qformer_perceiver.perceiver_resampler.layers.3.1.1.fc1.weight"},
163+
"pmid.qformer_perceiver.perceiver_resampler.layers.3.1.1.fc1.weight"},
164164
{"pmid.qformer_perceiver.perceiver_resampler.layers.3.1.3.weight",
165165
"pmid.qformer_perceiver.perceiver_resampler.layers.3.1.1.fc2.weight"},
166166
{"pmid.qformer_perceiver.token_proj.0.bias",
@@ -650,33 +650,32 @@ uint16_t f8_e4m3_to_f16(uint8_t f8) {
650650
return ggml_fp32_to_fp16(*reinterpret_cast<const float*>(&result));
651651
}
652652

653-
654653
uint16_t f8_e5m2_to_f16(uint8_t fp8) {
655-
uint8_t sign = (fp8 >> 7) & 0x1;
654+
uint8_t sign = (fp8 >> 7) & 0x1;
656655
uint8_t exponent = (fp8 >> 2) & 0x1F;
657656
uint8_t mantissa = fp8 & 0x3;
658657

659658
uint16_t fp16_sign = sign << 15;
660659
uint16_t fp16_exponent;
661660
uint16_t fp16_mantissa;
662661

663-
if (exponent == 0 && mantissa == 0) { //zero
662+
if (exponent == 0 && mantissa == 0) { // zero
664663
return fp16_sign;
665664
}
666665

667-
if (exponent == 0x1F) { //NAN and INF
666+
if (exponent == 0x1F) { // NAN and INF
668667
fp16_exponent = 0x1F;
669668
fp16_mantissa = mantissa ? (mantissa << 8) : 0;
670669
return fp16_sign | (fp16_exponent << 10) | fp16_mantissa;
671670
}
672671

673-
if (exponent == 0) { //subnormal numbers
672+
if (exponent == 0) { // subnormal numbers
674673
fp16_exponent = 0;
675674
fp16_mantissa = (mantissa << 8);
676675
return fp16_sign | fp16_mantissa;
677676
}
678677

679-
//normal numbers
678+
// normal numbers
680679
int16_t true_exponent = (int16_t)exponent - 15 + 15;
681680
if (true_exponent <= 0) {
682681
fp16_exponent = 0;
@@ -1051,7 +1050,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
10511050
}
10521051

10531052
TensorStorage tensor_storage(prefix + name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
1054-
tensor_storage.reverse_ne();
1053+
tensor_storage.reverse_ne();
10551054

10561055
size_t tensor_data_size = end - begin;
10571056

@@ -1434,10 +1433,9 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
14341433
std::string name = zip_entry_name(zip);
14351434
size_t pos = name.find("data.pkl");
14361435
if (pos != std::string::npos) {
1437-
14381436
std::string dir = name.substr(0, pos);
14391437
printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
1440-
void* pkl_data = NULL;
1438+
void* pkl_data = NULL;
14411439
size_t pkl_size;
14421440
zip_entry_read(zip, &pkl_data, &pkl_size);
14431441

model.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class ModelLoader {
167167
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
168168
ggml_backend_t backend,
169169
std::set<std::string> ignore_tensors = {});
170-
170+
171171
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
172172
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
173173
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);

0 commit comments

Comments
 (0)