@@ -241,7 +241,7 @@ class CLIPTokenizer {
241241 std::vector<int > tokenize (std::string text,
242242 on_new_token_cb_t on_new_token_cb,
243243 size_t max_length = 0 ,
244- bool padding = false ) {
244+ bool padding = false ) {
245245 std::vector<int32_t > tokens = encode (text, on_new_token_cb);
246246 tokens.insert (tokens.begin (), BOS_TOKEN_ID);
247247 if (max_length > 0 ) {
@@ -486,7 +486,6 @@ struct ResidualAttentionBlock {
486486
487487 ln2_w = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, hidden_size);
488488 ln2_b = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, hidden_size);
489-
490489 }
491490
492491 void map_by_name (std::map<std::string, struct ggml_tensor *>& tensors, const std::string prefix) {
@@ -661,8 +660,8 @@ struct CLIPTextModel {
661660 mem_size += ggml_row_size (GGML_TYPE_I32, hidden_size * max_position_embeddings); // position_ids
662661 mem_size += ggml_row_size (wtype, hidden_size * vocab_size); // token_embed_weight
663662 mem_size += ggml_row_size (wtype, hidden_size * max_position_embeddings); // position_embed_weight
664- if (version == OPENAI_CLIP_VIT_L_14) {
665- mem_size += ggml_row_size (wtype, hidden_size * max_position_embeddings); // token_embed_custom
663+ if (version == OPENAI_CLIP_VIT_L_14) {
664+ mem_size += ggml_row_size (wtype, hidden_size * max_position_embeddings); // token_embed_custom
666665 }
667666 for (int i = 0 ; i < num_hidden_layers; i++) {
668667 mem_size += resblocks[i].calculate_mem_size (wtype);
@@ -688,32 +687,32 @@ struct CLIPTextModel {
688687 }
689688 }
690689
691- bool load_embedding (std::string embd_name, std::string embd_path, std::vector<int32_t > & bpe_tokens) {
690+ bool load_embedding (std::string embd_name, std::string embd_path, std::vector<int32_t >& bpe_tokens) {
692691 // the order matters
693692 ModelLoader model_loader;
694- if (!model_loader.init_from_file (embd_path)) {
693+ if (!model_loader.init_from_file (embd_path)) {
695694 LOG_ERROR (" embedding '%s' failed" , embd_name.c_str ());
696695 return false ;
697696 }
698697 struct ggml_init_params params;
699- params.mem_size = 32 * 1024 ; // max for custom embeddings 32 KB
700- params.mem_buffer = NULL ;
701- params.no_alloc = false ;
698+ params.mem_size = 32 * 1024 ; // max for custom embeddings 32 KB
699+ params.mem_buffer = NULL ;
700+ params.no_alloc = false ;
702701 struct ggml_context * embd_ctx = ggml_init (params);
703- struct ggml_tensor * embd = NULL ;
704- auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
705- if (tensor_storage.ne [0 ] != hidden_size) {
702+ struct ggml_tensor * embd = NULL ;
703+ auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
704+ if (tensor_storage.ne [0 ] != hidden_size) {
706705 LOG_DEBUG (" embedding wrong hidden size, got %i, expected %i" , tensor_storage.ne [0 ], hidden_size);
707706 return false ;
708707 }
709- embd = ggml_new_tensor_2d (embd_ctx, token_embed_weight->type , hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne [1 ] : 1 );
708+ embd = ggml_new_tensor_2d (embd_ctx, token_embed_weight->type , hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne [1 ] : 1 );
710709 *dst_tensor = embd;
711710 return true ;
712711 };
713712 model_loader.load_tensors (on_load, NULL );
714713 ggml_backend_tensor_set (token_embed_custom, embd->data , num_custom_embeddings * hidden_size * ggml_type_size (token_embed_custom->type ), ggml_nbytes (embd));
715714 readed_embeddings.push_back (embd_name);
716- for (int i = 0 ; i < embd->ne [1 ]; i++) {
715+ for (int i = 0 ; i < embd->ne [1 ]; i++) {
717716 bpe_tokens.push_back (vocab_size + num_custom_embeddings);
718717 // LOG_DEBUG("new custom token: %i", vocab_size + num_custom_embeddings);
719718 num_custom_embeddings++;
@@ -775,7 +774,7 @@ struct CLIPTextModel {
775774
776775 final_ln_b = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, hidden_size);
777776
778- if (version == OPENAI_CLIP_VIT_L_14) {
777+ if (version == OPENAI_CLIP_VIT_L_14) {
779778 token_embed_custom = ggml_new_tensor_2d (ctx, wtype, hidden_size, max_position_embeddings);
780779 }
781780
@@ -878,11 +877,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
878877
879878 auto hidden_states2 = text_model2.forward (ctx0, input_ids2, NULL ); // [N, n_token, hidden_size2]
880879 hidden_states2 = ggml_reshape_4d (ctx0,
881- hidden_states2,
882- hidden_states2->ne [0 ],
883- hidden_states2->ne [1 ],
884- hidden_states2->ne [2 ],
885- hidden_states2->ne [3 ]);
880+ hidden_states2,
881+ hidden_states2->ne [0 ],
882+ hidden_states2->ne [1 ],
883+ hidden_states2->ne [2 ],
884+ hidden_states2->ne [3 ]);
886885 hidden_states2 = ggml_cont (ctx0, ggml_permute (ctx0, hidden_states2, 2 , 0 , 1 , 3 ));
887886
888887 hidden_states = ggml_concat (ctx0, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2]
@@ -913,20 +912,20 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
913912 LOG_DEBUG (" parse '%s' to %s" , text.c_str (), ss.str ().c_str ());
914913 }
915914
916- auto on_new_token_cb = [&] (std::string& str, std::vector<int32_t > & bpe_tokens) -> bool {
917- size_t word_end = str.find (" ," );
915+ auto on_new_token_cb = [&](std::string& str, std::vector<int32_t >& bpe_tokens) -> bool {
916+ size_t word_end = str.find (" ," );
918917 std::string embd_name = word_end == std::string::npos ? str : str.substr (0 , word_end);
919- embd_name = trim (embd_name);
918+ embd_name = trim (embd_name);
920919 std::string embd_path = get_full_path (text_model.embd_dir , embd_name + " .pt" );
921- if (embd_path.size () == 0 ) {
920+ if (embd_path.size () == 0 ) {
922921 embd_path = get_full_path (text_model.embd_dir , embd_name + " .ckpt" );
923922 }
924- if (embd_path.size () == 0 ) {
923+ if (embd_path.size () == 0 ) {
925924 embd_path = get_full_path (text_model.embd_dir , embd_name + " .safetensors" );
926925 }
927- if (embd_path.size () > 0 ) {
928- if (text_model.load_embedding (embd_name, embd_path, bpe_tokens)) {
929- if (word_end != std::string::npos) {
926+ if (embd_path.size () > 0 ) {
927+ if (text_model.load_embedding (embd_name, embd_path, bpe_tokens)) {
928+ if (word_end != std::string::npos) {
930929 str = str.substr (word_end);
931930 } else {
932931 str = " " ;
@@ -1033,7 +1032,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10331032
10341033 struct ggml_tensor * embeddings = NULL ;
10351034
1036- if (text_model.num_custom_embeddings > 0 && version != VERSION_XL) {
1035+ if (text_model.num_custom_embeddings > 0 && version != VERSION_XL) {
10371036 embeddings = ggml_new_tensor_2d (ctx0, wtype, text_model.hidden_size , text_model.vocab_size + text_model.num_custom_embeddings /* custom placeholder */ );
10381037 ggml_allocr_alloc (allocr, embeddings);
10391038 if (!ggml_allocr_is_measure (allocr)) {
0 commit comments