@@ -53,7 +53,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
5353 std::string embd_dir;
5454 int32_t num_custom_embeddings = 0 ;
5555 int32_t num_custom_embeddings_2 = 0 ;
56- std::vector<uint8_t > token_embed_custom;
56+ std::vector<uint8_t > token_embed_custom_1;
57+ std::vector<uint8_t > token_embed_custom_2;
5758 std::vector<std::string> readed_embeddings;
5859
5960 FrozenCLIPEmbedderWithCustomWords (ggml_backend_t backend,
@@ -63,7 +64,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
6364 SDVersion version = VERSION_SD1,
6465 PMVersion pv = PM_VERSION_1)
6566 : version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407 ), embd_dir(embd_dir) {
66- bool force_clip_f32 = embd_dir. size () > 0 ;
67+ bool force_clip_f32 = true ;
6768 if (sd_version_is_sd1 (version)) {
6869 text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, " cond_stage_model.transformer.text_model" , OPENAI_CLIP_VIT_L_14, true , force_clip_f32);
6970 } else if (sd_version_is_sd2 (version)) {
@@ -145,8 +146,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
145146 readed_embeddings.push_back (embd_name);
146147 if (embd) {
147148 int64_t hidden_size = text_model->model .hidden_size ;
148- token_embed_custom .resize (token_embed_custom .size () + ggml_nbytes (embd));
149- memcpy ((void *)(token_embed_custom .data () + num_custom_embeddings * hidden_size * ggml_type_size (embd->type )),
149+ token_embed_custom_1 .resize (token_embed_custom_1 .size () + ggml_nbytes (embd));
150+ memcpy ((void *)(token_embed_custom_1 .data () + num_custom_embeddings * hidden_size * ggml_type_size (embd->type )),
150151 embd->data ,
151152 ggml_nbytes (embd));
152153 for (int i = 0 ; i < embd->ne [1 ]; i++) {
@@ -158,8 +159,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
158159 }
159160 if (embd2) {
160161 int64_t hidden_size = text_model2->model .hidden_size ;
161- token_embed_custom .resize (token_embed_custom .size () + ggml_nbytes (embd2));
162- memcpy ((void *)(token_embed_custom .data () + num_custom_embeddings_2 * hidden_size * ggml_type_size (embd2->type )),
162+ token_embed_custom_2 .resize (token_embed_custom_2 .size () + ggml_nbytes (embd2));
163+ memcpy ((void *)(token_embed_custom_2 .data () + num_custom_embeddings_2 * hidden_size * ggml_type_size (embd2->type )),
163164 embd2->data ,
164165 ggml_nbytes (embd2));
165166 for (int i = 0 ; i < embd2->ne [1 ]; i++) {
@@ -631,7 +632,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
631632 text_model->compute (n_threads,
632633 input_ids,
633634 num_custom_embeddings,
634- token_embed_custom .data (),
635+ token_embed_custom_1 .data (),
635636 max_token_idx,
636637 false ,
637638 clip_skip,
@@ -641,7 +642,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
641642 text_model2->compute (n_threads,
642643 input_ids2,
643644 num_custom_embeddings,
644- token_embed_custom .data (),
645+ token_embed_custom_2 .data (),
645646 max_token_idx,
646647 false ,
647648 clip_skip,
@@ -653,7 +654,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
653654 text_model2->compute (n_threads,
654655 input_ids2,
655656 num_custom_embeddings,
656- token_embed_custom .data (),
657+ token_embed_custom_2 .data (),
657658 max_token_idx,
658659 true ,
659660 clip_skip,
0 commit comments