@@ -65,10 +65,16 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
6565 FrozenCLIPEmbedderWithCustomWords (ggml_backend_t backend,
6666 bool offload_params_to_cpu,
6767 const String2TensorStorage& tensor_storage_map,
68- const std::map<std::string, std::string> embedding_map ,
68+ const std::map<std::string, std::string>& orig_embedding_map ,
6969 SDVersion version = VERSION_SD1,
7070 PMVersion pv = PM_VERSION_1)
71- : version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407 ), embedding_map(embedding_map) {
71+ : version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407 ) {
72+ for (const auto & kv : orig_embedding_map) {
73+ std::string name = kv.first ;
74+ std::transform (name.begin (), name.end (), name.begin (), [](unsigned char c) { return std::tolower (c); });
75+ embedding_map[name] = kv.second ;
76+ tokenizer.add_special_token (name);
77+ }
7278 bool force_clip_f32 = !embedding_map.empty ();
7379 if (sd_version_is_sd1 (version)) {
7480 text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_storage_map, " cond_stage_model.transformer.text_model" , OPENAI_CLIP_VIT_L_14, true , force_clip_f32);
@@ -78,9 +84,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
7884 text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_storage_map, " cond_stage_model.transformer.text_model" , OPENAI_CLIP_VIT_L_14, false , force_clip_f32);
7985 text_model2 = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_storage_map, " cond_stage_model.1.transformer.text_model" , OPEN_CLIP_VIT_BIGG_14, false , force_clip_f32);
8086 }
81- for (const auto & kv : embedding_map) {
82- tokenizer.add_special_token (kv.first );
83- }
8487 }
8588
8689 void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) override {
0 commit comments