@@ -34,6 +34,7 @@ struct Conditioner {
3434 virtual void free_params_buffer () = 0;
3535 virtual void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) = 0;
3636 virtual size_t get_params_buffer_size () = 0;
37+ virtual void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) {}
3738 virtual std::tuple<SDCondition, std::vector<bool >> get_learned_condition_with_trigger (ggml_context* work_ctx,
3839 int n_threads,
3940 const ConditionerParams& conditioner_params) {
@@ -108,10 +109,17 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
108109 return buffer_size;
109110 }
110111
112+ void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) override {
113+ text_model->set_weight_adapter (adapter);
114+ if (sd_version_is_sdxl (version)) {
115+ text_model2->set_weight_adapter (adapter);
116+ }
117+ }
118+
111119 bool load_embedding (std::string embd_name, std::string embd_path, std::vector<int32_t >& bpe_tokens) {
112120 // the order matters
113121 ModelLoader model_loader;
114- if (!model_loader.init_from_file (embd_path)) {
122+ if (!model_loader.init_from_file_and_convert_name (embd_path)) {
115123 LOG_ERROR (" embedding '%s' failed" , embd_name.c_str ());
116124 return false ;
117125 }
@@ -270,13 +278,30 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
270278 const std::string& curr_text = item.first ;
271279 float curr_weight = item.second ;
272280 // printf(" %s: %f \n", curr_text.c_str(), curr_weight);
281+ int32_t clean_index = 0 ;
282+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
283+ // Pad token array up to chunk size at this point.
284+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
285+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
286+ int padding_size = 75 - (tokens_acc % 75 );
287+ for (int j = 0 ; j < padding_size; j++) {
288+ clean_input_ids.push_back (tokenizer.EOS_TOKEN_ID );
289+ clean_index++;
290+ }
291+
292+ // After padding, continue to the next iteration to process the following text as a new segment
293+ tokens.insert (tokens.end (), clean_input_ids.begin (), clean_input_ids.end ());
294+ weights.insert (weights.end (), padding_size, curr_weight);
295+ continue ;
296+ }
297+
298+ // Regular token, process normally
273299 std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
274- int32_t clean_index = 0 ;
275300 for (uint32_t i = 0 ; i < curr_tokens.size (); i++) {
276301 int token_id = curr_tokens[i];
277- if (token_id == image_token)
302+ if (token_id == image_token) {
278303 class_token_index.push_back (clean_index - 1 );
279- else {
304+ } else {
280305 clean_input_ids.push_back (token_id);
281306 clean_index++;
282307 }
@@ -379,6 +404,22 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
379404 for (const auto & item : parsed_attention) {
380405 const std::string& curr_text = item.first ;
381406 float curr_weight = item.second ;
407+
408+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
409+ // Pad token array up to chunk size at this point.
410+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
411+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
412+ size_t current_size = tokens.size ();
413+ size_t padding_size = (75 - (current_size % 75 )) % 75 ; // Ensure no negative padding
414+
415+ if (padding_size > 0 ) {
416+ LOG_DEBUG (" BREAK token encountered, padding current chunk by %zu tokens." , padding_size);
417+ tokens.insert (tokens.end (), padding_size, tokenizer.EOS_TOKEN_ID );
418+ weights.insert (weights.end (), padding_size, 1 .0f );
419+ }
420+ continue ; // Skip to the next item after handling BREAK
421+ }
422+
382423 std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
383424 tokens.insert (tokens.end (), curr_tokens.begin (), curr_tokens.end ());
384425 weights.insert (weights.end (), curr_tokens.size (), curr_weight);
@@ -764,6 +805,18 @@ struct SD3CLIPEmbedder : public Conditioner {
764805 return buffer_size;
765806 }
766807
808+ void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) override {
809+ if (clip_l) {
810+ clip_l->set_weight_adapter (adapter);
811+ }
812+ if (clip_g) {
813+ clip_g->set_weight_adapter (adapter);
814+ }
815+ if (t5) {
816+ t5->set_weight_adapter (adapter);
817+ }
818+ }
819+
767820 std::vector<std::pair<std::vector<int >, std::vector<float >>> tokenize (std::string text,
768821 size_t max_length = 0 ,
769822 bool padding = false ) {
@@ -1160,6 +1213,15 @@ struct FluxCLIPEmbedder : public Conditioner {
11601213 return buffer_size;
11611214 }
11621215
1216+ void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) {
1217+ if (clip_l) {
1218+ clip_l->set_weight_adapter (adapter);
1219+ }
1220+ if (t5) {
1221+ t5->set_weight_adapter (adapter);
1222+ }
1223+ }
1224+
11631225 std::vector<std::pair<std::vector<int >, std::vector<float >>> tokenize (std::string text,
11641226 size_t max_length = 0 ,
11651227 bool padding = false ) {
@@ -1400,6 +1462,12 @@ struct T5CLIPEmbedder : public Conditioner {
14001462 return buffer_size;
14011463 }
14021464
1465+ void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) override {
1466+ if (t5) {
1467+ t5->set_weight_adapter (adapter);
1468+ }
1469+ }
1470+
14031471 std::tuple<std::vector<int >, std::vector<float >, std::vector<float >> tokenize (std::string text,
14041472 size_t max_length = 0 ,
14051473 bool padding = false ) {
@@ -1589,6 +1657,12 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
15891657 return buffer_size;
15901658 }
15911659
1660+ void set_weight_adapter (const std::shared_ptr<WeightAdapter>& adapter) override {
1661+ if (qwenvl) {
1662+ qwenvl->set_weight_adapter (adapter);
1663+ }
1664+ }
1665+
15921666 std::tuple<std::vector<int >, std::vector<float >> tokenize (std::string text,
15931667 size_t max_length = 0 ,
15941668 size_t system_prompt_length = 0 ,
0 commit comments