From 039324ae167a1d668733407cc63f31fd43590975 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 14 Jan 2026 14:52:15 +0530 Subject: [PATCH 01/13] switch to transformers main again./ --- .github/workflows/pr_tests.yml | 4 ++-- .github/workflows/pr_tests_gpu.yml | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index c0dfa89e776d..85fd9850ee34 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -115,8 +115,8 @@ jobs: - name: Install dependencies run: | uv pip install -e ".[quality]" - #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps - name: Environment diff --git a/.github/workflows/pr_tests_gpu.yml b/.github/workflows/pr_tests_gpu.yml index dd20bbe93250..2ae48291be7f 100644 --- a/.github/workflows/pr_tests_gpu.yml +++ b/.github/workflows/pr_tests_gpu.yml @@ -14,6 +14,7 @@ on: - "tests/pipelines/test_pipelines_common.py" - "tests/models/test_modeling_common.py" - "examples/**/*.py" + - ".github/**.yml" workflow_dispatch: concurrency: @@ -131,8 +132,8 @@ jobs: run: | uv pip install -e ".[quality]" uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git - #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 - name: Environment run: | From c152b1831cef9da6d66b150c7ceec95977961bf3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 14 Jan 2026 14:54:39 +0530 Subject: [PATCH 02/13] more --- .github/workflows/pr_tests.yml | 4 ++-- .github/workflows/pr_tests_gpu.yml | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 85fd9850ee34..b3d08dfce01e 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -247,8 +247,8 @@ jobs: uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps uv pip install -U tokenizers uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps - #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 - name: Environment run: | diff --git a/.github/workflows/pr_tests_gpu.yml b/.github/workflows/pr_tests_gpu.yml index 2ae48291be7f..58c7ba6263b5 100644 --- a/.github/workflows/pr_tests_gpu.yml +++ b/.github/workflows/pr_tests_gpu.yml @@ -203,8 +203,8 @@ jobs: uv pip install -e ".[quality]" uv pip install peft@git+https://github.com/huggingface/peft.git uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git - #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 - name: Environment run: | @@ -265,8 +265,8 @@ jobs: nvidia-smi - name: Install dependencies run: | - #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 uv pip install -e ".[quality,training]" - name: Environment From c5e023fbe64641bc1d7ea257bc58fa030137c60a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 15 Jan 2026 13:02:55 +0530 Subject: [PATCH 03/13] up --- tests/models/test_models_auto.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/models/test_models_auto.py b/tests/models/test_models_auto.py index a70754343f30..c0e9b4494b07 100644 --- a/tests/models/test_models_auto.py +++ b/tests/models/test_models_auto.py @@ -20,7 +20,9 @@ def test_load_from_config_diffusers_with_subfolder(self, mock_load_config): side_effect=[EnvironmentError("File not found"), {"model_type": "clip_text_model"}], ) def test_load_from_config_transformers_with_subfolder(self, mock_load_config): - model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder") + model = AutoModel.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False + ) assert isinstance(model, CLIPTextModel) def test_load_from_config_without_subfolder(self): @@ -28,5 +30,7 @@ def test_load_from_config_without_subfolder(self): assert isinstance(model, LongformerModel) def test_load_from_model_index(self): - model = AutoModel.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder") + model = AutoModel.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="text_encoder", use_safetensors=False + ) assert isinstance(model, CLIPTextModel) From d0f279ce76c587d70e4f8f3074d3fd8d47a0834a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 15 Jan 2026 16:59:41 +0530 Subject: [PATCH 04/13] up --- examples/custom_diffusion/test_custom_diffusion.py | 4 ++++ src/diffusers/pipelines/kandinsky/text_encoder.py | 2 ++ src/diffusers/pipelines/kolors/text_encoder.py | 5 ++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/custom_diffusion/test_custom_diffusion.py b/examples/custom_diffusion/test_custom_diffusion.py index 9af84ec7598f..ad18eb246777 100644 --- a/examples/custom_diffusion/test_custom_diffusion.py +++ b/examples/custom_diffusion/test_custom_diffusion.py @@ -17,6 +17,9 @@ import os import sys import tempfile +import unittest + +from diffusers.utils import is_transformers_version sys.path.append("..") @@ -30,6 +33,7 @@ logger.addHandler(stream_handler) +@unittest.skipIf(is_transformers_version(">=", "4.57.5"), "Size mismatch") class CustomDiffusion(ExamplesTestsAccelerate): def test_custom_diffusion(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/src/diffusers/pipelines/kandinsky/text_encoder.py b/src/diffusers/pipelines/kandinsky/text_encoder.py index caa0029f00ca..58cc9ac4d3ed 100644 --- a/src/diffusers/pipelines/kandinsky/text_encoder.py +++ b/src/diffusers/pipelines/kandinsky/text_encoder.py @@ -20,6 +20,8 @@ def __init__(self, config, *args, **kwargs): self.LinearTransformation = torch.nn.Linear( in_features=config.transformerDimensions, out_features=config.numDims ) + if hasattr(self, "post_init"): + self.post_init() def forward(self, input_ids, attention_mask): embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0] diff --git a/src/diffusers/pipelines/kolors/text_encoder.py b/src/diffusers/pipelines/kolors/text_encoder.py index 6fd17156a116..88c551028968 100644 --- a/src/diffusers/pipelines/kolors/text_encoder.py +++ b/src/diffusers/pipelines/kolors/text_encoder.py @@ -782,6 +782,9 @@ def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): self.prefix_encoder = PrefixEncoder(config) self.dropout = torch.nn.Dropout(0.1) + if hasattr(self, "post_init"): + self.post_init() + def get_input_embeddings(self): return self.embedding.word_embeddings @@ -811,7 +814,7 @@ def forward( output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) - use_cache = use_cache if use_cache is not None else self.config.use_cache + use_cache = use_cache if use_cache is not None else getattr(self.config, "use_cache", None) return_dict = return_dict if return_dict is not None else self.config.use_return_dict batch_size, seq_length = input_ids.shape From 96f08043a392b4ab234dff06e0ddcd511fcdb4eb Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 15 Jan 2026 20:00:45 +0530 Subject: [PATCH 05/13] fix group offloading. --- src/diffusers/hooks/_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/hooks/_common.py b/src/diffusers/hooks/_common.py index ca7934e5c313..52e3508846f6 100644 --- a/src/diffusers/hooks/_common.py +++ b/src/diffusers/hooks/_common.py @@ -44,6 +44,7 @@ torch.nn.ConvTranspose2d, torch.nn.ConvTranspose3d, torch.nn.Linear, + torch.nn.Embedding, # TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX # because of double invocation of the same norm layer in CogVideoXLayerNorm ) From 37cfceef0dda50512d739c02362065b521ba6a11 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 16 Jan 2026 09:38:48 +0530 Subject: [PATCH 06/13] attributes --- src/diffusers/loaders/textual_inversion.py | 32 +++++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/diffusers/loaders/textual_inversion.py b/src/diffusers/loaders/textual_inversion.py index 63fc97ed431f..3eca9821bdfe 100644 --- a/src/diffusers/loaders/textual_inversion.py +++ b/src/diffusers/loaders/textual_inversion.py @@ -19,7 +19,13 @@ from torch import nn from ..models.modeling_utils import load_state_dict -from ..utils import _get_model_file, is_accelerate_available, is_transformers_available, logging +from ..utils import ( + _get_model_file, + is_accelerate_available, + is_transformers_available, + is_transformers_version, + logging, +) if is_transformers_available(): @@ -549,17 +555,29 @@ def unload_textual_inversion( # Delete from tokenizer for token_id, token_to_remove in zip(token_ids, tokens): - del tokenizer._added_tokens_decoder[token_id] - del tokenizer._added_tokens_encoder[token_to_remove] + if is_transformers_version("<=", "4.58.0"): + del tokenizer._added_tokens_decoder[token_id] + del tokenizer._added_tokens_encoder[token_to_remove] + elif is_transformers_version(">", "4.58.0"): + del tokenizer.added_tokens_decoder[token_id] + del tokenizer.added_tokens_encoder[token_to_remove] # Make all token ids sequential in tokenizer key_id = 1 for token_id in tokenizer.added_tokens_decoder: if token_id > last_special_token_id and token_id > last_special_token_id + key_id: - token = tokenizer._added_tokens_decoder[token_id] - tokenizer._added_tokens_decoder[last_special_token_id + key_id] = token - del tokenizer._added_tokens_decoder[token_id] - tokenizer._added_tokens_encoder[token.content] = last_special_token_id + key_id + if is_transformers_version("<=", "4.58.0"): + token = tokenizer._added_tokens_decoder[token_id] + tokenizer._added_tokens_decoder[last_special_token_id + key_id] = token + del tokenizer._added_tokens_decoder[token_id] + elif is_transformers_version(">", "4.58.0"): + token = tokenizer.added_tokens_decoder[token_id] + tokenizer.added_tokens_decoder[last_special_token_id + key_id] = token + del tokenizer.added_tokens_decoder[token_id] + if is_transformers_version("<=", "4.58.0"): + tokenizer._added_tokens_encoder[token.content] = last_special_token_id + key_id + elif is_transformers_version(">", "4.58.0"): + tokenizer.added_tokens_encoder[token.content] = last_special_token_id + key_id key_id += 1 tokenizer._update_trie() # set correct total vocab size after removing tokens From 926db24add2661c5a28513b3ff3ee6badac8f799 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 16 Jan 2026 10:01:44 +0530 Subject: [PATCH 07/13] up --- src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py b/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py index ea9df999ddd6..c7162c6d1859 100644 --- a/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py +++ b/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py @@ -278,6 +278,9 @@ def _get_prompt_embeds( truncation=True, padding="max_length", ) + input_ids = ( + input_ids["input_ids"] if not isinstance(input_ids, list) and "input_ids" in input_ids else input_ids + ) input_ids = torch.LongTensor(input_ids) input_ids_batch.append(input_ids) From cec020988b6c47a4a26af3fdb4349c6c6b5294ce Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 16 Jan 2026 10:22:59 +0530 Subject: [PATCH 08/13] up --- tests/pipelines/cogview4/test_cogview4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/cogview4/test_cogview4.py b/tests/pipelines/cogview4/test_cogview4.py index a1f0fc7a715b..5f71b1b296d9 100644 --- a/tests/pipelines/cogview4/test_cogview4.py +++ b/tests/pipelines/cogview4/test_cogview4.py @@ -108,7 +108,7 @@ def get_dummy_inputs(self, device, seed=0): generator = torch.Generator(device=device).manual_seed(seed) inputs = { "prompt": "dance monkey", - "negative_prompt": "", + "negative_prompt": "bad", "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, From 3dcb97c9ea0354ff0d91e238b2cabf99eb86a432 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 19 Jan 2026 13:43:47 +0530 Subject: [PATCH 09/13] tie embedding issue. --- .../controlnet_flux/test_controlnet_flux.py | 5 +++-- .../test_controlnet_flux_img2img.py | 9 ++++----- .../test_controlnet_flux_inpaint.py | 19 ++++--------------- .../test_controlnet_inpaint_sd3.py | 17 +++++++++++------ .../controlnet_sd3/test_controlnet_sd3.py | 12 ++++++++++-- tests/pipelines/flux/test_pipeline_flux.py | 5 +++-- .../flux/test_pipeline_flux_control.py | 5 +++-- .../test_pipeline_flux_control_img2img.py | 5 +++-- .../test_pipeline_flux_control_inpaint.py | 5 +++-- .../pipelines/flux/test_pipeline_flux_fill.py | 5 +++-- .../flux/test_pipeline_flux_img2img.py | 5 +++-- .../flux/test_pipeline_flux_inpaint.py | 5 +++-- .../flux/test_pipeline_flux_kontext.py | 5 +++-- .../test_pipeline_flux_kontext_inpaint.py | 5 +++-- .../test_pipeline_stable_diffusion_3.py | 13 +++++++++++-- ...est_pipeline_stable_diffusion_3_img2img.py | 13 +++++++++++-- ...est_pipeline_stable_diffusion_3_inpaint.py | 13 +++++++++++-- 17 files changed, 92 insertions(+), 54 deletions(-) diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py index 0895d9de3581..8607cd6944d9 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py @@ -19,7 +19,7 @@ import numpy as np import torch from huggingface_hub import hf_hub_download -from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast +from transformers import AutoConfig, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from diffusers import ( AutoencoderKL, @@ -97,7 +97,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py index 3d8378a5786d..a4749188dfd8 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py @@ -2,7 +2,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -13,9 +13,7 @@ ) from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import ( - torch_device, -) +from ...testing_utils import torch_device from ..test_pipelines_common import PipelineTesterMixin, check_qkv_fused_layers_exist @@ -70,7 +68,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py b/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py index 3ba475deb8a8..6eb560d90848 100644 --- a/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py +++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_inpaint.py @@ -3,15 +3,7 @@ import numpy as np import torch - -# torch_device, # {{ edit_1 }} Removed unused import -from transformers import ( - AutoTokenizer, - CLIPTextConfig, - CLIPTextModel, - CLIPTokenizer, - T5EncoderModel, -) +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -22,11 +14,7 @@ ) from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import ( - enable_full_determinism, - floats_tensor, - torch_device, -) +from ...testing_utils import enable_full_determinism, floats_tensor, torch_device from ..test_pipelines_common import PipelineTesterMixin @@ -85,7 +73,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py index 34c34b7a2ce7..072f9aa405d9 100644 --- a/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py +++ b/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py @@ -17,7 +17,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -28,10 +35,7 @@ from diffusers.models import SD3ControlNetModel from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import ( - enable_full_determinism, - torch_device, -) +from ...testing_utils import enable_full_determinism, torch_device from ..test_pipelines_common import PipelineTesterMixin @@ -103,7 +107,8 @@ def get_dummy_components(self): text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py index 2b6cf8d1e8be..82ab4308f3a2 100644 --- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py +++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py @@ -19,7 +19,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -118,7 +125,8 @@ def get_dummy_components( text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py index 74499bfa607a..281ac5ad3bc9 100644 --- a/tests/pipelines/flux/test_pipeline_flux.py +++ b/tests/pipelines/flux/test_pipeline_flux.py @@ -4,7 +4,7 @@ import numpy as np import torch from huggingface_hub import hf_hub_download -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -91,7 +91,8 @@ def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_control.py b/tests/pipelines/flux/test_pipeline_flux_control.py index 7e966470a336..44efca9b9f0e 100644 --- a/tests/pipelines/flux/test_pipeline_flux_control.py +++ b/tests/pipelines/flux/test_pipeline_flux_control.py @@ -3,7 +3,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxTransformer2DModel @@ -53,7 +53,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_control_img2img.py b/tests/pipelines/flux/test_pipeline_flux_control_img2img.py index e56136f2e91b..0f0bc0934115 100644 --- a/tests/pipelines/flux/test_pipeline_flux_control_img2img.py +++ b/tests/pipelines/flux/test_pipeline_flux_control_img2img.py @@ -3,7 +3,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -57,7 +57,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py b/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py index e42c5fc2aab5..ae2b6b829e54 100644 --- a/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py +++ b/tests/pipelines/flux/test_pipeline_flux_control_inpaint.py @@ -3,7 +3,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -58,7 +58,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_fill.py b/tests/pipelines/flux/test_pipeline_flux_fill.py index 25a4a3354820..42cd1efad495 100644 --- a/tests/pipelines/flux/test_pipeline_flux_fill.py +++ b/tests/pipelines/flux/test_pipeline_flux_fill.py @@ -3,7 +3,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxFillPipeline, FluxTransformer2DModel @@ -58,7 +58,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_img2img.py b/tests/pipelines/flux/test_pipeline_flux_img2img.py index 6f435760aef5..00587905d337 100644 --- a/tests/pipelines/flux/test_pipeline_flux_img2img.py +++ b/tests/pipelines/flux/test_pipeline_flux_img2img.py @@ -3,7 +3,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxImg2ImgPipeline, FluxTransformer2DModel @@ -55,7 +55,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_inpaint.py b/tests/pipelines/flux/test_pipeline_flux_inpaint.py index 6324ff236e10..14edb9e441b5 100644 --- a/tests/pipelines/flux/test_pipeline_flux_inpaint.py +++ b/tests/pipelines/flux/test_pipeline_flux_inpaint.py @@ -3,7 +3,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxInpaintPipeline, FluxTransformer2DModel @@ -55,7 +55,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_kontext.py b/tests/pipelines/flux/test_pipeline_flux_kontext.py index 5c78964ea54f..1c018f14b522 100644 --- a/tests/pipelines/flux/test_pipeline_flux_kontext.py +++ b/tests/pipelines/flux/test_pipeline_flux_kontext.py @@ -3,7 +3,7 @@ import numpy as np import PIL.Image import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -79,7 +79,8 @@ def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py b/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py index 9a2e32056dcb..b5f8570ebd1a 100644 --- a/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py +++ b/tests/pipelines/flux/test_pipeline_flux_kontext_inpaint.py @@ -3,7 +3,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -79,7 +79,8 @@ def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py index 3ccefe3de35d..200c832d0941 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py @@ -3,7 +3,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline @@ -72,7 +79,9 @@ def get_dummy_components(self): torch.manual_seed(0) text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py index 9025b1060c9e..3f46b341a09e 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py @@ -4,7 +4,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -73,7 +80,9 @@ def get_dummy_components(self): torch.manual_seed(0) text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py index 628930340294..a90ca21a801b 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py @@ -3,7 +3,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -73,7 +80,9 @@ def get_dummy_components(self): torch.manual_seed(0) text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") From 084c959bdf572e3f77b19004ff11c35cc6df1e26 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 19 Jan 2026 15:08:55 +0530 Subject: [PATCH 10/13] fix t5 stuff for more. --- tests/pipelines/bria/test_pipeline_bria.py | 5 +++-- tests/pipelines/chroma/test_pipeline_chroma.py | 5 +++-- .../chroma/test_pipeline_chroma_img2img.py | 5 +++-- tests/pipelines/chronoedit/test_chronoedit.py | 4 +++- tests/pipelines/cogvideo/test_cogvideox.py | 5 +++-- .../cogvideo/test_cogvideox_fun_control.py | 5 +++-- .../cogvideo/test_cogvideox_image2video.py | 5 +++-- .../cogvideo/test_cogvideox_video2video.py | 5 +++-- tests/pipelines/cogview3/test_cogview3plus.py | 5 +++-- tests/pipelines/consisid/test_consisid.py | 5 +++-- .../test_controlnet_hunyuandit.py | 7 +++++-- tests/pipelines/cosmos/test_cosmos.py | 5 +++-- tests/pipelines/cosmos/test_cosmos2_text2image.py | 5 +++-- tests/pipelines/cosmos/test_cosmos2_video2world.py | 5 +++-- tests/pipelines/cosmos/test_cosmos_video2world.py | 5 +++-- tests/pipelines/deepfloyd_if/__init__.py | 8 +++++--- tests/pipelines/deepfloyd_if/test_if.py | 4 +--- tests/pipelines/glm_image/test_glm_image.py | 5 +++-- .../hidream_image/test_pipeline_hidream.py | 4 +++- .../pipelines/hunyuan_video1_5/test_hunyuan_1_5.py | 12 ++++++++++-- tests/pipelines/hunyuandit/test_hunyuan_dit.py | 6 ++++-- tests/pipelines/kandinsky3/test_kandinsky3.py | 5 +++-- .../pipelines/kandinsky3/test_kandinsky3_img2img.py | 5 +++-- tests/pipelines/latte/test_latte.py | 5 +++-- tests/pipelines/ltx/test_ltx.py | 5 +++-- tests/pipelines/ltx/test_ltx_condition.py | 5 +++-- tests/pipelines/ltx/test_ltx_image2video.py | 5 +++-- tests/pipelines/mochi/test_mochi.py | 5 +++-- tests/pipelines/pag/test_pag_hunyuan_dit.py | 6 ++++-- tests/pipelines/pag/test_pag_pixart_sigma.py | 5 +++-- tests/pipelines/pag/test_pag_sd3.py | 13 +++++++++++-- tests/pipelines/pag/test_pag_sd3_img2img.py | 13 +++++++++++-- tests/pipelines/pixart_alpha/test_pixart.py | 7 +++++-- tests/pipelines/pixart_sigma/test_pixart.py | 7 +++++-- tests/pipelines/skyreels_v2/test_skyreels_v2.py | 5 +++-- tests/pipelines/skyreels_v2/test_skyreels_v2_df.py | 5 +++-- .../test_skyreels_v2_df_image_to_video.py | 7 +++++-- .../test_skyreels_v2_df_video_to_video.py | 5 +++-- .../skyreels_v2/test_skyreels_v2_image_to_video.py | 4 +++- tests/pipelines/stable_audio/test_stable_audio.py | 8 +++----- .../test_pipeline_visualcloze_combined.py | 5 +++-- .../test_pipeline_visualcloze_generation.py | 5 +++-- tests/pipelines/wan/test_wan.py | 5 +++-- tests/pipelines/wan/test_wan_22.py | 13 ++++++------- tests/pipelines/wan/test_wan_22_image_to_video.py | 8 +++++--- tests/pipelines/wan/test_wan_animate.py | 4 +++- tests/pipelines/wan/test_wan_image_to_video.py | 7 +++++-- tests/pipelines/wan/test_wan_vace.py | 5 +++-- tests/pipelines/wan/test_wan_video_to_video.py | 5 +++-- 49 files changed, 187 insertions(+), 105 deletions(-) diff --git a/tests/pipelines/bria/test_pipeline_bria.py b/tests/pipelines/bria/test_pipeline_bria.py index 844488e76f2e..dac9c428cfc9 100644 --- a/tests/pipelines/bria/test_pipeline_bria.py +++ b/tests/pipelines/bria/test_pipeline_bria.py @@ -19,7 +19,7 @@ import numpy as np import torch from huggingface_hub import hf_hub_download -from transformers import T5EncoderModel, T5TokenizerFast +from transformers import AutoConfig, T5EncoderModel, T5TokenizerFast from diffusers import ( AutoencoderKL, @@ -89,7 +89,8 @@ def get_dummy_components(self): scheduler = FlowMatchEulerDiscreteScheduler() torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = T5TokenizerFast.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/chroma/test_pipeline_chroma.py b/tests/pipelines/chroma/test_pipeline_chroma.py index 3edd58b75f82..6b856128dff0 100644 --- a/tests/pipelines/chroma/test_pipeline_chroma.py +++ b/tests/pipelines/chroma/test_pipeline_chroma.py @@ -2,7 +2,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKL, ChromaPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler @@ -41,7 +41,8 @@ def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1): ) torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/chroma/test_pipeline_chroma_img2img.py b/tests/pipelines/chroma/test_pipeline_chroma_img2img.py index 4ed1393037b9..8d991c42c749 100644 --- a/tests/pipelines/chroma/test_pipeline_chroma_img2img.py +++ b/tests/pipelines/chroma/test_pipeline_chroma_img2img.py @@ -3,7 +3,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKL, ChromaImg2ImgPipeline, ChromaTransformer2DModel, FlowMatchEulerDiscreteScheduler @@ -42,7 +42,8 @@ def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1): ) torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/chronoedit/test_chronoedit.py b/tests/pipelines/chronoedit/test_chronoedit.py index 43e5b3159b1c..0b72f93eed3c 100644 --- a/tests/pipelines/chronoedit/test_chronoedit.py +++ b/tests/pipelines/chronoedit/test_chronoedit.py @@ -17,6 +17,7 @@ import torch from PIL import Image from transformers import ( + AutoConfig, AutoTokenizer, CLIPImageProcessor, CLIPVisionConfig, @@ -71,7 +72,8 @@ def get_dummy_components(self): torch.manual_seed(0) # TODO: impl FlowDPMSolverMultistepScheduler scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/cogvideo/test_cogvideox.py b/tests/pipelines/cogvideo/test_cogvideox.py index dca1725d8a74..73816dcd3780 100644 --- a/tests/pipelines/cogvideo/test_cogvideox.py +++ b/tests/pipelines/cogvideo/test_cogvideox.py @@ -18,7 +18,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, DDIMScheduler @@ -117,7 +117,8 @@ def get_dummy_components(self, num_layers: int = 1): torch.manual_seed(0) scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cogvideo/test_cogvideox_fun_control.py b/tests/pipelines/cogvideo/test_cogvideox_fun_control.py index 097e8df7b35f..246458a0f453 100644 --- a/tests/pipelines/cogvideo/test_cogvideox_fun_control.py +++ b/tests/pipelines/cogvideo/test_cogvideox_fun_control.py @@ -18,7 +18,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCogVideoX, CogVideoXFunControlPipeline, CogVideoXTransformer3DModel, DDIMScheduler @@ -104,7 +104,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cogvideo/test_cogvideox_image2video.py b/tests/pipelines/cogvideo/test_cogvideox_image2video.py index 1dd5e2ae1405..3eb4f1ef485d 100644 --- a/tests/pipelines/cogvideo/test_cogvideox_image2video.py +++ b/tests/pipelines/cogvideo/test_cogvideox_image2video.py @@ -19,7 +19,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler from diffusers.utils import load_image @@ -113,7 +113,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cogvideo/test_cogvideox_video2video.py b/tests/pipelines/cogvideo/test_cogvideox_video2video.py index 3a1da7c4e7f7..60424ad2a04e 100644 --- a/tests/pipelines/cogvideo/test_cogvideox_video2video.py +++ b/tests/pipelines/cogvideo/test_cogvideox_video2video.py @@ -18,7 +18,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXVideoToVideoPipeline, DDIMScheduler @@ -99,7 +99,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cogview3/test_cogview3plus.py b/tests/pipelines/cogview3/test_cogview3plus.py index 819d4b952fc7..374cb6a2a295 100644 --- a/tests/pipelines/cogview3/test_cogview3plus.py +++ b/tests/pipelines/cogview3/test_cogview3plus.py @@ -18,7 +18,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipeline, CogView3PlusTransformer2DModel @@ -89,7 +89,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = CogVideoXDDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/consisid/test_consisid.py b/tests/pipelines/consisid/test_consisid.py index 4fd9e536cddc..748fbff6b8a0 100644 --- a/tests/pipelines/consisid/test_consisid.py +++ b/tests/pipelines/consisid/test_consisid.py @@ -19,7 +19,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCogVideoX, ConsisIDPipeline, ConsisIDTransformer3DModel, DDIMScheduler from diffusers.utils import load_image @@ -122,7 +122,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py index bf31f2abcffb..034ef56b0fd3 100644 --- a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py +++ b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py @@ -18,7 +18,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, BertModel, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -96,7 +96,10 @@ def get_dummy_components(self): scheduler = DDPMScheduler() text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel") tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel") - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cosmos/test_cosmos.py b/tests/pipelines/cosmos/test_cosmos.py index 32eea9c98c2c..3f93723eb341 100644 --- a/tests/pipelines/cosmos/test_cosmos.py +++ b/tests/pipelines/cosmos/test_cosmos.py @@ -20,7 +20,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCosmos, CosmosTextToWorldPipeline, CosmosTransformer3DModel, EDMEulerScheduler @@ -107,7 +107,8 @@ def get_dummy_components(self): rho=7.0, final_sigmas_type="sigma_min", ) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cosmos/test_cosmos2_text2image.py b/tests/pipelines/cosmos/test_cosmos2_text2image.py index 8e3c5e4c29f4..71c61eff0054 100644 --- a/tests/pipelines/cosmos/test_cosmos2_text2image.py +++ b/tests/pipelines/cosmos/test_cosmos2_text2image.py @@ -20,7 +20,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -95,7 +95,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cosmos/test_cosmos2_video2world.py b/tests/pipelines/cosmos/test_cosmos2_video2world.py index b0ca0e160d98..1b814257a30a 100644 --- a/tests/pipelines/cosmos/test_cosmos2_video2world.py +++ b/tests/pipelines/cosmos/test_cosmos2_video2world.py @@ -21,7 +21,7 @@ import numpy as np import PIL.Image import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -96,7 +96,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler(use_karras_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/cosmos/test_cosmos_video2world.py b/tests/pipelines/cosmos/test_cosmos_video2world.py index 2633c2007ac2..7bad955fc9cb 100644 --- a/tests/pipelines/cosmos/test_cosmos_video2world.py +++ b/tests/pipelines/cosmos/test_cosmos_video2world.py @@ -21,7 +21,7 @@ import numpy as np import PIL.Image import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLCosmos, CosmosTransformer3DModel, CosmosVideoToWorldPipeline, EDMEulerScheduler @@ -108,7 +108,8 @@ def get_dummy_components(self): rho=7.0, final_sigmas_type="sigma_min", ) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/deepfloyd_if/__init__.py b/tests/pipelines/deepfloyd_if/__init__.py index d47374b07e22..855907b7803c 100644 --- a/tests/pipelines/deepfloyd_if/__init__.py +++ b/tests/pipelines/deepfloyd_if/__init__.py @@ -2,7 +2,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import DDPMScheduler, UNet2DConditionModel from diffusers.models.attention_processor import AttnAddedKVProcessor @@ -18,7 +18,8 @@ class IFPipelineTesterMixin: def _get_dummy_components(self): torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") @@ -75,7 +76,8 @@ def _get_dummy_components(self): def _get_superresolution_dummy_components(self): torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/deepfloyd_if/test_if.py b/tests/pipelines/deepfloyd_if/test_if.py index e1870ddcbae9..0fd1391decd0 100644 --- a/tests/pipelines/deepfloyd_if/test_if.py +++ b/tests/pipelines/deepfloyd_if/test_if.py @@ -18,9 +18,7 @@ import torch -from diffusers import ( - IFPipeline, -) +from diffusers import IFPipeline from diffusers.models.attention_processor import AttnAddedKVProcessor from diffusers.utils.import_utils import is_xformers_available diff --git a/tests/pipelines/glm_image/test_glm_image.py b/tests/pipelines/glm_image/test_glm_image.py index 7a380b99b0fb..36b0841726b7 100644 --- a/tests/pipelines/glm_image/test_glm_image.py +++ b/tests/pipelines/glm_image/test_glm_image.py @@ -16,7 +16,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, GlmImagePipeline, GlmImageTransformer2DModel from diffusers.utils import is_transformers_version @@ -57,7 +57,8 @@ class GlmImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase): def get_dummy_components(self): torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") glm_config = GlmImageConfig( diff --git a/tests/pipelines/hidream_image/test_pipeline_hidream.py b/tests/pipelines/hidream_image/test_pipeline_hidream.py index ddf39ba4c1e6..10b2cf1eaf9f 100644 --- a/tests/pipelines/hidream_image/test_pipeline_hidream.py +++ b/tests/pipelines/hidream_image/test_pipeline_hidream.py @@ -18,6 +18,7 @@ import numpy as np import torch from transformers import ( + AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, @@ -94,7 +95,8 @@ def get_dummy_components(self): text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) torch.manual_seed(0) text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM") diff --git a/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py b/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py index 993c7ef6e4bb..de20148105bf 100644 --- a/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py +++ b/tests/pipelines/hunyuan_video1_5/test_hunyuan_1_5.py @@ -15,7 +15,14 @@ import unittest import torch -from transformers import ByT5Tokenizer, Qwen2_5_VLTextConfig, Qwen2_5_VLTextModel, Qwen2Tokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + ByT5Tokenizer, + Qwen2_5_VLTextConfig, + Qwen2_5_VLTextModel, + Qwen2Tokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKLHunyuanVideo15, @@ -114,7 +121,8 @@ def get_dummy_components(self, num_layers: int = 1): tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration") torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer_2 = ByT5Tokenizer() guider = ClassifierFreeGuidance(guidance_scale=1.0) diff --git a/tests/pipelines/hunyuandit/test_hunyuan_dit.py b/tests/pipelines/hunyuandit/test_hunyuan_dit.py index 2a329f10bc80..ba57b6a3599a 100644 --- a/tests/pipelines/hunyuandit/test_hunyuan_dit.py +++ b/tests/pipelines/hunyuandit/test_hunyuan_dit.py @@ -19,7 +19,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, BertModel, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel from diffusers import AutoencoderKL, DDPMScheduler, HunyuanDiT2DModel, HunyuanDiTPipeline @@ -74,7 +74,9 @@ def get_dummy_components(self): scheduler = DDPMScheduler() text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel") tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel") - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/kandinsky3/test_kandinsky3.py b/tests/pipelines/kandinsky3/test_kandinsky3.py index 55500f729bbb..abfd34b8478d 100644 --- a/tests/pipelines/kandinsky3/test_kandinsky3.py +++ b/tests/pipelines/kandinsky3/test_kandinsky3.py @@ -19,7 +19,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoPipelineForImage2Image, @@ -108,7 +108,8 @@ def get_dummy_components(self, time_cond_proj_dim=None): torch.manual_seed(0) movq = self.dummy_movq torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py index 503fdb242dff..4aafa082e9fc 100644 --- a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py +++ b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py @@ -20,7 +20,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoPipelineForImage2Image, @@ -119,7 +119,8 @@ def get_dummy_components(self, time_cond_proj_dim=None): torch.manual_seed(0) movq = self.dummy_movq torch.manual_seed(0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/latte/test_latte.py b/tests/pipelines/latte/test_latte.py index a40d4bf8eede..873c06e11c5b 100644 --- a/tests/pipelines/latte/test_latte.py +++ b/tests/pipelines/latte/test_latte.py @@ -20,7 +20,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -109,7 +109,8 @@ def get_dummy_components(self, num_layers: int = 1): vae = AutoencoderKL() scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/ltx/test_ltx.py b/tests/pipelines/ltx/test_ltx.py index aaf4161b51fb..9836551d30a1 100644 --- a/tests/pipelines/ltx/test_ltx.py +++ b/tests/pipelines/ltx/test_ltx.py @@ -17,7 +17,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel @@ -88,7 +88,8 @@ def get_dummy_components(self, num_layers: int = 1): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/ltx/test_ltx_condition.py b/tests/pipelines/ltx/test_ltx_condition.py index f5dfb0186209..b469662241fc 100644 --- a/tests/pipelines/ltx/test_ltx_condition.py +++ b/tests/pipelines/ltx/test_ltx_condition.py @@ -17,7 +17,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLLTXVideo, @@ -92,7 +92,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/ltx/test_ltx_image2video.py b/tests/pipelines/ltx/test_ltx_image2video.py index 2702993d4a59..7407c8bef5ea 100644 --- a/tests/pipelines/ltx/test_ltx_image2video.py +++ b/tests/pipelines/ltx/test_ltx_image2video.py @@ -17,7 +17,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLLTXVideo, @@ -91,7 +91,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/mochi/test_mochi.py b/tests/pipelines/mochi/test_mochi.py index 5615720a9343..a2100b5db540 100644 --- a/tests/pipelines/mochi/test_mochi.py +++ b/tests/pipelines/mochi/test_mochi.py @@ -18,7 +18,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel @@ -89,7 +89,8 @@ def get_dummy_components(self, num_layers: int = 2): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/pag/test_pag_hunyuan_dit.py b/tests/pipelines/pag/test_pag_hunyuan_dit.py index f268a614f85c..38686ee448de 100644 --- a/tests/pipelines/pag/test_pag_hunyuan_dit.py +++ b/tests/pipelines/pag/test_pag_hunyuan_dit.py @@ -19,7 +19,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, BertModel, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, BertModel, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -67,7 +67,9 @@ def get_dummy_components(self): scheduler = DDPMScheduler() text_encoder = BertModel.from_pretrained("hf-internal-testing/tiny-random-BertModel") tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-BertModel") - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") components = { diff --git a/tests/pipelines/pag/test_pag_pixart_sigma.py b/tests/pipelines/pag/test_pag_pixart_sigma.py index c04ebad08fdc..9bc2f6eed395 100644 --- a/tests/pipelines/pag/test_pag_pixart_sigma.py +++ b/tests/pipelines/pag/test_pag_pixart_sigma.py @@ -19,7 +19,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel import diffusers from diffusers import ( @@ -80,7 +80,8 @@ def get_dummy_components(self): vae = AutoencoderKL() scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/pag/test_pag_sd3.py b/tests/pipelines/pag/test_pag_sd3.py index 26e6ca099286..7f755ea8e170 100644 --- a/tests/pipelines/pag/test_pag_sd3.py +++ b/tests/pipelines/pag/test_pag_sd3.py @@ -3,7 +3,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -73,7 +80,9 @@ def get_dummy_components(self): torch.manual_seed(0) text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/pag/test_pag_sd3_img2img.py b/tests/pipelines/pag/test_pag_sd3_img2img.py index 19a36e283de4..e4146b87803c 100644 --- a/tests/pipelines/pag/test_pag_sd3_img2img.py +++ b/tests/pipelines/pag/test_pag_sd3_img2img.py @@ -5,7 +5,14 @@ import numpy as np import torch -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel +from transformers import ( + AutoConfig, + AutoTokenizer, + CLIPTextConfig, + CLIPTextModelWithProjection, + CLIPTokenizer, + T5EncoderModel, +) from diffusers import ( AutoencoderKL, @@ -84,7 +91,9 @@ def get_dummy_components(self): torch.manual_seed(0) text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) - text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_3 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") diff --git a/tests/pipelines/pixart_alpha/test_pixart.py b/tests/pipelines/pixart_alpha/test_pixart.py index fd41c9887dcc..037a9f44f31e 100644 --- a/tests/pipelines/pixart_alpha/test_pixart.py +++ b/tests/pipelines/pixart_alpha/test_pixart.py @@ -19,7 +19,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -77,7 +77,10 @@ def get_dummy_components(self): vae = AutoencoderKL() scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/pixart_sigma/test_pixart.py b/tests/pipelines/pixart_sigma/test_pixart.py index 6e8535062a79..51eebadd0ed0 100644 --- a/tests/pipelines/pixart_sigma/test_pixart.py +++ b/tests/pipelines/pixart_sigma/test_pixart.py @@ -19,7 +19,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKL, @@ -83,7 +83,10 @@ def get_dummy_components(self): vae = AutoencoderKL() scheduler = DDIMScheduler() - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/skyreels_v2/test_skyreels_v2.py b/tests/pipelines/skyreels_v2/test_skyreels_v2.py index 1bcec877c30d..b3f4e2849378 100644 --- a/tests/pipelines/skyreels_v2/test_skyreels_v2.py +++ b/tests/pipelines/skyreels_v2/test_skyreels_v2.py @@ -16,7 +16,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -68,7 +68,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py b/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py index 74235d59efd6..35d9852815f7 100644 --- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py +++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py @@ -16,7 +16,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -68,7 +68,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py index f0cbc710df05..2764bb6dd822 100644 --- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py +++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py @@ -18,6 +18,7 @@ import torch from PIL import Image from transformers import ( + AutoConfig, AutoTokenizer, T5EncoderModel, ) @@ -68,7 +69,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) @@ -159,7 +161,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py index 1b0b23318e63..b0f384f5a4e6 100644 --- a/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py +++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py @@ -18,7 +18,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -70,7 +70,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py b/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py index 784f701a29d2..77b6706db1b2 100644 --- a/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py +++ b/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py @@ -18,6 +18,7 @@ import torch from PIL import Image from transformers import ( + AutoConfig, AutoTokenizer, CLIPImageProcessor, CLIPVisionConfig, @@ -71,7 +72,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/stable_audio/test_stable_audio.py b/tests/pipelines/stable_audio/test_stable_audio.py index dd03f4d07f07..492aa92252de 100644 --- a/tests/pipelines/stable_audio/test_stable_audio.py +++ b/tests/pipelines/stable_audio/test_stable_audio.py @@ -19,10 +19,7 @@ import numpy as np import torch -from transformers import ( - T5EncoderModel, - T5Tokenizer, -) +from transformers import AutoConfig, T5EncoderModel, T5Tokenizer from diffusers import ( AutoencoderOobleck, @@ -111,7 +108,8 @@ def get_dummy_components(self): ) torch.manual_seed(0) t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" - text_encoder = T5EncoderModel.from_pretrained(t5_repo_id) + config = AutoConfig.from_pretrained(t5_repo_id) + text_encoder = T5EncoderModel(config) tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25) torch.manual_seed(0) diff --git a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py index 00ae0441fe99..9471badb20bf 100644 --- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py +++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py @@ -5,7 +5,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel import diffusers from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline @@ -77,7 +77,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py index ab6b3ca5c587..13f164ad9059 100644 --- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py +++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py @@ -5,7 +5,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel import diffusers from diffusers import ( @@ -79,7 +79,8 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(clip_text_encoder_config) torch.manual_seed(0) - text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder_2 = T5EncoderModel(config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") diff --git a/tests/pipelines/wan/test_wan.py b/tests/pipelines/wan/test_wan.py index 106a7b294646..958e1b8c8eaf 100644 --- a/tests/pipelines/wan/test_wan.py +++ b/tests/pipelines/wan/test_wan.py @@ -18,7 +18,7 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel @@ -68,7 +68,8 @@ def get_dummy_components(self): torch.manual_seed(0) # TODO: impl FlowDPMSolverMultistepScheduler scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_22.py b/tests/pipelines/wan/test_wan_22.py index 56ef5ceb97ed..fd17ca414af4 100644 --- a/tests/pipelines/wan/test_wan_22.py +++ b/tests/pipelines/wan/test_wan_22.py @@ -17,14 +17,11 @@ import numpy as np import torch -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel -from ...testing_utils import ( - enable_full_determinism, - torch_device, -) +from ...testing_utils import enable_full_determinism, torch_device from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..test_pipelines_common import PipelineTesterMixin @@ -63,7 +60,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) @@ -235,7 +233,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_22_image_to_video.py b/tests/pipelines/wan/test_wan_22_image_to_video.py index 6294d62044f3..4634047ebb73 100644 --- a/tests/pipelines/wan/test_wan_22_image_to_video.py +++ b/tests/pipelines/wan/test_wan_22_image_to_video.py @@ -18,7 +18,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel @@ -64,7 +64,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) @@ -248,7 +249,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_animate.py b/tests/pipelines/wan/test_wan_animate.py index d6d1b09f3620..5d634fb71849 100644 --- a/tests/pipelines/wan/test_wan_animate.py +++ b/tests/pipelines/wan/test_wan_animate.py @@ -19,6 +19,7 @@ import torch from PIL import Image from transformers import ( + AutoConfig, AutoTokenizer, CLIPImageProcessor, CLIPVisionConfig, @@ -78,7 +79,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_image_to_video.py b/tests/pipelines/wan/test_wan_image_to_video.py index 07a9142f2553..7ed263abdcb5 100644 --- a/tests/pipelines/wan/test_wan_image_to_video.py +++ b/tests/pipelines/wan/test_wan_image_to_video.py @@ -19,6 +19,7 @@ import torch from PIL import Image from transformers import ( + AutoConfig, AutoTokenizer, CLIPImageProcessor, CLIPVisionConfig, @@ -68,7 +69,8 @@ def get_dummy_components(self): torch.manual_seed(0) # TODO: impl FlowDPMSolverMultistepScheduler scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) @@ -239,7 +241,8 @@ def get_dummy_components(self): torch.manual_seed(0) # TODO: impl FlowDPMSolverMultistepScheduler scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_vace.py b/tests/pipelines/wan/test_wan_vace.py index fe078c0deb8a..53becce1685d 100644 --- a/tests/pipelines/wan/test_wan_vace.py +++ b/tests/pipelines/wan/test_wan_vace.py @@ -18,7 +18,7 @@ import numpy as np import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLWan, @@ -67,7 +67,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) diff --git a/tests/pipelines/wan/test_wan_video_to_video.py b/tests/pipelines/wan/test_wan_video_to_video.py index 27ada121ca48..3804e972b97f 100644 --- a/tests/pipelines/wan/test_wan_video_to_video.py +++ b/tests/pipelines/wan/test_wan_video_to_video.py @@ -16,7 +16,7 @@ import torch from PIL import Image -from transformers import AutoTokenizer, T5EncoderModel +from transformers import AutoConfig, AutoTokenizer, T5EncoderModel from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline @@ -62,7 +62,8 @@ def get_dummy_components(self): torch.manual_seed(0) scheduler = UniPCMultistepScheduler(flow_shift=3.0) - text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-t5") + text_encoder = T5EncoderModel(config) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") torch.manual_seed(0) From 351316328f0126222013cfe4c077f3b384f799e4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 20 Jan 2026 10:11:08 +0530 Subject: [PATCH 11/13] matrix configuration to see differences between 4.57.3 and main failures. --- .github/workflows/pr_tests.yml | 25 +++++++++++++------ .github/workflows/pr_tests_gpu.yml | 39 +++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index b3d08dfce01e..f0b063c91b58 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -92,8 +92,9 @@ jobs: runner: aws-general-8-plus image: diffusers/diffusers-pytorch-cpu report: torch_example_cpu + transformers_version: ["4.57.3", "main"] - name: ${{ matrix.config.name }} + name: ${{ matrix.config.name }} (transformers ${{ matrix.transformers_version }}) runs-on: group: ${{ matrix.config.runner }} @@ -115,8 +116,11 @@ jobs: - name: Install dependencies run: | uv pip install -e ".[quality]" - uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + if [ "${{ matrix.transformers_version }}" = "main" ]; then + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + else + uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }} + fi uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps - name: Environment @@ -155,7 +159,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v6 with: - name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_test_reports + name: pr_${{ matrix.config.framework }}_${{ matrix.config.report }}_transformers_${{ matrix.transformers_version }}_test_reports path: reports run_staging_tests: @@ -220,8 +224,10 @@ jobs: needs: [check_code_quality, check_repository_consistency] strategy: fail-fast: false + matrix: + transformers_version: ["4.57.3", "main"] - name: LoRA tests with PEFT main + name: LoRA tests with PEFT main (transformers ${{ matrix.transformers_version }}) runs-on: group: aws-general-8-plus @@ -247,8 +253,11 @@ jobs: uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps uv pip install -U tokenizers uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps - uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + if [ "${{ matrix.transformers_version }}" = "main" ]; then + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + else + uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }} + fi - name: Environment run: | @@ -275,6 +284,6 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v6 with: - name: pr_main_test_reports + name: pr_lora_transformers_${{ matrix.transformers_version }}_test_reports path: reports diff --git a/.github/workflows/pr_tests_gpu.yml b/.github/workflows/pr_tests_gpu.yml index 58c7ba6263b5..c4007968323d 100644 --- a/.github/workflows/pr_tests_gpu.yml +++ b/.github/workflows/pr_tests_gpu.yml @@ -107,13 +107,14 @@ jobs: path: reports torch_pipelines_cuda_tests: - name: Torch Pipelines CUDA Tests + name: Torch Pipelines CUDA Tests (transformers ${{ matrix.transformers_version }}) needs: setup_torch_cuda_pipeline_matrix strategy: fail-fast: false max-parallel: 8 matrix: module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }} + transformers_version: ["4.57.3", "main"] runs-on: group: aws-g4dn-2xlarge container: @@ -132,8 +133,11 @@ jobs: run: | uv pip install -e ".[quality]" uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git - uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + if [ "${{ matrix.transformers_version }}" = "main" ]; then + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + else + uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }} + fi - name: Environment run: | @@ -173,11 +177,11 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v6 with: - name: pipeline_${{ matrix.module }}_test_reports + name: pipeline_${{ matrix.module }}_transformers_${{ matrix.transformers_version }}_test_reports path: reports torch_cuda_tests: - name: Torch CUDA Tests + name: Torch CUDA Tests (transformers ${{ matrix.transformers_version }}) needs: [check_code_quality, check_repository_consistency] runs-on: group: aws-g4dn-2xlarge @@ -192,6 +196,7 @@ jobs: max-parallel: 4 matrix: module: [models, schedulers, lora, others] + transformers_version: ["4.57.3", "main"] steps: - name: Checkout diffusers uses: actions/checkout@v6 @@ -203,8 +208,11 @@ jobs: uv pip install -e ".[quality]" uv pip install peft@git+https://github.com/huggingface/peft.git uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git - uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + if [ "${{ matrix.transformers_version }}" = "main" ]; then + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + else + uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }} + fi - name: Environment run: | @@ -242,12 +250,16 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v6 with: - name: torch_cuda_test_reports_${{ matrix.module }} + name: torch_cuda_test_reports_${{ matrix.module }}_transformers_${{ matrix.transformers_version }} path: reports run_examples_tests: - name: Examples PyTorch CUDA tests on Ubuntu + name: Examples PyTorch CUDA tests on Ubuntu (transformers ${{ matrix.transformers_version }}) needs: [check_code_quality, check_repository_consistency] + strategy: + fail-fast: false + matrix: + transformers_version: ["4.57.3", "main"] runs-on: group: aws-g4dn-2xlarge @@ -265,8 +277,11 @@ jobs: nvidia-smi - name: Install dependencies run: | - uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - # uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 + if [ "${{ matrix.transformers_version }}" = "main" ]; then + uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git + else + uv pip uninstall transformers huggingface_hub && uv pip install transformers==${{ matrix.transformers_version }} + fi uv pip install -e ".[quality,training]" - name: Environment @@ -290,6 +305,6 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v6 with: - name: examples_test_reports + name: examples_transformers_${{ matrix.transformers_version }}_test_reports path: reports From 2fe9f9868db4006bc6857d33bf905bcdcd629432 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 20 Jan 2026 16:56:54 +0530 Subject: [PATCH 12/13] change qwen expected slice because of how init is handled in v5. --- tests/pipelines/qwenimage/test_qwenimage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/qwenimage/test_qwenimage.py b/tests/pipelines/qwenimage/test_qwenimage.py index 8ebfe7d08bc1..f4ad5dc47477 100644 --- a/tests/pipelines/qwenimage/test_qwenimage.py +++ b/tests/pipelines/qwenimage/test_qwenimage.py @@ -160,7 +160,7 @@ def test_inference(self): self.assertEqual(generated_image.shape, (3, 32, 32)) # fmt: off - expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208]) + expected_slice = torch.tensor([0.5646, 0.6369, 0.6019, 0.5640, 0.5830, 0.5520, 0.5717, 0.6315, 0.4167, 0.3563, 0.5640, 0.4849, 0.4961, 0.5237, 0.4084, 0.5014]) # fmt: on generated_slice = generated_image.flatten() From e1249d26402dceb3efc78eb33b2c8ac9ef1f02d4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 20 Jan 2026 18:05:20 +0530 Subject: [PATCH 13/13] same stuff. --- tests/pipelines/hidream_image/test_pipeline_hidream.py | 2 +- tests/pipelines/hunyuan_video/test_hunyuan_image2video.py | 2 +- tests/pipelines/qwenimage/test_qwenimage_edit.py | 2 +- tests/pipelines/qwenimage/test_qwenimage_edit_plus.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/pipelines/hidream_image/test_pipeline_hidream.py b/tests/pipelines/hidream_image/test_pipeline_hidream.py index 10b2cf1eaf9f..607e4b90a0af 100644 --- a/tests/pipelines/hidream_image/test_pipeline_hidream.py +++ b/tests/pipelines/hidream_image/test_pipeline_hidream.py @@ -151,7 +151,7 @@ def test_inference(self): self.assertEqual(generated_image.shape, (128, 128, 3)) # fmt: off - expected_slice = np.array([0.4507, 0.5256, 0.4205, 0.5791, 0.4848, 0.4831, 0.4443, 0.5107, 0.6586, 0.3163, 0.7318, 0.5933, 0.6252, 0.5512, 0.5357, 0.5983]) + expected_slice = np.array([0.4501, 0.5256, 0.4207, 0.5783, 0.4842, 0.4833, 0.4441, 0.5112, 0.6587, 0.3169, 0.7308, 0.5927, 0.6251, 0.5509, 0.5355, 0.5969]) # fmt: on generated_slice = generated_image.flatten() diff --git a/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py b/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py index 27b5bde31050..514579ce28d8 100644 --- a/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py +++ b/tests/pipelines/hunyuan_video/test_hunyuan_image2video.py @@ -233,7 +233,7 @@ def test_inference(self): self.assertEqual(generated_video.shape, (5, 3, 16, 16)) # fmt: off - expected_slice = torch.tensor([0.444, 0.479, 0.4485, 0.5752, 0.3539, 0.1548, 0.2706, 0.3593, 0.5323, 0.6635, 0.6795, 0.5255, 0.5091, 0.345, 0.4276, 0.4128]) + expected_slice = torch.tensor([0.4441, 0.4790, 0.4485, 0.5748, 0.3539, 0.1553, 0.2707, 0.3594, 0.5331, 0.6645, 0.6799, 0.5257, 0.5092, 0.3450, 0.4276, 0.4127]) # fmt: on generated_slice = generated_video.flatten() diff --git a/tests/pipelines/qwenimage/test_qwenimage_edit.py b/tests/pipelines/qwenimage/test_qwenimage_edit.py index 058548cf5f1b..383c11937dd1 100644 --- a/tests/pipelines/qwenimage/test_qwenimage_edit.py +++ b/tests/pipelines/qwenimage/test_qwenimage_edit.py @@ -163,7 +163,7 @@ def test_inference(self): self.assertEqual(generated_image.shape, (3, 32, 32)) # fmt: off - expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]]) + expected_slice = torch.tensor([0.5640, 0.6350, 0.6003, 0.5606, 0.5801, 0.5502, 0.5757, 0.6388, 0.4174, 0.3590, 0.5647, 0.4891, 0.4975, 0.5256, 0.4088, 0.4991]) # fmt: on generated_slice = generated_image.flatten() diff --git a/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py b/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py index 6faf34728286..e8bc694ced84 100644 --- a/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py +++ b/tests/pipelines/qwenimage/test_qwenimage_edit_plus.py @@ -164,7 +164,7 @@ def test_inference(self): self.assertEqual(generated_image.shape, (3, 32, 32)) # fmt: off - expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]]) + expected_slice = torch.tensor([0.5640, 0.6339, 0.5997, 0.5607, 0.5799, 0.5496, 0.5760, 0.6393, 0.4172, 0.3595, 0.5655, 0.4896, 0.4971, 0.5255, 0.4088, 0.4987]) # fmt: on generated_slice = generated_image.flatten()