|
| 1 | +""" |
| 2 | +Copyright 2025 Google LLC |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +""" |
| 16 | + |
| 17 | +import os |
| 18 | +import pytest |
| 19 | +import functools |
| 20 | +import jax |
| 21 | +import jax.numpy as jnp |
| 22 | +from flax.linen import partitioning as nn_partitioning |
| 23 | +from jax.sharding import Mesh |
| 24 | +from .. import pyconfig |
| 25 | +from ..max_utils import ( |
| 26 | + create_device_mesh, |
| 27 | +) |
| 28 | +import numpy as np |
| 29 | +import unittest |
| 30 | +from ..data_preprocessing.wan_txt2vid_data_preprocessing import vae_encode |
| 31 | +from ..checkpointing.wan_checkpointer_2_1 import WanCheckpointer2_1 |
| 32 | +from ..utils import load_video |
| 33 | +from ..video_processor import VideoProcessor |
| 34 | +import flax |
| 35 | + |
| 36 | +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 37 | + |
| 38 | +CACHE_T = 2 |
| 39 | + |
| 40 | +IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" |
| 41 | + |
| 42 | +flax.config.update("flax_always_shard_variable", False) |
| 43 | + |
| 44 | + |
| 45 | +class DataProcessingTest(unittest.TestCase): |
| 46 | + |
| 47 | + def setUp(self): |
| 48 | + DataProcessingTest.dummy_data = {} |
| 49 | + pyconfig.initialize( |
| 50 | + [ |
| 51 | + None, |
| 52 | + os.path.join(THIS_DIR, "..", "configs", "base_wan_14b.yml"), |
| 53 | + ], |
| 54 | + unittest=True, |
| 55 | + ) |
| 56 | + config = pyconfig.config |
| 57 | + self.config = config |
| 58 | + devices_array = create_device_mesh(config) |
| 59 | + self.mesh = Mesh(devices_array, config.mesh_axes) |
| 60 | + |
| 61 | + @pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Don't run smoke tests on Github Actions") |
| 62 | + def test_wan_vae_encode_normalization(self): |
| 63 | + """Test wan vae encode function normalization""" |
| 64 | + pyconfig.initialize( |
| 65 | + [ |
| 66 | + None, |
| 67 | + os.path.join(THIS_DIR, "..", "configs", "base_wan_14b.yml"), |
| 68 | + ], |
| 69 | + unittest=True, |
| 70 | + ) |
| 71 | + config = pyconfig.config |
| 72 | + devices_array = create_device_mesh(config) |
| 73 | + mesh = Mesh(devices_array, config.mesh_axes) |
| 74 | + checkpoint_loader = WanCheckpointer2_1(config=config) |
| 75 | + pipeline, _, _ = checkpoint_loader.load_checkpoint() |
| 76 | + |
| 77 | + vae_scale_factor_spatial = 2 ** len(pipeline.vae.temperal_downsample) |
| 78 | + video_processor = VideoProcessor(vae_scale_factor=vae_scale_factor_spatial) |
| 79 | + |
| 80 | + video_path = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hiker.mp4" |
| 81 | + video = load_video(video_path) |
| 82 | + videos = [video_processor.preprocess_video([video], height=config.height, width=config.width)] |
| 83 | + videos = jnp.array(np.squeeze(np.array(videos), axis=1), dtype=config.weights_dtype) |
| 84 | + p_vae_encode = jax.jit(functools.partial(vae_encode, vae=pipeline.vae, vae_cache=pipeline.vae_cache)) |
| 85 | + |
| 86 | + rng = jax.random.key(config.seed) |
| 87 | + with mesh, nn_partitioning.axis_rules(config.logical_axis_rules): |
| 88 | + latents = p_vae_encode(videos, rng=rng) |
| 89 | + # 1. Verify Channel Count (Wan 2.1 requires 16) |
| 90 | + self.assertEqual(latents.shape[1], 16, f"Expected 16 channels, got {latents.shape[1]}") |
| 91 | + |
| 92 | + # 2. Verify Global Stats |
| 93 | + # We expect mean near 0 and variance near 1. |
| 94 | + # We use a threshold (e.g., 0.15) since this is just one video. |
| 95 | + global_mean = jnp.mean(latents) |
| 96 | + global_var = jnp.var(latents) |
| 97 | + |
| 98 | + self.assertLess(abs(global_mean), 0.2, f"Global mean {global_mean} is too far from 0") |
| 99 | + self.assertAlmostEqual(global_var, 1.0, delta=0.2, msg=f"Global variance {global_var} is too far from 1.0") |
| 100 | + |
| 101 | + # 3. Verify Channel-wise Range |
| 102 | + # Ensure no channel is completely "dead" or "exploding" |
| 103 | + channel_vars = jnp.var(latents, axis=(0, 2, 3, 4)) |
| 104 | + self.assertTrue(jnp.all(channel_vars > 0.1), "One or more channels have near-zero variance") |
| 105 | + self.assertTrue(jnp.all(channel_vars < 5.0), "One or more channels have exploding variance") |
0 commit comments