diff --git a/docs.json b/docs.json index 3c7e7d0e1..701bbd9ff 100644 --- a/docs.json +++ b/docs.json @@ -197,7 +197,13 @@ { "group": "Video", "pages": [ - "tutorials/video/ltxv", + { + "group": "LTX", + "pages": [ + "tutorials/video/ltxv", + "tutorials/video/ltx/ltx-2" + ] + }, { "group": "Wan Video", "pages": [ @@ -864,7 +870,13 @@ { "group": "视频", "pages": [ - "zh-CN/tutorials/video/ltxv", + { + "group": "LTX", + "pages": [ + "zh-CN/tutorials/video/ltxv", + "zh-CN/tutorials/video/ltx/ltx-2" + ] + }, { "group": "万相视频", "pages": [ diff --git a/tutorials/video/ltx/ltx-2.mdx b/tutorials/video/ltx/ltx-2.mdx new file mode 100644 index 000000000..a0d7f721f --- /dev/null +++ b/tutorials/video/ltx/ltx-2.mdx @@ -0,0 +1,132 @@ +--- +title: "LTX-2" +description: "A DiT-based audio-video foundation model for synchronized video and audio generation" +--- + +import UpdateReminder from "/snippets/tutorials/update-reminder.mdx"; + +[LTX-2](https://huggingface.co/Lightricks/LTX-2) is a 19B parameter DiT-based audio-video foundation model by Lightricks. It generates synchronized video and audio in a single pass, creating cohesive experiences where motion, dialogue, background noise, and music are produced together. + + + +## Key features + +- **Synchronized audio-video generation**: Generates motion, dialogue, SFX, and music together in one pass +- **Multiple generation modes**: Text-to-video, image-to-video, and video-to-video +- **Control options**: Canny, Depth, and Pose video-to-video control via IC-LoRAs +- **Keyframe-driven generation**: Interpolate between keyframe images +- **Native upscaling**: Spatial (2x) and temporal (2x) upscalers for higher resolution and FPS +- **Prompt enhancement**: Automatic prompt enhancement support + +## Model checkpoints + +| Name | Description | +|------|-------------| +| ltx-2-19b-dev | Full model in bf16, flexible and trainable | +| ltx-2-19b-dev-fp8 | Full model in fp8 quantization | +| ltx-2-19b-distilled | Distilled version, 8 steps, CFG=1 | +| ltx-2-spatial-upscaler-x2-1.0 | 2x spatial upscaler for higher resolution | +| ltx-2-temporal-upscaler-x2-1.0 | 2x temporal upscaler for higher FPS | + +## Getting started + +LTX-2 is natively supported in ComfyUI. To get started: + +1. Update ComfyUI to the latest version +2. Go to **Template Library** > **Video** > choose any LTX-2 workflow +3. Follow the pop-up to download models and run the workflow + +## Workflows + +### Text-to-video + +Generate videos from text prompts. + + + + Download workflow + + + Open in cloud + + + +**Distilled version** (faster, 8 steps): + + Download workflow + + +### Image-to-video + +Generate videos from an input image. + + + + Download workflow + + + Open in cloud + + + +**Distilled version** (faster, 8 steps): + + Download workflow + + +### Control-to-video + +Generate videos with structural control using IC-LoRAs. + +**Depth control:** + + + Download workflow + + + Open in cloud + + + +**Canny control:** + + + Download workflow + + + Open in cloud + + + +**Pose control:** + + + Download workflow + + + Open in cloud + + + +## Prompting tips + +When writing prompts for LTX-2, focus on detailed, chronological descriptions of actions and scenes. Include specific movements, appearances, camera angles, and environmental details in a single flowing paragraph. Start directly with the action and keep descriptions literal and precise. + +Structure your prompts with: +- Main action in a single sentence +- Specific details about movements and gestures +- Character/object appearances +- Background and environment details +- Camera angles and movements +- Lighting and colors +- Any changes or sudden events + +Keep prompts within 200 words for best results. + +## Resources + +- [GitHub Repository](https://github.com/Lightricks/LTX-2) +- [Hugging Face Model](https://huggingface.co/Lightricks/LTX-2) +- [LTX-2 Prompting Guide](https://ltx.video/blog/how-to-prompt-for-ltx-2) +- [ComfyUI-LTXVideo](https://github.com/Lightricks/ComfyUI-LTXVideo/) + diff --git a/zh-CN/tutorials/video/ltx/ltx-2.mdx b/zh-CN/tutorials/video/ltx/ltx-2.mdx new file mode 100644 index 000000000..95e169e25 --- /dev/null +++ b/zh-CN/tutorials/video/ltx/ltx-2.mdx @@ -0,0 +1,132 @@ +--- +title: "LTX-2" +description: "基于 DiT 的音视频基础模型,支持同步生成视频和音频" +--- + +import UpdateReminder from "/snippets/zh/tutorials/update-reminder.mdx"; + +[LTX-2](https://huggingface.co/Lightricks/LTX-2) 是 Lightricks 推出的 190 亿参数 DiT 音视频基础模型。它可以在单次生成中同步产出视频和音频,将动作、对话、背景音效和音乐融为一体。 + + + +## 主要特性 + +- **音视频同步生成**:一次生成动作、对话、音效和音乐 +- **多种生成模式**:支持文生视频、图生视频和视频转视频 +- **控制选项**:通过 IC-LoRAs 支持 Canny、Depth 和 Pose 视频控制 +- **关键帧驱动生成**:在关键帧图像之间进行插值 +- **原生放大**:空间 (2x) 和时间 (2x) 放大器,提升分辨率和帧率 +- **提示词增强**:支持自动提示词增强 + +## 模型检查点 + +| 名称 | 描述 | +|------|------| +| ltx-2-19b-dev | bf16 完整模型,灵活可训练 | +| ltx-2-19b-dev-fp8 | fp8 量化完整模型 | +| ltx-2-19b-distilled | 蒸馏版本,8 步,CFG=1 | +| ltx-2-spatial-upscaler-x2-1.0 | 2x 空间放大器,提升分辨率 | +| ltx-2-temporal-upscaler-x2-1.0 | 2x 时间放大器,提升帧率 | + +## 快速入门 + +LTX-2 已原生支持 ComfyUI。开始使用: + +1. 将 ComfyUI 更新到最新版本 +2. 进入 **模板库** > **视频** > 选择任意 LTX-2 工作流 +3. 按照弹窗提示下载模型并运行工作流 + +## 工作流 + +### 文生视频 + +从文本提示词生成视频。 + + + + 下载工作流 + + + 在云端打开 + + + +**蒸馏版本**(更快,8 步): + + 下载工作流 + + +### 图生视频 + +从输入图像生成视频。 + + + + 下载工作流 + + + 在云端打开 + + + +**蒸馏版本**(更快,8 步): + + 下载工作流 + + +### 控制生成视频 + +使用 IC-LoRAs 进行结构控制生成视频。 + +**深度控制:** + + + 下载工作流 + + + 在云端打开 + + + +**Canny 边缘控制:** + + + 下载工作流 + + + 在云端打开 + + + +**姿态控制:** + + + 下载工作流 + + + 在云端打开 + + + +## 提示词技巧 + +编写 LTX-2 提示词时,请专注于详细、按时间顺序描述动作和场景。在一个连贯的段落中包含具体的动作、外观、镜头角度和环境细节。直接从动作开始,保持描述的字面性和精确性。 + +提示词结构建议: +- 用一句话描述主要动作 +- 动作和手势的具体细节 +- 角色/物体外观 +- 背景和环境细节 +- 镜头角度和运动 +- 光线和色彩 +- 任何变化或突发事件 + +提示词最好控制在 200 词以内。 + +## 资源 + +- [GitHub 仓库](https://github.com/Lightricks/LTX-2) +- [Hugging Face 模型](https://huggingface.co/Lightricks/LTX-2) +- [LTX-2 提示词指南](https://ltx.video/blog/how-to-prompt-for-ltx-2) +- [ComfyUI-LTXVideo](https://github.com/Lightricks/ComfyUI-LTXVideo/) +