From 6bf3b5414f6849cff28bbce08b2d2043e6dfb76f Mon Sep 17 00:00:00 2001 From: yusing Date: Sun, 16 Jun 2024 11:51:03 +0800 Subject: [PATCH 1/4] Readme fix, add workflow for Stable Diffusion 3.0 (Save VAE + Create) --- README.md | 56 +- workflows/Create_SD3_Pytorch.json | 849 +++++++++++++++++++++++++ workflows/Create_SD3_TRT_Static.json | 898 +++++++++++++++++++++++++++ workflows/Save_SD3_VAE.json | 123 ++++ 4 files changed, 1894 insertions(+), 32 deletions(-) create mode 100644 workflows/Create_SD3_Pytorch.json create mode 100644 workflows/Create_SD3_TRT_Static.json create mode 100644 workflows/Save_SD3_VAE.json diff --git a/README.md b/README.md index ad780c0..872f205 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Supports: - SDXL - SDXL Turbo - Stable Video Diffusion -- Stable Video Diffusion-XT  +- Stable Video Diffusion-XT Requirements: @@ -30,7 +30,7 @@ to easily install them to your ComfyUI instance. You can also manually install them by git cloning the repo to your ComfyUI/custom_nodes folder and installing the requirements like: -``` +```shell cd custom_nodes git clone https://github.com/comfyanonymous/ComfyUI_TensorRT cd ComfyUI_TensorRT @@ -68,19 +68,19 @@ These .json files can be loaded in ComfyUI. ### Building A TensorRT Engine From a Checkpoint -1. Add a Load Checkpoint Node -2. Add either a Static Model TensorRT Conversion node or a Dynamic +1. Add a Load Checkpoint Node +2. Add either a Static Model TensorRT Conversion node or a Dynamic Model TensorRT Conversion node to ComfyUI -3. ![](readme_images/image3.png) -4. Connect the Load Checkpoint Model output to the TensorRT Conversion +3. ![](readme_images/image3.png) +4. Connect the Load Checkpoint Model output to the TensorRT Conversion Node Model input. -5. ![](readme_images/image5.png) -6. ![](readme_images/image2.png) -7. To help identify the converted TensorRT model, provide a meaningful +5. ![](readme_images/image5.png) +6. ![](readme_images/image2.png) +7. To help identify the converted TensorRT model, provide a meaningful filename prefix, add this filename after “tensorrt/” -8. ![](readme_images/image9.png) +8. ![](readme_images/image9.png) -9. Click on Queue Prompt to start building the TensorRT Engines +9. Click on Queue Prompt to start building the TensorRT Engines 10. ![](readme_images/image7.png) ![](readme_images/image11.png) @@ -112,33 +112,25 @@ TensorRT Engines are loaded using the TensorRT Loader node. ComfyUI TensorRT engines are not yet compatible with ControlNets or LoRAs. Compatibility will be enabled in a future update. -1. Add a TensorRT Loader node -2. Note, if a TensorRT Engine has been created during a ComfyUI +1. Add a TensorRT Loader node +2. Note, if a TensorRT Engine has been created during a ComfyUI session, it will not show up in the TensorRT Loader until the ComfyUI interface has been refreshed (F5 to refresh browser). -3. ![](readme_images/image6.png) -4. Select a TensorRT Engine from the unet_name dropdown -5. Dynamic Engines will use a filename format of: - -  +3. ![](readme_images/image6.png) +4. Select a TensorRT Engine from the unet_name dropdown +5. Dynamic Engines will use a filename format of: -1. dyn-b-min-max-opt-h-min-max-opt-w-min-max-opt -2. dyn=dynamic, b=batch size, h=height, w=width - -  + 1. dyn-b-min-max-opt-h-min-max-opt-w-min-max-opt + 2. dyn=dynamic, b=batch size, h=height, w=width -6. Static Engine will use a filename format of: +6. Static Engine will use a filename format of: -  - -1. stat-b-opt-h-opt-w-opt -2. stat=static, b=batch size, h=height, w=width - -  + 1. stat-b-opt-h-opt-w-opt + 2. stat=static, b=batch size, h=height, w=width -7. ![](readme_images/image8.png) -8. The model_type must match the model type of the TensorRT engine. -9. ![](readme_images/image10.png) +7. ![](readme_images/image8.png) +8. The model_type must match the model type of the TensorRT engine. +9. ![](readme_images/image10.png) 10. The CLIP and VAE for the workflow will need to be utilized from the original model checkpoint, the MODEL output from the TensorRT Loader will be connected to the Sampler. diff --git a/workflows/Create_SD3_Pytorch.json b/workflows/Create_SD3_Pytorch.json new file mode 100644 index 0000000..8d6230b --- /dev/null +++ b/workflows/Create_SD3_Pytorch.json @@ -0,0 +1,849 @@ +{ + "last_node_id": 278, + "last_link_id": 614, + "nodes": [ + { + "id": 68, + "type": "ConditioningSetTimestepRange", + "pos": [ + -1010, + 167 + ], + "size": { + "0": 317.4000244140625, + "1": 82 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 90 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 91 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningSetTimestepRange" + }, + "widgets_values": [ + 0.1, + 1 + ] + }, + { + "id": 70, + "type": "ConditioningSetTimestepRange", + "pos": [ + -1006, + 314 + ], + "size": { + "0": 317.4000244140625, + "1": 82 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 93, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 92 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningSetTimestepRange" + }, + "widgets_values": [ + 0, + 0.1 + ] + }, + { + "id": 67, + "type": "ConditioningZeroOut", + "pos": [ + -1370, + 337 + ], + "size": { + "0": 211.60000610351562, + "1": 26 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 580 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 90 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningZeroOut" + } + }, + { + "id": 266, + "type": "Note", + "pos": [ + -2352, + 576 + ], + "size": { + "0": 308.061279296875, + "1": 102.86902618408203 + }, + "flags": {}, + "order": 0, + "mode": 0, + "properties": { + "text": "" + }, + "widgets_values": [ + "Resolution should be around 1 megapixel and width/height must be multiple of 64" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 69, + "type": "ConditioningCombine", + "pos": [ + -662, + 165 + ], + "size": { + "0": 228.39999389648438, + "1": 46 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "conditioning_1", + "type": "CONDITIONING", + "link": 91 + }, + { + "name": "conditioning_2", + "type": "CONDITIONING", + "link": 92 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 592 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningCombine" + } + }, + { + "id": 231, + "type": "VAEDecode", + "pos": [ + 141, + -177 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 596 + }, + { + "name": "vae", + "type": "VAE", + "link": 557 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 599 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 11, + "type": "TripleCLIPLoader", + "pos": [ + -2067, + -47 + ], + "size": { + "0": 502.6791076660156, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 5, + 94 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "TripleCLIPLoader" + }, + "widgets_values": [ + "clip_g.safetensors", + "clip_l.safetensors", + "t5xxl_fp8_e4m3fn.safetensors" + ] + }, + { + "id": 233, + "type": "PreviewImage", + "pos": [ + 500, + -166 + ], + "size": { + "0": 319.3996887207031, + "1": 688.7749633789062 + }, + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 599 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 13, + "type": "ModelSamplingSD3", + "pos": [ + -1425, + -202 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": { + "collapsed": false + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 605 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 607 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 271, + "type": "KSampler", + "pos": [ + -330, + -201 + ], + "size": { + "0": 315, + "1": 446 + }, + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 614 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 595 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 592 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 593 + }, + { + "name": "seed", + "type": "INT", + "link": 597, + "widget": { + "name": "seed" + }, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 596 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 108510363512649, + "fixed", + 28, + 4.5, + "dpmpp_2m", + "sgm_uniform", + 1 + ] + }, + { + "id": 135, + "type": "EmptySD3LatentImage", + "pos": [ + -2352, + 438 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 593 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptySD3LatentImage" + }, + "widgets_values": [ + 1024, + 1536, + 1 + ] + }, + { + "id": 274, + "type": "LoraLoaderModelOnly", + "pos": [ + -1054, + -203 + ], + "size": { + "0": 315, + "1": 82 + }, + "flags": {}, + "order": 11, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 607 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 614 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly" + }, + "widgets_values": [ + "Real\\XL绪儿-引魂灯.safetensors", + 1 + ] + }, + { + "id": 71, + "type": "CLIPTextEncode", + "pos": [ + -1869.2871546875003, + 560.071803930664 + ], + "size": { + "0": 380.4615783691406, + "1": 102.07693481445312 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 94 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 93, + 580 + ], + "shape": 3, + "slot_index": 0 + } + ], + "title": "Negative Prompt", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -1869, + 284 + ], + "size": { + "0": 389.06927490234375, + "1": 207.84902954101562 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 595 + ], + "shape": 3, + "slot_index": 0 + } + ], + "title": "Prompt", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 272, + "type": "PrimitiveNode", + "pos": [ + -2342, + 278 + ], + "size": { + "0": 210, + "1": 82 + }, + "flags": {}, + "order": 3, + "mode": 0, + "outputs": [ + { + "name": "INT", + "type": "INT", + "links": [ + 597 + ], + "slot_index": 0, + "widget": { + "name": "seed" + } + } + ], + "title": "Seed", + "properties": { + "Run widget replace on values": false + }, + "widgets_values": [ + 108510363512649, + "randomize" + ] + }, + { + "id": 252, + "type": "CheckpointLoaderSimple", + "pos": [ + -2314, + -203 + ], + "size": { + "0": 746.7357788085938, + "1": 98 + }, + "flags": {}, + "order": 4, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 605 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [], + "shape": 3, + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 557 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "stableDiffusion3SD3_sd3Medium.safetensors" + ] + }, + { + "id": 278, + "type": "Note", + "pos": [ + -1050, + -311 + ], + "size": [ + 309.0594491527561, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "properties": { + "text": "" + }, + "widgets_values": [ + "Right click > bypass to choose a Lora model" + ], + "color": "#432", + "bgcolor": "#653" + } + ], + "links": [ + [ + 5, + 11, + 0, + 6, + 0, + "CLIP" + ], + [ + 90, + 67, + 0, + 68, + 0, + "CONDITIONING" + ], + [ + 91, + 68, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 92, + 70, + 0, + 69, + 1, + "CONDITIONING" + ], + [ + 93, + 71, + 0, + 70, + 0, + "CONDITIONING" + ], + [ + 94, + 11, + 0, + 71, + 0, + "CLIP" + ], + [ + 557, + 252, + 2, + 231, + 1, + "VAE" + ], + [ + 580, + 71, + 0, + 67, + 0, + "CONDITIONING" + ], + [ + 592, + 69, + 0, + 271, + 2, + "CONDITIONING" + ], + [ + 593, + 135, + 0, + 271, + 3, + "LATENT" + ], + [ + 595, + 6, + 0, + 271, + 1, + "CONDITIONING" + ], + [ + 596, + 271, + 0, + 231, + 0, + "LATENT" + ], + [ + 597, + 272, + 0, + 271, + 4, + "INT" + ], + [ + 599, + 231, + 0, + 233, + 0, + "IMAGE" + ], + [ + 605, + 252, + 0, + 13, + 0, + "MODEL" + ], + [ + 607, + 13, + 0, + 274, + 0, + "MODEL" + ], + [ + 614, + 274, + 0, + 271, + 0, + "MODEL" + ] + ], + "groups": [ + { + "title": "Load Models", + "bounding": [ + -2410, + -339, + 969, + 488 + ], + "color": "#3f789e", + "font_size": 24 + }, + { + "title": "Input", + "bounding": [ + -2409, + 181, + 972, + 523 + ], + "color": "#3f789e", + "font_size": 24 + }, + { + "title": "Output", + "bounding": [ + 464, + -273, + 741, + 814 + ], + "color": "#3f789e", + "font_size": 24 + } + ], + "config": {}, + "extra": { + "ds": { + "scale": 0.6830134553650711, + "offset": { + "0": 2362.777755858448, + "1": -166.11852144523655 + } + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/workflows/Create_SD3_TRT_Static.json b/workflows/Create_SD3_TRT_Static.json new file mode 100644 index 0000000..ece7a19 --- /dev/null +++ b/workflows/Create_SD3_TRT_Static.json @@ -0,0 +1,898 @@ +{ + "last_node_id": 284, + "last_link_id": 624, + "nodes": [ + { + "id": 233, + "type": "PreviewImage", + "pos": [ + 203.5900231198122, + -194.69003825585946 + ], + "size": { + "0": 319.3996887207031, + "1": 688.7749633789062 + }, + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 599 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 13, + "type": "ModelSamplingSD3", + "pos": [ + -1425, + -202 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": { + "collapsed": false + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 617 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 607 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 274, + "type": "LoraLoaderModelOnly", + "pos": [ + -1054, + -203 + ], + "size": { + "0": 315, + "1": 82 + }, + "flags": {}, + "order": 11, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 607 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 614 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly" + }, + "widgets_values": [ + "Real\\XL绪儿-引魂灯.safetensors", + 1 + ] + }, + { + "id": 279, + "type": "TensorRTLoader", + "pos": [ + -1826, + -204 + ], + "size": { + "0": 315, + "1": 82 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 617 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "TensorRTLoader" + }, + "widgets_values": [ + "SD3_Medium_$stat-b-1-h-768-w-1344_00001_.engine", + "sd3" + ] + }, + { + "id": 67, + "type": "ConditioningZeroOut", + "pos": [ + -1370, + 340 + ], + "size": { + "0": 211.60000610351562, + "1": 26 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 580 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 90 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningZeroOut" + } + }, + { + "id": 68, + "type": "ConditioningSetTimestepRange", + "pos": [ + -1101, + 178 + ], + "size": { + "0": 317.4000244140625, + "1": 82 + }, + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 90 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 91 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningSetTimestepRange" + }, + "widgets_values": [ + 0.1, + 1 + ] + }, + { + "id": 69, + "type": "ConditioningCombine", + "pos": [ + -736, + 172 + ], + "size": { + "0": 228.39999389648438, + "1": 46 + }, + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "conditioning_1", + "type": "CONDITIONING", + "link": 91 + }, + { + "name": "conditioning_2", + "type": "CONDITIONING", + "link": 92 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 592 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningCombine" + } + }, + { + "id": 231, + "type": "VAEDecode", + "pos": [ + -107, + -194 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 596 + }, + { + "name": "vae", + "type": "VAE", + "link": 620 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 599 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 271, + "type": "KSampler", + "pos": [ + -496, + -113 + ], + "size": { + "0": 315, + "1": 446 + }, + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 614 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 595 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 592 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 593 + }, + { + "name": "seed", + "type": "INT", + "link": 597, + "widget": { + "name": "seed" + }, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 596 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 477292933307989, + "fixed", + 28, + 4.5, + "dpmpp_2m", + "sgm_uniform", + 1 + ] + }, + { + "id": 282, + "type": "VAELoader", + "pos": [ + -496, + -241 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 620 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "SD3_Vae_00001_.safetensors" + ] + }, + { + "id": 272, + "type": "PrimitiveNode", + "pos": [ + -2245, + 551 + ], + "size": { + "0": 210, + "1": 82 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "INT", + "type": "INT", + "links": [ + 597 + ], + "slot_index": 0, + "widget": { + "name": "seed" + } + } + ], + "title": "Seed", + "properties": { + "Run widget replace on values": false + }, + "widgets_values": [ + 477292933307989, + "randomize" + ] + }, + { + "id": 135, + "type": "EmptySD3LatentImage", + "pos": [ + -2352, + 687 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 593 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptySD3LatentImage" + }, + "widgets_values": [ + 1344, + 768, + 1 + ] + }, + { + "id": 278, + "type": "DualCLIPLoader", + "pos": [ + -2247, + 388 + ], + "size": { + "0": 210, + "1": 106 + }, + "flags": {}, + "order": 4, + "mode": 0, + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 623, + 624 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_g.safetensors", + "clip_l.safetensors", + "sd3" + ] + }, + { + "id": 284, + "type": "TripleCLIPLoader", + "pos": [ + -2751, + 383 + ], + "size": { + "0": 308.6756286621094, + "1": 106 + }, + "flags": {}, + "order": 5, + "mode": 0, + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "TripleCLIPLoader" + }, + "widgets_values": [ + "clip_g.safetensors", + "clip_l.safetensors", + "t5xxl_fp8_e4m3fn.safetensors" + ] + }, + { + "id": 283, + "type": "Note", + "pos": [ + -2677, + 249 + ], + "size": { + "0": 212.2451171875, + "1": 89.32557678222656 + }, + "flags": {}, + "order": 6, + "mode": 0, + "properties": { + "text": "" + }, + "widgets_values": [ + "Replace DualCLIPLoader with Triple if you do not have enough VRAM." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 71, + "type": "CLIPTextEncode", + "pos": [ + -2012, + 519 + ], + "size": [ + 437.94467296388257, + 131.32229289176644 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 624 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 93, + 580 + ], + "shape": 3, + "slot_index": 0 + } + ], + "title": "Negative Prompt", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -2014, + 268 + ], + "size": [ + 438.77332147462494, + 204.28772395560924 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 623 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 595 + ], + "shape": 3, + "slot_index": 0 + } + ], + "title": "Prompt", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 266, + "type": "Note", + "pos": [ + -2009, + 692 + ], + "size": { + "0": 210, + "1": 89.53284454345703 + }, + "flags": {}, + "order": 7, + "mode": 0, + "properties": { + "text": "" + }, + "widgets_values": [ + "Resolution should be around 1 megapixel and width/height must be multiple of 64" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 70, + "type": "ConditioningSetTimestepRange", + "pos": [ + -1100, + 325 + ], + "size": [ + 316.30387720649946, + 98.3692518853423 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 93, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 92 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ConditioningSetTimestepRange" + }, + "widgets_values": [ + 0, + 0.1 + ] + } + ], + "links": [ + [ + 90, + 67, + 0, + 68, + 0, + "CONDITIONING" + ], + [ + 91, + 68, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 92, + 70, + 0, + 69, + 1, + "CONDITIONING" + ], + [ + 93, + 71, + 0, + 70, + 0, + "CONDITIONING" + ], + [ + 580, + 71, + 0, + 67, + 0, + "CONDITIONING" + ], + [ + 592, + 69, + 0, + 271, + 2, + "CONDITIONING" + ], + [ + 593, + 135, + 0, + 271, + 3, + "LATENT" + ], + [ + 595, + 6, + 0, + 271, + 1, + "CONDITIONING" + ], + [ + 596, + 271, + 0, + 231, + 0, + "LATENT" + ], + [ + 597, + 272, + 0, + 271, + 4, + "INT" + ], + [ + 599, + 231, + 0, + 233, + 0, + "IMAGE" + ], + [ + 607, + 13, + 0, + 274, + 0, + "MODEL" + ], + [ + 614, + 274, + 0, + 271, + 0, + "MODEL" + ], + [ + 617, + 279, + 0, + 13, + 0, + "MODEL" + ], + [ + 620, + 282, + 0, + 231, + 1, + "VAE" + ], + [ + 623, + 278, + 0, + 6, + 0, + "CLIP" + ], + [ + 624, + 278, + 0, + 71, + 0, + "CLIP" + ] + ], + "groups": [ + { + "title": "Load Models", + "bounding": [ + -2410, + -339, + 969, + 488 + ], + "color": "#3f789e", + "font_size": 24 + }, + { + "title": "Input", + "bounding": [ + -2412, + 180, + 969, + 645 + ], + "color": "#3f789e", + "font_size": 24 + }, + { + "title": "Output", + "bounding": [ + 168, + -302, + 741, + 814 + ], + "color": "#3f789e", + "font_size": 24 + } + ], + "config": {}, + "extra": { + "ds": { + "scale": 0.5644739300537774, + "offset": { + "0": 1916.077935619001, + "1": 185.33911852740795 + } + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/workflows/Save_SD3_VAE.json b/workflows/Save_SD3_VAE.json new file mode 100644 index 0000000..4c36464 --- /dev/null +++ b/workflows/Save_SD3_VAE.json @@ -0,0 +1,123 @@ +{ + "last_node_id": 3, + "last_link_id": 1, + "nodes": [ + { + "id": 2, + "type": "Note", + "pos": [ + 230, + 11 + ], + "size": { + "0": 210, + "1": 58 + }, + "flags": {}, + "order": 0, + "mode": 0, + "properties": { + "text": "" + }, + "widgets_values": [ + "Select SD3 Base Model here" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 1, + "type": "CheckpointLoaderSimple", + "pos": [ + 227, + 115 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": null, + "shape": 3 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null, + "shape": 3 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 1 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "stableDiffusion3SD3_sd3Medium.safetensors" + ] + }, + { + "id": 3, + "type": "VAESave", + "pos": [ + 576, + 152 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 1 + } + ], + "properties": { + "Node name for S&R": "VAESave" + }, + "widgets_values": [ + "vae/SD3_Vae" + ] + } + ], + "links": [ + [ + 1, + 1, + 2, + 3, + 0, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1, + "offset": [ + -140.800048828125, + 148.8000030517578 + ] + } + }, + "version": 0.4 +} \ No newline at end of file From 0f2d14e9a5b3c291a20b236ed57fdde78191db78 Mon Sep 17 00:00:00 2001 From: yusing Date: Mon, 17 Jun 2024 00:57:21 +0800 Subject: [PATCH 2/4] add unload_before and unload_after option, code refactor --- tensorrt_convert.py | 57 +++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/tensorrt_convert.py b/tensorrt_convert.py index ef694ee..d4e4cde 100644 --- a/tensorrt_convert.py +++ b/tensorrt_convert.py @@ -24,6 +24,7 @@ {".engine"}, ) + class TQDMProgressMonitor(trt.IProgressMonitor): def __init__(self): trt.IProgressMonitor.__init__(self) @@ -93,14 +94,18 @@ def step_complete(self, phase_name, step): except KeyboardInterrupt: # There is no need to propagate this exception to TensorRT. We can simply cancel the build. return False - + class TRT_MODEL_CONVERSION_BASE: def __init__(self): self.output_dir = folder_paths.get_output_directory() self.temp_dir = folder_paths.get_temp_directory() self.timing_cache_path = os.path.normpath( - os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "timing_cache.trt")) + os.path.join( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), "timing_cache.trt" + ) + ) ) RETURN_TYPES = () @@ -148,14 +153,15 @@ def _convert( context_max, num_video_frames, is_static: bool, + unload_before: bool = True, + unload_after: bool = True, ): output_onnx = os.path.normpath( - os.path.join( - os.path.join(self.temp_dir, "{}".format(time.time())), "model.onnx" - ) + os.path.join(self.temp_dir, str(time.time()), "model.onnx") ) - comfy.model_management.unload_all_models() + if unload_before: + comfy.model_management.unload_all_models() comfy.model_management.load_models_gpu([model], force_patch_weights=True) unet = model.model.diffusion_model @@ -163,11 +169,15 @@ def _convert( context_len = 77 context_len_min = context_len - if context_dim is None: #SD3 - context_embedder_config = model.model.model_config.unet_config.get("context_embedder_config", None) + if context_dim is None: # SD3 + context_embedder_config = model.model.model_config.unet_config.get( + "context_embedder_config", None + ) if context_embedder_config is not None: - context_dim = context_embedder_config.get("params", {}).get("in_features", None) - context_len = 154 #NOTE: SD3 can have 77 or 154 depending on which text encoders are used, this is why context_len_min stays 77 + context_dim = context_embedder_config.get("params", {}).get( + "in_features", None + ) + context_len = 154 # NOTE: SD3 can have 77 or 154 depending on which text encoders are used, this is why context_len_min stays 77 if context_dim is not None: input_names = ["x", "timesteps", "context"] @@ -179,7 +189,7 @@ def _convert( "context": {0: "batch", 1: "num_embeds"}, } - transformer_options = model.model_options['transformer_options'].copy() + transformer_options = model.model_options["transformer_options"].copy() if model.model.model_config.unet_config.get( "use_temporal_resblock", False ): # SVD @@ -205,7 +215,13 @@ def forward(self, x, timesteps, context, y): unet = svd_unet context_len_min = context_len = 1 else: + class UNET(torch.nn.Module): + def __init__(self, unet, opts): + super().__init__() + self.unet = unet + self.transformer_options = opts + def forward(self, x, timesteps, context, y=None): return self.unet( x, @@ -214,10 +230,8 @@ def forward(self, x, timesteps, context, y=None): y, transformer_options=self.transformer_options, ) - _unet = UNET() - _unet.unet = unet - _unet.transformer_options = transformer_options - unet = _unet + + unet = UNET(unet, transformer_options) input_channels = model.model.model_config.unet_config.get("in_channels") @@ -272,7 +286,8 @@ def forward(self, x, timesteps, context, y=None): dynamic_axes=dynamic_axes, ) - comfy.model_management.unload_all_models() + if unload_after: + comfy.model_management.unload_all_models() comfy.model_management.soft_empty_cache() # TRT conversion starts here @@ -304,7 +319,9 @@ def forward(self, x, timesteps, context, y=None): profile.set_shape(input_names[k], min_shape, opt_shape, max_shape) # Encode shapes to filename - encode = lambda a: ".".join(map(lambda x: str(x), a)) + def encode(a): + return ".".join(map(str, a)) + prefix_encode += "{}#{}#{}#{};".format( input_names[k], encode(min_shape), encode(opt_shape), encode(max_shape) ) @@ -589,6 +606,8 @@ def INPUT_TYPES(s): "step": 1, }, ), + "unload_before": ("BOOLEAN", {"default": True}), + "unload_after": ("BOOLEAN", {"default": True}), }, } @@ -601,6 +620,8 @@ def convert( width_opt, context_opt, num_video_frames, + unload_before: bool = True, + unload_after: bool = True, ): return super()._convert( model, @@ -619,6 +640,8 @@ def convert( context_opt, num_video_frames, is_static=True, + unload_before=unload_before, + unload_after=unload_after, ) From 505b20484265a9bc7d5ea3072f07a3c5ba72c927 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Jun 2024 14:02:10 -0400 Subject: [PATCH 3/4] Fix issue with black images. --- tensorrt_loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorrt_loader.py b/tensorrt_loader.py index 3739042..6ebe51e 100644 --- a/tensorrt_loader.py +++ b/tensorrt_loader.py @@ -41,7 +41,6 @@ def __init__(self, engine_path): self.engine = runtime.deserialize_cuda_engine(f.read()) self.context = self.engine.create_execution_context() self.dtype = torch.float16 - self.stream = torch.cuda.Stream() def set_bindings_shape(self, inputs, split_batch): for k in inputs: @@ -91,12 +90,13 @@ def __call__(self, x, timesteps, context, y=None, control=None, transformer_opti dtype=trt_datatype_to_torch(self.engine.get_tensor_dtype(output_binding_name))) model_inputs_converted[output_binding_name] = out + stream = torch.cuda.default_stream(x.device) for i in range(curr_split_batch): for k in model_inputs_converted: x = model_inputs_converted[k] self.context.set_tensor_address(k, x[(x.shape[0] // curr_split_batch) * i:].data_ptr()) - self.context.execute_async_v3(stream_handle=self.stream.cuda_stream) - self.stream.synchronize() + self.context.execute_async_v3(stream_handle=stream.cuda_stream) + stream.synchronize() return out def load_state_dict(self, sd, strict=False): From e0b788fa7c3fe7e4091aa813e26dbd07ff5fa3b8 Mon Sep 17 00:00:00 2001 From: yusing Date: Tue, 18 Jun 2024 04:36:12 +0800 Subject: [PATCH 4/4] removed unload_before and unload_after option --- pyproject.toml | 2 +- tensorrt_convert.py | 13 +------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0d27a6d..bf651c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "comfyui_tensorrt" description = "TensorRT Node for ComfyUI\nThis node enables the best performance on NVIDIA RTX™ Graphics Cards (GPUs) for Stable Diffusion by leveraging NVIDIA TensorRT." -version = "0.1.1" +version = "0.1.2" license = "LICENSE" dependencies = [ "tensorrt>=10.0.1", diff --git a/tensorrt_convert.py b/tensorrt_convert.py index d4e4cde..8366b6c 100644 --- a/tensorrt_convert.py +++ b/tensorrt_convert.py @@ -153,15 +153,11 @@ def _convert( context_max, num_video_frames, is_static: bool, - unload_before: bool = True, - unload_after: bool = True, ): output_onnx = os.path.normpath( os.path.join(self.temp_dir, str(time.time()), "model.onnx") ) - if unload_before: - comfy.model_management.unload_all_models() comfy.model_management.load_models_gpu([model], force_patch_weights=True) unet = model.model.diffusion_model @@ -286,8 +282,7 @@ def forward(self, x, timesteps, context, y=None): dynamic_axes=dynamic_axes, ) - if unload_after: - comfy.model_management.unload_all_models() + comfy.model_management.unload_all_models() comfy.model_management.soft_empty_cache() # TRT conversion starts here @@ -606,8 +601,6 @@ def INPUT_TYPES(s): "step": 1, }, ), - "unload_before": ("BOOLEAN", {"default": True}), - "unload_after": ("BOOLEAN", {"default": True}), }, } @@ -620,8 +613,6 @@ def convert( width_opt, context_opt, num_video_frames, - unload_before: bool = True, - unload_after: bool = True, ): return super()._convert( model, @@ -640,8 +631,6 @@ def convert( context_opt, num_video_frames, is_static=True, - unload_before=unload_before, - unload_after=unload_after, )