Added dynamic shape support to SDXL example

cehongwang · cehongwang · commit cef162007cbc · 2025-02-26T00:20:55.000Z
diff --git a/examples/dynamo/mutable_torchtrt_module_example.py b/examples/dynamo/mutable_torchtrt_module_example.py
@@ -22,94 +22,107 @@
 import torch_tensorrt as torch_trt
 import torchvision.models as models
 
-np.random.seed(5)
-torch.manual_seed(5)
-inputs = [torch.rand((1, 3, 224, 224)).to("cuda")]
-
-# %%
-# Initialize the Mutable Torch TensorRT Module with settings.
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-settings = {
-    "use_python": False,
-    "enabled_precisions": {torch.float32},
-    "immutable_weights": False,
-}
-
-model = models.resnet18(pretrained=True).eval().to("cuda")
-mutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)
-# You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.
-mutable_module(*inputs)
-
-# %%
-# Make modifications to the mutable module.
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-# %%
-# Making changes to mutable module can trigger refit or re-compilation. For example, loading a different state_dict and setting new weight values will trigger refit, and adding a module to the model will trigger re-compilation.
-model2 = models.resnet18(pretrained=False).eval().to("cuda")
-mutable_module.load_state_dict(model2.state_dict())
-
-
-# Check the output
-# The refit happens while you call the mutable module again.
-expected_outputs, refitted_outputs = model2(*inputs), mutable_module(*inputs)
-for expected_output, refitted_output in zip(expected_outputs, refitted_outputs):
-    assert torch.allclose(
-        expected_output, refitted_output, 1e-2, 1e-2
-    ), "Refit Result is not correct. Refit failed"
-
-print("Refit successfully!")
-
-# %%
-# Saving Mutable Torch TensorRT Module
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-# Currently, saving is only when "use_python" = False in settings
-torch_trt.MutableTorchTensorRTModule.save(mutable_module, "mutable_module.pkl")
-reload = torch_trt.MutableTorchTensorRTModule.load("mutable_module.pkl")
+# np.random.seed(5)
+# torch.manual_seed(5)
+# inputs = [torch.rand((1, 3, 224, 224)).to("cuda")]
+
+# # %%
+# # Initialize the Mutable Torch TensorRT Module with settings.
+# # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# settings = {
+#     "use_python": False,
+#     "enabled_precisions": {torch.float32},
+#     "immutable_weights": False,
+# }
+
+# model = models.resnet18(pretrained=True).eval().to("cuda")
+# mutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)
+# # You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.
+# mutable_module(*inputs)
+
+# # %%
+# # Make modifications to the mutable module.
+# # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# # %%
+# # Making changes to mutable module can trigger refit or re-compilation. For example, loading a different state_dict and setting new weight values will trigger refit, and adding a module to the model will trigger re-compilation.
+# model2 = models.resnet18(pretrained=False).eval().to("cuda")
+# mutable_module.load_state_dict(model2.state_dict())
+
+
+# # Check the output
+# # The refit happens while you call the mutable module again.
+# expected_outputs, refitted_outputs = model2(*inputs), mutable_module(*inputs)
+# for expected_output, refitted_output in zip(expected_outputs, refitted_outputs):
+#     assert torch.allclose(
+#         expected_output, refitted_output, 1e-2, 1e-2
+#     ), "Refit Result is not correct. Refit failed"
+
+# print("Refit successfully!")
+
+# # %%
+# # Saving Mutable Torch TensorRT Module
+# # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# # Currently, saving is only when "use_python" = False in settings
+# torch_trt.MutableTorchTensorRTModule.save(mutable_module, "mutable_module.pkl")
+# reload = torch_trt.MutableTorchTensorRTModule.load("mutable_module.pkl")
 
 # %%
 # Stable Diffusion with Huggingface
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-from diffusers import DiffusionPipeline
-
-with torch.no_grad():
-    settings = {
-        "use_python_runtime": True,
-        "enabled_precisions": {torch.float16},
-        "debug": True,
-        "immutable_weights": False,
-    }
-
-    model_id = "stabilityai/stable-diffusion-xl-base-1.0"
-    device = "cuda:0"
-
-    prompt = "cinematic photo elsa, police uniform <lora:princess_xl_v2:0.8>, . 35mm photograph, film, bokeh, professional, 4k, highly detailed"
-    negative = "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, nude"
-
-    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
-    pipe.to(device)
-
-    # The only extra line you need
-    pipe.unet = torch_trt.MutableTorchTensorRTModule(pipe.unet, **settings)
-
-    image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]
-    image.save("./without_LoRA_mutable.jpg")
-
-    # Standard Huggingface LoRA loading procedure
-    pipe.load_lora_weights(
-        "stablediffusionapi/load_lora_embeddings",
-        weight_name="all-disney-princess-xl-lo.safetensors",
-        adapter_name="lora1",
-    )
-    pipe.set_adapters(["lora1"], adapter_weights=[1])
-    pipe.fuse_lora()
-    pipe.unload_lora_weights()
-
-    # Refit triggered
-    image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]
-    image.save("./with_LoRA_mutable.jpg")
+# from diffusers import DiffusionPipeline
+
+# with torch.no_grad():
+#     settings = {
+#         "use_python_runtime": True,
+#         "enabled_precisions": {torch.float16},
+#         "debug": True,
+#         "immutable_weights": False,
+#     }
+
+#     model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+#     device = "cuda:0"
+
+#     prompt = "cinematic photo elsa, police uniform <lora:princess_xl_v2:0.8>, . 35mm photograph, film, bokeh, professional, 4k, highly detailed"
+#     negative = "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, nude"
+
+#     pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+#     pipe.to(device)
+
+#     # The only extra line you need
+#     pipe.unet = torch_trt.MutableTorchTensorRTModule(pipe.unet, **settings)
+#     BATCH = torch.export.Dim("BATCH", min=1 * 2, max=12 * 2)
+#     _HEIGHT = torch.export.Dim('_HEIGHT', min=16, max=32)
+#     _WIDTH = torch.export.Dim('_WIDTH', min=16, max=32)
+#     HEIGHT = 4*_HEIGHT
+#     WIDTH = 4*_WIDTH
+#     args_dynamic_shapes = ({0: BATCH, 2: HEIGHT, 3: WIDTH}, {})
+#     kwargs_dynamic_shapes = {
+#         'encoder_hidden_states': {0: BATCH},
+#         'added_cond_kwargs':{
+#             'text_embeds': {0: BATCH},
+#             'time_ids': {0: BATCH},
+#         }
+#     }
+#     pipe.unet.set_expected_dynamic_shape_range(args_dynamic_shapes, kwargs_dynamic_shapes)
+#     image = pipe(prompt, negative_prompt=negative, num_inference_steps=30, height=1024, width=768, num_images_per_prompt=2).images[0]
+#     image.save("./without_LoRA_mutable.jpg")
+
+#     # Standard Huggingface LoRA loading procedure
+#     pipe.load_lora_weights(
+#         "stablediffusionapi/load_lora_embeddings",
+#         weight_name="all-disney-princess-xl-lo.safetensors",
+#         adapter_name="lora1",
+#     )
+#     pipe.set_adapters(["lora1"], adapter_weights=[1])
+#     pipe.fuse_lora()
+#     pipe.unload_lora_weights()
+
+#     # Refit triggered
+#     image = pipe(prompt, negative_prompt=negative, num_inference_steps=30, height=1024, width=1024, num_images_per_prompt=1).images[0]
+#     image.save("./with_LoRA_mutable.jpg")
 
 
 # %%