chore: updates

peri044 · cehongwang · commit bfb549eff500 · 2025-02-13T00:23:28.000Z
diff --git a/examples/dynamo/torch_export_flux_fp8.py b/examples/dynamo/torch_export_flux_fp8.py
@@ -27,55 +27,34 @@ def generate_image(pipe, prompt, image_name):
 
 
 device = "cuda"
-breakpoint()
 pipe = FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     torch_dtype=torch.float16,
 )
 
-breakpoint()
-pipe.to(device)
-pipe.to(torch.float16)
+pipe.to(device).to(torch.float16)
+config = pipe.transformer.config
+# from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
+# pipe.transformer = FluxTransformer2DModel(patch_size=1, in_channels=64, num_layers=1, num_single_layers=1, guidance_embeds=True).to("cuda:0").to(torch.float16)
 backbone = pipe.transformer
-
+# generate_image(pipe, ["A cat holding a sign that says hello world"], "flux-dev")
+# breakpoint()
 # mto.restore(backbone, "./schnell_fp8.pt")
 
-# dummy_inputs = generate_dummy_inputs("flux-dev", "cuda", True)
 batch_size = 2
 BATCH = torch.export.Dim("batch", min=1, max=2)
 SEQ_LEN = torch.export.Dim("seq_len", min=1, max=512)
 IMG_ID = torch.export.Dim("img_id", min=3586, max=4096)
-# dynamic_shapes = (
-#            {0: BATCH},
-#            {0: BATCH},
-#            {0: BATCH},
-#            {0: BATCH},
-#            {0: BATCH, 1: SEQ_LEN},
-#            {0: BATCH, 1: SEQ_LEN},
-#            {0: BATCH},
-#            {}
-#        )
-#
-# dummy_inputs = (
-#            torch.randn((batch_size, 4096, 64), dtype=torch.float16).to(device),
-#            torch.tensor([1.0, 1.0], dtype=torch.float16).to(device),
-#            torch.tensor([1.0, 1.0], dtype=torch.float16).to(device),
-#            torch.randn((batch_size, 768), dtype=torch.float16).to(device),
-#            torch.randn((batch_size, 512, 4096), dtype=torch.float16).to(device),
-#            torch.randn((batch_size, 512, 3), dtype=torch.float16).to(device),
-#            torch.randn((batch_size, 4096, 3), dtype=torch.float16).to(device),
-#        )
+
 
 dynamic_shapes = {
     "hidden_states": {0: BATCH},
     "encoder_hidden_states": {0: BATCH, 1: SEQ_LEN},
     "pooled_projections": {0: BATCH},
     "timestep": {0: BATCH},
-    "txt_ids": {0: BATCH, 1: SEQ_LEN},
-    "img_ids": {0: BATCH, 1: IMG_ID},
+    "txt_ids": {0: SEQ_LEN},
+    "img_ids": {0: IMG_ID},
     "guidance": {0: BATCH},
-    # "joint_attention_kwargs": {},
-    # "return_dict": {}
 }
 
 dummy_inputs = {
@@ -89,39 +68,56 @@ def generate_image(pipe, prompt, image_name):
         device
     ),
     "timestep": torch.tensor([1.0, 1.0], dtype=torch.float16).to(device),
-    "txt_ids": torch.randn((batch_size, 512, 3), dtype=torch.float16).to(device),
-    "img_ids": torch.randn((batch_size, 4096, 3), dtype=torch.float16).to(device),
-    "guidance": torch.tensor([1.0, 1.0], dtype=torch.float16).to(device),
-    # "joint_attention_kwargs": {},
-    # "return_dict": torch.tensor(False)
+    "txt_ids": torch.randn((512, 3), dtype=torch.float16).to(device),
+    "img_ids": torch.randn((4096, 3), dtype=torch.float16).to(device),
+    "guidance": torch.tensor([1.0, 1.0], dtype=torch.float32).to(device),
 }
-with export_torch_mode():
-    ep = _export(
-        backbone,
-        args=(),
-        kwargs=dummy_inputs,
-        dynamic_shapes=dynamic_shapes,
-        strict=False,
-        allow_complex_guards_as_runtime_asserts=True,
-    )
+# with export_torch_mode():
+ep = _export(
+    backbone,
+    args=(),
+    kwargs=dummy_inputs,
+    dynamic_shapes=dynamic_shapes,
+    strict=False,
+    allow_complex_guards_as_runtime_asserts=True,
+)
 
 # breakpoint()
 with torch_tensorrt.logging.debug():
     trt_gm = torch_tensorrt.dynamo.compile(
         ep,
         inputs=dummy_inputs,
-        enabled_precisions={torch.float16},
+        enabled_precisions={torch.float32},
         truncate_double=True,
         dryrun=False,
         min_block_size=1,
+        # use_python_runtime=True,
         debug=True,
+        use_fp32_acc=True,
+        use_explicit_typing=True,
     )
-
+# breakpoint()
+# out_pyt = backbone(**dummy_inputs)
+# out_trt = trt_gm(**dummy_inputs)
 breakpoint()
+
+
+class TRTModule(torch.nn.Module):
+    def __init__(self, trt_mod):
+        super(TRTModule, self).__init__()
+        self.trt_mod = trt_mod
+
+    def __call__(self, *args, **kwargs):
+        # breakpoint()
+        kwargs.pop("joint_attention_kwargs")
+        kwargs.pop("return_dict")
+
+        return self.trt_mod(**kwargs)
+
+
 backbone.to("cpu")
-config = pipe.transformer.config
-pipe.transformer = trt_gm
+pipe.transformer = TRTModule(trt_gm)
 pipe.transformer.config = config
 
 # Generate an image
-generate_image(pipe, "A cat holding a sign that says hello world", "flux-dev")
+generate_image(pipe, ["A cat holding a sign that says hello world"], "flux-dev")