Added use_activation_hooks: bool to swap

Andrew Gu · Andrew Gu · commit 34e30d13abe6 · 2024-02-14T13:30:20.000-08:00
ghstack-source-id: 65dd688 Pull Request resolved: #214
diff --git a/float8_experimental/float8_linear_utils.py b/float8_experimental/float8_linear_utils.py
@@ -70,8 +70,10 @@ def _update_history_with_new_amax(new_amax, amax_history):
 def swap_linear_with_float8_linear(
     module: nn.Module,
     module_cls: Type[nn.Module],
-    emulate: bool = False,
+    *,
     skip_fqn_list: Optional[List[str]] = None,
+    emulate: bool = False,
+    use_activation_hooks: bool = False,
 ) -> nn.Module:
     """
     Replaces all instances of ``torch.nn.Linear`` in ``module`` with instances
@@ -80,17 +82,20 @@ def swap_linear_with_float8_linear(
     Args:
         module (torch.nn.Module): Module to modify.
         module_cls (Union[Type[Float8Linear], Type[Float8DynamicLinear]]): Float8 linear class for the swap.
-        emulate (bool, optional): Whether to emulate the fp8 matmul logic in fp32.
         skip_fqn_list (List[str], optional): If specified, a list of module FQNs to skip.
             Linear submodules of these skipped modules will also be skipped.
+        emulate (bool): Whether to emulate the fp8 matmul logic in fp32.
+        use_activation_hooks (bool): Whether to cast activations to fp8 using module hooks.
     """
     module_names_to_skip = set(skip_fqn_list or [])
     if isinstance(module, nn.Linear):
         if len(list(module.children())) > 0:
             raise AssertionError(
                 f"Does not support a root nn.Linear with children: {module}"
             )
-        return module_cls.from_float(module, emulate)
+        return module_cls.from_float(
+            module, emulate=emulate, use_activation_hooks=use_activation_hooks
+        )
 
     # Mark all modules to skip as visited
     root_module = module
@@ -112,7 +117,10 @@ def post_order_traversal(
             assert (
                 parent_module is not None
             ), f"Linear root module should return early: {module}"
-            setattr(parent_module, module_name, module_cls.from_float(module, emulate))
+            float8linear_module = module_cls.from_float(
+                module, emulate=emulate, use_activation_hooks=use_activation_hooks
+            )
+            setattr(parent_module, module_name, float8linear_module)
 
     post_order_traversal(root_module, "", None)
     # Without this explicit `del`, this set only gets deleted upon an explicit
diff --git a/float8_experimental/float8_python_api.py b/float8_experimental/float8_python_api.py
@@ -12,6 +12,8 @@
 
 from typing import Optional, Tuple
 
+import float8_experimental.float8_aten_api  # noqa
+
 import torch
 from float8_experimental.float8_tensor import Float8Tensor
 
diff --git a/test/test_base.py b/test/test_base.py
@@ -351,7 +351,7 @@ def test_swap_root_linear(self):
             [Float8Linear, Float8DynamicLinear], [True, False]
         ):
             module = nn.Linear(3, 3)
-            module = swap_linear_with_float8_linear(module, module_cls, emulate)
+            module = swap_linear_with_float8_linear(module, module_cls, emulate=emulate)
             self.assertIsInstance(module, module_cls)
             self.assertEqual(module.emulate, emulate)
 
@@ -365,7 +365,7 @@ def test_swap_root_linear_with_children_raises(self):
                 AssertionError,
                 "Does not support a root nn.Linear with children",
             ):
-                swap_linear_with_float8_linear(module, module_cls, emulate)
+                swap_linear_with_float8_linear(module, module_cls, emulate=emulate)
 
     def test_swap_submodule_linears(self):
         class MLP(nn.Module):
@@ -378,7 +378,7 @@ def __init__(self, dim: int):
             [Float8Linear, Float8DynamicLinear], [True, False]
         ):
             model = nn.Sequential(MLP(3), nn.Linear(3, 3), MLP(3))
-            model = swap_linear_with_float8_linear(model, module_cls, emulate)
+            model = swap_linear_with_float8_linear(model, module_cls, emulate=emulate)
             self.assertIsInstance(model[0].lin1, module_cls)
             self.assertIsInstance(model[0].lin2, module_cls)
             self.assertIsInstance(model[1], module_cls)
@@ -398,7 +398,7 @@ def __init__(self, dim: int):
             model = nn.Sequential(MLP(3), nn.Linear(3, 3), MLP(3))
             skip_fqn_list = ["2", "0.lin2"]
             model = swap_linear_with_float8_linear(
-                model, module_cls, emulate, skip_fqn_list
+                model, module_cls, emulate=emulate, skip_fqn_list=skip_fqn_list
             )
             self.assertIsInstance(model[0].lin1, module_cls)
             self.assertNotIsInstance(model[0].lin2, module_cls)