Update compiling_optimizer_lr_scheduler.py

svekars · web-flow · commit dc62ced354b2 · 2024-05-21T12:42:14.000-07:00
diff --git a/recipes_source/compiling_optimizer_lr_scheduler.py b/recipes_source/compiling_optimizer_lr_scheduler.py
@@ -1,5 +1,6 @@
 """
 (beta) Running the compiled optimizer with an LR Scheduler
+============================================================
 
 **Author:** `Michael Lazos <https://github.com/mlazos>`_
 """
@@ -37,6 +38,7 @@
 #####################################################################
 # Setting up and running the compiled optimizer with LR Scheduler
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
 # In this section, we'll use the Adam optimizer with LinearLR Scheduler
 # and create a helper function to wrap the ``step()`` call for each of them
 # in ``torch.compile()``.
@@ -46,7 +48,7 @@
 #    ``torch.compile`` is only supported on CUDA devices that have a compute capability of 7.0 or higher.
 
 
-# exit cleanly if we are on a device that doesn't support torch.compile
+# exit cleanly if we are on a device that doesn't support ``torch.compile``
 if torch.cuda.get_device_capability() < (7, 0):
     print("Exiting because torch.compile is not supported on this device.")
     import sys
@@ -70,14 +72,6 @@ def fn():
     fn()
     print(opt.param_groups[0]["lr"])
 
-########################################################################
-# Sample Output:
-#
-# >> tensor(0.0047)
-# >> tensor(0.0060)
-# >> tensor(0.0073)
-# >> tensor(0.0087)
-# >> tensor(0.0100)
 
 ######################################################################
 # Extension: What happens with a non-tensor LR?
@@ -106,28 +100,30 @@ def fn():
 
 ######################################################################
 # Sample Output:
+# 
+# .. code-block:: bash
 #
-# >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
-# >>    triggered by the following guard failure(s):
-# >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
-# >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
-# >>    triggered by the following guard failure(s):
-# >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
-# >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
-# >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
-# >>    triggered by the following guard failure(s):
-# >>    - L['self'].param_groups[0]['lr'] == 0.006000000000000001
-# >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
-# >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
-# >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
-# >>    triggered by the following guard failure(s):
-# >>    - L['self'].param_groups[0]['lr'] == 0.007333333333333335
-# >>    - L['self'].param_groups[0]['lr'] == 0.006000000000000001
-# >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
-# >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
+#    >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
+#    >>    triggered by the following guard failure(s):
+#    >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
+#    >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
+#    >>    triggered by the following guard failure(s):
+#    >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
+#    >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
+#    >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
+#    >>    triggered by the following guard failure(s):
+#    >>    - L['self'].param_groups[0]['lr'] == 0.006000000000000001
+#    >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
+#    >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
+#    >>[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adam.py:191
+#    >>    triggered by the following guard failure(s):
+#    >>    - L['self'].param_groups[0]['lr'] == 0.007333333333333335
+#    >>    - L['self'].param_groups[0]['lr'] == 0.006000000000000001
+#    >>    - L['self'].param_groups[0]['lr'] == 0.004666666666666667
+#    >>    - L['self'].param_groups[0]['lr'] == 0.003333333333333333
 #
-# With this example, we can see that we recompile the optimizer 4 additional
-# due to the guard failure on the 'lr' in param_groups[0]
+# With this example, we can see that we recompile the optimizer 4 additional times
+# due to the guard failure on the 'lr' in param_groups[0].
 
 ######################################################################
 # Conclusion
@@ -139,5 +135,6 @@ def fn():
 # with a LinearLR scheduler to demonstrate the LR changing across iterations.
 #
 # See also:
-# * tutorial on the compiled optimizer - `Compiled optimizer tutorial <https://pytorch.org/tutorials/recipes/compiling_optimizer.html>`_
-# * deeper technical details on the compiled optimizer see `Compiling the optimizer with PT2 <https://dev-discuss.pytorch.org/t/compiling-the-optimizer-with-pt2/1669>`_
+#
+# * `Compiled optimizer tutorial <https://pytorch.org/tutorials/recipes/compiling_optimizer.html>`__ - an intro into the compiled optimizer.
+# * `Compiling the optimizer with PT2 <https://dev-discuss.pytorch.org/t/compiling-the-optimizer-with-pt2/1669>`__ - deeper technical details on the compiled optimizer.