Merge branch 'main' into fix_checkpoint_hyperparameter_tuning

svekars · web-flow · commit f52b7ba9be70 · 2024-03-20T08:31:16.000-07:00
diff --git a/.jenkins/build.sh b/.jenkins/build.sh
@@ -15,6 +15,10 @@ sudo apt-get update || sudo apt-get install libgnutls30
 sudo apt-get update
 sudo apt-get install -y --no-install-recommends unzip p7zip-full sox libsox-dev libsox-fmt-all rsync
 
+# Install pandoc (does not install from pypi)
+sudo apt-get update
+sudo apt-get install -y pandoc
+
 # NS: Path to python runtime should already be part of docker container
 # export PATH=/opt/conda/bin:$PATH
 rm -rf src
@@ -63,6 +67,9 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then
   # Step 3: Run `make docs` to generate HTML files and static files for these tutorials
   make docs
 
+  # Step 3.1: Run the post-processing script:
+  python .jenkins/post_process_notebooks.py
+
   # Step 4: If any of the generated files are not related the tutorial files we want to run,
   # then we remove them
   set +x
@@ -140,6 +147,9 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then
   bash $DIR/remove_invisible_code_block_batch.sh docs
   python .jenkins/validate_tutorials_built.py
 
+  # Step 5.1: Run post-processing script on .ipynb files:
+  python .jenkins/post_process_notebooks.py
+
   # Step 6: Copy generated HTML files and static files to S3
   7z a manager.7z docs
   awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${COMMIT_ID}/manager.7z
diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py
@@ -0,0 +1,139 @@
+from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
+import markdown
+import html
+
+def to_markdown(item, skip_octicon=False):
+    # A handler function to process strings, links, code, and code
+    # blocks
+    if item['t'] == 'Str':
+        return item['c']
+    elif item['t'] == 'Space':
+        return ' '
+    elif item['t'] == 'Link':
+        link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1])
+        return f'<a href="{item["c"][2][0]}">{link_text}</a>'
+    elif item['t'] == 'Code':
+        # Need to remove icticon as they don't render in .ipynb
+        if any(value == 'octicon' for key, value in item['c'][0][2]):
+            return ''
+        else:
+            # Escape the code and wrap it in <code> tags
+            return f'<code>{html.escape(item["c"][1])}</code>'
+    elif item['t'] == 'CodeBlock':
+        # Escape the code block and wrap it in <pre><code> tags
+        return f'<pre><code>{html.escape(item["c"][1])}</code></pre>'
+    else:
+        return ''
+
+
+def process_admonitions(key, value, format, meta):
+    # Replace admonitions with proper HTML.
+    if key == 'Div':
+        [[ident, classes, keyvals], contents] = value
+        if 'note' in classes:
+            color = '#54c7ec'
+            label = 'NOTE:'
+        elif 'tip' in classes:
+            color = '#6bcebb'
+            label = 'TIP:'
+        elif 'warning' in classes:
+            color = '#e94f3b'
+            label = 'WARNING:'
+        else:
+            return
+
+        note_content = []
+        for block in contents:
+            if block.get('t') == 'Para':
+                for item in block['c']:
+                    if item['t'] == 'Str':
+                        note_content.append(Str(item['c']))
+                    elif item['t'] == 'Space':
+                        note_content.append(Space())
+                    elif item['t'] == 'Link':
+                        note_content.append(Link(*item['c']))
+                    elif item['t'] == 'Code':
+                        note_content.append(Code(*item['c']))
+            elif block.get('t') == 'CodeBlock':
+                note_content.append(CodeBlock(*block['c']))
+
+        note_content_md = ''.join(to_markdown(item) for item in note_content)
+        html_content = markdown.markdown(note_content_md)
+
+        return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px"><strong>{label}</strong></div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
+    elif key == 'RawBlock':
+    # this is needed for the cells that have embedded video.
+    # We add a special tag to those: ``` {python, .jupyter-code-cell}
+    # The post-processing script then finds those and genrates separate
+    # code cells that can load video.
+        [format, content] = value
+        if format == 'html' and 'iframe' in content:
+            # Extract the video URL
+            video_url = content.split('src="')[1].split('"')[0]
+            # Create the Python code to display the video
+            python_code = f"""
+from IPython.display import display, HTML
+html_code = \"""
+{content}
+\"""
+display(HTML(html_code))
+"""
+
+            return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}
+
+
+def process_images(key, value, format, meta):
+    # Add https://pytorch.org/tutorials/ to images so that they
+    # load correctly in the notebook.
+    if key != 'Image':
+        return None
+    [ident, classes, keyvals], caption, [src, title] = value
+    if not src.startswith('http'):
+        while src.startswith('../'):
+            src = src[3:]
+        if src.startswith('/_static'):
+            src = src[1:]
+        src = 'https://pytorch.org/tutorials/' + src
+
+    return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}
+
+
+def process_grids(key, value, format, meta):
+    # Generate side by side grid cards. Only for the two-cards layout
+    # that we use in the tutorial template.
+    if key == 'Div':
+        [[ident, classes, keyvals], contents] = value
+        if 'grid' in classes:
+            columns = ['<div style="width: 45%; float: left; padding: 20px;">',
+                       '<div style="width: 45%; float: right; padding: 20px;">']
+            column_num = 0
+            for block in contents:
+                if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
+                    item_html = ''
+                    for item in block['c'][1]:
+                        if item['t'] == 'Para':
+                            item_html += '<h2>' + ''.join(to_markdown(i) for i in item['c']) + '</h2>'
+                        elif item['t'] == 'BulletList':
+                            item_html += '<ul>'
+                            for list_item in item['c']:
+                                item_html += '<li>' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '</li>'
+                            item_html += '</ul>'
+                    columns[column_num] += item_html
+                    column_num = (column_num + 1) % 2
+            columns = [column + '</div>' for column in columns]
+            return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}
+
+def is_code_block(item):
+    return item['t'] == 'Code' and 'octicon' in item['c'][1]
+
+
+def process_all(key, value, format, meta):
+    for transform in [process_admonitions, process_images, process_grids]:
+        new_value = transform(key, value, format, meta)
+        if new_value is not None:
+            break
+    return new_value
+
+
+if __name__ == "__main__":
+    toJSONFilter(process_all)
diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py
@@ -0,0 +1,97 @@
+import nbformat as nbf
+import os
+import re
+
+"""
+This post-processing script needs to run after the .ipynb files are
+generated. The script removes extraneous ```{=html} syntax from the
+admonitions and splits the cells that have video iframe into a 
+separate code cell that can be run to load the video directly
+in the notebook. This script is included in build.sh.
+"""
+
+
+# Pattern to search ``` {.python .jupyter-code-cell}
+pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
+
+
+def process_video_cell(notebook_path):
+    """
+    This function finds the code blocks with the
+    "``` {.python .jupyter-code-cell}" code bocks and slices them
+    into a separe code cell (instead of markdown) which allows to
+    load the video in the notebook. The rest of the content is placed
+    in a new markdown cell.
+    """
+    print(f'Processing file: {notebook_path}')
+    notebook = nbf.read(notebook_path, as_version=4)
+
+    # Iterate over markdown cells
+    for i, cell in enumerate(notebook.cells):
+        if cell.cell_type == 'markdown':
+            match = pattern.search(cell.source)
+            if match:
+                print(f'Match found in cell {i}: {match.group(0)[:100]}...')
+                # Extract the parts before and after the video code block
+                before_html_block = match.group(1)
+                code_block = match.group(2)
+
+                # Add a comment to run the cell to display the video 
+                code_block = "# Run this cell to load the video\n" + code_block
+                # Create a new code cell
+                new_code_cell = nbf.v4.new_code_cell(source=code_block)
+
+                # Replace the original markdown cell with the part before the code block
+                cell.source = before_html_block
+
+                # Insert the new code cell after the current one
+                notebook.cells.insert(i+1, new_code_cell)
+                print(f'New code cell created with source: {new_code_cell.source}')
+
+                # If there is content after the HTML code block, create a new markdown cell
+                if len(match.group(3).strip()) > 0:
+                    after_html_block = match.group(3)
+                    new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
+                    # Create a new markdown cell and add the content after code block there
+                    notebook.cells.insert(i+2, new_markdown_cell)
+
+            else:
+                # Remove ```{=html} from the code block
+                cell.source = remove_html_tag(cell.source)
+
+    nbf.write(notebook, notebook_path)
+
+
+def remove_html_tag(content):
+    """
+    Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
+    prevents it from rendering correctly. This function removes
+    ```{=html} that we don't need.
+    """
+    content = re.sub(r'```{=html}\n<div', '<div', content)
+    content = re.sub(r'">\n```', '">', content)
+    content = re.sub(r'<\/div>\n```', '</div>\n', content)
+    content = re.sub(r'```{=html}\n</div>\n```', '</div>\n', content)
+    content = re.sub(r'```{=html}', '', content)
+    content = re.sub(r'</p>\n```', '</p>', content)
+    return content
+
+
+def walk_dir(downloads_dir):
+    """
+    Walk the dir and process all notebook files in
+    the _downloads directory and its subdirectories.
+    """
+    for root, dirs, files in os.walk(downloads_dir):
+        for filename in files:
+            if filename.endswith('.ipynb'):
+                process_video_cell(os.path.join(root, filename))
+
+
+def main():
+    downloads_dir = './docs/_downloads'
+    walk_dir(downloads_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py
@@ -5,7 +5,7 @@
 .. note::
     As of PyTorch 2.1, there are two versions of ONNX Exporter.
 
-    * ``torch.onnx.dynamo_export`is the newest (still in beta) exporter based on the TorchDynamo technology released with PyTorch 2.0.
+    * ``torch.onnx.dynamo_export`` is the newest (still in beta) exporter based on the TorchDynamo technology released with PyTorch 2.0.
     * ``torch.onnx.export`` is based on TorchScript backend and has been available since PyTorch 1.2.0.
 
 In this tutorial, we describe how to convert a model defined
diff --git a/advanced_source/usb_semisup_learn.py b/advanced_source/usb_semisup_learn.py
@@ -81,7 +81,7 @@
 # algorithm on dataset
 # 
 # Note that a CUDA-enabled backend is required for training with the ``semilearn`` package.
-# See `Enabling CUDA in Google Colab <https://pytorch.org/tutorials/beginner/colab#using-cuda>`__ for instructions
+# See `Enabling CUDA in Google Colab <https://pytorch.org/tutorials/beginner/colab#enabling-cuda>`__ for instructions
 # on enabling CUDA in Google Colab.
 #
 import semilearn
diff --git a/beginner_source/ddp_series_multigpu.rst b/beginner_source/ddp_series_multigpu.rst
@@ -78,15 +78,15 @@ Imports
 Constructing the process group
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+-  First, before initializing the group process, call `set_device <https://pytorch.org/docs/stable/generated/torch.cuda.set_device.html?highlight=set_device#torch.cuda.set_device>`__,
+   which sets the default GPU for each process. This is important to prevent hangs or excessive memory utilization on `GPU:0`
 -  The process group can be initialized by TCP (default) or from a
    shared file-system. Read more on `process group
    initialization <https://pytorch.org/docs/stable/distributed.html#tcp-initialization>`__
 -  `init_process_group <https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group>`__
    initializes the distributed process group.
 -  Read more about `choosing a DDP
    backend <https://pytorch.org/docs/stable/distributed.html#which-backend-to-use>`__
--  `set_device <https://pytorch.org/docs/stable/generated/torch.cuda.set_device.html?highlight=set_device#torch.cuda.set_device>`__
-   sets the default GPU for each process. This is important to prevent hangs or excessive memory utilization on `GPU:0`
 
 .. code-block:: diff
 
@@ -98,8 +98,9 @@ Constructing the process group
     +   """
     +   os.environ["MASTER_ADDR"] = "localhost"
     +   os.environ["MASTER_PORT"] = "12355"
-    +   init_process_group(backend="nccl", rank=rank, world_size=world_size)
     +   torch.cuda.set_device(rank)
+    +   init_process_group(backend="nccl", rank=rank, world_size=world_size)
+
 
 
 Constructing the DDP model
diff --git a/beginner_source/knowledge_distillation_tutorial.py b/beginner_source/knowledge_distillation_tutorial.py
@@ -324,7 +324,7 @@ def train_knowledge_distillation(teacher, student, train_loader, epochs, learnin
             soft_prob = nn.functional.log_softmax(student_logits / T, dim=-1)
 
             # Calculate the soft targets loss. Scaled by T**2 as suggested by the authors of the paper "Distilling the knowledge in a neural network"
-            soft_targets_loss = -torch.sum(soft_targets * soft_prob) / soft_prob.size()[0] * (T**2)
+            soft_targets_loss = torch.sum(soft_targets * (soft_targets.log() - soft_prob)) / soft_prob.size()[0] * (T**2)
 
             # Calculate the true label loss
             label_loss = ce_loss(student_logits, labels)
diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py
@@ -160,7 +160,7 @@
 #     The 1.6 release of PyTorch switched ``torch.save`` to use a new
 #     zip file-based format. ``torch.load`` still retains the ability to
 #     load files in the old format. If for any reason you want ``torch.save``
-#     to use the old format, pass the ``kwarg``parameter ``_use_new_zipfile_serialization=False``.
+#     to use the old format, pass the ``kwarg`` parameter ``_use_new_zipfile_serialization=False``.
 #
 # When saving a model for inference, it is only necessary to save the
 # trained model’s learned parameters. Saving the model’s *state_dict* with
diff --git a/conf.py b/conf.py
@@ -42,7 +42,8 @@
 import distutils.file_util
 import re
 from get_sphinx_filenames import SPHINX_SHOULD_RUN
-
+import pandocfilters
+import pypandoc
 import plotly.io as pio
 pio.renderers.default = 'sphinx_gallery'
 
@@ -74,7 +75,8 @@
     'sphinx.ext.intersphinx',
     'sphinx_copybutton',
     'sphinx_gallery.gen_gallery',
-    'sphinx_design'
+    'sphinx_design',
+    'nbsphinx'
 ]
 
 intersphinx_mapping = {
@@ -107,7 +109,10 @@ def reset_seeds(gallery_conf, fname):
                             "# https://pytorch.org/tutorials/beginner/colab\n"
                             "%matplotlib inline"),
     'reset_modules': (reset_seeds),
-    'ignore_pattern': r'_torch_export_nightly_tutorial.py'
+    'ignore_pattern': r'_torch_export_nightly_tutorial.py',
+    'pypandoc': {'extra_args': ['--mathjax', '--toc'],
+                 'filters': ['.jenkins/custom_pandoc_filter.py'],
+    },
 }
 
 if os.getenv('GALLERY_PATTERN'):
diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py
@@ -5,7 +5,7 @@
 **Author**: `Sean Robertson <https://github.com/spro>`_
 
 This is our second of three tutorials on "NLP From Scratch".
-In the `first tutorial </intermediate/char_rnn_classification_tutorial>`_
+In the `first tutorial </tutorials/intermediate/char_rnn_classification_tutorial>`_
 we used a RNN to classify names into their language of origin. This time
 we'll turn around and generate names from languages.
 
diff --git a/recipes_source/quantization.rst b/recipes_source/quantization.rst
@@ -81,7 +81,7 @@ The full documentation of the `quantize_dynamic` API call is `here <https://pyto
 3. Post Training Static Quantization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-This method converts both the weights and the activations to 8-bit integers beforehand so there won't be on-the-fly conversion on the activations during the inference, as the dynamic quantization does, hence improving the performance significantly.
+This method converts both the weights and the activations to 8-bit integers beforehand so there won’t be on-the-fly conversion on the activations during the inference, as the dynamic quantization does. While post-training static quantization can significantly enhance inference speed and reduce model size, this method may degrade the original model's accuracy more compared to post training dynamic quantization.
 
 To apply static quantization on a model, run the following code:
 
diff --git a/requirements.txt b/requirements.txt

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@`
`81`	`81`	`# algorithm on dataset`
`82`	`82`	`#`
`83`	`83`	# Note that a CUDA-enabled backend is required for training with the ``semilearn`` package.
`84`		-# See `Enabling CUDA in Google Colab <https://pytorch.org/tutorials/beginner/colab#using-cuda>`__ for instructions
	`84`	+# See `Enabling CUDA in Google Colab <https://pytorch.org/tutorials/beginner/colab#enabling-cuda>`__ for instructions
`85`	`85`	`# on enabling CUDA in Google Colab.`
`86`	`86`	`#`
`87`	`87`	`import semilearn`
Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,7 @@`
`160`	`160`	# The 1.6 release of PyTorch switched ``torch.save`` to use a new
`161`	`161`	# zip file-based format. ``torch.load`` still retains the ability to
`162`	`162`	# load files in the old format. If for any reason you want ``torch.save``
`163`		-# to use the old format, pass the ``kwarg``parameter ``_use_new_zipfile_serialization=False``.
	`163`	+# to use the old format, pass the ``kwarg`` parameter ``_use_new_zipfile_serialization=False``.
`164`	`164`	`#`
`165`	`165`	`# When saving a model for inference, it is only necessary to save the`
`166`	`166`	`# trained model’s learned parameters. Saving the model’s state_dict with`