ROCm
diff --git a/‎.circleci/docker/build.sh
Lines changed: 12 additions & 4 deletions b/‎.circleci/docker/build.sh
Lines changed: 12 additions & 4 deletions
diff --git a/‎.circleci/docker/centos-rocm/Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.circleci/docker/centos-rocm/Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.circleci/docker/common/install_conda.sh
Lines changed: 2 additions & 16 deletions b/‎.circleci/docker/common/install_conda.sh
Lines changed: 2 additions & 16 deletions
diff --git a/‎.circleci/docker/common/install_cudnn.sh
Lines changed: 18 additions & 0 deletions b/‎.circleci/docker/common/install_cudnn.sh
Lines changed: 18 additions & 0 deletions
diff --git a/‎.circleci/docker/common/install_rocm.sh
Lines changed: 2 additions & 2 deletions b/‎.circleci/docker/common/install_rocm.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.circleci/docker/requirements-ci.txt
Lines changed: 210 additions & 0 deletions b/‎.circleci/docker/requirements-ci.txt
Lines changed: 210 additions & 0 deletions
diff --git a/‎.circleci/docker/ubuntu-cuda/Dockerfile
Lines changed: 10 additions & 3 deletions b/‎.circleci/docker/ubuntu-cuda/Dockerfile
Lines changed: 10 additions & 3 deletions
diff --git a/‎.circleci/docker/ubuntu-rocm/Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.circleci/docker/ubuntu-rocm/Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.circleci/docker/ubuntu/Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.circleci/docker/ubuntu/Dockerfile
Lines changed: 2 additions & 0 deletions
@@ -222,21 +222,21 @@ case "$image" in
     DB=yes
     VISION=yes
     ;;
-  pytorch-linux-bionic-rocm4.3.1-py3.7)
+  pytorch-linux-bionic-rocm4.5-py3.7)
     ANACONDA_PYTHON_VERSION=3.7
     GCC_VERSION=9
     PROTOBUF=yes
     DB=yes
     VISION=yes
-    ROCM_VERSION=4.3.1
+    ROCM_VERSION=4.5.2
     ;;
-  pytorch-linux-bionic-rocm4.5-py3.7)
+  pytorch-linux-bionic-rocm5.0-py3.7)
     ANACONDA_PYTHON_VERSION=3.7
     GCC_VERSION=9
     PROTOBUF=yes
     DB=yes
     VISION=yes
-    ROCM_VERSION=4.5.2
+    ROCM_VERSION=5.0
     ;;
   *)
     # Catch-all for builds that are not hardcoded.
@@ -283,6 +283,13 @@ fi
 
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 
+#when using cudnn version 8 install it separately from cuda
+if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then
+  IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
+  if [[ ${CUDNN_VERSION} == 8 ]]; then
+    IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
+  fi
+fi
 
 # Build image
 # TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
@@ -321,6 +328,7 @@ docker build \
        --build-arg "KATEX=${KATEX:-}" \
        --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
        --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \
+       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
        -f $(dirname ${DOCKERFILE})/Dockerfile \
        -t "$tmp_tag" \
        "$@" \
 
@@ -42,8 +42,10 @@ RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
+ADD requirements-ci.txt /opt/conda/requirements-ci.txt
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
+RUN rm /opt/conda/requirements-ci.txt
 
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 
@@ -21,7 +21,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
       ;;
   esac
 
-  mkdir /opt/conda
+  mkdir -p /opt/conda
   chown jenkins:jenkins /opt/conda
 
   # Work around bug where devtoolset replaces sudo and breaks it.
@@ -94,21 +94,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
   conda_install nnpack -c killeent
 
   # Install some other packages, including those needed for Python test reporting
-  # Pin SciPy because of failing distribution tests (see #60347)
-  # Pin MyPy version because new errors are likely to appear with each release
-  # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
-  # Pin unittest-xml-reporting to freeze printing test summary logic, related: https://github.com/pytorch/pytorch/issues/69014
-  as_jenkins pip install --progress-bar off pytest \
-    scipy==1.6.3 \
-    scikit-image \
-    psutil \
-    "unittest-xml-reporting<=3.2.0,>=2.0.0" \
-    boto3==1.16.34 \
-    hypothesis==4.53.2 \
-    expecttest==0.1.3 \
-    mypy==0.812 \
-    tb-nightly \
-    librosa>=0.6.2
+  as_jenkins pip install --progress-bar off -r /opt/conda/requirements-ci.txt
 
   # Install numba only on python-3.8 or below
   # For numba issue see https://github.com/pytorch/pytorch/issues/51511
 
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [[ ${CUDNN_VERSION} == 8 ]]; then
+    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+    mkdir tmp_cudnn && cd tmp_cudnn
+    CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
+    curl -OLs  https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
+    tar xf ${CUDNN_NAME}.tar.xz
+    cp -a ${CUDNN_NAME}/include/* /usr/include/
+    cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
+    cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
+
+    cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
+    cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
+    cd ..
+    rm -rf tmp_cudnn
+    ldconfig
+fi
@@ -6,7 +6,7 @@ install_magma() {
     # "install" hipMAGMA into /opt/rocm/magma by copying after build
     git clone https://bitbucket.org/icl/magma.git
     pushd magma
-    # Mar 7 - Fixes memory leaks for many linalg UTs
+    # Fixes memory leaks of magma found while executing linalg UTs
     git checkout 5959b8783e45f1809812ed96ae762f38ee701972
     cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
     echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
@@ -35,7 +35,7 @@ ver() {
 }
 
 # Map ROCm version to AMDGPU version
-declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" )
+declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" ["5.0"]="21.50" )
 
 install_ubuntu() {
     apt-get update
 
@@ -0,0 +1,210 @@
+# Python dependencies required for unit tests
+
+#awscli==1.6 #this breaks some platforms
+#Description: AWS command line interface
+#Pinned versions: 1.6
+#test that import:
+
+boto3==1.19.12
+#Description: AWS SDK for python
+#Pinned versions: 1.19.12, 1.16.34
+#test that import:
+
+click
+#Description: Command Line Interface Creation Kit
+#Pinned versions:
+#test that import:
+
+coremltools==5.0b5
+#Description: Apple framework for ML integration
+#Pinned versions: 5.0b5
+#test that import:
+
+#dataclasses #this breaks some platforms
+#Description: Provides decorators for auto adding special methods to user classes
+#Pinned versions:
+#test that import:
+
+expecttest==0.1.3
+#Description: method for writing tests where test framework auto populates
+# the expected output based on previous runs
+#Pinned versions: 0.1.3
+#test that import:
+
+flatbuffers==2.0
+#Description: cross platform serialization library
+#Pinned versions: 2.0
+#test that import:
+
+#future #this breaks linux-bionic-rocm4.5-py3.7
+#Description: compatibility layer between python 2 and python 3
+#Pinned versions:
+#test that import:
+
+hypothesis==4.53.2
+# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
+#Description: advanced library for generating parametrized tests
+#Pinned versions: 3.44.6, 4.53.2
+#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
+
+junitparser==2.1.1
+#Description: unitparser handles JUnit/xUnit Result XML files
+#Pinned versions: 2.1.1
+#test that import:
+
+librosa>=0.6.2
+#Description: A python package for music and audio analysis
+#Pinned versions: >=0.6.2
+#test that import: test_spectral_ops.py
+
+#mkl #this breaks linux-bionic-rocm4.5-py3.7
+#Description: Intel oneAPI Math Kernel Library
+#Pinned versions:
+#test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
+#test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
+#test_autograd.py
+
+#mkl-devel
+# see mkl
+
+#mock # breaks ci/circleci: docker-pytorch-linux-xenial-py3-clang5-android-ndk-r19c
+#Description: A testing library that allows you to replace parts of your
+#system under test with mock objects
+#Pinned versions:
+#test that import: test_module_init.py, test_modules.py, test_nn.py,
+#test_testing.py
+
+#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
+#Description: collects runtime types of function arguments and return
+#values, and can automatically generate stub files
+#Pinned versions:
+#test that import:
+
+mypy==0.812
+# Pin MyPy version because new errors are likely to appear with each release
+#Description: linter
+#Pinned versions: 0.812
+#test that import: test_typing.py, test_type_hints.py
+
+#networkx
+#Description: creation, manipulation, and study of
+#the structure, dynamics, and functions of complex networks
+#Pinned versions: 2.0
+#test that import:
+
+#ninja
+#Description: build system.  Note that it install from
+#here breaks things so it is commented out
+#Pinned versions: 1.10.0.post1
+#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
+
+#numba
+#Description: Just-In-Time Compiler for Numerical Functions
+#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
+#test that import: test_numba_integration.py
+
+#numpy
+#Description: Provides N-dimensional arrays and linear algebra
+#Pinned versions: 1.20
+#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
+#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
+#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
+#test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
+#test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
+#test_overrides.py, test_numpy_interop.py, test_numba_integration.py
+#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
+#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
+#test_binary_ufuncs.py
+
+#onnxruntime
+#Description: scoring engine for Open Neural Network Exchange (ONNX) models
+#Pinned versions: 1.9.0
+#test that import:
+
+#pillow
+#Description:  Python Imaging Library fork
+#Pinned versions:
+#test that import:
+
+#protobuf
+#Description:  Google’s data interchange format
+#Pinned versions:
+#test that import: test_tensorboard.py
+
+psutil
+#Description: information on running processes and system utilization
+#Pinned versions:
+#test that import: test_profiler.py, test_openmp.py, test_dataloader.py
+
+pytest
+#Description: testing framework
+#Pinned versions:
+#test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
+
+#pytest-benchmark
+#Description: fixture for benchmarking code
+#Pinned versions: 3.2.3
+#test that import:
+
+#pytest-sugar
+#Description: shows failures and errors instantly
+#Pinned versions:
+#test that import:
+
+#PyYAML
+#Description: data serialization format
+#Pinned versions:
+#test that import:
+
+#requests
+#Description: HTTP library
+#Pinned versions:
+#test that import: test_type_promotion.py
+
+#rich
+#Description: rich text and beautiful formatting in the terminal
+#Pinned versions: 10.9.0
+#test that import:
+
+scikit-image
+#Description: image processing routines
+#Pinned versions:
+#test that import: test_nn.py
+
+#scikit-learn
+#Description: machine learning package
+#Pinned versions: 0.20.3
+#test that import:
+
+scipy==1.6.3
+# Pin SciPy because of failing distribution tests (see #60347)
+#Description: scientific python
+#Pinned versions: 1.6.3
+#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
+#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
+#test_linalg.py, test_binary_ufuncs.py
+
+#tabulate
+#Description: Pretty-print tabular data
+#Pinned versions:
+#test that import:
+
+tb-nightly
+#Description: TensorBoard
+#Pinned versions:
+#test that import:
+
+#typing-extensions
+#Description: type hints for python
+#Pinned versions:
+#test that import:
+
+#virtualenv
+#Description: virtual environment for python
+#Pinned versions:
+#test that import:
+
+unittest-xml-reporting<=3.2.0,>=2.0.0
+#Description: saves unit test results to xml
+#Pinned versions:
+#test that import:
@@ -1,12 +1,11 @@
 ARG UBUNTU_VERSION
 ARG CUDA_VERSION
-ARG CUDNN_VERSION
+ARG IMAGE_NAME
 
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+FROM ${IMAGE_NAME}
 
 ARG UBUNTU_VERSION
 ARG CUDA_VERSION
-ARG CUDNN_VERSION
 
 ENV DEBIAN_FRONTEND noninteractive
 
@@ -27,8 +26,10 @@ RUN bash ./install_katex.sh && rm install_katex.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
+ADD requirements-ci.txt /opt/conda/requirements-ci.txt
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
+RUN rm /opt/conda/requirements-ci.txt
 
 # Install gcc
 ARG GCC_VERSION
@@ -99,5 +100,11 @@ ENV CUDA_PATH /usr/local/cuda
 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 
+# Install CUDNN
+ARG CUDNN_VERSION
+ADD ./common/install_cudnn.sh install_cudnn.sh
+RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
+RUN rm install_cudnn.sh
+
 USER jenkins
 CMD ["bash"]
@@ -28,8 +28,10 @@ RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
+ADD requirements-ci.txt /opt/conda/requirements-ci.txt
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
+RUN rm /opt/conda/requirements-ci.txt
 
 # Install gcc
 ARG GCC_VERSION
 
@@ -36,8 +36,10 @@ RUN bash ./install_katex.sh && rm install_katex.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
+ADD requirements-ci.txt /opt/conda/requirements-ci.txt
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
+RUN rm /opt/conda/requirements-ci.txt
 
 # Install gcc
 ARG GCC_VERSION