Skip to content

Commit b39b812

Browse files
committed
Upgrade to PyTorch 1.8.1, TensorFlow 2.5.0 and Rapids 21.08
And CUDA 11.2 http://b/181966788
1 parent bc252a2 commit b39b812

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

Dockerfile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG BASE_TAG=m78
2-
ARG TENSORFLOW_VERSION=2.4.1
2+
ARG TENSORFLOW_VERSION=2.5.0
33

44
FROM gcr.io/deeplearning-platform-release/base-cpu:${BASE_TAG}
55

@@ -47,7 +47,7 @@ RUN conda config --add channels conda-forge && \
4747
conda install cartopy=0.19 imagemagick=7.0 pyproj==3.1.0 pysal==2.1.0 && \
4848
/tmp/clean-layer.sh
4949

50-
RUN pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 torchtext==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html && \
50+
RUN pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 torchtext==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html && \
5151
/tmp/clean-layer.sh
5252

5353
RUN pip install seaborn python-dateutil dask python-igraph && \
@@ -60,9 +60,9 @@ RUN pip install seaborn python-dateutil dask python-igraph && \
6060
/tmp/clean-layer.sh
6161

6262
RUN pip install tensorflow==${TENSORFLOW_VERSION} && \
63-
pip install tensorflow-gcs-config==2.4.0 && \
64-
pip install tensorflow-addons==0.12.1 && \
65-
pip install tensorflow_probability==0.12.2 && \
63+
pip install tensorflow-gcs-config==${TENSORFLOW_VERSION} && \
64+
pip install tensorflow-addons==0.13.0 && \
65+
pip install tensorflow_probability==0.13.0 && \
6666
/tmp/clean-layer.sh
6767

6868
RUN apt-get install -y libfreetype6-dev && \
@@ -330,8 +330,7 @@ RUN pip install bleach && \
330330
pip install widgetsnbextension && \
331331
pip install pyarrow && \
332332
pip install feather-format && \
333-
# fastai >= 2.3.1 upgrades pytorch/torchvision. upgrade of pytorch will be handled in b/181966788
334-
pip install fastai==2.2.7 && \
333+
pip install fastai && \
335334
pip install allennlp && \
336335
# https://b.corp.google.com/issues/184685619#comment9: 3.9.0 is causing a major performance degradation with spacy 2.3.5
337336
pip install importlib-metadata==3.4.0 && \

gpu.Dockerfile

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG BASE_TAG=staging
22

3-
FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 AS nvidia
3+
FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu18.04 AS nvidia
44
FROM gcr.io/kaggle-images/python:${BASE_TAG}
55

66
ADD clean-layer.sh /tmp/clean-layer.sh
@@ -13,7 +13,7 @@ COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
1313
RUN sed -i 's/deb https:\/\/developer.download.nvidia.com/deb http:\/\/developer.download.nvidia.com/' /etc/apt/sources.list.d/*.list
1414

1515
ENV CUDA_MAJOR_VERSION=11
16-
ENV CUDA_MINOR_VERSION=0
16+
ENV CUDA_MINOR_VERSION=2
1717
ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION
1818
LABEL com.nvidia.volumes.needed="nvidia_driver"
1919
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
@@ -27,7 +27,9 @@ ENV LD_LIBRARY_PATH_NO_STUBS="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_
2727
ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
2828
ENV NVIDIA_VISIBLE_DEVICES=all
2929
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
30-
ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION"
30+
# With CUDA enhanced compatibility, applications compiled with CUDA 11.1 can be run on the driver associated with CUDA 11.0 (i.e. R450).
31+
# See: https://docs.nvidia.com/deploy/cuda-compatibility/index.html#existing-apps-minor-versions
32+
ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION"
3133
RUN apt-get update && apt-get install -y --no-install-recommends \
3234
cuda-cupti-$CUDA_VERSION \
3335
cuda-cudart-$CUDA_VERSION \
@@ -37,10 +39,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3739
cuda-nvml-dev-$CUDA_VERSION \
3840
cuda-minimal-build-$CUDA_VERSION \
3941
cuda-command-line-tools-$CUDA_VERSION \
40-
libcudnn8=8.0.4.30-1+cuda$CUDA_VERSION \
41-
libcudnn8-dev=8.0.4.30-1+cuda$CUDA_VERSION \
42-
libnccl2=2.7.8-1+cuda$CUDA_VERSION \
43-
libnccl-dev=2.7.8-1+cuda$CUDA_VERSION && \
42+
libcudnn8=8.1.1.33-1+cuda$CUDA_VERSION \
43+
libcudnn8-dev=8.1.1.33-1+cuda$CUDA_VERSION \
44+
libnccl2=2.8.4-1+cuda$CUDA_VERSION \
45+
libnccl-dev=2.8.4-1+cuda$CUDA_VERSION && \
4446
ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda && \
4547
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
4648
/tmp/clean-layer.sh
@@ -55,15 +57,18 @@ RUN apt-get install -y ocl-icd-libopencl1 clinfo libboost-all-dev && \
5557
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
5658
# However, because this image is based on the CPU image, this isn't possible but better
5759
# to put them at the top of this file to minize conflicts.
58-
RUN conda install cudf=21.06 cuml=21.06 cudatoolkit=$CUDA_VERSION && \
60+
RUN conda install cudf=21.08 cuml=21.08 cudatoolkit=$CUDA_VERSION && \
5961
/tmp/clean-layer.sh
6062

6163
# Install Pytorch and torchvision with GPU support.
62-
# Note: torchtext and torchaudio do not require a separate GPU package.
63-
RUN pip install torch==1.7.1+cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION torchvision==0.8.2+cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \
64+
# Note: torchtext and torchaudio do not require a separate package.
65+
# Replace `cu111` by `cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once build for CUDA 11.2 is released.
66+
# Introduced in CUDA 11.1, CUDA Enhanced Compatibility leverages semantic versioning across components in the CUDA Toolkit, an application can be built for one CUDA minor release (such as 11.1) and work across all future minor releases within the major family (such as 11.x).
67+
# See: https://docs.nvidia.com/deploy/cuda-compatibility/index.html#overview
68+
RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html && \
6469
/tmp/clean-layer.sh
6570

66-
# Install LightGBM with GPU
71+
# Install LightGBM with GPU support
6772
RUN pip uninstall -y lightgbm && \
6873
cd /usr/local/src && \
6974
git clone --recursive https://github.com/microsoft/LightGBM && \
@@ -79,7 +84,8 @@ RUN pip uninstall -y lightgbm && \
7984
/tmp/clean-layer.sh
8085

8186
# Install JAX (Keep JAX version in sync with CPU image)
82-
RUN pip install jax==0.2.16 jaxlib==0.1.68+cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
87+
# TODO(b/181966788) Replace `cuda111` with `cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once new version is out.
88+
RUN pip install jax==0.2.16 jaxlib==0.1.68+cuda111 -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
8389
/tmp/clean-layer.sh
8490

8591
# Reinstall packages with a separate version for GPU support.
@@ -90,9 +96,8 @@ RUN pip uninstall -y mxnet && \
9096
# Install GPU-only packages
9197
RUN pip install pycuda && \
9298
pip install pynvrtc && \
93-
# b/190622765 latest version is causing issue. nnabla fixed it in https://github.com/sony/nnabla/issues/892, waiting for new release before we can remove this pin.
94-
pip install pynvml==8.0.4 && \
95-
pip install nnabla-ext-cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
99+
# TODO(b/181966788) Replace `110` with `$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once new version of mxnet is out.
100+
pip install nnabla-ext-cuda110 && \
96101
/tmp/clean-layer.sh
97102

98103
# Re-add TensorBoard Jupyter extension patch

0 commit comments

Comments
 (0)