@@ -12,6 +12,15 @@ ARG TORCHVISION_VERSION
12
12
FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
13
13
FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
14
14
FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
15
+ {{ else }}
16
+ FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
17
+ {{ end }}
18
+
19
+ # Ensures shared libraries installed with conda can be found by the dynamic link loader.
20
+ ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
21
+ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"
22
+
23
+ {{ if eq .Accelerator "gpu" }}
15
24
ARG CUDA_MAJOR_VERSION
16
25
ARG CUDA_MINOR_VERSION
17
26
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
@@ -22,11 +31,10 @@ ENV PATH=/opt/bin:${PATH}
22
31
ENV LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH"
23
32
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs"
24
33
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
25
- {{ else }}
26
- FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
27
34
{{ end }}
35
+
28
36
# Keep these variables in sync if base image is updated.
29
- ENV TENSORFLOW_VERSION=2.6.4
37
+ ENV TENSORFLOW_VERSION=2.9.2
30
38
31
39
# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
32
40
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
@@ -76,33 +84,42 @@ ENV PROJ_LIB=/opt/conda/share/proj
76
84
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
77
85
RUN conda config --add channels nvidia && \
78
86
conda config --add channels rapidsai && \
87
+ conda install -c conda-forge mamba && \
79
88
# Base image channel order: conda-forge (highest priority), defaults.
80
89
# End state: rapidsai (highest priority), nvidia, conda-forge, defaults.
81
- conda install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
90
+ mamba install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
82
91
/tmp/clean-layer.sh
83
92
84
93
{{ if eq .Accelerator "gpu" }}
85
94
86
95
# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
87
- RUN pip uninstall -y pyarrow && \
88
- conda install cudf=21.10 cuml=21.10 cudatoolkit=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
89
- /tmp/clean-layer.sh
90
- {{ end }}
96
+ # b/267180053: RapidsAI (cudf/cuml) are not compatible with the latest tensorflow cudatoolkit version.
97
+ # RUN pip uninstall -y pyarrow && \
98
+ # mamba install -y cudf cuml && \
99
+ # /tmp/clean-layer.sh
100
+ # {{ end }}
91
101
92
102
# Install implicit
93
103
{{ if eq .Accelerator "gpu" }}
94
- RUN conda install implicit implicit-proc=*=gpu && \
104
+ RUN mamba install implicit implicit-proc=*=gpu && \
95
105
/tmp/clean-layer.sh
96
106
{{ else }}
97
- RUN conda install implicit && \
107
+ RUN mamba install implicit && \
98
108
/tmp/clean-layer.sh
99
109
{{ end}}
100
110
101
111
# Install PyTorch
102
112
{{ if eq .Accelerator "gpu" }}
103
113
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
104
- RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
114
+ RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
105
115
pip install /tmp/torch/*.whl && \
116
+ # b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
117
+ # the built from source versions don't seem to properly link it in. This forces the dep
118
+ # which makes sure that libomp is loaded when these libraries are loaded.
119
+ mamba install -y openmp && \
120
+ pip install patchelf && \
121
+ patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchtext/lib/libtorchtext.so && \
122
+ patchelf --add-needed libomp.so /opt/conda/lib/python3.7/site-packages/torchaudio/lib/libtorchaudio.so && \
106
123
rm -rf /tmp/torch && \
107
124
/tmp/clean-layer.sh
108
125
{{ else }}
@@ -141,7 +158,8 @@ RUN pip install jax[cpu] && \
141
158
142
159
# Install mxnet
143
160
{{ if eq .Accelerator "gpu" }}
144
- RUN pip install mxnet-cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
161
+ # No specific package for 11.3 minor versions, using 11.2 instead.
162
+ RUN pip install mxnet-cu112 && \
145
163
/tmp/clean-layer.sh
146
164
{{ else }}
147
165
RUN pip install mxnet && \
@@ -160,10 +178,11 @@ RUN pip install spacy && \
160
178
# Install GPU specific packages
161
179
{{ if eq .Accelerator "gpu" }}
162
180
# Install GPU-only packages
181
+ # No specific package for nnabla-ext-cuda 11.x minor versions.
163
182
RUN pip install pycuda \
164
183
pynvrtc \
165
184
pynvml \
166
- nnabla-ext-cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
185
+ nnabla-ext-cuda${ CUDA_MAJOR_VERSION}0 && \
167
186
/tmp/clean-layer.sh
168
187
{{ end }}
169
188
@@ -176,9 +195,9 @@ RUN pip install pysal \
176
195
# Use `conda install -c h2oai h2o` once Python 3.7 version is released to conda.
177
196
apt-get install -y default-jre-headless && \
178
197
pip install -f https://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o \
179
- tensorflow-gcs-config==2.6.0 \
180
- tensorflow-addons==0.14.0 \
181
- tensorflow_decision_forests==0.2.0 && \
198
+ " tensorflow-gcs-config<=${TENSORFLOW_VERSION}" \
199
+ tensorflow-addons==0.17.1 \
200
+ tensorflow_decision_forests==0.2.7 && \
182
201
/tmp/clean-layer.sh
183
202
184
203
RUN apt-get install -y libfreetype6-dev && \
@@ -393,6 +412,8 @@ RUN pip install cython \
393
412
mlcrate && \
394
413
/tmp/clean-layer.sh
395
414
415
+
416
+ # Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376
396
417
RUN pip install bleach \
397
418
certifi \
398
419
cycler \
@@ -402,7 +423,7 @@ RUN pip install bleach \
402
423
ipykernel \
403
424
ipython \
404
425
ipython-genutils \
405
- ipywidgets \
426
+ ipywidgets==7.7.1 \
406
427
isoweek \
407
428
jedi \
408
429
jsonschema \
@@ -459,6 +480,10 @@ RUN pip install bleach \
459
480
#
460
481
###########
461
482
483
+ # dlib has a libmkl incompatibility:
484
+ # test_dlib_face_detector (test_dlib.TestDLib) ... INTEL MKL ERROR: /opt/conda/bin/../lib/libmkl_avx512.so.2: undefined symbol: mkl_sparse_optimize_bsr_trsm_i8.
485
+ # Intel MKL FATAL ERROR: Cannot load libmkl_avx512.so.2 or libmkl_def.so.2.
486
+ # nnabla breaks protobuf compatibiilty:
462
487
RUN pip install flashtext \
463
488
wandb \
464
489
# b/214080882 blake3 0.3.0 is not compatible with vaex.
@@ -505,10 +530,8 @@ RUN pip install flashtext \
505
530
transformers \
506
531
# b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
507
532
datasets==2.1.0 \
508
- dlib \
509
533
kaggle-environments \
510
534
geopandas \
511
- nnabla \
512
535
vowpalwabbit \
513
536
pydub \
514
537
pydegensac \
@@ -600,6 +623,9 @@ RUN jupyter-nbextension disable nb_conda --py --sys-prefix && \
600
623
jupyter-serverextension disable nb_conda --py --sys-prefix && \
601
624
python -m nb_conda_kernels.install --disable
602
625
626
+ # Force only one libcusolver
627
+ RUN rm /opt/conda/bin/../lib/libcusolver.so.11 && ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11
628
+
603
629
# Set backend for matplotlib
604
630
ENV MPLBACKEND "agg"
605
631
0 commit comments