Kaggle · djherbis · Jan 3, 2025 · Dec 19, 2024 · Dec 19, 2024 · Dec 20, 2024
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -32,8 +32,16 @@ RUN uv pip uninstall --system google-cloud-bigquery-storage
 # to avoid affecting the larger build, we'll post-install it.
 RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools"
 
+# b/385161357 Latest Colab uses tf 2.17.1, but tf decision forests only has a version for 2.17.0.
+# Instead, we'll install tfdf with its deps and hope that 2.17.0 compat tfdf works with tf 2.17.1.
+RUN uv pip install --system --no-deps tensorflow-decision-forests==1.10.0 wurlitzer==3.1.1 ydf==0.9.0
+
+# b/385145217 Latest Colab lacks mkl numpy, install it.
+RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy
+
 # b/328788268 We install an incompatible pair of libs (shapely<2, libpysal==4.9.2) so we can't put this one in the requirements.txt
-RUN uv pip install --system  "libpysal==4.9.2"
+# newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason
+RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2"
 
 # Adding non-package dependencies:
 

diff --git a/config.txt b/config.txt
@@ -1,5 +1,5 @@
 BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime
-BASE_IMAGE_TAG=release-colab_20240920-060127_RC00
+BASE_IMAGE_TAG=release-colab_20241217-060132_RC00
 LIGHTGBM_VERSION=4.5.0
 CUDA_MAJOR_VERSION=12
 CUDA_MINOR_VERSION=2
diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt
@@ -1,4 +1,5 @@
 # Please keep this in alphabetical order
+--extra-index-url https://pypi.nvidia.com
 Altair>=5.4.0
 Babel
 Boruta
@@ -23,6 +24,7 @@ catboost
 category-encoders
 cesium
 comm
+cuml-cu12
 cytoolz
 dask-expr
 # Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported"
@@ -83,6 +85,7 @@ libpysal<=4.9.2
 lime
 line_profiler
 mamba
+matplotlib<3.8
 mlcrate
 mne
 mpld3
@@ -140,7 +143,8 @@ squarify
 tensorflow-cloud
 tensorflow-io
 tensorflow-text
-tensorflow_decision_forests
+# b/385161357: tf 2.17.1 does not have matching tensorflow_decision_forests release
+# tensorflow_decision_forests
 timm
 torchinfo
 torchmetrics

diff --git a/tests/test_cuml.py b/tests/test_cuml.py
@@ -6,7 +6,6 @@
 class TestCuml(unittest.TestCase):
     @gpu_test
     @p100_exempt # b/342143152: cuML(>=24.4v) is inompatible with p100 GPUs.
-    @unittest.skip("b/381287748 cuML is not installed in Colab.")
     def test_pca_fit_transform(self):
         import unittest
         import numpy as np

diff --git a/tests/test_numpy.py b/tests/test_numpy.py
@@ -3,9 +3,10 @@
 from distutils.version import StrictVersion
 
 import numpy as np
-from numpy.distutils.system_info import get_info
+import io
+from contextlib import redirect_stdout
 
-class TestNumpy(unittest.TestCase):   
+class TestNumpy(unittest.TestCase):
     def test_version(self):
         # b/370860329: newer versions are not capable with current tensorflow
         self.assertEqual(StrictVersion(np.__version__), StrictVersion("1.26.4")) 
@@ -18,5 +19,13 @@ def test_array(self):
     # Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation
     # and numpy will be reinstalled with an OpenBLAS backing.)
     def test_mkl(self):
-        # This will throw an exception if the MKL is not linked correctly or return an empty dict.
-        self.assertTrue(get_info("blas_mkl"))
+        try:
+            from numpy.distutils.system_info import get_info
+            # This will throw an exception if the MKL is not linked correctly or return an empty dict.
+            self.assertTrue(get_info("blas_mkl"))
+        except:
+            # Fallback to check if mkl is present via show_config()
+            config_out = io.StringIO()
+            with redirect_stdout(config_out):
+                np.show_config()
+            self.assertIn("mkl_rt", config_out.getvalue())
diff --git a/tpu/Dockerfile b/tpu/Dockerfile
@@ -6,12 +6,8 @@ FROM $BASE_IMAGE
 # See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
 ARG PYTHON_WHEEL_VERSION
 ARG PYTHON_VERSION_PATH
-ARG TF_LINUX_WHEEL_VERSION
 ARG TORCH_LINUX_WHEEL_VERSION
 ARG TORCH_VERSION
-ARG TENSORFLOW_VERSION
-ARG TF_LIBTPU_VERSION
-ARG JAX_VERSION
 ARG TORCHVISION_VERSION
 ARG TORCHAUDIO_VERSION
 
@@ -67,18 +63,13 @@ RUN envsubst < /kaggle_requirements.txt > /requirements.txt
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 RUN export PATH="${HOME}/.local/bin:${PATH}" && uv pip install --system -r /requirements.txt --prerelease=allow --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
     /tmp/clean-layer.sh
-
-# Tensorflow libtpu:
-RUN curl --output /usr/local/lib/python3.10/site-packages/libtpu/libtpu.so https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/libtpu/${TF_LIBTPU_VERSION}/libtpu.so
+ENV PATH="${HOME}/.local/bin:${PATH}"
 
 # Kaggle Model Hub patches:
 ADD patches/kaggle_module_resolver.py /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/kaggle_module_resolver.py
 RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/config.py
 RUN sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/config.py
 
-# Monkey-patch the default TPU to the local (TPU VM).
-RUN sed -i 's/tpu=None,/tpu="local",/' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py
-
 # Set these env vars so that they don't produce errs calling the metadata server to load them:
 ENV TPU_ACCELERATOR_TYPE=v3-8
 ENV TPU_PROCESS_ADDRESSES=local
@@ -92,7 +83,6 @@ LABEL build-date=$BUILD_DATE
 ENV GIT_COMMIT=${GIT_COMMIT}
 ENV BUILD_DATE=${BUILD_DATE}
 
-LABEL tensorflow-version=$TENSORFLOW_VERSION
 LABEL kaggle-lang=python
 
 # Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`.

diff --git a/tpu/config.txt b/tpu/config.txt
@@ -1,17 +1,12 @@
 BASE_IMAGE=python:3.10
 PYTHON_WHEEL_VERSION=cp310
 PYTHON_VERSION_PATH=python3.10
-# gsutil ls gs://cloud-tpu-tpuvm-artifacts/tensorflow
-# https://cloud.google.com/tpu/docs/supported-tpu-configurations#libtpu_versions
-TENSORFLOW_VERSION=2.16.1
-TF_LIBTPU_VERSION=1.10.1
 TF_LINUX_WHEEL_VERSION=manylinux_2_17_x86_64.manylinux2014_x86_64
-JAX_VERSION=0.4.23
-# gsutil ls gs://pytorch-xla-releases/wheels/tpuvm/* | grep libtpu | grep -v -E ".*rc[0-9].*"
+# gsutil ls gs://pytorch-xla-releases/wheels/tpuvm/* | grep libtpu | grep torch_xla | grep -v -E ".*rc[0-9].*" | sed 's/.*torch_xla-\(.*\)+libtpu.*/\1/' | sort -rV
 # Supports nightly
-TORCH_VERSION=2.4.0
+TORCH_VERSION=2.5.0
 # https://github.com/pytorch/audio supports nightly
-TORCHAUDIO_VERSION=2.4.0
+TORCHAUDIO_VERSION=2.5.0
 # https://github.com/pytorch/vision supports nightly
-TORCHVISION_VERSION=0.19.0
+TORCHVISION_VERSION=0.20.0
 TORCH_LINUX_WHEEL_VERSION=manylinux_2_28_x86_64
diff --git a/tpu/requirements.txt b/tpu/requirements.txt
@@ -1,5 +1,8 @@
+# TPU Utils
+tpu-info
 # Tensorflow packages
-https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/tensorflow/tf-${TENSORFLOW_VERSION}/tensorflow-${TENSORFLOW_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TF_LINUX_WHEEL_VERSION}.whl
+tensorflow-tpu>=2.18.0
+-f https://storage.googleapis.com/libtpu-tf-releases/index.html
 tensorflow_hub
 tensorflow-io
 tensorflow-probability
@@ -9,7 +12,7 @@ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TOR
 torchaudio==${TORCHAUDIO_VERSION}
 torchvision==${TORCHVISION_VERSION}
 # Jax packages
-jax[tpu]==${JAX_VERSION}
+jax[tpu]>=0.4.34
 distrax
 flax
 git+https://github.com/deepmind/dm-haiku
@@ -34,7 +37,6 @@ albumentations
 diffusers
 einops
 fastparquet
-librosa
 matplotlib
 opencv-python
 opencv-python-headless