Skip to content

Update Colab base image to release-colab_20241217-060132_RC00 #1458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Dockerfile.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,16 @@ RUN uv pip uninstall --system google-cloud-bigquery-storage
# to avoid affecting the larger build, we'll post-install it.
RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools"

# b/385161357 Latest Colab uses tf 2.17.1, but tf decision forests only has a version for 2.17.0.
# Instead, we'll install tfdf with its deps and hope that 2.17.0 compat tfdf works with tf 2.17.1.
RUN uv pip install --system --no-deps tensorflow-decision-forests==1.10.0 wurlitzer==3.1.1 ydf==0.9.0

# b/385145217 Latest Colab lacks mkl numpy, install it.
RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy

# b/328788268 We install an incompatible pair of libs (shapely<2, libpysal==4.9.2) so we can't put this one in the requirements.txt
RUN uv pip install --system "libpysal==4.9.2"
# newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason
RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2"

# Adding non-package dependencies:

Expand Down
2 changes: 1 addition & 1 deletion config.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime
BASE_IMAGE_TAG=release-colab_20240920-060127_RC00
BASE_IMAGE_TAG=release-colab_20241217-060132_RC00
LIGHTGBM_VERSION=4.5.0
CUDA_MAJOR_VERSION=12
CUDA_MINOR_VERSION=2
6 changes: 5 additions & 1 deletion kaggle_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Please keep this in alphabetical order
--extra-index-url https://pypi.nvidia.com
Altair>=5.4.0
Babel
Boruta
Expand All @@ -23,6 +24,7 @@ catboost
category-encoders
cesium
comm
cuml-cu12
cytoolz
dask-expr
# Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported"
Expand Down Expand Up @@ -83,6 +85,7 @@ libpysal<=4.9.2
lime
line_profiler
mamba
matplotlib<3.8
mlcrate
mne
mpld3
Expand Down Expand Up @@ -140,7 +143,8 @@ squarify
tensorflow-cloud
tensorflow-io
tensorflow-text
tensorflow_decision_forests
# b/385161357: tf 2.17.1 does not have matching tensorflow_decision_forests release
# tensorflow_decision_forests
timm
torchinfo
torchmetrics
Expand Down
1 change: 0 additions & 1 deletion tests/test_cuml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
class TestCuml(unittest.TestCase):
@gpu_test
@p100_exempt # b/342143152: cuML(>=24.4v) is inompatible with p100 GPUs.
@unittest.skip("b/381287748 cuML is not installed in Colab.")
def test_pca_fit_transform(self):
import unittest
import numpy as np
Expand Down
17 changes: 13 additions & 4 deletions tests/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from distutils.version import StrictVersion

import numpy as np
from numpy.distutils.system_info import get_info
import io
from contextlib import redirect_stdout

class TestNumpy(unittest.TestCase):
class TestNumpy(unittest.TestCase):
def test_version(self):
# b/370860329: newer versions are not capable with current tensorflow
self.assertEqual(StrictVersion(np.__version__), StrictVersion("1.26.4"))
Expand All @@ -18,5 +19,13 @@ def test_array(self):
# Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation
# and numpy will be reinstalled with an OpenBLAS backing.)
def test_mkl(self):
# This will throw an exception if the MKL is not linked correctly or return an empty dict.
self.assertTrue(get_info("blas_mkl"))
try:
from numpy.distutils.system_info import get_info
# This will throw an exception if the MKL is not linked correctly or return an empty dict.
self.assertTrue(get_info("blas_mkl"))
except:
# Fallback to check if mkl is present via show_config()
config_out = io.StringIO()
with redirect_stdout(config_out):
np.show_config()
self.assertIn("mkl_rt", config_out.getvalue())
12 changes: 1 addition & 11 deletions tpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@ FROM $BASE_IMAGE
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG PYTHON_WHEEL_VERSION
ARG PYTHON_VERSION_PATH
ARG TF_LINUX_WHEEL_VERSION
ARG TORCH_LINUX_WHEEL_VERSION
ARG TORCH_VERSION
ARG TENSORFLOW_VERSION
ARG TF_LIBTPU_VERSION
ARG JAX_VERSION
ARG TORCHVISION_VERSION
ARG TORCHAUDIO_VERSION

Expand Down Expand Up @@ -67,18 +63,13 @@ RUN envsubst < /kaggle_requirements.txt > /requirements.txt
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN export PATH="${HOME}/.local/bin:${PATH}" && uv pip install --system -r /requirements.txt --prerelease=allow --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
/tmp/clean-layer.sh

# Tensorflow libtpu:
RUN curl --output /usr/local/lib/python3.10/site-packages/libtpu/libtpu.so https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/libtpu/${TF_LIBTPU_VERSION}/libtpu.so
ENV PATH="${HOME}/.local/bin:${PATH}"

# Kaggle Model Hub patches:
ADD patches/kaggle_module_resolver.py /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/kaggle_module_resolver.py
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/config.py
RUN sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/config.py

# Monkey-patch the default TPU to the local (TPU VM).
RUN sed -i 's/tpu=None,/tpu="local",/' /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py

# Set these env vars so that they don't produce errs calling the metadata server to load them:
ENV TPU_ACCELERATOR_TYPE=v3-8
ENV TPU_PROCESS_ADDRESSES=local
Expand All @@ -92,7 +83,6 @@ LABEL build-date=$BUILD_DATE
ENV GIT_COMMIT=${GIT_COMMIT}
ENV BUILD_DATE=${BUILD_DATE}

LABEL tensorflow-version=$TENSORFLOW_VERSION
LABEL kaggle-lang=python

# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`.
Expand Down
13 changes: 4 additions & 9 deletions tpu/config.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
BASE_IMAGE=python:3.10
PYTHON_WHEEL_VERSION=cp310
PYTHON_VERSION_PATH=python3.10
# gsutil ls gs://cloud-tpu-tpuvm-artifacts/tensorflow
# https://cloud.google.com/tpu/docs/supported-tpu-configurations#libtpu_versions
TENSORFLOW_VERSION=2.16.1
TF_LIBTPU_VERSION=1.10.1
TF_LINUX_WHEEL_VERSION=manylinux_2_17_x86_64.manylinux2014_x86_64
JAX_VERSION=0.4.23
# gsutil ls gs://pytorch-xla-releases/wheels/tpuvm/* | grep libtpu | grep -v -E ".*rc[0-9].*"
# gsutil ls gs://pytorch-xla-releases/wheels/tpuvm/* | grep libtpu | grep torch_xla | grep -v -E ".*rc[0-9].*" | sed 's/.*torch_xla-\(.*\)+libtpu.*/\1/' | sort -rV
# Supports nightly
TORCH_VERSION=2.4.0
TORCH_VERSION=2.5.0
# https://github.com/pytorch/audio supports nightly
TORCHAUDIO_VERSION=2.4.0
TORCHAUDIO_VERSION=2.5.0
# https://github.com/pytorch/vision supports nightly
TORCHVISION_VERSION=0.19.0
TORCHVISION_VERSION=0.20.0
TORCH_LINUX_WHEEL_VERSION=manylinux_2_28_x86_64
8 changes: 5 additions & 3 deletions tpu/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# TPU Utils
tpu-info
# Tensorflow packages
https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/tensorflow/tf-${TENSORFLOW_VERSION}/tensorflow-${TENSORFLOW_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TF_LINUX_WHEEL_VERSION}.whl
tensorflow-tpu>=2.18.0
-f https://storage.googleapis.com/libtpu-tf-releases/index.html
tensorflow_hub
tensorflow-io
tensorflow-probability
Expand All @@ -9,7 +12,7 @@ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TOR
torchaudio==${TORCHAUDIO_VERSION}
torchvision==${TORCHVISION_VERSION}
# Jax packages
jax[tpu]==${JAX_VERSION}
jax[tpu]>=0.4.34
distrax
flax
git+https://github.com/deepmind/dm-haiku
Expand All @@ -34,7 +37,6 @@ albumentations
diffusers
einops
fastparquet
librosa
matplotlib
opencv-python
opencv-python-headless
Expand Down
Loading