Kaggle · rosbo · Feb 9, 2023 · Feb 8, 2023 · Feb 9, 2023 · Philmod
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -586,6 +586,11 @@ ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustom
 # Override default imagemagick policies
 ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
 
+# Add Kaggle module resolver
+ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.7/site-packages/tensorflow_hub/kaggle_module_resolver.py
+RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py && \
+    sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py
+
 # TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have
 # worker tunneling support in place.
 # b/139212522 re-enable TensorBoard once solution for slowdown is implemented.

diff --git a/patches/kaggle_module_resolver.py b/patches/kaggle_module_resolver.py
@@ -0,0 +1,25 @@
+import os
+import re
+
+from tensorflow_hub import resolver
+
+url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/[^\\/]+/(?P<model>[^\\/]+)/frameworks/(?P<framework>[^\\/]+)/variations/(?P<variation>[^\\/]+)/versions/(?P<version>[0-9]+)$")
+
+def _is_on_kaggle_notebook():
+    return os.getenv("KAGGLE_CONTAINER_NAME") != None
+
+def _is_kaggle_handle(handle):
+    return url_pattern.match(handle) != None
+
+class KaggleFileResolver(resolver.HttpResolverBase):
+    def is_supported(self, handle):
+        return _is_on_kaggle_notebook() and _is_kaggle_handle(handle)    
+
+    def __call__(self, handle):
+        m = url_pattern.match(handle)
+        local_path = f"/kaggle/input/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}"
+        if not os.path.exists(local_path):
+            # TODO(b/268256777) Attach model & wait until ready instead.
+            raise RuntimeError(f"You have to attach the '{handle}' model to your Kaggle notebook.")
+
+        return local_path
diff --git a/test b/test
@@ -74,6 +74,7 @@ docker rm jupyter_test || true
 mkdir -p /tmp/python-build/tmp
 mkdir -p /tmp/python-build/devshm
 mkdir -p /tmp/python-build/working
+mkdir -p /tmp/python-build/kaggle
 
 # Only run Jupyter server test if no specific test pattern is specified.
 if [ $PATTERN == 'test*.py' ]; then
@@ -108,6 +109,7 @@ docker run --rm -t --read-only --net=none \
     --shm-size=2g \
     -v $PWD:/input:ro -v /tmp/python-build/working:/working \
     -v /tmp/python-build/tmp:/tmp -v /tmp/python-build/devshm:/dev/shm \
+    -v /tmp/python-build/kaggle:/kaggle \
     -w=/working \
     $ADDITONAL_OPTS \
     "$IMAGE_TAG" \

diff --git a/tests/data/model.tar.gz b/tests/data/model.tar.gz
diff --git a/tests/data/saved_model/keras_metadata.pb b/tests/data/saved_model/keras_metadata.pb
@@ -0,0 +1,5 @@
+
+�root"_tf_keras_network*�{"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}, "shared_object_id": 4, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 4]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2
+�root.layer-0"_tf_keras_input_layer*�{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2
+�root.layer_with_weights-0"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 6}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2
+�root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 7}2
diff --git a/tests/data/saved_model/saved_model.pb b/tests/data/saved_model/saved_model.pb
diff --git a/tests/data/saved_model/variables/variables.data-00000-of-00001 b/tests/data/saved_model/variables/variables.data-00000-of-00001
diff --git a/tests/data/saved_model/variables/variables.index b/tests/data/saved_model/variables/variables.index
diff --git a/tests/test_kaggle_module_resolver.py b/tests/test_kaggle_module_resolver.py
@@ -0,0 +1,66 @@
+import unittest
+
+import os
+import threading
+
+import tensorflow as tf
+import tensorflow_hub as hub
+
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from test.support import EnvironmentVarGuard
+
+
+class TestKaggleModuleResolver(unittest.TestCase):
+    class HubHTTPHandler(BaseHTTPRequestHandler):
+        def do_GET(self):
+            print("GETGETGET")
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/gzip')
+            self.end_headers()
+
+            with open('/input/tests/data/model.tar.gz', 'rb') as model_archive:
+                self.wfile.write(model_archive.read())
+
+    def _test_client(self, client_func, handler):
+        with HTTPServer(('localhost', 8080), handler) as test_server:
+            threading.Thread(target=test_server.serve_forever).start()
+
+            try:
+                client_func()            
+            finally:
+                test_server.shutdown()
+
+    def test_kaggle_resolver_succeeds(self):
+        # Simulates we are inside a Kaggle environment.
+        env = EnvironmentVarGuard()
+        env.set('KAGGLE_CONTAINER_NAME', 'foo')
+        # Attach model to right directory.
+        os.makedirs('/kaggle/input/foomodule/tensorflow2/barvar')
+        os.symlink('/input/tests/data/saved_model/', '/kaggle/input/foomodule/tensorflow2/barvar/2', target_is_directory=True)
+
+        with env:
+            test_inputs = tf.ones([1,4])
+            layer = hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
+            self.assertEqual([1, 1], layer(test_inputs).shape)
+
+    def test_kaggle_resolver_not_attached_throws(self):
+        # Simulates we are inside a Kaggle environment.
+        env = EnvironmentVarGuard()
+        env.set('KAGGLE_CONTAINER_NAME', 'foo')
+        with env:
+            with self.assertRaisesRegex(RuntimeError, '.*attach.*'):
+                hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
+
+    def test_http_resolver_succeeds(self):
+        def call_hub():
+            test_inputs = tf.ones([1,4])
+            layer = hub.KerasLayer('http://localhost:8080/model.tar.gz')
+            self.assertEqual([1, 1], layer(test_inputs).shape)
+
+        self._test_client(call_hub, TestKaggleModuleResolver.HubHTTPHandler)
+
+    def test_local_path_resolver_succeeds(self):
+        test_inputs = tf.ones([1,4])
+        layer = hub.KerasLayer('/input/tests/data/saved_model')
+
+        self.assertEqual([1, 1], layer(test_inputs).shape)