Skip to content

Add Kaggle Resolver #1213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Dockerfile.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@ ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustom
# Override default imagemagick policies
ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml

# Add Kaggle module resolver
ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.7/site-packages/tensorflow_hub/kaggle_module_resolver.py
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py && \
sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py

# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have
# worker tunneling support in place.
# b/139212522 re-enable TensorBoard once solution for slowdown is implemented.
Expand Down
25 changes: 25 additions & 0 deletions patches/kaggle_module_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import re

from tensorflow_hub import resolver

url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/[^\\/]+/(?P<model>[^\\/]+)/frameworks/(?P<framework>[^\\/]+)/variations/(?P<variation>[^\\/]+)/versions/(?P<version>[0-9]+)$")

def _is_on_kaggle_notebook():
return os.getenv("KAGGLE_CONTAINER_NAME") != None

def _is_kaggle_handle(handle):
return url_pattern.match(handle) != None

class KaggleFileResolver(resolver.HttpResolverBase):
def is_supported(self, handle):
return _is_on_kaggle_notebook() and _is_kaggle_handle(handle)

def __call__(self, handle):
m = url_pattern.match(handle)
local_path = f"/kaggle/input/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}"
if not os.path.exists(local_path):
# TODO(b/268256777) Attach model & wait until ready instead.
raise RuntimeError(f"You have to attach the '{handle}' model to your Kaggle notebook.")

return local_path
2 changes: 2 additions & 0 deletions test
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ docker rm jupyter_test || true
mkdir -p /tmp/python-build/tmp
mkdir -p /tmp/python-build/devshm
mkdir -p /tmp/python-build/working
mkdir -p /tmp/python-build/kaggle

# Only run Jupyter server test if no specific test pattern is specified.
if [ $PATTERN == 'test*.py' ]; then
Expand Down Expand Up @@ -108,6 +109,7 @@ docker run --rm -t --read-only --net=none \
--shm-size=2g \
-v $PWD:/input:ro -v /tmp/python-build/working:/working \
-v /tmp/python-build/tmp:/tmp -v /tmp/python-build/devshm:/dev/shm \
-v /tmp/python-build/kaggle:/kaggle \
-w=/working \
$ADDITONAL_OPTS \
"$IMAGE_TAG" \
Expand Down
Binary file added tests/data/model.tar.gz
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/data/saved_model/keras_metadata.pb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

�root"_tf_keras_network*�{"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}, "shared_object_id": 4, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 4]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2
� root.layer-0"_tf_keras_input_layer*�{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2
�root.layer_with_weights-0"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 6}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2
�root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 7}2
Binary file added tests/data/saved_model/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file added tests/data/saved_model/variables/variables.index
Binary file not shown.
66 changes: 66 additions & 0 deletions tests/test_kaggle_module_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import unittest

import os
import threading

import tensorflow as tf
import tensorflow_hub as hub

from http.server import BaseHTTPRequestHandler, HTTPServer
from test.support import EnvironmentVarGuard


class TestKaggleModuleResolver(unittest.TestCase):
class HubHTTPHandler(BaseHTTPRequestHandler):
def do_GET(self):
print("GETGETGET")
self.send_response(200)
self.send_header('Content-Type', 'application/gzip')
self.end_headers()

with open('/input/tests/data/model.tar.gz', 'rb') as model_archive:
self.wfile.write(model_archive.read())

def _test_client(self, client_func, handler):
with HTTPServer(('localhost', 8080), handler) as test_server:
threading.Thread(target=test_server.serve_forever).start()

try:
client_func()
finally:
test_server.shutdown()

def test_kaggle_resolver_succeeds(self):
# Simulates we are inside a Kaggle environment.
env = EnvironmentVarGuard()
env.set('KAGGLE_CONTAINER_NAME', 'foo')
# Attach model to right directory.
os.makedirs('/kaggle/input/foomodule/tensorflow2/barvar')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't it have the VersionNumber at the end?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I symlink the directory at the VersionNumber the line below.

os.symlink('/input/tests/data/saved_model/', '/kaggle/input/foomodule/tensorflow2/barvar/2', target_is_directory=True)

with env:
test_inputs = tf.ones([1,4])
layer = hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
self.assertEqual([1, 1], layer(test_inputs).shape)

def test_kaggle_resolver_not_attached_throws(self):
# Simulates we are inside a Kaggle environment.
env = EnvironmentVarGuard()
env.set('KAGGLE_CONTAINER_NAME', 'foo')
with env:
with self.assertRaisesRegex(RuntimeError, '.*attach.*'):
hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")

def test_http_resolver_succeeds(self):
def call_hub():
test_inputs = tf.ones([1,4])
layer = hub.KerasLayer('http://localhost:8080/model.tar.gz')
self.assertEqual([1, 1], layer(test_inputs).shape)

self._test_client(call_hub, TestKaggleModuleResolver.HubHTTPHandler)

def test_local_path_resolver_succeeds(self):
test_inputs = tf.ones([1,4])
layer = hub.KerasLayer('/input/tests/data/saved_model')

self.assertEqual([1, 1], layer(test_inputs).shape)