Skip to content

Commit 3ff0017

Browse files
authored
Add Kaggle Resolver (#1213)
http://b/268256777
1 parent 739e1b0 commit 3ff0017

File tree

9 files changed

+102
-0
lines changed

9 files changed

+102
-0
lines changed

Dockerfile.tmpl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,11 @@ ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustom
609609
# Override default imagemagick policies
610610
ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
611611

612+
# Add Kaggle module resolver
613+
ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.7/site-packages/tensorflow_hub/kaggle_module_resolver.py
614+
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py && \
615+
sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py
616+
612617
# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have
613618
# worker tunneling support in place.
614619
# b/139212522 re-enable TensorBoard once solution for slowdown is implemented.

patches/kaggle_module_resolver.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import re
3+
4+
from tensorflow_hub import resolver
5+
6+
url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/[^\\/]+/(?P<model>[^\\/]+)/frameworks/(?P<framework>[^\\/]+)/variations/(?P<variation>[^\\/]+)/versions/(?P<version>[0-9]+)$")
7+
8+
def _is_on_kaggle_notebook():
9+
return os.getenv("KAGGLE_CONTAINER_NAME") != None
10+
11+
def _is_kaggle_handle(handle):
12+
return url_pattern.match(handle) != None
13+
14+
class KaggleFileResolver(resolver.HttpResolverBase):
15+
def is_supported(self, handle):
16+
return _is_on_kaggle_notebook() and _is_kaggle_handle(handle)
17+
18+
def __call__(self, handle):
19+
m = url_pattern.match(handle)
20+
local_path = f"/kaggle/input/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}"
21+
if not os.path.exists(local_path):
22+
# TODO(b/268256777) Attach model & wait until ready instead.
23+
raise RuntimeError(f"You have to attach the '{handle}' model to your Kaggle notebook.")
24+
25+
return local_path

test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ docker rm jupyter_test || true
7474
mkdir -p /tmp/python-build/tmp
7575
mkdir -p /tmp/python-build/devshm
7676
mkdir -p /tmp/python-build/working
77+
mkdir -p /tmp/python-build/kaggle
7778

7879
# Only run Jupyter server test if no specific test pattern is specified.
7980
if [ $PATTERN == 'test*.py' ]; then
@@ -108,6 +109,7 @@ docker run --rm -t --read-only --net=none \
108109
--shm-size=2g \
109110
-v $PWD:/input:ro -v /tmp/python-build/working:/working \
110111
-v /tmp/python-build/tmp:/tmp -v /tmp/python-build/devshm:/dev/shm \
112+
-v /tmp/python-build/kaggle:/kaggle \
111113
-w=/working \
112114
$ADDITONAL_OPTS \
113115
"$IMAGE_TAG" \

tests/data/model.tar.gz

8.38 KB
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
�root"_tf_keras_network*�{"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}, "shared_object_id": 4, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 4]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2
3+
� root.layer-0"_tf_keras_input_layer*�{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2
4+
�root.layer_with_weights-0"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 6}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2
5+
�root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 7}2

tests/data/saved_model/saved_model.pb

43.6 KB
Binary file not shown.
Binary file not shown.
895 Bytes
Binary file not shown.

tests/test_kaggle_module_resolver.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import unittest
2+
3+
import os
4+
import threading
5+
6+
import tensorflow as tf
7+
import tensorflow_hub as hub
8+
9+
from http.server import BaseHTTPRequestHandler, HTTPServer
10+
from test.support import EnvironmentVarGuard
11+
12+
13+
class TestKaggleModuleResolver(unittest.TestCase):
14+
class HubHTTPHandler(BaseHTTPRequestHandler):
15+
def do_GET(self):
16+
self.send_response(200)
17+
self.send_header('Content-Type', 'application/gzip')
18+
self.end_headers()
19+
20+
with open('/input/tests/data/model.tar.gz', 'rb') as model_archive:
21+
self.wfile.write(model_archive.read())
22+
23+
def _test_client(self, client_func, handler):
24+
with HTTPServer(('localhost', 8080), handler) as test_server:
25+
threading.Thread(target=test_server.serve_forever).start()
26+
27+
try:
28+
client_func()
29+
finally:
30+
test_server.shutdown()
31+
32+
def test_kaggle_resolver_succeeds(self):
33+
# Simulates we are inside a Kaggle environment.
34+
env = EnvironmentVarGuard()
35+
env.set('KAGGLE_CONTAINER_NAME', 'foo')
36+
# Attach model to right directory.
37+
os.makedirs('/kaggle/input/foomodule/tensorflow2/barvar')
38+
os.symlink('/input/tests/data/saved_model/', '/kaggle/input/foomodule/tensorflow2/barvar/2', target_is_directory=True)
39+
40+
with env:
41+
test_inputs = tf.ones([1,4])
42+
layer = hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
43+
self.assertEqual([1, 1], layer(test_inputs).shape)
44+
45+
def test_kaggle_resolver_not_attached_throws(self):
46+
# Simulates we are inside a Kaggle environment.
47+
env = EnvironmentVarGuard()
48+
env.set('KAGGLE_CONTAINER_NAME', 'foo')
49+
with env:
50+
with self.assertRaisesRegex(RuntimeError, '.*attach.*'):
51+
hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
52+
53+
def test_http_resolver_succeeds(self):
54+
def call_hub():
55+
test_inputs = tf.ones([1,4])
56+
layer = hub.KerasLayer('http://localhost:8080/model.tar.gz')
57+
self.assertEqual([1, 1], layer(test_inputs).shape)
58+
59+
self._test_client(call_hub, TestKaggleModuleResolver.HubHTTPHandler)
60+
61+
def test_local_path_resolver_succeeds(self):
62+
test_inputs = tf.ones([1,4])
63+
layer = hub.KerasLayer('/input/tests/data/saved_model')
64+
65+
self.assertEqual([1, 1], layer(test_inputs).shape)

0 commit comments

Comments
 (0)