Skip to content

Commit 6e33ece

Browse files
committed
Add Kaggle Resolver
http://b/268256777
1 parent eca5485 commit 6e33ece

File tree

9 files changed

+103
-0
lines changed

9 files changed

+103
-0
lines changed

Dockerfile.tmpl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,11 @@ ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustom
586586
# Override default imagemagick policies
587587
ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
588588

589+
# Add Kaggle module resolver
590+
ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.7/site-packages/tensorflow_hub/kaggle_module_resolver.py
591+
RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py && \
592+
sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.7/site-packages/tensorflow_hub/config.py
593+
589594
# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have
590595
# worker tunneling support in place.
591596
# b/139212522 re-enable TensorBoard once solution for slowdown is implemented.

patches/kaggle_module_resolver.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import re
3+
4+
from tensorflow_hub import resolver
5+
6+
url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/[^\\/]+/(?P<model>[^\\/]+)/frameworks/(?P<framework>[^\\/]+)/variations/(?P<variation>[^\\/]+)/versions/(?P<version>[0-9]+)$")
7+
8+
def _is_on_kaggle_notebook():
9+
return os.getenv("KAGGLE_CONTAINER_NAME") != None
10+
11+
def _is_kaggle_handle(handle):
12+
return url_pattern.match(handle) != None
13+
14+
class KaggleFileResolver(resolver.HttpResolverBase):
15+
def is_supported(self, handle):
16+
return _is_on_kaggle_notebook() and _is_kaggle_handle(handle)
17+
18+
def __call__(self, handle):
19+
m = url_pattern.match(handle)
20+
local_path = f"/kaggle/input/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}"
21+
if not os.path.exists(local_path):
22+
# TODO(b/268256777) Attach model & wait until ready instead.
23+
raise RuntimeError(f"You have to attach the '{handle}' model to your Kaggle notebook.")
24+
25+
return local_path

test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ docker rm jupyter_test || true
7474
mkdir -p /tmp/python-build/tmp
7575
mkdir -p /tmp/python-build/devshm
7676
mkdir -p /tmp/python-build/working
77+
mkdir -p /tmp/python-build/kaggle
7778

7879
# Only run Jupyter server test if no specific test pattern is specified.
7980
if [ $PATTERN == 'test*.py' ]; then
@@ -108,6 +109,7 @@ docker run --rm -t --read-only --net=none \
108109
--shm-size=2g \
109110
-v $PWD:/input:ro -v /tmp/python-build/working:/working \
110111
-v /tmp/python-build/tmp:/tmp -v /tmp/python-build/devshm:/dev/shm \
112+
-v /tmp/python-build/kaggle:/kaggle \
111113
-w=/working \
112114
$ADDITONAL_OPTS \
113115
"$IMAGE_TAG" \

tests/data/model.tar.gz

8.38 KB
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
�root"_tf_keras_network*�{"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}, "shared_object_id": 4, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 4]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2
3+
� root.layer-0"_tf_keras_input_layer*�{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 4]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2
4+
�root.layer_with_weights-0"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 4}}, "shared_object_id": 6}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 4]}}2
5+
�root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 7}2

tests/data/saved_model/saved_model.pb

43.6 KB
Binary file not shown.
Binary file not shown.
895 Bytes
Binary file not shown.

tests/test_kaggle_module_resolver.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import unittest
2+
3+
import os
4+
import threading
5+
6+
import tensorflow as tf
7+
import tensorflow_hub as hub
8+
9+
from http.server import BaseHTTPRequestHandler, HTTPServer
10+
from test.support import EnvironmentVarGuard
11+
12+
13+
class TestKaggleModuleResolver(unittest.TestCase):
14+
class HubHTTPHandler(BaseHTTPRequestHandler):
15+
def do_GET(self):
16+
print("GETGETGET")
17+
self.send_response(200)
18+
self.send_header('Content-Type', 'application/gzip')
19+
self.end_headers()
20+
21+
with open('/input/tests/data/model.tar.gz', 'rb') as model_archive:
22+
self.wfile.write(model_archive.read())
23+
24+
def _test_client(self, client_func, handler):
25+
with HTTPServer(('localhost', 8080), handler) as test_server:
26+
threading.Thread(target=test_server.serve_forever).start()
27+
28+
try:
29+
client_func()
30+
finally:
31+
test_server.shutdown()
32+
33+
def test_kaggle_resolver_succeeds(self):
34+
# Simulates we are inside a Kaggle environment.
35+
env = EnvironmentVarGuard()
36+
env.set('KAGGLE_CONTAINER_NAME', 'foo')
37+
# Attach model to right directory.
38+
os.makedirs('/kaggle/input/foomodule/tensorflow2/barvar')
39+
os.symlink('/input/tests/data/saved_model/', '/kaggle/input/foomodule/tensorflow2/barvar/2', target_is_directory=True)
40+
41+
with env:
42+
test_inputs = tf.ones([1,4])
43+
layer = hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
44+
self.assertEqual([1, 1], layer(test_inputs).shape)
45+
46+
def test_kaggle_resolver_not_attached_throws(self):
47+
# Simulates we are inside a Kaggle environment.
48+
env = EnvironmentVarGuard()
49+
env.set('KAGGLE_CONTAINER_NAME', 'foo')
50+
with env:
51+
with self.assertRaisesRegex(RuntimeError, '.*attach.*'):
52+
hub.KerasLayer("https://kaggle.com/models/foo/foomodule/frameworks/TensorFlow2/variations/barvar/versions/2")
53+
54+
def test_http_resolver_succeeds(self):
55+
def call_hub():
56+
test_inputs = tf.ones([1,4])
57+
layer = hub.KerasLayer('http://localhost:8080/model.tar.gz')
58+
self.assertEqual([1, 1], layer(test_inputs).shape)
59+
60+
self._test_client(call_hub, TestKaggleModuleResolver.HubHTTPHandler)
61+
62+
def test_local_path_resolver_succeeds(self):
63+
test_inputs = tf.ones([1,4])
64+
layer = hub.KerasLayer('/input/tests/data/saved_model')
65+
66+
self.assertEqual([1, 1], layer(test_inputs).shape)

0 commit comments

Comments
 (0)