Use CUDA 11.0 compatible JAX version. (#1124)

rosbo · web-flow · commit 9e8db9defcb1 · 2022-01-20T12:57:42.000-08:00
- Added additional smoke tests for jax and flax.

http://215555626
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -114,7 +114,7 @@ RUN pip install lightgbm==$LIGHTGBM_VERSION && \
 
 # Install JAX
 {{ if eq .Accelerator "gpu" }}
-RUN pip install jax[cuda] -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
+RUN pip install jax[cuda11_cudnn805] -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
     /tmp/clean-layer.sh
 {{ else }}
 RUN pip install jax[cpu] && \
diff --git a/tests/test_flax.py b/tests/test_flax.py
@@ -1,15 +1,52 @@
 import unittest
 
+import jax
 import jax.numpy as jnp
 import numpy as np
+import optax
 
 from flax import linen as nn
+from flax.training import train_state 
 
 
 class TestFlax(unittest.TestCase):
 
-    def test_bla(self):
+    def test_pooling(self):
         x = jnp.full((1, 3, 3, 1), 2.)
         mul_reduce = lambda x, y: x * y
         y = nn.pooling.pool(x, 1., mul_reduce, (2, 2), (1, 1), 'VALID')
         np.testing.assert_allclose(y, np.full((1, 2, 2, 1), 2. ** 4))
+
+    def test_cnn(self):
+        class CNN(nn.Module):
+            @nn.compact
+            def __call__(self, x):
+                x = nn.Conv(features=32, kernel_size=(3, 3))(x)
+                x = nn.relu(x)
+                x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+                x = nn.Conv(features=64, kernel_size=(3, 3))(x)
+                x = nn.relu(x)
+                x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+                x = x.reshape((x.shape[0], -1))
+                x = nn.Dense(features=256)(x)
+                x = nn.relu(x)
+                x = nn.Dense(features=120)(x)   
+                x = nn.log_softmax(x)
+                return x
+            
+        def create_train_state(rng, learning_rate, momentum):
+            cnn = CNN()
+            params = cnn.init(rng, jnp.ones([1, 224, 224, 3]))['params']
+            tx = optax.sgd(learning_rate, momentum)
+            return train_state.TrainState.create(
+            apply_fn=cnn.apply, params=params, tx=tx)
+
+        rng = jax.random.PRNGKey(0)
+        rng, init_rng = jax.random.split(rng)
+
+        learning_rate = 2e-5
+        momentum = 0.9
+        state = create_train_state(init_rng, learning_rate, momentum)
+        self.assertEqual(0, state.step)
+
+
diff --git a/tests/test_jax.py b/tests/test_jax.py
@@ -1,4 +1,6 @@
 import unittest
+
+import os
 import time
 
 import jax.numpy as np
@@ -16,3 +18,7 @@ def test_grad(self):
         grad_tanh = grad(self.tanh)
         ag = grad_tanh(1.0)
         self.assertEqual(0.4199743, ag)
+
+    def test_backend(self):
+        expected_backend = 'cpu' if len(os.environ.get('CUDA_VERSION', '')) == 0 else 'gpu'
+