Added autoscaling test and small fixes

bradmiro · bradmiro · commit e7ddc7878766 · 2020-06-25T19:11:41.000-04:00
diff --git a/dataproc/src/main/java/CreateClusterWithAutoscaling.java b/dataproc/src/main/java/CreateClusterWithAutoscaling.java
@@ -1,38 +1,38 @@
 /*
- * Copyright 2020 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This sample creates a Dataproc cluster with an autoscaling policy enabled. The policy we will be creating mirrors
- * the following YAML representation:
- *
-    workerConfig:
-      minInstances: 2
-      maxInstances: 100
-      weight: 1
-    secondaryWorkerConfig:
-      minInstances: 0
-      maxInstances: 100
-      weight: 1
-    basicAlgorithm:
-      cooldownPeriod: 4m
-      yarnConfig:
-        scaleUpFactor: 0.05
-        scaleDownFactor: 1.0
-        scaleUpMinWorkerFraction: 0.0
-        scaleDownMinWorkerFraction: 0.0
-        gracefulDecommissionTimeout: 1h
- */
+* Copyright 2020 Google LLC
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*
+* This sample creates a Dataproc cluster with an autoscaling policy enabled. The policy we will be creating mirrors
+* the following YAML representation:
+*
+   workerConfig:
+     minInstances: 2
+     maxInstances: 100
+     weight: 1
+   secondaryWorkerConfig:
+     minInstances: 0
+     maxInstances: 100
+     weight: 1
+   basicAlgorithm:
+     cooldownPeriod: 4m
+     yarnConfig:
+       scaleUpFactor: 0.05
+       scaleDownFactor: 1.0
+       scaleUpMinWorkerFraction: 0.0
+       scaleDownMinWorkerFraction: 0.0
+       gracefulDecommissionTimeout: 1h
+*/
 
 // [START dataproc_create_autoscaling_cluster]
 
@@ -67,9 +67,11 @@ public static void createClusterwithAutoscaling(
     AutoscalingPolicyServiceSettings autoscalingPolicyServiceSettings =
         AutoscalingPolicyServiceSettings.newBuilder().setEndpoint(myEndpoint).build();
 
-    // Create a cluster controller client and an autoscaling controller clien with the configured
-    // settings. The clients only need to be created once and can be reused for multiple requests. Using a
-    // try-with-resources closes the client, but this can also be done manually with the .close() method.
+    // Create a cluster controller client and an autoscaling controller client with the configured
+    // settings. The clients only need to be created once and can be reused for multiple requests.
+    // Using a
+    // try-with-resources closes the client, but this can also be done manually with the .close()
+    // method.
     try (ClusterControllerClient clusterControllerClient =
             ClusterControllerClient.create(clusterControllerSettings);
         AutoscalingPolicyServiceClient autoscalingPolicyServiceClient =
@@ -113,10 +115,11 @@ public static void createClusterwithAutoscaling(
       // Policy is uploaded here.
       autoscalingPolicyServiceClient.createAutoscalingPolicy(parent, autoscalingPolicy);
 
-      // Now the policy can be referrenced when creating a cluster.
+      // Now the policy can be referenced when creating a cluster.
       String autoscalingPolicyURI =
           String.format(
-              "projects/%s/locations/%s/autoscalingPolicies/%s", projectId, region, autoscalingPolicyName);
+              "projects/%s/locations/%s/autoscalingPolicies/%s",
+              projectId, region, autoscalingPolicyName);
       AutoscalingConfig autoscalingConfig =
           AutoscalingConfig.newBuilder().setPolicyUri(autoscalingPolicyURI).build();
 
@@ -151,7 +154,7 @@ public static void createClusterwithAutoscaling(
       System.out.printf("Cluster created successfully: %s", response.getClusterName());
 
     } catch (ExecutionException e) {
-      System.err.println(String.format("Error executing createCluster: %s ", e.getMessage()));
+      System.err.println(String.format("createClusterWithAutoscaling: %s ", e.getMessage()));
     }
   }
 }
diff --git a/dataproc/src/main/java/SubmitHadoopFSJob.java b/dataproc/src/main/java/SubmitHadoopFSJob.java
@@ -30,64 +30,66 @@
 
 public class SubmitHadoopFSJob {
 
-    public static ArrayList<String> stringToList(String s) {
-        return new ArrayList<>(Arrays.asList(s.split(" ")));
-    }
-
-    public static void submitHadoopFSQuery() throws IOException, InterruptedException {
-        // TODO(developer): Replace these variables before running the sample.
-        String projectId = "your-project-id";
-        String region = "your-project-region";
-        String clusterName = "your-cluster-name";
-        String hadoopFSQuery = "your-hadoop-fs-query";
-        submitHadoopFSJob(projectId, region, clusterName, hadoopFSQuery);
-    }
-
-    public static void submitHadoopFSJob(
-            String projectId, String region, String clusterName, String hadoopFSQuery)
-            throws IOException, InterruptedException {
-        String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
-
-        // Configure the settings for the job controller client.
-        JobControllerSettings jobControllerSettings =
-                JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
-
-        // Create a job controller client with the configured settings. Using a try-with-resources closes the client,
-        // but this can also be done manually with the .close() method.
-        try (JobControllerClient jobControllerClient =
-                     JobControllerClient.create(jobControllerSettings)) {
-
-            // Configure cluster placement for the job.
-            JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
-
-            // Configure Hadoop job settings. The HadoopFS query is set here.
-            HadoopJob hadoopJob = HadoopJob.newBuilder()
-                    .setMainClass("org.apache.hadoop.fs.FsShell")
-                    .addAllArgs(stringToList(hadoopFSQuery))
-                    .build();
-
-            Job job = Job.newBuilder().setPlacement(jobPlacement).setHadoopJob(hadoopJob).build();
-
-            // Submit an asynchronous request to execute the job.
-            OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest =
-                    jobControllerClient.submitJobAsOperationAsync(projectId, region, job);
-
-            Job response = submitJobAsOperationAsyncRequest.get();
-
-            // Print output from Google Cloud Storage
-            Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(response.getDriverOutputResourceUri());
-            matches.matches();
-
-            Storage storage = StorageOptions.getDefaultInstance().getService();
-            Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
-
-            System.out.println(String.format("Job \"%s\" finished: %s",
-                    response.getReference().getJobId(),
-                    new String(blob.getContent())));
-
-        } catch (ExecutionException e) {
-            System.err.println(String.format("submitHadoopFSJob: %s ", e.getMessage()));
-        }
+  public static ArrayList<String> stringToList(String s) {
+    return new ArrayList<>(Arrays.asList(s.split(" ")));
+  }
+
+  public static void submitHadoopFSQuery() throws IOException, InterruptedException {
+    // TODO(developer): Replace these variables before running the sample.
+    String projectId = "your-project-id";
+    String region = "your-project-region";
+    String clusterName = "your-cluster-name";
+    String hadoopFSQuery = "your-hadoop-fs-query";
+    submitHadoopFSJob(projectId, region, clusterName, hadoopFSQuery);
+  }
+
+  public static void submitHadoopFSJob(
+      String projectId, String region, String clusterName, String hadoopFSQuery)
+      throws IOException, InterruptedException {
+    String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
+
+    // Configure the settings for the job controller client.
+    JobControllerSettings jobControllerSettings =
+        JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
+
+    // Create a job controller client with the configured settings. Using a try-with-resources
+    // closes the client,
+    // but this can also be done manually with the .close() method.
+    try (JobControllerClient jobControllerClient =
+        JobControllerClient.create(jobControllerSettings)) {
+
+      // Configure cluster placement for the job.
+      JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
+
+      // Configure Hadoop job settings. The HadoopFS query is set here.
+      HadoopJob hadoopJob =
+          HadoopJob.newBuilder()
+              .setMainClass("org.apache.hadoop.fs.FsShell")
+              .addAllArgs(stringToList(hadoopFSQuery))
+              .build();
+
+      Job job = Job.newBuilder().setPlacement(jobPlacement).setHadoopJob(hadoopJob).build();
+
+      // Submit an asynchronous request to execute the job.
+      OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest =
+          jobControllerClient.submitJobAsOperationAsync(projectId, region, job);
+
+      Job response = submitJobAsOperationAsyncRequest.get();
+
+      // Print output from Google Cloud Storage
+      Matcher matches =
+          Pattern.compile("gs://(.*?)/(.*)").matcher(response.getDriverOutputResourceUri());
+      matches.matches();
+
+      Storage storage = StorageOptions.getDefaultInstance().getService();
+      Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
+
+      System.out.println(
+          String.format("Job finished successfully: %s", new String(blob.getContent())));
+
+    } catch (ExecutionException e) {
+      System.err.println(String.format("submitHadoopFSJob: %s ", e.getMessage()));
     }
+  }
 }
-// [END dataproc_submit_hadoop_fs_job]
+// [END dataproc_submit_hadoop_fs_job]
diff --git a/dataproc/src/test/java/CreateClusterWithAutoscalingTest.java b/dataproc/src/test/java/CreateClusterWithAutoscalingTest.java
@@ -1,2 +1,100 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static junit.framework.TestCase.assertNotNull;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.dataproc.v1.*;
+import com.google.protobuf.Empty;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.UUID;
+import java.util.concurrent.ExecutionException;
+import org.hamcrest.CoreMatchers;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
 public class CreateClusterWithAutoscalingTest {
+
+  private static final String CLUSTER_NAME =
+      String.format("java-as-test-%s", UUID.randomUUID().toString());
+  private static final String REGION = "us-central1";
+  private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
+  private static final String AUTOSCALING_POLICY_NAME =
+      String.format("java-as-test-%s", UUID.randomUUID().toString());
+
+  private ByteArrayOutputStream bout;
+
+  private static void requireEnv(String varName) {
+    assertNotNull(
+        String.format("Environment variable '%s' is required to perform these tests.", varName),
+        System.getenv(varName));
+  }
+
+  @BeforeClass
+  public static void checkRequirements() {
+    requireEnv("GOOGLE_APPLICATION_CREDENTIALS");
+    requireEnv("GOOGLE_CLOUD_PROJECT");
+  }
+
+  @Before
+  public void setUp() {
+    bout = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(bout));
+  }
+
+  @After
+  public void tearDown() throws IOException, InterruptedException, ExecutionException {
+    String myEndpoint = String.format("%s-dataproc.googleapis.com:443", REGION);
+
+    ClusterControllerSettings clusterControllerSettings =
+        ClusterControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
+
+    AutoscalingPolicyServiceSettings autoscalingPolicyServiceSettings =
+        AutoscalingPolicyServiceSettings.newBuilder().setEndpoint(myEndpoint).build();
+
+    try (ClusterControllerClient clusterControllerClient =
+            ClusterControllerClient.create(clusterControllerSettings);
+        AutoscalingPolicyServiceClient autoscalingPolicyServiceClient =
+            AutoscalingPolicyServiceClient.create(autoscalingPolicyServiceSettings)) {
+
+      OperationFuture<Empty, ClusterOperationMetadata> deleteClusterAsyncRequest =
+          clusterControllerClient.deleteClusterAsync(PROJECT_ID, REGION, CLUSTER_NAME);
+      deleteClusterAsyncRequest.get();
+
+      AutoscalingPolicyName name =
+          AutoscalingPolicyName.ofProjectLocationAutoscalingPolicyName(
+              PROJECT_ID, REGION, AUTOSCALING_POLICY_NAME);
+      autoscalingPolicyServiceClient.deleteAutoscalingPolicy(name);
+    }
+  }
+
+  @Test
+  public void createClusterWithAutoscalingTest() throws IOException, InterruptedException {
+    CreateClusterWithAutoscaling.createClusterwithAutoscaling(
+        PROJECT_ID, REGION, CLUSTER_NAME, AUTOSCALING_POLICY_NAME);
+    String output = bout.toString();
+
+    assertThat(output, CoreMatchers.containsString(CLUSTER_NAME));
+  }
 }