Skip to content

Commit 6dd3dba

Browse files
authored
feat: add cli functionality to dataproc quickstart (#2047)
* Changed quickstart to be an executable program
1 parent 4dc3deb commit 6dd3dba

File tree

3 files changed

+47
-17
lines changed

3 files changed

+47
-17
lines changed

dataproc/pom.xml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@
3434
</plugin>
3535
</plugins>
3636
</pluginManagement>
37+
<plugins>
38+
<plugin>
39+
<groupId>org.codehaus.mojo</groupId>
40+
<artifactId>exec-maven-plugin</artifactId>
41+
<version>1.6.0</version>
42+
<configuration>
43+
<mainClass>Quickstart</mainClass>
44+
</configuration>
45+
</plugin>
46+
</plugins>
3747
</build>
3848

3949
<dependencyManagement>
@@ -65,5 +75,4 @@
6575
<scope>test</scope>
6676
</dependency>
6777
</dependencies>
68-
6978
</project>

dataproc/src/main/java/Quickstart.java

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@
1515
*/
1616

1717
// [START dataproc_quickstart]
18+
/* This quickstart sample walks a user through creating a Cloud Dataproc
19+
* cluster, submitting a PySpark job from Google Cloud Storage to the
20+
* cluster, reading the output of the job and deleting the cluster, all
21+
* using the Java client library.
22+
*
23+
* Usage:
24+
* mvn clean package -DskipTests
25+
*
26+
* mvn exec:java -Dexec.args="<PROJECT_ID> <REGION> <CLUSTER_NAME> <GCS_JOB_FILE_PATH>"
27+
*
28+
* You can also set these arguments in the main function instead of providing them via the CLI.
29+
*/
30+
1831
import com.google.api.gax.longrunning.OperationFuture;
1932
import com.google.cloud.dataproc.v1.Cluster;
2033
import com.google.cloud.dataproc.v1.ClusterConfig;
@@ -60,15 +73,6 @@ public static Job waitForJobCompletion(
6073
}
6174
}
6275

63-
public static void quickstart() throws IOException, InterruptedException {
64-
// TODO(developer): Replace these variables before running the sample.
65-
String projectId = "your-project-id";
66-
String region = "your-project-region";
67-
String clusterName = "your-cluster-name";
68-
String jobFilePath = "your-job-file-path";
69-
quickstart(projectId, region, clusterName, jobFilePath);
70-
}
71-
7276
public static void quickstart(
7377
String projectId, String region, String clusterName, String jobFilePath)
7478
throws IOException, InterruptedException {
@@ -82,10 +86,10 @@ public static void quickstart(
8286
JobControllerSettings jobControllerSettings =
8387
JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
8488

85-
// Create both a cluster controller client and job controller client with the configured
86-
// settings. The client only needs to be created once and can be reused for multiple requests.
87-
// Using a try-with-resources closes the client, but this can also be done manually with
88-
// the .close() method.
89+
// Create both a cluster controller client and job controller client with the
90+
// configured settings. The client only needs to be created once and can be reused for
91+
// multiple requests. Using a try-with-resources closes the client, but this can also be done
92+
// manually with the .close() method.
8993
try (ClusterControllerClient clusterControllerClient =
9094
ClusterControllerClient.create(clusterControllerSettings);
9195
JobControllerClient jobControllerClient =
@@ -114,7 +118,8 @@ public static void quickstart(
114118
OperationFuture<Cluster, ClusterOperationMetadata> createClusterAsyncRequest =
115119
clusterControllerClient.createClusterAsync(projectId, region, cluster);
116120
Cluster response = createClusterAsyncRequest.get();
117-
System.out.printf("Cluster created successfully: %s", response.getClusterName());
121+
System.out.println(
122+
String.format("Cluster created successfully: %s", response.getClusterName()));
118123

119124
// Configure the settings for our job.
120125
JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
@@ -133,7 +138,7 @@ public static void quickstart(
133138
int timeout = 10;
134139
try {
135140
Job jobInfo = finishedJobFuture.get(timeout, TimeUnit.MINUTES);
136-
System.out.printf("Job %s finished successfully.", jobId);
141+
System.out.println(String.format("Job %s finished successfully.", jobId));
137142

138143
// Cloud Dataproc job output gets saved to a GCS bucket allocated to it.
139144
Cluster clusterInfo = clusterControllerClient.getCluster(projectId, region, clusterName);
@@ -163,5 +168,21 @@ public static void quickstart(
163168
System.err.println(String.format("Error executing quickstart: %s ", e.getMessage()));
164169
}
165170
}
171+
172+
public static void main(String... args) throws IOException, InterruptedException {
173+
if (args.length != 4) {
174+
System.err.println(
175+
"Insufficient number of parameters provided. Please make sure a "
176+
+ "PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order.");
177+
return;
178+
}
179+
180+
String projectId = args[0]; // project-id of project to create the cluster in
181+
String region = args[1]; // region to create the cluster
182+
String clusterName = args[2]; // name of the cluster
183+
String jobFilePath = args[3]; // location in GCS of the PySpark job
184+
185+
quickstart(projectId, region, clusterName, jobFilePath);
186+
}
166187
}
167188
// [END dataproc_quickstart]

dataproc/src/test/java/QuickstartTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public void setUp() {
8888

8989
@Test
9090
public void quickstartTest() throws IOException, InterruptedException {
91-
Quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH);
91+
Quickstart.main(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH);
9292
String output = bout.toString();
9393

9494
assertThat(output, CoreMatchers.containsString("Cluster created successfully"));

0 commit comments

Comments
 (0)