15
15
*/
16
16
17
17
// [START dataproc_quickstart]
18
+ /* This quickstart sample walks a user through creating a Cloud Dataproc
19
+ * cluster, submitting a PySpark job from Google Cloud Storage to the
20
+ * cluster, reading the output of the job and deleting the cluster, all
21
+ * using the Java client library.
22
+ *
23
+ * Usage:
24
+ * mvn clean package -DskipTests
25
+ *
26
+ * mvn exec:java -Dexec.args="<PROJECT_ID> <REGION> <CLUSTER_NAME> <GCS_JOB_FILE_PATH>"
27
+ *
28
+ * You can also set these arguments in the main function instead of providing them via the CLI.
29
+ */
30
+
18
31
import com .google .api .gax .longrunning .OperationFuture ;
19
32
import com .google .cloud .dataproc .v1 .Cluster ;
20
33
import com .google .cloud .dataproc .v1 .ClusterConfig ;
@@ -60,15 +73,6 @@ public static Job waitForJobCompletion(
60
73
}
61
74
}
62
75
63
- public static void quickstart () throws IOException , InterruptedException {
64
- // TODO(developer): Replace these variables before running the sample.
65
- String projectId = "your-project-id" ;
66
- String region = "your-project-region" ;
67
- String clusterName = "your-cluster-name" ;
68
- String jobFilePath = "your-job-file-path" ;
69
- quickstart (projectId , region , clusterName , jobFilePath );
70
- }
71
-
72
76
public static void quickstart (
73
77
String projectId , String region , String clusterName , String jobFilePath )
74
78
throws IOException , InterruptedException {
@@ -82,10 +86,10 @@ public static void quickstart(
82
86
JobControllerSettings jobControllerSettings =
83
87
JobControllerSettings .newBuilder ().setEndpoint (myEndpoint ).build ();
84
88
85
- // Create both a cluster controller client and job controller client with the configured
86
- // settings. The client only needs to be created once and can be reused for multiple requests.
87
- // Using a try-with-resources closes the client, but this can also be done manually with
88
- // the .close() method.
89
+ // Create both a cluster controller client and job controller client with the
90
+ // configured settings. The client only needs to be created once and can be reused for
91
+ // multiple requests. Using a try-with-resources closes the client, but this can also be done
92
+ // manually with the .close() method.
89
93
try (ClusterControllerClient clusterControllerClient =
90
94
ClusterControllerClient .create (clusterControllerSettings );
91
95
JobControllerClient jobControllerClient =
@@ -114,7 +118,8 @@ public static void quickstart(
114
118
OperationFuture <Cluster , ClusterOperationMetadata > createClusterAsyncRequest =
115
119
clusterControllerClient .createClusterAsync (projectId , region , cluster );
116
120
Cluster response = createClusterAsyncRequest .get ();
117
- System .out .printf ("Cluster created successfully: %s" , response .getClusterName ());
121
+ System .out .println (
122
+ String .format ("Cluster created successfully: %s" , response .getClusterName ()));
118
123
119
124
// Configure the settings for our job.
120
125
JobPlacement jobPlacement = JobPlacement .newBuilder ().setClusterName (clusterName ).build ();
@@ -133,7 +138,7 @@ public static void quickstart(
133
138
int timeout = 10 ;
134
139
try {
135
140
Job jobInfo = finishedJobFuture .get (timeout , TimeUnit .MINUTES );
136
- System .out .printf ( "Job %s finished successfully." , jobId );
141
+ System .out .println ( String . format ( "Job %s finished successfully." , jobId ) );
137
142
138
143
// Cloud Dataproc job output gets saved to a GCS bucket allocated to it.
139
144
Cluster clusterInfo = clusterControllerClient .getCluster (projectId , region , clusterName );
@@ -163,5 +168,21 @@ public static void quickstart(
163
168
System .err .println (String .format ("Error executing quickstart: %s " , e .getMessage ()));
164
169
}
165
170
}
171
+
172
+ public static void main (String ... args ) throws IOException , InterruptedException {
173
+ if (args .length != 4 ) {
174
+ System .err .println (
175
+ "Insufficient number of parameters provided. Please make sure a "
176
+ + "PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order." );
177
+ return ;
178
+ }
179
+
180
+ String projectId = args [0 ]; // project-id of project to create the cluster in
181
+ String region = args [1 ]; // region to create the cluster
182
+ String clusterName = args [2 ]; // name of the cluster
183
+ String jobFilePath = args [3 ]; // location in GCS of the PySpark job
184
+
185
+ quickstart (projectId , region , clusterName , jobFilePath );
186
+ }
166
187
}
167
188
// [END dataproc_quickstart]
0 commit comments