16
16
17
17
package com .example .speech ;
18
18
19
+ import com .google .api .gax .core .ApiStreamObserver ;
19
20
import com .google .api .gax .grpc .OperationFuture ;
21
+ import com .google .api .gax .grpc .StreamingCallable ;
20
22
import com .google .cloud .speech .spi .v1 .SpeechClient ;
21
23
import com .google .cloud .speech .v1 .LongRunningRecognizeResponse ;
22
24
import com .google .cloud .speech .v1 .RecognitionAudio ;
25
27
import com .google .cloud .speech .v1 .RecognizeResponse ;
26
28
import com .google .cloud .speech .v1 .SpeechRecognitionAlternative ;
27
29
import com .google .cloud .speech .v1 .SpeechRecognitionResult ;
30
+ import com .google .cloud .speech .v1 .StreamingRecognitionConfig ;
31
+ import com .google .cloud .speech .v1 .StreamingRecognitionResult ;
32
+ import com .google .cloud .speech .v1 .StreamingRecognizeRequest ;
33
+ import com .google .cloud .speech .v1 .StreamingRecognizeResponse ;
34
+ import com .google .common .util .concurrent .SettableFuture ;
28
35
import com .google .protobuf .ByteString ;
29
36
30
37
import java .io .IOException ;
@@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
40
47
System .out .printf (
41
48
"\t java %s \" <command>\" \" <path-to-image>\" \n "
42
49
+ "Commands:\n "
43
- + "\t syncrecognize | asyncrecognize\n "
50
+ + "\t syncrecognize | asyncrecognize | streamrecognize \n "
44
51
+ "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
45
52
+ "for a Cloud Storage resource (gs://...)\n " ,
46
53
Recognize .class .getCanonicalName ());
@@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
62
69
} else {
63
70
asyncRecognizeFile (path );
64
71
}
72
+ } else if (command .equals ("streamrecognize" )) {
73
+ streamingRecognizeFile (path );
74
+ //streamingRecognizeEasy(path);
65
75
}
76
+
66
77
}
67
78
68
79
/**
@@ -180,13 +191,13 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
180
191
* Performs non-blocking speech recognition on remote FLAC file and prints
181
192
* the transcription.
182
193
*
183
- * @param gcsUri the path to the remote FLAC audio file to transcribe.
194
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
184
195
*/
185
196
public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
186
197
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
187
198
SpeechClient speech = SpeechClient .create ();
188
199
189
- // Configure remote file request for FLAC file
200
+ // Configure remote file request for Linear16
190
201
RecognitionConfig config = RecognitionConfig .newBuilder ()
191
202
.setEncoding (AudioEncoding .FLAC )
192
203
.setLanguageCode ("en-US" )
@@ -214,4 +225,81 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
214
225
}
215
226
speech .close ();
216
227
}
228
+
229
+ public static void streamingRecognizeFile (String fileName ) throws Exception , IOException {
230
+ Path path = Paths .get (fileName );
231
+ byte [] data = Files .readAllBytes (path );
232
+
233
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
234
+ SpeechClient speech = SpeechClient .create ();
235
+
236
+ // Configure request with local raw PCM audio
237
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
238
+ .setEncoding (AudioEncoding .LINEAR16 )
239
+ .setLanguageCode ("en-US" )
240
+ .setSampleRateHertz (16000 )
241
+ .build ();
242
+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
243
+ .setConfig (recConfig )
244
+ .build ();
245
+
246
+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
247
+ private final SettableFuture <List <T >> future = SettableFuture .create ();
248
+ private final List <T > messages = new java .util .ArrayList <T >();
249
+
250
+ @ Override
251
+ public void onNext (T message ) {
252
+ messages .add (message );
253
+ }
254
+
255
+ @ Override
256
+ public void onError (Throwable t ) {
257
+ future .setException (t );
258
+ }
259
+
260
+ @ Override
261
+ public void onCompleted () {
262
+ future .set (messages );
263
+ }
264
+
265
+ // Returns the SettableFuture object to get received messages / exceptions.
266
+ public SettableFuture <List <T >> future () {
267
+ return future ;
268
+ }
269
+ }
270
+
271
+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
272
+ new ResponseApiStreamingObserver <StreamingRecognizeResponse >();
273
+
274
+ StreamingCallable <StreamingRecognizeRequest ,StreamingRecognizeResponse > callable =
275
+ speech .streamingRecognizeCallable ();
276
+
277
+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
278
+ callable .bidiStreamingCall (responseObserver );
279
+
280
+ // The first request must **only** contain the audio configuration:
281
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
282
+ .setStreamingConfig (config )
283
+ .build ());
284
+
285
+ // Subsequent requests must **only** contain the audio data.
286
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
287
+ .setAudioContent (ByteString .copyFrom (data ))
288
+ .build ());
289
+
290
+ // Mark transmission as completed after sending the data.
291
+ requestObserver .onCompleted ();
292
+
293
+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
294
+
295
+ for (StreamingRecognizeResponse response : responses ) {
296
+ for (StreamingRecognitionResult result : response .getResultsList ()) {
297
+ for (SpeechRecognitionAlternative alternative : result .getAlternativesList ()) {
298
+ System .out .println (alternative .getTranscript ());
299
+ }
300
+ }
301
+ }
302
+ speech .close ();
303
+ }
304
+
217
305
}
0 commit comments