Skip to content

Commit f48fff1

Browse files
committed
Adds support for word time offset
1 parent fd0ef5c commit f48fff1

File tree

3 files changed

+24
-11
lines changed

3 files changed

+24
-11
lines changed

speech/cloud-client/pom.xml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
</properties>
3535

3636
<!-- FIXME(lesv) - temp to fix an issue w/ GA Datastore -->
37-
<!--
37+
<!--
3838
<dependencyManagement>
3939
<dependencies>
4040
<dependency>
@@ -45,13 +45,12 @@
4545
</dependencies>
4646
</dependencyManagement>
4747
-->
48-
4948
<dependencies>
5049
<!-- [START dependencies] -->
5150
<dependency>
5251
<groupId>com.google.cloud</groupId>
5352
<artifactId>google-cloud</artifactId>
54-
<version>0.20.3-alpha</version>
53+
<version>0.21.1-alpha</version>
5554
</dependency>
5655
<!-- [END dependencies] -->
5756

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
*/
1616

1717
package com.example.speech;
18-
19-
import com.google.api.gax.grpc.ApiStreamObserver;
20-
import com.google.api.gax.grpc.OperationFuture;
21-
import com.google.api.gax.grpc.StreamingCallable;
18+
import com.google.api.gax.rpc.ApiStreamObserver;
19+
import com.google.api.gax.rpc.OperationFuture;
20+
import com.google.api.gax.rpc.StreamingCallable;
2221
import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
2322
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
2423
import com.google.cloud.speech.v1.RecognitionAudio;
@@ -32,9 +31,10 @@
3231
import com.google.cloud.speech.v1.StreamingRecognitionResult;
3332
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
3433
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
34+
import com.google.cloud.speech.v1.WordInfo;
3535
import com.google.common.util.concurrent.SettableFuture;
36+
import com.google.longrunning.Operation;
3637
import com.google.protobuf.ByteString;
37-
3838
import java.io.IOException;
3939
import java.nio.file.Files;
4040
import java.nio.file.Path;
@@ -169,8 +169,9 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
169169
.build();
170170

171171
// Use non-blocking call for getting file transcription
172-
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
172+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata, Operation> response =
173173
speech.longRunningRecognizeAsync(config, audio);
174+
174175
while (!response.isDone()) {
175176
System.out.println("Waiting for response...");
176177
Thread.sleep(10000);
@@ -202,13 +203,14 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
202203
.setEncoding(AudioEncoding.FLAC)
203204
.setLanguageCode("en-US")
204205
.setSampleRateHertz(16000)
206+
.setEnableWordTimeOffsets(true)
205207
.build();
206208
RecognitionAudio audio = RecognitionAudio.newBuilder()
207209
.setUri(gcsUri)
208210
.build();
209211

210212
// Use non-blocking call for getting file transcription
211-
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
213+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata, Operation> response =
212214
speech.longRunningRecognizeAsync(config, audio);
213215
while (!response.isDone()) {
214216
System.out.println("Waiting for response...");
@@ -220,7 +222,12 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
220222
for (SpeechRecognitionResult result: results) {
221223
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
222224
for (SpeechRecognitionAlternative alternative: alternatives) {
223-
System.out.printf("Transcription: %s%n", alternative.getTranscript());
225+
System.out.printf("Transcription: %s\n",alternative.getTranscript());
226+
for (WordInfo wordInfo: alternative.getWordsList()) {
227+
System.out.println(wordInfo.getWord());
228+
System.out.printf("\t%s ns - %s ns\n",
229+
wordInfo.getStartTime().getNanos(), wordInfo.getEndTime().getNanos());
230+
}
224231
}
225232
}
226233
speech.close();

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,13 @@ public void testAsyncRecognizeGcs() throws Exception {
8383
assertThat(got).contains("how old is the Brooklyn Bridge");
8484
}
8585

86+
@Test
87+
public void testAsyncWordoffset() throws Exception {
88+
Recognize.asyncRecognizeGcs(gcsPath);
89+
String got = bout.toString();
90+
assertThat(got).contains("\t0.0 sec -");
91+
}
92+
8693
@Test
8794
public void testStreamRecognize() throws Exception {
8895
Recognize.streamingRecognizeFile(fileName);

0 commit comments

Comments
 (0)