Skip to content

Commit efeca49

Browse files
authored
docs(samples): Add & update samples for STT v2 (#10388)
docs(samples): Add & update samples for STT v2 Added samples for: * Changing STT endpoint location * Automatic punctuation * Profanity filter * Selecting a model * Multi-channel * Word level confidence * Word time offsets * Chirp Updated existing samples: * Added commandline args * Removed recognizer creation where not needed
1 parent d5dab1f commit efeca49

37 files changed

+1052
-318
lines changed

speech/snippets/adaptation_v2_custom_class_reference.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
# limitations under the License.
1414

1515

16-
# [START speech_adaptation_v2_custom_class_reference]
16+
import argparse
1717

18+
# [START speech_adaptation_v2_custom_class_reference]
1819
from google.cloud.speech_v2 import SpeechClient
1920
from google.cloud.speech_v2.types import cloud_speech
2021

2122

2223
def adaptation_v2_custom_class_reference(
2324
project_id: str,
24-
recognizer_id: str,
2525
phrase_set_id: str,
2626
custom_class_id: str,
2727
audio_file: str,
@@ -30,7 +30,6 @@ def adaptation_v2_custom_class_reference(
3030
3131
Args:
3232
project_id: The GCP project ID.
33-
recognizer_id: The ID of the recognizer to use.
3433
phrase_set_id: The ID of the phrase set to use.
3534
custom_class_id: The ID of the custom class to use.
3635
audio_file: The audio file to transcribe.
@@ -41,18 +40,6 @@ def adaptation_v2_custom_class_reference(
4140
# Instantiates a client
4241
client = SpeechClient()
4342

44-
request = cloud_speech.CreateRecognizerRequest(
45-
parent=f"projects/{project_id}/locations/global",
46-
recognizer_id=recognizer_id,
47-
recognizer=cloud_speech.Recognizer(
48-
language_codes=["en-US"], model="latest_short"
49-
),
50-
)
51-
52-
# Creates a Recognizer
53-
operation = client.create_recognizer(request=request)
54-
recognizer = operation.result()
55-
5643
# Reads a file as bytes
5744
with open(audio_file, "rb") as f:
5845
content = f.read()
@@ -88,11 +75,16 @@ def adaptation_v2_custom_class_reference(
8875
]
8976
)
9077
config = cloud_speech.RecognitionConfig(
91-
auto_decoding_config={}, adaptation=adaptation
78+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
79+
adaptation=adaptation,
80+
language_codes=["en-US"],
81+
model="latest_short",
9282
)
9383

9484
request = cloud_speech.RecognizeRequest(
95-
recognizer=recognizer.name, config=config, content=content
85+
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
86+
config=config,
87+
content=content,
9688
)
9789

9890
# Transcribes the audio into text
@@ -108,4 +100,14 @@ def adaptation_v2_custom_class_reference(
108100

109101

110102
if __name__ == "__main__":
111-
adaptation_v2_custom_class_reference()
103+
parser = argparse.ArgumentParser(
104+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
105+
)
106+
parser.add_argument("project_id", help="GCP Project ID")
107+
parser.add_argument("phrase_set_id", help="ID for the phrase set to create")
108+
parser.add_argument("custom_class_id", help="ID for the custom class to create")
109+
parser.add_argument("audio_file", help="Audio file to stream")
110+
args = parser.parse_args()
111+
adaptation_v2_custom_class_reference(
112+
args.project_id, args.phrase_set_id, args.custom_class_id, args.audio_file
113+
)

speech/snippets/adaptation_v2_custom_class_reference_test.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,7 @@
2222

2323
import adaptation_v2_custom_class_reference
2424

25-
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
26-
27-
28-
def delete_recognizer(name: str) -> None:
29-
client = SpeechClient()
30-
request = cloud_speech.DeleteRecognizerRequest(name=name)
31-
client.delete_recognizer(request=request)
25+
_RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
3226

3327

3428
def delete_phrase_set(name: str) -> None:
@@ -47,16 +41,14 @@ def delete_custom_class(name: str) -> None:
4741
def test_adaptation_v2_custom_class_reference() -> None:
4842
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
4943

50-
recognizer_id = "recognizer-" + str(uuid4())
5144
phrase_set_id = "phrase-set-" + str(uuid4())
5245
custom_class_id = "custom-class-" + str(uuid4())
5346
response = (
5447
adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
5548
project_id,
56-
recognizer_id,
5749
phrase_set_id,
5850
custom_class_id,
59-
os.path.join(RESOURCES, "fair.wav"),
51+
os.path.join(_RESOURCES, "fair.wav"),
6052
)
6153
)
6254

@@ -66,10 +58,6 @@ def test_adaptation_v2_custom_class_reference() -> None:
6658
re.DOTALL | re.I,
6759
)
6860

69-
delete_recognizer(
70-
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
71-
)
72-
7361
delete_phrase_set(
7462
f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}"
7563
)

speech/snippets/adaptation_v2_inline_custom_class.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,21 @@
1313
# limitations under the License.
1414

1515

16-
# [START speech_adaptation_v2_inline_custom_class]
16+
import argparse
1717

18+
# [START speech_adaptation_v2_inline_custom_class]
1819
from google.cloud.speech_v2 import SpeechClient
1920
from google.cloud.speech_v2.types import cloud_speech
2021

2122

2223
def adaptation_v2_inline_custom_class(
2324
project_id: str,
24-
recognizer_id: str,
2525
audio_file: str,
2626
) -> cloud_speech.RecognizeResponse:
2727
"""Transcribe audio file using inline custom class
2828
2929
Args:
3030
project_id: The GCP project ID.
31-
recognizer_id: The ID of the recognizer.
3231
audio_file: The audio file to transcribe.
3332
3433
Returns:
@@ -37,18 +36,6 @@ def adaptation_v2_inline_custom_class(
3736
# Instantiates a client
3837
client = SpeechClient()
3938

40-
request = cloud_speech.CreateRecognizerRequest(
41-
parent=f"projects/{project_id}/locations/global",
42-
recognizer_id=recognizer_id,
43-
recognizer=cloud_speech.Recognizer(
44-
language_codes=["en-US"], model="latest_short"
45-
),
46-
)
47-
48-
# Creates a Recognizer
49-
operation = client.create_recognizer(request=request)
50-
recognizer = operation.result()
51-
5239
# Reads a file as bytes
5340
with open(audio_file, "rb") as f:
5441
content = f.read()
@@ -65,11 +52,16 @@ def adaptation_v2_inline_custom_class(
6552
custom_classes=[custom_class],
6653
)
6754
config = cloud_speech.RecognitionConfig(
68-
auto_decoding_config={}, adaptation=adaptation
55+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
56+
adaptation=adaptation,
57+
language_codes=["en-US"],
58+
model="latest_short",
6959
)
7060

7161
request = cloud_speech.RecognizeRequest(
72-
recognizer=recognizer.name, config=config, content=content
62+
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
63+
config=config,
64+
content=content,
7365
)
7466

7567
# Transcribes the audio into text
@@ -85,4 +77,10 @@ def adaptation_v2_inline_custom_class(
8577

8678

8779
if __name__ == "__main__":
88-
adaptation_v2_inline_custom_class()
80+
parser = argparse.ArgumentParser(
81+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
82+
)
83+
parser.add_argument("project_id", help="GCP Project ID")
84+
parser.add_argument("audio_file", help="Audio file to stream")
85+
args = parser.parse_args()
86+
adaptation_v2_inline_custom_class(args.project_id, args.audio_file)

speech/snippets/adaptation_v2_inline_custom_class_test.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,24 @@
1414

1515
import os
1616
import re
17-
from uuid import uuid4
1817

1918
from google.api_core.retry import Retry
20-
from google.cloud.speech_v2 import SpeechClient
21-
from google.cloud.speech_v2.types import cloud_speech
2219

2320
import adaptation_v2_inline_custom_class
2421

25-
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
26-
27-
28-
def delete_recognizer(name: str) -> None:
29-
client = SpeechClient()
30-
request = cloud_speech.DeleteRecognizerRequest(name=name)
31-
client.delete_recognizer(request=request)
22+
_RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
3223

3324

3425
@Retry()
3526
def test_adaptation_v2_inline_custom_class() -> None:
3627
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
3728

38-
recognizer_id = "recognizer-" + str(uuid4())
3929
response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
40-
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
30+
project_id, os.path.join(_RESOURCES, "fair.wav")
4131
)
4232

4333
assert re.search(
4434
r"the word",
4535
response.results[0].alternatives[0].transcript,
4636
re.DOTALL | re.I,
4737
)
48-
49-
delete_recognizer(
50-
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
51-
)

speech/snippets/adaptation_v2_inline_phrase_set.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,20 @@
1313
# limitations under the License.
1414

1515

16-
# [START speech_adaptation_v2_inline_phrase_set]
16+
import argparse
1717

18+
# [START speech_adaptation_v2_inline_phrase_set]
1819
from google.cloud.speech_v2 import SpeechClient
1920
from google.cloud.speech_v2.types import cloud_speech
2021

2122

2223
def adaptation_v2_inline_phrase_set(
2324
project_id: str,
24-
recognizer_id: str,
2525
audio_file: str,
2626
) -> cloud_speech.RecognizeResponse:
2727
# Instantiates a client
2828
client = SpeechClient()
2929

30-
request = cloud_speech.CreateRecognizerRequest(
31-
parent=f"projects/{project_id}/locations/global",
32-
recognizer_id=recognizer_id,
33-
recognizer=cloud_speech.Recognizer(
34-
language_codes=["en-US"], model="latest_short"
35-
),
36-
)
37-
38-
# Creates a Recognizer
39-
operation = client.create_recognizer(request=request)
40-
recognizer = operation.result()
41-
4230
# Reads a file as bytes
4331
with open(audio_file, "rb") as f:
4432
content = f.read()
@@ -53,11 +41,16 @@ def adaptation_v2_inline_phrase_set(
5341
]
5442
)
5543
config = cloud_speech.RecognitionConfig(
56-
auto_decoding_config={}, adaptation=adaptation
44+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
45+
adaptation=adaptation,
46+
language_codes=["en-US"],
47+
model="latest_short",
5748
)
5849

5950
request = cloud_speech.RecognizeRequest(
60-
recognizer=recognizer.name, config=config, content=content
51+
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
52+
config=config,
53+
content=content,
6154
)
6255

6356
# Transcribes the audio into text
@@ -73,4 +66,13 @@ def adaptation_v2_inline_phrase_set(
7366

7467

7568
if __name__ == "__main__":
76-
adaptation_v2_inline_phrase_set()
69+
parser = argparse.ArgumentParser(
70+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
71+
)
72+
parser.add_argument("project_id", help="GCP Project ID")
73+
parser.add_argument("audio_file", help="Audio file to stream")
74+
args = parser.parse_args()
75+
adaptation_v2_inline_phrase_set(
76+
args.project_id,
77+
args.audio_file,
78+
)

speech/snippets/adaptation_v2_inline_phrase_set_test.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,24 @@
1414

1515
import os
1616
import re
17-
from uuid import uuid4
1817

1918
from google.api_core.retry import Retry
20-
from google.cloud.speech_v2 import SpeechClient
21-
from google.cloud.speech_v2.types import cloud_speech
2219

2320
import adaptation_v2_inline_phrase_set
2421

25-
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
26-
27-
28-
def delete_recognizer(name: str) -> None:
29-
client = SpeechClient()
30-
request = cloud_speech.DeleteRecognizerRequest(name=name)
31-
client.delete_recognizer(request=request)
22+
_RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
3223

3324

3425
@Retry()
3526
def test_adaptation_v2_inline_phrase_set() -> None:
3627
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
3728

38-
recognizer_id = "recognizer-" + str(uuid4())
3929
response = adaptation_v2_inline_phrase_set.adaptation_v2_inline_phrase_set(
40-
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
30+
project_id, os.path.join(_RESOURCES, "fair.wav")
4131
)
4232

4333
assert re.search(
4434
r"the word is fare",
4535
response.results[0].alternatives[0].transcript,
4636
re.DOTALL | re.I,
4737
)
48-
49-
delete_recognizer(
50-
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
51-
)

0 commit comments

Comments
 (0)