Skip to content

Commit 4a30f92

Browse files
authored
Merge pull request #287 from watson-developer-cloud/gh279-streaming-example
Gh279 streaming example
2 parents 6447b88 + 2f774ea commit 4a30f92

File tree

8 files changed

+805
-120
lines changed

8 files changed

+805
-120
lines changed

Config.json.enc

0 Bytes
Binary file not shown.

Examples/ServiceExamples/ExampleStreaming.unity

Lines changed: 342 additions & 31 deletions
Large diffs are not rendered by default.

Examples/ServiceExamples/Scripts/ExampleSpeechToText.cs

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ public class ExampleSpeechToText : MonoBehaviour
2929
private string _password = null;
3030
private string _url = null;
3131

32-
private AudioClip _audioClip;
3332
private SpeechToText _speechToText;
3433

3534
private string _modelNameToGet;
@@ -38,13 +37,19 @@ public class ExampleSpeechToText : MonoBehaviour
3837
private string _customCorpusFilePath;
3938
private string _customWordsFilePath;
4039
private string _acousticResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/TheFirstAmericaninEarthOrbit.mp3";
40+
private string _oggResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/InauguralAddress-1981.ogg";
4141
private bool _isAudioLoaded = false;
4242
private string _createdAcousticModelId;
4343
private string _acousticResourceName = "unity-acoustic-resource";
4444
private string _createdAcousticModelName = "unity-example-acoustic-model";
4545
private byte[] _acousticResourceData;
46+
private string _acousticResourceMimeType;
47+
private byte[] _oggResourceData;
48+
private string _oggResourceMimeType;
49+
private bool _isOggLoaded = false;
4650

4751
private bool _recognizeTested = false;
52+
private bool _recognizeOggTested = false;
4853
private bool _getModelsTested = false;
4954
private bool _getModelTested = false;
5055
private bool _getCustomizationsTested = false;
@@ -87,24 +92,41 @@ void Start()
8792
_speechToText = new SpeechToText(credentials);
8893
_customCorpusFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/theJabberwocky-utf8.txt";
8994
_customWordsFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/test-stt-words.json";
95+
_acousticResourceMimeType = Utility.GetMimeType(Path.GetExtension(_acousticResourceUrl));
96+
_oggResourceMimeType = Utility.GetMimeType(Path.GetExtension(_oggResourceUrl));
97+
98+
_speechToText.StreamMultipart = true;
9099

91100
Runnable.Run(Examples());
92101
}
93102

94103
private IEnumerator Examples()
95104
{
96105
Runnable.Run(DownloadAcousticResource());
106+
while (!_isAudioLoaded)
107+
yield return null;
108+
109+
Runnable.Run(DownloadOggResource());
110+
while (!_isOggLoaded)
111+
yield return null;
97112

98113
// Recognize
99114
Log.Debug("ExampleSpeechToText.Examples()", "Attempting to recognize");
100115
List<string> keywords = new List<string>();
101116
keywords.Add("speech");
102117
_speechToText.KeywordsThreshold = 0.5f;
103118
_speechToText.Keywords = keywords.ToArray();
104-
_speechToText.Recognize(_audioClip, HandleOnRecognize);
119+
_speechToText.Recognize(_acousticResourceData, _acousticResourceMimeType, HandleOnRecognize);
105120
while (!_recognizeTested)
106121
yield return null;
107122

123+
// Recognize ogg
124+
_speechToText.StreamMultipart = true;
125+
Log.Debug("ExampleSpeechToText", "Attempting to recognize ogg: mimeType: {0} | _speechTText.StreamMultipart: {1}", _oggResourceMimeType, _speechToText.StreamMultipart);
126+
_speechToText.Recognize(_oggResourceData, _oggResourceMimeType + ";codecs=vorbis", HandleOnRecognizeOgg);
127+
while (!_recognizeOggTested)
128+
yield return null;
129+
108130
// Get models
109131
Log.Debug("ExampleSpeechToText.Examples()", "Attempting to get models");
110132
_speechToText.GetModels(HandleGetModels);
@@ -426,6 +448,32 @@ private void HandleOnRecognize(SpeechRecognitionEvent result)
426448
}
427449
}
428450

451+
private void HandleOnRecognizeOgg(SpeechRecognitionEvent result)
452+
{
453+
if (result != null && result.results.Length > 0)
454+
{
455+
foreach (var res in result.results)
456+
{
457+
foreach (var alt in res.alternatives)
458+
{
459+
string text = alt.transcript;
460+
Log.Debug("ExampleSpeechToText", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
461+
462+
if (res.final)
463+
_recognizeOggTested = true;
464+
}
465+
466+
if (res.keywords_result != null && res.keywords_result.keyword != null)
467+
{
468+
foreach (var keyword in res.keywords_result.keyword)
469+
{
470+
Log.Debug("ExampleSpeechToText", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
471+
}
472+
}
473+
}
474+
}
475+
}
476+
429477
private void HandleGetCustomizations(Customizations customizations, string customData)
430478
{
431479
Log.Debug("ExampleSpeechToText.HandleGetCustomizations()", "Speech to Text - Get customizations response: {0}", customData);
@@ -724,5 +772,18 @@ private IEnumerator DownloadAcousticResource()
724772
Log.Debug("ExampleSpeechToText.DownloadAcousticResource()", "acoustic resource downloaded");
725773
_acousticResourceData = www.bytes;
726774
_isAudioLoaded = true;
775+
www.Dispose();
776+
}
777+
778+
private IEnumerator DownloadOggResource()
779+
{
780+
Log.Debug("ExampleSpeechToText", "downloading ogg resource from {0}", _oggResourceUrl);
781+
WWW www = new WWW(_oggResourceUrl);
782+
yield return www;
783+
784+
Log.Debug("ExampleSpeechToText", "ogg resource downloaded");
785+
_oggResourceData = www.bytes;
786+
_isOggLoaded = true;
787+
www.Dispose();
727788
}
728-
}
789+
}

Examples/ServiceExamples/Scripts/ExampleStreaming.cs

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,20 @@
2222
using IBM.Watson.DeveloperCloud.Utilities;
2323
using IBM.Watson.DeveloperCloud.DataTypes;
2424
using System.Collections.Generic;
25+
using UnityEngine.UI;
2526

2627
public class ExampleStreaming : MonoBehaviour
2728
{
2829
private string _username = null;
2930
private string _password = null;
3031
private string _url = null;
3132

33+
public Text ResultsField;
34+
3235
private int _recordingRoutine = 0;
3336
private string _microphoneID = null;
3437
private AudioClip _recording = null;
35-
private int _recordingBufferSize = 2;
38+
private int _recordingBufferSize = 1;
3639
private int _recordingHZ = 22050;
3740

3841
private SpeechToText _speechToText;
@@ -60,21 +63,15 @@ public bool Active
6063
_speechToText.DetectSilence = true;
6164
_speechToText.EnableWordConfidence = true;
6265
_speechToText.EnableTimestamps = true;
63-
_speechToText.SilenceThreshold = 0.1f;
64-
_speechToText.MaxAlternatives = 5;
66+
_speechToText.SilenceThreshold = 0.01f;
67+
_speechToText.MaxAlternatives = 0;
6568
_speechToText.EnableInterimResults = true;
6669
_speechToText.OnError = OnError;
6770
_speechToText.InactivityTimeout = -1;
68-
_speechToText.ProfanityFilter = true;
71+
_speechToText.ProfanityFilter = false;
6972
_speechToText.SmartFormatting = true;
70-
_speechToText.SpeakerLabels = true;
73+
_speechToText.SpeakerLabels = false;
7174
_speechToText.WordAlternativesThreshold = null;
72-
List<string> keywords = new List<string>();
73-
keywords.Add("hello");
74-
keywords.Add("testing");
75-
keywords.Add("watson");
76-
_speechToText.KeywordsThreshold = 0.5f;
77-
_speechToText.Keywords = keywords.ToArray();
7875
_speechToText.StartListening(OnRecognize, OnRecognizeSpeaker);
7976
}
8077
else if (!value && _speechToText.IsListening)
@@ -145,7 +142,7 @@ private IEnumerator RecordingHandler()
145142
_recording.GetData(samples, bFirstBlock ? 0 : midPoint);
146143

147144
AudioData record = new AudioData();
148-
record.MaxLevel = Mathf.Abs(Mathf.Max(samples));
145+
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
149146
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
150147
record.Clip.SetData(samples, 0);
151148

@@ -176,8 +173,9 @@ private void OnRecognize(SpeechRecognitionEvent result)
176173
{
177174
foreach (var alt in res.alternatives)
178175
{
179-
string text = alt.transcript;
180-
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
176+
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
177+
Log.Debug("ExampleStreaming.OnRecognize()", text);
178+
ResultsField.text = text;
181179
}
182180

183181
if (res.keywords_result != null && res.keywords_result.keyword != null)
@@ -210,6 +208,5 @@ private void OnRecognizeSpeaker(SpeakerRecognitionEvent result)
210208
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
211209
}
212210
}
213-
214211
}
215-
}
212+
}

0 commit comments

Comments
 (0)