@@ -29,7 +29,6 @@ public class ExampleSpeechToText : MonoBehaviour
29
29
private string _password = null ;
30
30
private string _url = null ;
31
31
32
- private AudioClip _audioClip ;
33
32
private SpeechToText _speechToText ;
34
33
35
34
private string _modelNameToGet ;
@@ -38,13 +37,19 @@ public class ExampleSpeechToText : MonoBehaviour
38
37
private string _customCorpusFilePath ;
39
38
private string _customWordsFilePath ;
40
39
private string _acousticResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/TheFirstAmericaninEarthOrbit.mp3" ;
40
+ private string _oggResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/InauguralAddress-1981.ogg" ;
41
41
private bool _isAudioLoaded = false ;
42
42
private string _createdAcousticModelId ;
43
43
private string _acousticResourceName = "unity-acoustic-resource" ;
44
44
private string _createdAcousticModelName = "unity-example-acoustic-model" ;
45
45
private byte [ ] _acousticResourceData ;
46
+ private string _acousticResourceMimeType ;
47
+ private byte [ ] _oggResourceData ;
48
+ private string _oggResourceMimeType ;
49
+ private bool _isOggLoaded = false ;
46
50
47
51
private bool _recognizeTested = false ;
52
+ private bool _recognizeOggTested = false ;
48
53
private bool _getModelsTested = false ;
49
54
private bool _getModelTested = false ;
50
55
private bool _getCustomizationsTested = false ;
@@ -87,24 +92,41 @@ void Start()
87
92
_speechToText = new SpeechToText ( credentials ) ;
88
93
_customCorpusFilePath = Application . dataPath + "/Watson/Examples/ServiceExamples/TestData/theJabberwocky-utf8.txt" ;
89
94
_customWordsFilePath = Application . dataPath + "/Watson/Examples/ServiceExamples/TestData/test-stt-words.json" ;
95
+ _acousticResourceMimeType = Utility . GetMimeType ( Path . GetExtension ( _acousticResourceUrl ) ) ;
96
+ _oggResourceMimeType = Utility . GetMimeType ( Path . GetExtension ( _oggResourceUrl ) ) ;
97
+
98
+ _speechToText . StreamMultipart = true ;
90
99
91
100
Runnable . Run ( Examples ( ) ) ;
92
101
}
93
102
94
103
private IEnumerator Examples ( )
95
104
{
96
105
Runnable . Run ( DownloadAcousticResource ( ) ) ;
106
+ while ( ! _isAudioLoaded )
107
+ yield return null ;
108
+
109
+ Runnable . Run ( DownloadOggResource ( ) ) ;
110
+ while ( ! _isOggLoaded )
111
+ yield return null ;
97
112
98
113
// Recognize
99
114
Log . Debug ( "ExampleSpeechToText.Examples()" , "Attempting to recognize" ) ;
100
115
List < string > keywords = new List < string > ( ) ;
101
116
keywords . Add ( "speech" ) ;
102
117
_speechToText . KeywordsThreshold = 0.5f ;
103
118
_speechToText . Keywords = keywords . ToArray ( ) ;
104
- _speechToText . Recognize ( _audioClip , HandleOnRecognize ) ;
119
+ _speechToText . Recognize ( _acousticResourceData , _acousticResourceMimeType , HandleOnRecognize ) ;
105
120
while ( ! _recognizeTested )
106
121
yield return null ;
107
122
123
+ // Recognize ogg
124
+ _speechToText . StreamMultipart = true ;
125
+ Log . Debug ( "ExampleSpeechToText" , "Attempting to recognize ogg: mimeType: {0} | _speechTText.StreamMultipart: {1}" , _oggResourceMimeType , _speechToText . StreamMultipart ) ;
126
+ _speechToText . Recognize ( _oggResourceData , _oggResourceMimeType + ";codecs=vorbis" , HandleOnRecognizeOgg ) ;
127
+ while ( ! _recognizeOggTested )
128
+ yield return null ;
129
+
108
130
// Get models
109
131
Log . Debug ( "ExampleSpeechToText.Examples()" , "Attempting to get models" ) ;
110
132
_speechToText . GetModels ( HandleGetModels ) ;
@@ -426,6 +448,32 @@ private void HandleOnRecognize(SpeechRecognitionEvent result)
426
448
}
427
449
}
428
450
451
+ private void HandleOnRecognizeOgg ( SpeechRecognitionEvent result )
452
+ {
453
+ if ( result != null && result . results . Length > 0 )
454
+ {
455
+ foreach ( var res in result . results )
456
+ {
457
+ foreach ( var alt in res . alternatives )
458
+ {
459
+ string text = alt . transcript ;
460
+ Log . Debug ( "ExampleSpeechToText" , string . Format ( "{0} ({1}, {2:0.00})\n " , text , res . final ? "Final" : "Interim" , alt . confidence ) ) ;
461
+
462
+ if ( res . final )
463
+ _recognizeOggTested = true ;
464
+ }
465
+
466
+ if ( res . keywords_result != null && res . keywords_result . keyword != null )
467
+ {
468
+ foreach ( var keyword in res . keywords_result . keyword )
469
+ {
470
+ Log . Debug ( "ExampleSpeechToText" , "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}" , keyword . normalized_text , keyword . confidence , keyword . start_time , keyword . end_time ) ;
471
+ }
472
+ }
473
+ }
474
+ }
475
+ }
476
+
429
477
private void HandleGetCustomizations ( Customizations customizations , string customData )
430
478
{
431
479
Log . Debug ( "ExampleSpeechToText.HandleGetCustomizations()" , "Speech to Text - Get customizations response: {0}" , customData ) ;
@@ -724,5 +772,18 @@ private IEnumerator DownloadAcousticResource()
724
772
Log . Debug ( "ExampleSpeechToText.DownloadAcousticResource()" , "acoustic resource downloaded" ) ;
725
773
_acousticResourceData = www . bytes ;
726
774
_isAudioLoaded = true ;
775
+ www . Dispose ( ) ;
776
+ }
777
+
778
+ private IEnumerator DownloadOggResource ( )
779
+ {
780
+ Log . Debug ( "ExampleSpeechToText" , "downloading ogg resource from {0}" , _oggResourceUrl ) ;
781
+ WWW www = new WWW ( _oggResourceUrl ) ;
782
+ yield return www ;
783
+
784
+ Log . Debug ( "ExampleSpeechToText" , "ogg resource downloaded" ) ;
785
+ _oggResourceData = www . bytes ;
786
+ _isOggLoaded = true ;
787
+ www . Dispose ( ) ;
727
788
}
728
- }
789
+ }
0 commit comments