watson-developer-cloud · mediumTaj · Nov 13, 2017 · Oct 28, 2017 · Oct 29, 2017 · Nov 1, 2017
diff --git a/Config.json.enc b/Config.json.enc
diff --git a/Examples/ServiceExamples/ExampleStreaming.unity b/Examples/ServiceExamples/ExampleStreaming.unity
diff --git a/Examples/ServiceExamples/Scripts/ExampleSpeechToText.cs b/Examples/ServiceExamples/Scripts/ExampleSpeechToText.cs
@@ -29,7 +29,6 @@ public class ExampleSpeechToText : MonoBehaviour
     private string _password = null;
     private string _url = null;
 
-    private AudioClip _audioClip;
     private SpeechToText _speechToText;
 
     private string _modelNameToGet;
@@ -38,13 +37,19 @@ public class ExampleSpeechToText : MonoBehaviour
     private string _customCorpusFilePath;
     private string _customWordsFilePath;
     private string _acousticResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/TheFirstAmericaninEarthOrbit.mp3";
+    private string _oggResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/InauguralAddress-1981.ogg";
     private bool _isAudioLoaded = false;
     private string _createdAcousticModelId;
     private string _acousticResourceName = "unity-acoustic-resource";
     private string _createdAcousticModelName = "unity-example-acoustic-model";
     private byte[] _acousticResourceData;
+    private string _acousticResourceMimeType;
+    private byte[] _oggResourceData;
+    private string _oggResourceMimeType;
+    private bool _isOggLoaded = false;
 
     private bool _recognizeTested = false;
+    private bool _recognizeOggTested = false;
     private bool _getModelsTested = false;
     private bool _getModelTested = false;
     private bool _getCustomizationsTested = false;
@@ -87,24 +92,41 @@ void Start()
         _speechToText = new SpeechToText(credentials);
         _customCorpusFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/theJabberwocky-utf8.txt";
         _customWordsFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/test-stt-words.json";
+        _acousticResourceMimeType = Utility.GetMimeType(Path.GetExtension(_acousticResourceUrl));
+        _oggResourceMimeType = Utility.GetMimeType(Path.GetExtension(_oggResourceUrl));
+
+        _speechToText.StreamMultipart = true;
 
         Runnable.Run(Examples());
     }
 
     private IEnumerator Examples()
     {
         Runnable.Run(DownloadAcousticResource());
+        while (!_isAudioLoaded)
+            yield return null;
+
+        Runnable.Run(DownloadOggResource());
+        while (!_isOggLoaded)
+            yield return null;
 
         //  Recognize
         Log.Debug("ExampleSpeechToText.Examples()", "Attempting to recognize");
         List<string> keywords = new List<string>();
         keywords.Add("speech");
         _speechToText.KeywordsThreshold = 0.5f;
         _speechToText.Keywords = keywords.ToArray();
-        _speechToText.Recognize(_audioClip, HandleOnRecognize);
+        _speechToText.Recognize(_acousticResourceData, _acousticResourceMimeType, HandleOnRecognize);
         while (!_recognizeTested)
             yield return null;
 
+        //  Recognize ogg
+        _speechToText.StreamMultipart = true;
+        Log.Debug("ExampleSpeechToText", "Attempting to recognize ogg: mimeType: {0} | _speechTText.StreamMultipart: {1}", _oggResourceMimeType, _speechToText.StreamMultipart);
+        _speechToText.Recognize(_oggResourceData, _oggResourceMimeType + ";codecs=vorbis", HandleOnRecognizeOgg);
+        while (!_recognizeOggTested)
+            yield return null;
+
         //  Get models
         Log.Debug("ExampleSpeechToText.Examples()", "Attempting to get models");
         _speechToText.GetModels(HandleGetModels);
@@ -426,6 +448,32 @@ private void HandleOnRecognize(SpeechRecognitionEvent result)
         }
     }
 
+    private void HandleOnRecognizeOgg(SpeechRecognitionEvent result)
+    {
+        if (result != null && result.results.Length > 0)
+        {
+            foreach (var res in result.results)
+            {
+                foreach (var alt in res.alternatives)
+                {
+                    string text = alt.transcript;
+                    Log.Debug("ExampleSpeechToText", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
+
+                    if (res.final)
+                        _recognizeOggTested = true;
+                }
+
+                if (res.keywords_result != null && res.keywords_result.keyword != null)
+                {
+                    foreach (var keyword in res.keywords_result.keyword)
+                    {
+                        Log.Debug("ExampleSpeechToText", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
+                    }
+                }
+            }
+        }
+    }
+
     private void HandleGetCustomizations(Customizations customizations, string customData)
     {
         Log.Debug("ExampleSpeechToText.HandleGetCustomizations()", "Speech to Text - Get customizations response: {0}", customData);
@@ -724,5 +772,18 @@ private IEnumerator DownloadAcousticResource()
         Log.Debug("ExampleSpeechToText.DownloadAcousticResource()", "acoustic resource downloaded");
         _acousticResourceData = www.bytes;
         _isAudioLoaded = true;
+        www.Dispose();
+    }
+
+    private IEnumerator DownloadOggResource()
+    {
+        Log.Debug("ExampleSpeechToText", "downloading ogg resource from {0}", _oggResourceUrl);
+        WWW www = new WWW(_oggResourceUrl);
+        yield return www;
+
+        Log.Debug("ExampleSpeechToText", "ogg resource downloaded");
+        _oggResourceData = www.bytes;
+        _isOggLoaded = true;
+        www.Dispose();
     }
-}
+}
diff --git a/Examples/ServiceExamples/Scripts/ExampleStreaming.cs b/Examples/ServiceExamples/Scripts/ExampleStreaming.cs
@@ -22,17 +22,20 @@
 using IBM.Watson.DeveloperCloud.Utilities;
 using IBM.Watson.DeveloperCloud.DataTypes;
 using System.Collections.Generic;
+using UnityEngine.UI;
 
 public class ExampleStreaming : MonoBehaviour
 {
     private string _username = null;
     private string _password = null;
     private string _url = null;
 
+    public Text ResultsField;
+
     private int _recordingRoutine = 0;
     private string _microphoneID = null;
     private AudioClip _recording = null;
-    private int _recordingBufferSize = 2;
+    private int _recordingBufferSize = 1;
     private int _recordingHZ = 22050;
 
     private SpeechToText _speechToText;
@@ -60,21 +63,15 @@ public bool Active
                 _speechToText.DetectSilence = true;
                 _speechToText.EnableWordConfidence = true;
                 _speechToText.EnableTimestamps = true;
-                _speechToText.SilenceThreshold = 0.1f;
-                _speechToText.MaxAlternatives = 5;
+                _speechToText.SilenceThreshold = 0.01f;
+                _speechToText.MaxAlternatives = 0;
                 _speechToText.EnableInterimResults = true;
                 _speechToText.OnError = OnError;
                 _speechToText.InactivityTimeout = -1;
-                _speechToText.ProfanityFilter = true;
+                _speechToText.ProfanityFilter = false;
                 _speechToText.SmartFormatting = true;
-                _speechToText.SpeakerLabels = true;
+                _speechToText.SpeakerLabels = false;
                 _speechToText.WordAlternativesThreshold = null;
-                List<string> keywords = new List<string>();
-                keywords.Add("hello");
-                keywords.Add("testing");
-                keywords.Add("watson");
-                _speechToText.KeywordsThreshold = 0.5f;
-                _speechToText.Keywords = keywords.ToArray();
                 _speechToText.StartListening(OnRecognize, OnRecognizeSpeaker);
             }
             else if (!value && _speechToText.IsListening)
@@ -145,7 +142,7 @@ private IEnumerator RecordingHandler()
                 _recording.GetData(samples, bFirstBlock ? 0 : midPoint);
 
                 AudioData record = new AudioData();
-                record.MaxLevel = Mathf.Abs(Mathf.Max(samples));
+				record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                 record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                 record.Clip.SetData(samples, 0);
 
@@ -176,8 +173,9 @@ private void OnRecognize(SpeechRecognitionEvent result)
             {
                 foreach (var alt in res.alternatives)
                 {
-                    string text = alt.transcript;
-                    Log.Debug("ExampleStreaming.OnRecognize()", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
+                    string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
+                    Log.Debug("ExampleStreaming.OnRecognize()", text);
+                    ResultsField.text = text;
                 }
 
                 if (res.keywords_result != null && res.keywords_result.keyword != null)
@@ -210,6 +208,5 @@ private void OnRecognizeSpeaker(SpeakerRecognitionEvent result)
                 Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
             }
         }
-
     }
-}
+}