Skip to content

Gh279 streaming example #287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Nov 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6f07b4d
add overload to stt Recognize that takes a byte array. add example re…
mediumTaj Oct 28, 2017
4538ced
break down buffer into chunks and send chunks smaller than 1/2 second
mediumTaj Oct 29, 2017
41c52c4
Merge branch 'develop' into gh279-streaming-example
mediumTaj Nov 1, 2017
bf4f261
Merge branch 'gh279-streaming-example' of github.com:watson-developer…
mediumTaj Nov 1, 2017
3de242f
scene changes
mediumTaj Nov 1, 2017
43cb9ea
Merge branch 'develop' into gh279-streaming-example
mediumTaj Nov 1, 2017
40645ae
update credentials
mediumTaj Nov 1, 2017
1f2056f
merge in develop and update credentials
mediumTaj Nov 1, 2017
d3c0b87
rename ifdef
mediumTaj Nov 2, 2017
2c274b2
@kimberlysiva's reccomended changes
mediumTaj Nov 3, 2017
5a5d9ce
remove half second delay on sending
mediumTaj Nov 7, 2017
6889a56
WaitOne(0) --> WaitOne(50)
mediumTaj Nov 7, 2017
701e356
encrypt credentials
mediumTaj Nov 7, 2017
82096c6
Fix detect silence to work with smaller audio chunks
kimberlysiva Nov 9, 2017
c935baa
Fix max level for audio clip
kimberlysiva Nov 9, 2017
3b09eeb
Merge pull request #296 from kimberlysiva/279-detectSilence
mediumTaj Nov 9, 2017
6949f27
update credentials
mediumTaj Nov 9, 2017
4954939
merge in develop, split streaming and streaming chunked examples
mediumTaj Nov 9, 2017
35fe4e1
update example streaming scene to remove one text field
mediumTaj Nov 9, 2017
1f3ec4e
merge in develop, encrypt credentials
mediumTaj Nov 9, 2017
d5e572d
add @ksiva's fixes that were lost in merge
mediumTaj Nov 10, 2017
c66d749
clip.maxlevel and waitone(50) were reverted - bringing them back
mediumTaj Nov 13, 2017
43537d4
Update WSConnector send loop on .NET Core
kimberlysiva Nov 13, 2017
9f83f40
Merge pull request #298 from kimberlysiva/279-hololens
mediumTaj Nov 13, 2017
46289a8
update credentials
mediumTaj Nov 13, 2017
d54f099
changed OnListen to return a bool, true if audio was sent/enqueued
kimberlysiva Nov 13, 2017
30b6479
Merge pull request #299 from kimberlysiva/279-onListen
mediumTaj Nov 13, 2017
6ab2f81
refactor from streaming example
mediumTaj Nov 13, 2017
ec4868a
xMerge branch 'gh279-streaming-example' of github.com:watson-develope…
mediumTaj Nov 13, 2017
2f774ea
encrypt credentials
mediumTaj Nov 13, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified Config.json.enc
Binary file not shown.
373 changes: 342 additions & 31 deletions Examples/ServiceExamples/ExampleStreaming.unity

Large diffs are not rendered by default.

67 changes: 64 additions & 3 deletions Examples/ServiceExamples/Scripts/ExampleSpeechToText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ public class ExampleSpeechToText : MonoBehaviour
private string _password = null;
private string _url = null;

private AudioClip _audioClip;
private SpeechToText _speechToText;

private string _modelNameToGet;
Expand All @@ -38,13 +37,19 @@ public class ExampleSpeechToText : MonoBehaviour
private string _customCorpusFilePath;
private string _customWordsFilePath;
private string _acousticResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/TheFirstAmericaninEarthOrbit.mp3";
private string _oggResourceUrl = "https://ia802302.us.archive.org/10/items/Greatest_Speeches_of_the_20th_Century/InauguralAddress-1981.ogg";
private bool _isAudioLoaded = false;
private string _createdAcousticModelId;
private string _acousticResourceName = "unity-acoustic-resource";
private string _createdAcousticModelName = "unity-example-acoustic-model";
private byte[] _acousticResourceData;
private string _acousticResourceMimeType;
private byte[] _oggResourceData;
private string _oggResourceMimeType;
private bool _isOggLoaded = false;

private bool _recognizeTested = false;
private bool _recognizeOggTested = false;
private bool _getModelsTested = false;
private bool _getModelTested = false;
private bool _getCustomizationsTested = false;
Expand Down Expand Up @@ -87,24 +92,41 @@ void Start()
_speechToText = new SpeechToText(credentials);
_customCorpusFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/theJabberwocky-utf8.txt";
_customWordsFilePath = Application.dataPath + "/Watson/Examples/ServiceExamples/TestData/test-stt-words.json";
_acousticResourceMimeType = Utility.GetMimeType(Path.GetExtension(_acousticResourceUrl));
_oggResourceMimeType = Utility.GetMimeType(Path.GetExtension(_oggResourceUrl));

_speechToText.StreamMultipart = true;

Runnable.Run(Examples());
}

private IEnumerator Examples()
{
Runnable.Run(DownloadAcousticResource());
while (!_isAudioLoaded)
yield return null;

Runnable.Run(DownloadOggResource());
while (!_isOggLoaded)
yield return null;

// Recognize
Log.Debug("ExampleSpeechToText.Examples()", "Attempting to recognize");
List<string> keywords = new List<string>();
keywords.Add("speech");
_speechToText.KeywordsThreshold = 0.5f;
_speechToText.Keywords = keywords.ToArray();
_speechToText.Recognize(_audioClip, HandleOnRecognize);
_speechToText.Recognize(_acousticResourceData, _acousticResourceMimeType, HandleOnRecognize);
while (!_recognizeTested)
yield return null;

// Recognize ogg
_speechToText.StreamMultipart = true;
Log.Debug("ExampleSpeechToText", "Attempting to recognize ogg: mimeType: {0} | _speechTText.StreamMultipart: {1}", _oggResourceMimeType, _speechToText.StreamMultipart);
_speechToText.Recognize(_oggResourceData, _oggResourceMimeType + ";codecs=vorbis", HandleOnRecognizeOgg);
while (!_recognizeOggTested)
yield return null;

// Get models
Log.Debug("ExampleSpeechToText.Examples()", "Attempting to get models");
_speechToText.GetModels(HandleGetModels);
Expand Down Expand Up @@ -426,6 +448,32 @@ private void HandleOnRecognize(SpeechRecognitionEvent result)
}
}

private void HandleOnRecognizeOgg(SpeechRecognitionEvent result)
{
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Log.Debug("ExampleSpeechToText", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));

if (res.final)
_recognizeOggTested = true;
}

if (res.keywords_result != null && res.keywords_result.keyword != null)
{
foreach (var keyword in res.keywords_result.keyword)
{
Log.Debug("ExampleSpeechToText", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
}
}
}
}
}

private void HandleGetCustomizations(Customizations customizations, string customData)
{
Log.Debug("ExampleSpeechToText.HandleGetCustomizations()", "Speech to Text - Get customizations response: {0}", customData);
Expand Down Expand Up @@ -724,5 +772,18 @@ private IEnumerator DownloadAcousticResource()
Log.Debug("ExampleSpeechToText.DownloadAcousticResource()", "acoustic resource downloaded");
_acousticResourceData = www.bytes;
_isAudioLoaded = true;
www.Dispose();
}

private IEnumerator DownloadOggResource()
{
Log.Debug("ExampleSpeechToText", "downloading ogg resource from {0}", _oggResourceUrl);
WWW www = new WWW(_oggResourceUrl);
yield return www;

Log.Debug("ExampleSpeechToText", "ogg resource downloaded");
_oggResourceData = www.bytes;
_isOggLoaded = true;
www.Dispose();
}
}
}
29 changes: 13 additions & 16 deletions Examples/ServiceExamples/Scripts/ExampleStreaming.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,20 @@
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using UnityEngine.UI;

public class ExampleStreaming : MonoBehaviour
{
private string _username = null;
private string _password = null;
private string _url = null;

public Text ResultsField;

private int _recordingRoutine = 0;
private string _microphoneID = null;
private AudioClip _recording = null;
private int _recordingBufferSize = 2;
private int _recordingBufferSize = 1;
private int _recordingHZ = 22050;

private SpeechToText _speechToText;
Expand Down Expand Up @@ -60,21 +63,15 @@ public bool Active
_speechToText.DetectSilence = true;
_speechToText.EnableWordConfidence = true;
_speechToText.EnableTimestamps = true;
_speechToText.SilenceThreshold = 0.1f;
_speechToText.MaxAlternatives = 5;
_speechToText.SilenceThreshold = 0.01f;
_speechToText.MaxAlternatives = 0;
_speechToText.EnableInterimResults = true;
_speechToText.OnError = OnError;
_speechToText.InactivityTimeout = -1;
_speechToText.ProfanityFilter = true;
_speechToText.ProfanityFilter = false;
_speechToText.SmartFormatting = true;
_speechToText.SpeakerLabels = true;
_speechToText.SpeakerLabels = false;
_speechToText.WordAlternativesThreshold = null;
List<string> keywords = new List<string>();
keywords.Add("hello");
keywords.Add("testing");
keywords.Add("watson");
_speechToText.KeywordsThreshold = 0.5f;
_speechToText.Keywords = keywords.ToArray();
_speechToText.StartListening(OnRecognize, OnRecognizeSpeaker);
}
else if (!value && _speechToText.IsListening)
Expand Down Expand Up @@ -145,7 +142,7 @@ private IEnumerator RecordingHandler()
_recording.GetData(samples, bFirstBlock ? 0 : midPoint);

AudioData record = new AudioData();
record.MaxLevel = Mathf.Abs(Mathf.Max(samples));
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
record.Clip.SetData(samples, 0);

Expand Down Expand Up @@ -176,8 +173,9 @@ private void OnRecognize(SpeechRecognitionEvent result)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
Log.Debug("ExampleStreaming.OnRecognize()", text);
ResultsField.text = text;
}

if (res.keywords_result != null && res.keywords_result.keyword != null)
Expand Down Expand Up @@ -210,6 +208,5 @@ private void OnRecognizeSpeaker(SpeakerRecognitionEvent result)
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
}
}

}
}
}
Loading