Skip to content

Commit 3b09eeb

Browse files
authored
Merge pull request #296 from kimberlysiva/279-detectSilence
Fix detect silence to work with smaller audio chunks
2 parents 701e356 + c935baa commit 3b09eeb

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

Examples/ServiceExamples/Scripts/ExampleStreaming.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ public bool Active
6363
{
6464
if (value && !_speechToText.IsListening)
6565
{
66-
_speechToText.DetectSilence = false;
66+
_speechToText.DetectSilence = true;
6767
_speechToText.EnableWordConfidence = true;
6868
_speechToText.EnableTimestamps = true;
69-
_speechToText.SilenceThreshold = 0.1f;
69+
_speechToText.SilenceThreshold = 0.01f;
7070
_speechToText.MaxAlternatives = 0;
7171
_speechToText.EnableInterimResults = true;
7272
_speechToText.OnError = OnError;
@@ -177,7 +177,7 @@ private IEnumerator RecordingHandler()
177177

178178
// Create AudioData and use the samples we just created
179179
AudioData record = new AudioData();
180-
record.MaxLevel = Mathf.Max(samples);
180+
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
181181
record.Clip = AudioClip.Create("Recording", chunkSize, _recording.channels, _recordingHZ, false);
182182
record.Clip.SetData(samples, 0);
183183

Scripts/Services/SpeechToText/v1/SpeechToText.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ public class SpeechToText : IWatsonService
109109
private string _acoustic_customization_id = null;
110110
private float _customization_weight = 0.3f;
111111
private bool _streamMultipart = false; // If true sets `Transfer-Encoding` header of multipart request to `chunked`.
112+
private float _silenceDuration = 0.0f;
113+
private float _silenceCutoff = 1.0f;
112114

113115
private fsSerializer _serializer = new fsSerializer();
114116
private Credentials _credentials = null;
@@ -465,7 +467,17 @@ public void OnListen(AudioData clip)
465467
SendStart();
466468
}
467469

468-
if (!DetectSilence || clip.MaxLevel >= _silenceThreshold)
470+
// If silence persists for _silenceCutoff seconds, send stop and discard clips until audio resumes
471+
if (DetectSilence && clip.MaxLevel < _silenceThreshold)
472+
{
473+
_silenceDuration += clip.Clip.length;
474+
}
475+
else
476+
{
477+
_silenceDuration = 0.0f;
478+
}
479+
480+
if (!DetectSilence || _silenceDuration < _silenceCutoff)
469481
{
470482
if (_listenActive)
471483
{

0 commit comments

Comments
 (0)