Skip to content

Commit 99870cc

Browse files
authored
go : exposed various parts to the Go Interface (ggml-org#697)
1 parent 23ae1f1 commit 99870cc

File tree

4 files changed

+30
-7
lines changed

4 files changed

+30
-7
lines changed

bindings/go/params.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ func (p *Params) SetMaxSegmentLength(n int) {
105105
p.max_len = C.int(n)
106106
}
107107

108+
func (p *Params) SetTokenTimestamps(b bool) {
109+
p.token_timestamps = toBool(b)
110+
}
111+
108112
// Set max tokens per segment (0 = no limit)
109113
func (p *Params) SetMaxTokensPerSegment(n int) {
110114
p.max_tokens = C.int(n)

bindings/go/pkg/whisper/context.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
111111
context.params.SetMaxSegmentLength(int(n))
112112
}
113113

114+
// Set token timestamps flag
115+
func (context *context) SetTokenTimestamps(b bool) {
116+
context.params.SetTokenTimestamps(b)
117+
}
118+
114119
// Set max tokens per segment (0 = no limit)
115120
func (context *context) SetMaxTokensPerSegment(n uint) {
116121
context.params.SetMaxTokensPerSegment(int(n))
@@ -280,10 +285,14 @@ func toSegment(ctx *whisper.Context, n int) Segment {
280285
func toTokens(ctx *whisper.Context, n int) []Token {
281286
result := make([]Token, ctx.Whisper_full_n_tokens(n))
282287
for i := 0; i < len(result); i++ {
288+
data := ctx.Whisper_full_get_token_data(n, i)
289+
283290
result[i] = Token{
284-
Id: int(ctx.Whisper_full_get_token_id(n, i)),
285-
Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
286-
P: ctx.Whisper_full_get_token_p(n, i),
291+
Id: int(ctx.Whisper_full_get_token_id(n, i)),
292+
Text: ctx.Whisper_full_get_token_text(n, i),
293+
P: ctx.Whisper_full_get_token_p(n, i),
294+
Start: time.Duration(data.T0()) * time.Millisecond * 10,
295+
End: time.Duration(data.T1()) * time.Millisecond * 10,
287296
}
288297
}
289298
return result

bindings/go/pkg/whisper/interface.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ type Context interface {
4141
SetTokenThreshold(float32) // Set timestamp token probability threshold
4242
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
4343
SetMaxSegmentLength(uint) // Set max segment length in characters
44+
SetTokenTimestamps(bool) // Set token timestamps flag
4445
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
4546

4647
// Process mono audio data and return any errors.
@@ -85,7 +86,8 @@ type Segment struct {
8586

8687
// Token is a text or special token
8788
type Token struct {
88-
Id int
89-
Text string
90-
P float32
89+
Id int
90+
Text string
91+
P float32
92+
Start, End time.Duration
9193
}

bindings/go/whisper.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
356356

357357
// Get token data for the specified token in the specified segment.
358358
// This contains probabilities, timestamps, etc.
359-
func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
359+
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
360360
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
361361
}
362362

@@ -407,3 +407,11 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
407407
}
408408
return true
409409
}
410+
411+
func (t TokenData) T0() int64 {
412+
return int64(t.t0)
413+
}
414+
415+
func (t TokenData) T1() int64 {
416+
return int64(t.t1)
417+
}

0 commit comments

Comments
 (0)