9
9
"slices"
10
10
"sort"
11
11
"strings"
12
+ "time"
12
13
13
14
openai "github.com/gptscript-ai/chat-completion-client"
14
15
"github.com/gptscript-ai/gptscript/pkg/cache"
@@ -212,15 +213,15 @@ func (c *Client) seed(request openai.ChatCompletionRequest) int {
212
213
return hash .Seed (newRequest )
213
214
}
214
215
215
- func (c * Client ) fromCache (ctx context.Context , messageRequest types.CompletionRequest , request openai.ChatCompletionRequest ) (result []openai. ChatCompletionStreamResponse , _ bool , _ error ) {
216
+ func (c * Client ) fromCache (ctx context.Context , messageRequest types.CompletionRequest , request openai.ChatCompletionRequest ) (result types. CompletionMessage , _ bool , _ error ) {
216
217
if ! messageRequest .GetCache () {
217
- return nil , false , nil
218
+ return types. CompletionMessage {} , false , nil
218
219
}
219
220
found , err := c .cache .Get (ctx , c .cacheKey (request ), & result )
220
221
if err != nil {
221
- return nil , false , err
222
+ return types. CompletionMessage {} , false , err
222
223
} else if ! found {
223
- return nil , false , nil
224
+ return types. CompletionMessage {} , false , nil
224
225
}
225
226
return result , true , nil
226
227
}
@@ -396,33 +397,27 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
396
397
IncludeUsage : true ,
397
398
}
398
399
}
399
- response , ok , err := c .fromCache (ctx , messageRequest , request )
400
+ result , ok , err := c .fromCache (ctx , messageRequest , request )
400
401
if err != nil {
401
402
return nil , err
402
403
} else if ! ok {
403
- response , err = c .call (ctx , request , id , status )
404
+ result , err = c .call (ctx , request , id , status )
404
405
405
406
// If we got back a context length exceeded error, keep retrying and shrinking the message history until we pass.
406
407
var apiError * openai.APIError
407
408
if errors .As (err , & apiError ) && apiError .Code == "context_length_exceeded" && messageRequest .Chat {
408
409
// Decrease maxTokens by 10% to make garbage collection more aggressive.
409
410
// The retry loop will further decrease maxTokens if needed.
410
411
maxTokens := decreaseTenPercent (messageRequest .MaxTokens )
411
- response , err = c .contextLimitRetryLoop (ctx , request , id , maxTokens , status )
412
+ result , err = c .contextLimitRetryLoop (ctx , request , id , maxTokens , status )
412
413
}
413
-
414
414
if err != nil {
415
415
return nil , err
416
416
}
417
417
} else {
418
418
cacheResponse = true
419
419
}
420
420
421
- result := types.CompletionMessage {}
422
- for _ , response := range response {
423
- result = appendMessage (result , response )
424
- }
425
-
426
421
for i , content := range result .Content {
427
422
if content .ToolCall != nil && content .ToolCall .ID == "" {
428
423
content .ToolCall .ID = "call_" + hash .ID (content .ToolCall .Function .Name , content .ToolCall .Function .Arguments )[:8 ]
@@ -440,7 +435,6 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
440
435
441
436
status <- types.CompletionStatus {
442
437
CompletionID : id ,
443
- Chunks : response ,
444
438
Response : result ,
445
439
Usage : result .Usage ,
446
440
Cached : cacheResponse ,
@@ -449,9 +443,9 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
449
443
return & result , nil
450
444
}
451
445
452
- func (c * Client ) contextLimitRetryLoop (ctx context.Context , request openai.ChatCompletionRequest , id string , maxTokens int , status chan <- types.CompletionStatus ) ([]openai. ChatCompletionStreamResponse , error ) {
446
+ func (c * Client ) contextLimitRetryLoop (ctx context.Context , request openai.ChatCompletionRequest , id string , maxTokens int , status chan <- types.CompletionStatus ) (types. CompletionMessage , error ) {
453
447
var (
454
- response []openai. ChatCompletionStreamResponse
448
+ response types. CompletionMessage
455
449
err error
456
450
)
457
451
@@ -469,10 +463,10 @@ func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatC
469
463
maxTokens = decreaseTenPercent (maxTokens )
470
464
continue
471
465
}
472
- return nil , err
466
+ return types. CompletionMessage {} , err
473
467
}
474
468
475
- return nil , err
469
+ return types. CompletionMessage {} , err
476
470
}
477
471
478
472
func appendMessage (msg types.CompletionMessage , response openai.ChatCompletionStreamResponse ) types.CompletionMessage {
@@ -548,7 +542,7 @@ func override(left, right string) string {
548
542
return left
549
543
}
550
544
551
- func (c * Client ) call (ctx context.Context , request openai.ChatCompletionRequest , transactionID string , partial chan <- types.CompletionStatus ) (responses []openai. ChatCompletionStreamResponse , _ error ) {
545
+ func (c * Client ) call (ctx context.Context , request openai.ChatCompletionRequest , transactionID string , partial chan <- types.CompletionStatus ) (types. CompletionMessage , error ) {
552
546
streamResponse := os .Getenv ("GPTSCRIPT_INTERNAL_OPENAI_STREAMING" ) != "false"
553
547
554
548
partial <- types.CompletionStatus {
@@ -565,56 +559,58 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
565
559
request .StreamOptions = nil
566
560
resp , err := c .c .CreateChatCompletion (ctx , request )
567
561
if err != nil {
568
- return nil , err
562
+ return types. CompletionMessage {} , err
569
563
}
570
- return []openai.ChatCompletionStreamResponse {
571
- {
572
- ID : resp .ID ,
573
- Object : resp .Object ,
574
- Created : resp .Created ,
575
- Model : resp .Model ,
576
- Usage : resp .Usage ,
577
- Choices : []openai.ChatCompletionStreamChoice {
578
- {
579
- Index : resp .Choices [0 ].Index ,
580
- Delta : openai.ChatCompletionStreamChoiceDelta {
581
- Content : resp .Choices [0 ].Message .Content ,
582
- Role : resp .Choices [0 ].Message .Role ,
583
- FunctionCall : resp .Choices [0 ].Message .FunctionCall ,
584
- ToolCalls : resp .Choices [0 ].Message .ToolCalls ,
585
- },
586
- FinishReason : resp .Choices [0 ].FinishReason ,
564
+ return appendMessage (types.CompletionMessage {}, openai.ChatCompletionStreamResponse {
565
+ ID : resp .ID ,
566
+ Object : resp .Object ,
567
+ Created : resp .Created ,
568
+ Model : resp .Model ,
569
+ Usage : resp .Usage ,
570
+ Choices : []openai.ChatCompletionStreamChoice {
571
+ {
572
+ Index : resp .Choices [0 ].Index ,
573
+ Delta : openai.ChatCompletionStreamChoiceDelta {
574
+ Content : resp .Choices [0 ].Message .Content ,
575
+ Role : resp .Choices [0 ].Message .Role ,
576
+ FunctionCall : resp .Choices [0 ].Message .FunctionCall ,
577
+ ToolCalls : resp .Choices [0 ].Message .ToolCalls ,
587
578
},
579
+ FinishReason : resp .Choices [0 ].FinishReason ,
588
580
},
589
581
},
590
- }, nil
582
+ }) , nil
591
583
}
592
584
593
585
stream , err := c .c .CreateChatCompletionStream (ctx , request )
594
586
if err != nil {
595
- return nil , err
587
+ return types. CompletionMessage {} , err
596
588
}
597
589
defer stream .Close ()
598
590
599
- var partialMessage types.CompletionMessage
591
+ var (
592
+ partialMessage types.CompletionMessage
593
+ start = time .Now ()
594
+ last []string
595
+ )
600
596
for {
601
597
response , err := stream .Recv ()
602
598
if err == io .EOF {
603
- return responses , c .cache .Store (ctx , c .cacheKey (request ), responses )
599
+ return partialMessage , c .cache .Store (ctx , c .cacheKey (request ), partialMessage )
604
600
} else if err != nil {
605
- return nil , err
606
- }
607
- if len (response .Choices ) > 0 {
608
- slog .Debug ("stream" , "content" , response .Choices [0 ].Delta .Content )
601
+ return types.CompletionMessage {}, err
609
602
}
603
+ partialMessage = appendMessage (partialMessage , response )
610
604
if partial != nil {
611
- partialMessage = appendMessage (partialMessage , response )
612
- partial <- types.CompletionStatus {
613
- CompletionID : transactionID ,
614
- PartialResponse : & partialMessage ,
605
+ if time .Since (start ) > 500 * time .Millisecond {
606
+ last = last [:0 ]
607
+ partial <- types.CompletionStatus {
608
+ CompletionID : transactionID ,
609
+ PartialResponse : & partialMessage ,
610
+ }
611
+ start = time .Now ()
615
612
}
616
613
}
617
- responses = append (responses , response )
618
614
}
619
615
}
620
616
0 commit comments