Skip to content

Commit 5258a3b

Browse files
chore: increase memory efficiency
1 parent 886f944 commit 5258a3b

File tree

4 files changed

+48
-52
lines changed

4 files changed

+48
-52
lines changed

pkg/cache/cache.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func (c *Client) CacheDir() string {
9292

9393
func (c *Client) cacheKey(key any) (string, error) {
9494
hash := sha256.New()
95+
hash.Write([]byte("v2"))
9596
if err := json.NewEncoder(hash).Encode(key); err != nil {
9697
return "", err
9798
}

pkg/engine/http.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (e *Engine) runHTTP(ctx context.Context, prg *types.Program, tool types.Too
7676
}
7777

7878
for _, env := range e.Env {
79-
if strings.HasPrefix(env, "GPTSCRIPT_") {
79+
if strings.HasPrefix(env, "GPTSCRIPT_WORKSPACE_") {
8080
req.Header.Add("X-GPTScript-Env", env)
8181
}
8282
}

pkg/openai/client.go

Lines changed: 46 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"slices"
1010
"sort"
1111
"strings"
12+
"time"
1213

1314
openai "github.com/gptscript-ai/chat-completion-client"
1415
"github.com/gptscript-ai/gptscript/pkg/cache"
@@ -212,15 +213,15 @@ func (c *Client) seed(request openai.ChatCompletionRequest) int {
212213
return hash.Seed(newRequest)
213214
}
214215

215-
func (c *Client) fromCache(ctx context.Context, messageRequest types.CompletionRequest, request openai.ChatCompletionRequest) (result []openai.ChatCompletionStreamResponse, _ bool, _ error) {
216+
func (c *Client) fromCache(ctx context.Context, messageRequest types.CompletionRequest, request openai.ChatCompletionRequest) (result types.CompletionMessage, _ bool, _ error) {
216217
if !messageRequest.GetCache() {
217-
return nil, false, nil
218+
return types.CompletionMessage{}, false, nil
218219
}
219220
found, err := c.cache.Get(ctx, c.cacheKey(request), &result)
220221
if err != nil {
221-
return nil, false, err
222+
return types.CompletionMessage{}, false, err
222223
} else if !found {
223-
return nil, false, nil
224+
return types.CompletionMessage{}, false, nil
224225
}
225226
return result, true, nil
226227
}
@@ -396,33 +397,27 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
396397
IncludeUsage: true,
397398
}
398399
}
399-
response, ok, err := c.fromCache(ctx, messageRequest, request)
400+
result, ok, err := c.fromCache(ctx, messageRequest, request)
400401
if err != nil {
401402
return nil, err
402403
} else if !ok {
403-
response, err = c.call(ctx, request, id, status)
404+
result, err = c.call(ctx, request, id, status)
404405

405406
// If we got back a context length exceeded error, keep retrying and shrinking the message history until we pass.
406407
var apiError *openai.APIError
407408
if errors.As(err, &apiError) && apiError.Code == "context_length_exceeded" && messageRequest.Chat {
408409
// Decrease maxTokens by 10% to make garbage collection more aggressive.
409410
// The retry loop will further decrease maxTokens if needed.
410411
maxTokens := decreaseTenPercent(messageRequest.MaxTokens)
411-
response, err = c.contextLimitRetryLoop(ctx, request, id, maxTokens, status)
412+
result, err = c.contextLimitRetryLoop(ctx, request, id, maxTokens, status)
412413
}
413-
414414
if err != nil {
415415
return nil, err
416416
}
417417
} else {
418418
cacheResponse = true
419419
}
420420

421-
result := types.CompletionMessage{}
422-
for _, response := range response {
423-
result = appendMessage(result, response)
424-
}
425-
426421
for i, content := range result.Content {
427422
if content.ToolCall != nil && content.ToolCall.ID == "" {
428423
content.ToolCall.ID = "call_" + hash.ID(content.ToolCall.Function.Name, content.ToolCall.Function.Arguments)[:8]
@@ -440,7 +435,6 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
440435

441436
status <- types.CompletionStatus{
442437
CompletionID: id,
443-
Chunks: response,
444438
Response: result,
445439
Usage: result.Usage,
446440
Cached: cacheResponse,
@@ -449,9 +443,9 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
449443
return &result, nil
450444
}
451445

452-
func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatCompletionRequest, id string, maxTokens int, status chan<- types.CompletionStatus) ([]openai.ChatCompletionStreamResponse, error) {
446+
func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatCompletionRequest, id string, maxTokens int, status chan<- types.CompletionStatus) (types.CompletionMessage, error) {
453447
var (
454-
response []openai.ChatCompletionStreamResponse
448+
response types.CompletionMessage
455449
err error
456450
)
457451

@@ -469,10 +463,10 @@ func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatC
469463
maxTokens = decreaseTenPercent(maxTokens)
470464
continue
471465
}
472-
return nil, err
466+
return types.CompletionMessage{}, err
473467
}
474468

475-
return nil, err
469+
return types.CompletionMessage{}, err
476470
}
477471

478472
func appendMessage(msg types.CompletionMessage, response openai.ChatCompletionStreamResponse) types.CompletionMessage {
@@ -548,7 +542,7 @@ func override(left, right string) string {
548542
return left
549543
}
550544

551-
func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest, transactionID string, partial chan<- types.CompletionStatus) (responses []openai.ChatCompletionStreamResponse, _ error) {
545+
func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest, transactionID string, partial chan<- types.CompletionStatus) (types.CompletionMessage, error) {
552546
streamResponse := os.Getenv("GPTSCRIPT_INTERNAL_OPENAI_STREAMING") != "false"
553547

554548
partial <- types.CompletionStatus{
@@ -565,56 +559,58 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
565559
request.StreamOptions = nil
566560
resp, err := c.c.CreateChatCompletion(ctx, request)
567561
if err != nil {
568-
return nil, err
562+
return types.CompletionMessage{}, err
569563
}
570-
return []openai.ChatCompletionStreamResponse{
571-
{
572-
ID: resp.ID,
573-
Object: resp.Object,
574-
Created: resp.Created,
575-
Model: resp.Model,
576-
Usage: resp.Usage,
577-
Choices: []openai.ChatCompletionStreamChoice{
578-
{
579-
Index: resp.Choices[0].Index,
580-
Delta: openai.ChatCompletionStreamChoiceDelta{
581-
Content: resp.Choices[0].Message.Content,
582-
Role: resp.Choices[0].Message.Role,
583-
FunctionCall: resp.Choices[0].Message.FunctionCall,
584-
ToolCalls: resp.Choices[0].Message.ToolCalls,
585-
},
586-
FinishReason: resp.Choices[0].FinishReason,
564+
return appendMessage(types.CompletionMessage{}, openai.ChatCompletionStreamResponse{
565+
ID: resp.ID,
566+
Object: resp.Object,
567+
Created: resp.Created,
568+
Model: resp.Model,
569+
Usage: resp.Usage,
570+
Choices: []openai.ChatCompletionStreamChoice{
571+
{
572+
Index: resp.Choices[0].Index,
573+
Delta: openai.ChatCompletionStreamChoiceDelta{
574+
Content: resp.Choices[0].Message.Content,
575+
Role: resp.Choices[0].Message.Role,
576+
FunctionCall: resp.Choices[0].Message.FunctionCall,
577+
ToolCalls: resp.Choices[0].Message.ToolCalls,
587578
},
579+
FinishReason: resp.Choices[0].FinishReason,
588580
},
589581
},
590-
}, nil
582+
}), nil
591583
}
592584

593585
stream, err := c.c.CreateChatCompletionStream(ctx, request)
594586
if err != nil {
595-
return nil, err
587+
return types.CompletionMessage{}, err
596588
}
597589
defer stream.Close()
598590

599-
var partialMessage types.CompletionMessage
591+
var (
592+
partialMessage types.CompletionMessage
593+
start = time.Now()
594+
last []string
595+
)
600596
for {
601597
response, err := stream.Recv()
602598
if err == io.EOF {
603-
return responses, c.cache.Store(ctx, c.cacheKey(request), responses)
599+
return partialMessage, c.cache.Store(ctx, c.cacheKey(request), partialMessage)
604600
} else if err != nil {
605-
return nil, err
606-
}
607-
if len(response.Choices) > 0 {
608-
slog.Debug("stream", "content", response.Choices[0].Delta.Content)
601+
return types.CompletionMessage{}, err
609602
}
603+
partialMessage = appendMessage(partialMessage, response)
610604
if partial != nil {
611-
partialMessage = appendMessage(partialMessage, response)
612-
partial <- types.CompletionStatus{
613-
CompletionID: transactionID,
614-
PartialResponse: &partialMessage,
605+
if time.Since(start) > 500*time.Millisecond {
606+
last = last[:0]
607+
partial <- types.CompletionStatus{
608+
CompletionID: transactionID,
609+
PartialResponse: &partialMessage,
610+
}
611+
start = time.Now()
615612
}
616613
}
617-
responses = append(responses, response)
618614
}
619615
}
620616

pkg/types/completion.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ type CompletionStatus struct {
8282
Response any
8383
Usage Usage
8484
Cached bool
85-
Chunks any
8685
PartialResponse *CompletionMessage
8786
}
8887

0 commit comments

Comments
 (0)