Skip to content

Commit 9a0709f

Browse files
committed
enhance: avoid context limit
Signed-off-by: Grant Linville <[email protected]>
1 parent fbb8f5d commit 9a0709f

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

pkg/openai/client.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package openai
22

33
import (
44
"context"
5+
"errors"
56
"io"
67
"log/slog"
8+
"math"
79
"os"
810
"slices"
911
"sort"
@@ -24,6 +26,7 @@ import (
2426
const (
2527
DefaultModel = openai.GPT4o
2628
BuiltinCredName = "sys.openai"
29+
TooLongMessage = "Error: tool call output is too long"
2730
)
2831

2932
var (
@@ -317,6 +320,14 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
317320
}
318321

319322
if messageRequest.Chat {
323+
// Check the last message. If it is from a tool call, and if it takes up more than 80% of the budget on its own, reject it.
324+
lastMessage := msgs[len(msgs)-1]
325+
if lastMessage.Role == string(types.CompletionMessageRoleTypeTool) && countMessage(lastMessage) > int(math.Round(float64(getBudget(messageRequest.MaxTokens))*0.8)) {
326+
// We need to update it in the msgs slice for right now and in the messageRequest for future calls.
327+
msgs[len(msgs)-1].Content = TooLongMessage
328+
messageRequest.Messages[len(messageRequest.Messages)-1].Content = types.Text(TooLongMessage)
329+
}
330+
320331
msgs = dropMessagesOverCount(messageRequest.MaxTokens, msgs)
321332
}
322333

@@ -383,6 +394,16 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
383394
return nil, err
384395
} else if !ok {
385396
response, err = c.call(ctx, request, id, status)
397+
398+
// If we got back a context length exceeded error, keep retrying and shrinking the message history until we pass.
399+
var apiError *openai.APIError
400+
if err != nil && errors.As(err, &apiError) && apiError.Code == "context_length_exceeded" && messageRequest.Chat {
401+
// Decrease maxTokens by 10% to make garbage collection more aggressive.
402+
// The retry loop will further decrease maxTokens if needed.
403+
maxTokens := decreaseTenPercent(messageRequest.MaxTokens)
404+
response, err = c.contextLimitRetryLoop(ctx, request, id, maxTokens, status)
405+
}
406+
386407
if err != nil {
387408
return nil, err
388409
}
@@ -421,6 +442,32 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
421442
return &result, nil
422443
}
423444

445+
func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatCompletionRequest, id string, maxTokens int, status chan<- types.CompletionStatus) ([]openai.ChatCompletionStreamResponse, error) {
446+
var (
447+
response []openai.ChatCompletionStreamResponse
448+
err error
449+
)
450+
451+
for range 10 { // maximum 10 tries
452+
// Try to drop older messages again, with a decreased max tokens.
453+
request.Messages = dropMessagesOverCount(maxTokens, request.Messages)
454+
response, err = c.call(ctx, request, id, status)
455+
if err == nil {
456+
break
457+
}
458+
459+
var apiError *openai.APIError
460+
if errors.As(err, &apiError) && apiError.Code == "context_length_exceeded" {
461+
// Decrease maxTokens and try again
462+
maxTokens = decreaseTenPercent(maxTokens)
463+
continue
464+
}
465+
return nil, err
466+
}
467+
468+
return response, nil
469+
}
470+
424471
func appendMessage(msg types.CompletionMessage, response openai.ChatCompletionStreamResponse) types.CompletionMessage {
425472
msg.Usage.CompletionTokens = types.FirstSet(msg.Usage.CompletionTokens, response.Usage.CompletionTokens)
426473
msg.Usage.PromptTokens = types.FirstSet(msg.Usage.PromptTokens, response.Usage.PromptTokens)

pkg/openai/count.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
package openai
22

3-
import openai "github.com/gptscript-ai/chat-completion-client"
3+
import (
4+
"math"
5+
6+
openai "github.com/gptscript-ai/chat-completion-client"
7+
)
8+
9+
const DefaultMaxTokens = 128_000
10+
11+
func decreaseTenPercent(maxTokens int) int {
12+
maxTokens = getBudget(maxTokens)
13+
return int(math.Round(float64(maxTokens) * 0.9))
14+
}
15+
16+
func getBudget(maxTokens int) int {
17+
if maxTokens == 0 {
18+
return DefaultMaxTokens
19+
}
20+
return maxTokens
21+
}
422

523
func dropMessagesOverCount(maxTokens int, msgs []openai.ChatCompletionMessage) (result []openai.ChatCompletionMessage) {
624
var (
725
lastSystem int
826
withinBudget int
9-
budget = maxTokens
27+
budget = getBudget(maxTokens)
1028
)
1129

12-
if maxTokens == 0 {
13-
budget = 300_000
14-
} else {
15-
budget *= 3
16-
}
17-
1830
for i, msg := range msgs {
1931
if msg.Role == openai.ChatMessageRoleSystem {
2032
budget -= countMessage(msg)
@@ -33,7 +45,15 @@ func dropMessagesOverCount(maxTokens int, msgs []openai.ChatCompletionMessage) (
3345
}
3446
}
3547

36-
if withinBudget == len(msgs)-1 {
48+
// OpenAI gets upset if there is a tool message without a tool call preceding it.
49+
// Check the oldest message within budget, and if it is a tool message, just drop it.
50+
// We do this in a loop because it is possible for multiple tool messages to be in a row,
51+
// due to parallel tool calls.
52+
for withinBudget < len(msgs) && msgs[withinBudget].Role == openai.ChatMessageRoleTool {
53+
withinBudget++
54+
}
55+
56+
if withinBudget >= len(msgs)-1 {
3757
// We are going to drop all non system messages, which seems useless, so just return them
3858
// all and let it fail
3959
return msgs

0 commit comments

Comments
 (0)