Skip to content

Commit f7a3ab3

Browse files
committed
test: add smoke test based on examples/samples-readme.md
Signed-off-by: Nick Hale <[email protected]>
1 parent b77cd13 commit f7a3ab3

File tree

7 files changed

+11566
-3
lines changed

7 files changed

+11566
-3
lines changed

pkg/tests/judge/judge.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
8484
}
8585

8686
func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {
87-
comparisonJSON, err := json.MarshalIndent(&comparison[T]{
87+
comparisonJSON, err := json.Marshal(&comparison[T]{
8888
Expected: expected,
8989
Actual: actual,
90-
}, "", " ")
90+
})
9191
if err != nil {
9292
return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)
9393
}

pkg/tests/smoke/smoke_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ func TestSmoke(t *testing.T) {
8383
actualEvents,
8484
`
8585
- disregard differences in timestamps, generated IDs, natural language verbiage, and event order
86-
- omit callProgress events from the comparision
8786
- the overall stream of events and set of tools called should roughly match
8887
- arguments passed in tool calls should be roughly the same
8988
- the final callFinish event should be semantically similar
@@ -175,6 +174,11 @@ func getActualEvents(t *testing.T, eventsFile string) []event {
175174

176175
var e event
177176
require.NoError(t, json.Unmarshal([]byte(line), &e))
177+
178+
if e.Type == runner.EventTypeCallProgress {
179+
continue
180+
}
181+
178182
events = append(events, e)
179183
}
180184

pkg/tests/smoke/testdata/SamplesReadme/claude-3-5-sonnet-20240620-expected.json

Lines changed: 3871 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)