gptscript-ai
diff --git a/‎pkg/tests/judge/judge.go
Lines changed: 2 additions & 2 deletions b/‎pkg/tests/judge/judge.go
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/tests/smoke/smoke_test.go
Lines changed: 5 additions & 1 deletion b/‎pkg/tests/smoke/smoke_test.go
Lines changed: 5 additions & 1 deletion
diff --git a/‎pkg/tests/smoke/testdata/SamplesReadme/claude-3-5-sonnet-20240620-expected.json
Lines changed: 3871 additions & 0 deletions b/‎pkg/tests/smoke/testdata/SamplesReadme/claude-3-5-sonnet-20240620-expected.json
Lines changed: 3871 additions & 0 deletions
@@ -84,10 +84,10 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
 }
 
 func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {
-	comparisonJSON, err := json.MarshalIndent(&comparison[T]{
+	comparisonJSON, err := json.Marshal(&comparison[T]{
 		Expected: expected,
 		Actual:   actual,
-	}, "", "    ")
+	})
 	if err != nil {
 		return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)
 	}
 
@@ -83,7 +83,6 @@ func TestSmoke(t *testing.T) {
 				actualEvents,
 				`
 - disregard differences in timestamps, generated IDs, natural language verbiage, and event order
-- omit callProgress events from the comparision
 - the overall stream of events and set of tools called should roughly match
 - arguments passed in tool calls should be roughly the same
 - the final callFinish event should be semantically similar
@@ -175,6 +174,11 @@ func getActualEvents(t *testing.T, eventsFile string) []event {
 
 		var e event
 		require.NoError(t, json.Unmarshal([]byte(line), &e))
+
+		if e.Type == runner.EventTypeCallProgress {
+			continue
+		}
+
 		events = append(events, e)
 	}
Original file line number	Diff line number	Diff line change
`@@ -84,10 +84,10 @@ func New[T any](client openai.Client) (Judge[T], error) {`
`84`	`84`	`}`
`85`	`85`
`86`	`86`	`func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {`
`87`		`- comparisonJSON, err := json.MarshalIndent(&comparison[T]{`
	`87`	`+ comparisonJSON, err := json.Marshal(&comparison[T]{`
`88`	`88`	`Expected: expected,`
`89`	`89`	`Actual: actual,`
`90`		`- }, "", " ")`
	`90`	`+ })`
`91`	`91`	`if err != nil {`
`92`	`92`	`return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)`
`93`	`93`	`}`