@@ -68,6 +68,10 @@ def questions_wrong_file():
68
68
def questions_answers_file ():
69
69
return _get_file ("questions_answers.jsonl" )
70
70
71
+ @pytest .fixture
72
+ def questions_answers_basic_file ():
73
+ return _get_file ("questions_answers_basic.jsonl" )
74
+
71
75
72
76
def _target_fn (query ):
73
77
"""An example target function."""
@@ -90,6 +94,15 @@ def _target_fn2(query):
90
94
response ["query" ] = f"The query is as follows: { query } "
91
95
return response
92
96
97
+ def _new_answer_target ():
98
+ return {"response" : "new response" }
99
+
100
+ def _question_override_target (query ):
101
+ return {"query" : "new query" }
102
+
103
+ def _question_answer_override_target (query , response ):
104
+ return {"query" : "new query" , "response" : "new response" }
105
+
93
106
94
107
@pytest .mark .usefixtures ("mock_model_config" )
95
108
@pytest .mark .unittest
@@ -508,3 +521,103 @@ def test_general_aggregation(self):
508
521
assert aggregation ["thing.metric" ] == 3
509
522
assert aggregation ["other_thing.other_meteric" ] == - 3
510
523
assert aggregation ["final_thing.final_metric" ] == 0.4
524
+
525
+ @pytest .mark .parametrize ("use_pf_client" , [True , False ])
526
+ def test_optional_inputs_with_data (self , questions_file , questions_answers_basic_file , use_pf_client ):
527
+ from test_evaluators .test_inputs_evaluators import (
528
+ NonOptionalEval ,
529
+ HalfOptionalEval ,
530
+ OptionalEval ,
531
+ NoInputEval
532
+ )
533
+
534
+ # All variants work with both keyworded inputs
535
+ results = evaluate (
536
+ data = questions_answers_basic_file ,
537
+ evaluators = {
538
+ "non" : NonOptionalEval (),
539
+ "half" : HalfOptionalEval (),
540
+ "opt" : OptionalEval (),
541
+ "no" : NoInputEval ()
542
+ },
543
+ _use_pf_client = use_pf_client
544
+ ) # type: ignore
545
+
546
+ first_row = results ["rows" ][0 ]
547
+ assert first_row ["outputs.non.non_score" ] == 0
548
+ assert first_row ["outputs.half.half_score" ] == 1
549
+ assert first_row ["outputs.opt.opt_score" ] == 3
550
+ # CodeClient doesn't like no-input evals.
551
+ if use_pf_client :
552
+ assert first_row ["outputs.no.no_score" ] == 0
553
+
554
+ # Variant with no default inputs fails on single input
555
+ with pytest .raises (EvaluationException ) as exc_info :
556
+ evaluate (
557
+ data = questions_file ,
558
+ evaluators = {
559
+ "non" : NonOptionalEval (),
560
+ },
561
+ _use_pf_client = use_pf_client
562
+ ) # type: ignore
563
+ assert exc_info ._excinfo [1 ].__str__ () == "Missing required inputs for evaluator non : ['response']." # type: ignore
564
+
565
+ # Variants with default answer work when only question is inputted
566
+ only_question_results = evaluate (
567
+ data = questions_file ,
568
+ evaluators = {
569
+ "half" : HalfOptionalEval (),
570
+ "opt" : OptionalEval (),
571
+ "no" : NoInputEval ()
572
+ },
573
+ _use_pf_client = use_pf_client
574
+ ) # type: ignore
575
+
576
+ first_row_2 = only_question_results ["rows" ][0 ]
577
+ assert first_row_2 ["outputs.half.half_score" ] == 0
578
+ assert first_row_2 ["outputs.opt.opt_score" ] == 1
579
+ if use_pf_client :
580
+ assert first_row ["outputs.no.no_score" ] == 0
581
+
582
+ @pytest .mark .parametrize ("use_pf_client" , [True , False ])
583
+ def test_optional_inputs_with_target (self , questions_file , questions_answers_basic_file , use_pf_client ):
584
+ from test_evaluators .test_inputs_evaluators import EchoEval
585
+
586
+ # Check that target overrides default inputs
587
+ target_answer_results = evaluate (
588
+ data = questions_file ,
589
+ target = _new_answer_target ,
590
+ evaluators = {
591
+ "echo" : EchoEval ()
592
+ },
593
+ _use_pf_client = use_pf_client
594
+ ) # type: ignore
595
+
596
+ assert target_answer_results ['rows' ][0 ]['outputs.echo.echo_query' ] == 'How long is flight from Earth to LV-426?'
597
+ assert target_answer_results ['rows' ][0 ]['outputs.echo.echo_response' ] == 'new response'
598
+
599
+ # Check that target replaces inputs from data (I.E. if both data and target have same output
600
+ # the target output is sent to the evaluator.)
601
+ question_override_results = evaluate (
602
+ data = questions_answers_basic_file ,
603
+ target = _question_override_target ,
604
+ evaluators = {
605
+ "echo" : EchoEval ()
606
+ },
607
+ _use_pf_client = use_pf_client
608
+ ) # type: ignore
609
+
610
+ assert question_override_results ['rows' ][0 ]['outputs.echo.echo_query' ] == "new query"
611
+ assert question_override_results ['rows' ][0 ]['outputs.echo.echo_response' ] == 'There is nothing good there.'
612
+
613
+ # Check that target can replace default and data inputs at the same time.
614
+ double_override_results = evaluate (
615
+ data = questions_answers_basic_file ,
616
+ target = _question_answer_override_target ,
617
+ evaluators = {
618
+ "echo" : EchoEval ()
619
+ },
620
+ _use_pf_client = use_pf_client
621
+ ) # type: ignore
622
+ assert double_override_results ['rows' ][0 ]['outputs.echo.echo_query' ] == "new query"
623
+ assert double_override_results ['rows' ][0 ]['outputs.echo.echo_response' ] == "new response"
0 commit comments