@@ -463,6 +463,9 @@ def test_init(self):
463
463
self .assertAlmostEqual (rc .ratio , 12325.0 / 11616.0 )
464
464
self .assertAlmostEqual (rc .delta , (((11616.0 / 12325.0 ) - 1 ) * 100 ),
465
465
places = 3 )
466
+ self .assertEqual (rc .location , 'MIN' )
467
+ self .assertEqual ([rc .old_location , rc .new_location ],
468
+ [rc .old .min , rc .new .min ])
466
469
# handle test results that sometimes change to zero, when compiler
467
470
# optimizes out the body of the incorrectly written test
468
471
rc = ResultComparison (self .r0 , self .r0 )
@@ -489,6 +492,94 @@ def test_values_is_dubious(self):
489
492
# other way around: old.min < new.min < old.max
490
493
self .assertTrue (ResultComparison (self .r2 , self .r1 ).is_dubious )
491
494
495
+ def test_use_most_stable_location_for_comparison (self ):
496
+ """
497
+ Select the most stable location estimate (MIN, P05, P10, Q1, MED).
498
+
499
+ When the results contain samples from multiple independent runs, use
500
+ the empirical distribution to select the location estimate with lowest
501
+ variance and use it in result comparison.
502
+ """
503
+ def compare (log ):
504
+ results = [
505
+ PerformanceTestResult (l .split (',' ), quantiles = True , delta = True )
506
+ for l in log .split ('\n ' )[1 :- 1 ]]
507
+ results [0 ].merge (results [1 ])
508
+ results [2 ].merge (results [3 ])
509
+ return ResultComparison (results [0 ], results [2 ])
510
+
511
+ # --quantile=20 --delta FlattenListFlatMap
512
+ rc = compare ("""
513
+ 0,F,21,3721,147,1689,83,44,138,95,6,3,19,19,26,23,45,64,88,84,34,56,233,1
514
+ 0,F,21,5284,16,560,23,25,15,29,13,6,4,19,16,15,26,32,31,72,23,26,104,1
515
+ 0,F,21,5318,82,507,25,13,23,12,4,9,25,23,27,14,27,13,6,8,38,89,265,1
516
+ 0,F,21,4701,12,1151,28,13,32,5,11,26,9,33,16,28,14,13,46,185,126,16,64,1
517
+ """ )
518
+ self .assertTrue (rc .is_dubious )
519
+ self .assertEqual ([rc .old .min , rc .new .min ], [3721 , 4701 ])
520
+ self .assertEqual ([rc .old .median , rc .new .median ], [5975 , 5988 ])
521
+ self .assertEqual (rc .location , 'MED' )
522
+ self .assertEqual ([rc .old_location , rc .new_location ], [5975 , 5988 ])
523
+ # self.assertAlmostEqual(rc.delta, 26.34, places=2) # delta from MIN
524
+ self .assertAlmostEqual (rc .delta , 0.22 , places = 2 ) # delta from MED
525
+
526
+ # --quantile=20 --delta ObjectiveCBridgeStubToNSDateRef O
527
+ rc = compare ("""
528
+ 0,O,21,128,8,1,,4,3,3,2,,,3,,1,,,,3,,1,2,1
529
+ 0,O,21,119,16,2,,3,3,1,5,,,,1,2,1,,2,1,1,,2,1
530
+ 0,O,21,125,7,5,,1,5,1,3,2,,,,2,1,1,1,2,1,1,2,1
531
+ 0,O,21,119,17,,1,,2,5,4,1,,,,1,1,,1,,,3,3,1
532
+ """ )
533
+ self .assertTrue (rc .is_dubious )
534
+ self .assertEqual ([rc .old .min , rc .new .min ], [119 , 119 ])
535
+ self .assertEqual ([rc .old .samples .quantile (0.1 ),
536
+ rc .new .samples .quantile (0.1 )], [137 , 136 ])
537
+ self .assertEqual (rc .location , 'P10' )
538
+ self .assertEqual ([rc .old_location , rc .new_location ], [137 , 136 ])
539
+ self .assertAlmostEqual (rc .delta , - 0.73 , places = 2 )
540
+
541
+ # --quantile=20 --delta DictionaryBridgeToObjC_Bridge -Onone
542
+ rc = compare ("""
543
+ 0,D,21,15,,,,,,,,,,,,,1,,,,,,,
544
+ 0,D,21,15,,,,,,,,,,,,,,,,,,,,
545
+ 0,D,21,14,1,,,,,,,,,,,,,,,,,,,
546
+ 0,D,21,15,,,,,,,,,,1,,,,,,,1,,,
547
+ """ )
548
+ self .assertTrue (rc .is_dubious )
549
+ self .assertEqual ([rc .old .min , rc .new .min ], [15 , 14 ])
550
+ self .assertEqual ([rc .old .samples .quantile (0.05 ),
551
+ rc .new .samples .quantile (0.05 )], [15 , 15 ])
552
+ # self.assertEqual(rc.location, 'P10')
553
+ self .assertEqual (rc .location , 'P05' )
554
+ self .assertEqual ([rc .old_location , rc .new_location ], [15 , 15 ])
555
+
556
+ def test_stable_location_vs_outlier_runs (self ):
557
+ "Location estimate should be robust in presence of outlier runs."
558
+ def synth (min ):
559
+ r = ('0,S,21,' + str (min ) + ',1,1,,,1,,,,,1,,,,,2,,,,,' ).split (',' )
560
+ return PerformanceTestResult (r , quantiles = True , delta = True )
561
+
562
+ s , t , u = synth (100 ), synth (100 ), synth (100 )
563
+ self .assertEqual (
564
+ [s .min , s .samples .quantile (0.05 ), s .samples .quantile (0.1 ),
565
+ s .samples .q1 , s .median , s .samples .q3 ],
566
+ [100 , 101 , 102 , 103 , 104 , 106 ])
567
+
568
+ [s .merge (synth (100 )) for i in range (1 , 11 )]
569
+ [t .merge (synth (100 )) for i in range (1 , 10 )]
570
+ [u .merge (synth (100 )) for i in range (1 , 9 )]
571
+ t .merge (synth (94 )) # one outlier run
572
+ u .merge (synth (94 )) # two outlier runs
573
+ u .merge (synth (94 ))
574
+
575
+ rst = ResultComparison (s , t )
576
+ self .assertEqual (rst .location , 'Q1' )
577
+ self .assertEqual ([rst .old_location , rst .new_location ], [103 , 102 ])
578
+
579
+ rsu = ResultComparison (s , u )
580
+ self .assertEqual (rsu .location , 'MED' )
581
+ self .assertEqual ([rsu .old_location , rsu .new_location ], [104 , 103 ])
582
+
492
583
493
584
class FileSystemIntegration (unittest .TestCase ):
494
585
def setUp (self ):
0 commit comments