1
1
import logging
2
2
import os
3
+ import traceback
3
4
from collections import Counter
4
5
5
- import docker
6
6
from concurrent .futures import ThreadPoolExecutor , as_completed
7
7
from datasets import load_dataset
8
8
from tqdm import tqdm
18
18
logger = logging .getLogger (__name__ )
19
19
20
20
21
- def main (dataset_name : str , dataset_split : str , repo_split : str , base_dir : str , branch : str , backend : str , timeout : int , num_workers : int ) -> None :
21
+ def main (
22
+ dataset_name : str ,
23
+ dataset_split : str ,
24
+ repo_split : str ,
25
+ base_dir : str ,
26
+ branch : str ,
27
+ backend : str ,
28
+ timeout : int ,
29
+ num_workers : int ,
30
+ ) -> None :
22
31
dataset : Iterator [RepoInstance ] = load_dataset (dataset_name , split = dataset_split ) # type: ignore
23
32
repos = SPLIT [repo_split ]
24
33
pairs = []
@@ -54,15 +63,15 @@ def main(dataset_name: str, dataset_split: str, repo_split: str, base_dir: str,
54
63
# Update progress bar, check if instance ran successfully
55
64
result = future .result ()
56
65
log_dirs .append (result )
57
- except Exception as e :
66
+ except Exception :
58
67
traceback .print_exc ()
59
68
continue
60
69
61
70
# get numbers
62
71
out = []
63
72
for name in tqdm (log_dirs ):
64
73
report_file = os .path .join (name , "report.json" )
65
- name = name .split ('/' )[2 ]
74
+ name = name .split ("/" )[2 ]
66
75
if not os .path .exists (report_file ):
67
76
out .append (
68
77
{
@@ -73,9 +82,9 @@ def main(dataset_name: str, dataset_split: str, repo_split: str, base_dir: str,
73
82
}
74
83
)
75
84
continue
76
- dataset : Iterator [ RepoInstance ] = load_dataset ("json" , data_files = report_file , split = "train" )
85
+ report = load_dataset ("json" , data_files = report_file , split = "train" ) # type: ignore
77
86
test_ids = get_tests (name , stdout = False )
78
- tests = {x [' nodeid' ]: x [' call' ] for x in dataset ["tests" ][0 ]}
87
+ tests = {x [" nodeid" ]: x [" call" ] for x in report ["tests" ][0 ]} # type: ignore
79
88
status = []
80
89
runtimes = []
81
90
no_runs = 0
@@ -100,18 +109,16 @@ def main(dataset_name: str, dataset_split: str, repo_split: str, base_dir: str,
100
109
"name" : name ,
101
110
"sum" : total ,
102
111
"passed" : passed ,
103
- "num_passed" : status ["passed" ]+ status ["xfail" ],
104
- "num_tests" : sum (status .values ())
112
+ "num_passed" : status ["passed" ] + status ["xfail" ],
113
+ "num_tests" : sum (status .values ()),
105
114
}
106
115
)
107
116
print ("repo,runtime,num_passed/num_tests" )
108
117
out = sorted (out , key = lambda x : x ["sum" ], reverse = True )
109
118
for x in out :
110
- print (
111
- f"{ x ['name' ]} ,{ x ['sum' ]} ,{ x ['num_passed' ]} /{ x ['num_tests' ]} "
112
- )
119
+ print (f"{ x ['name' ]} ,{ x ['sum' ]} ,{ x ['num_passed' ]} /{ x ['num_tests' ]} " )
113
120
total_runtime = sum ([x ["sum" ] for x in out ])
114
- averaged_passed = sum ([x ["passed" ] for x in out ])/ len (out )
121
+ averaged_passed = sum ([x ["passed" ] for x in out ]) / len (out )
115
122
print (f"total runtime: { total_runtime } " )
116
123
print (f"average pass rate: { averaged_passed } " )
117
124
0 commit comments