Skip to content

Commit c3300bc

Browse files
authored
Merge pull request #11954 from graydon/yet-more-process-stats-dir-improvements
Yet more process stats dir improvements
2 parents 94c596e + 11858e0 commit c3300bc

File tree

2 files changed

+42
-71
lines changed

2 files changed

+42
-71
lines changed

utils/jobstats/jobstats.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,22 @@ def driver_jobs_total(self):
6161
assert(self.is_driver_job())
6262
return self.driver_jobs_ran() + self.driver_jobs_skipped()
6363

64-
def merged_with(self, other):
64+
def merged_with(self, other, merge_by="sum"):
6565
"""Return a new JobStats, holding the merger of self and other"""
6666
merged_stats = {}
67+
ops = {"sum": lambda a, b: a + b,
68+
# Because 0 is also a sentinel on counters we do a modified
69+
# "nonzero-min" here. Not ideal but best we can do.
70+
"min": lambda a, b: (min(a, b)
71+
if a != 0 and b != 0
72+
else max(a, b)),
73+
"max": lambda a, b: max(a, b)}
74+
op = ops[merge_by]
6775
for k, v in self.stats.items() + other.stats.items():
68-
merged_stats[k] = v + merged_stats.get(k, 0.0)
76+
if k in merged_stats:
77+
merged_stats[k] = op(v, merged_stats[k])
78+
else:
79+
merged_stats[k] = v
6980
merged_kind = self.jobkind
7081
if other.jobkind != merged_kind:
7182
merged_kind = "<merged>"
@@ -160,7 +171,7 @@ def to_lnt_test_obj(self, args):
160171

161172

162173
def load_stats_dir(path, select_module=[], select_stat=[],
163-
exclude_timers=False):
174+
exclude_timers=False, **kwargs):
164175
"""Loads all stats-files found in path into a list of JobStats objects"""
165176
jobstats = []
166177
auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
@@ -213,23 +224,25 @@ def load_stats_dir(path, select_module=[], select_stat=[],
213224
return jobstats
214225

215226

216-
def merge_all_jobstats(jobstats, select_module=[], group_by_module=False):
227+
def merge_all_jobstats(jobstats, select_module=[], group_by_module=False,
228+
merge_by="sum", **kwargs):
217229
"""Does a pairwise merge of the elements of list of jobs"""
218230
m = None
219231
if len(select_module) > 0:
220232
jobstats = filter(lambda j: j.module in select_module, jobstats)
221233
if group_by_module:
222234
def keyfunc(j):
223235
return j.module
236+
jobstats = list(jobstats)
224237
jobstats.sort(key=keyfunc)
225238
prefixed = []
226239
for mod, group in itertools.groupby(jobstats, keyfunc):
227-
groupmerge = merge_all_jobstats(group)
240+
groupmerge = merge_all_jobstats(group, merge_by=merge_by)
228241
prefixed.append(groupmerge.prefixed_by(mod))
229242
jobstats = prefixed
230243
for j in jobstats:
231244
if m is None:
232245
m = j
233246
else:
234-
m = m.merged_with(j)
247+
m = m.merged_with(j, merge_by=merge_by)
235248
return m

utils/process-stats-dir.py

Lines changed: 23 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,15 @@
3636
def load_paired_stats_dirs(args):
3737
assert(len(args.remainder) == 2)
3838
paired_stats = []
39-
mod = args.select_module
40-
stat = args.select_stat
41-
xt = args.exclude_timers
4239
(old, new) = args.remainder
4340
for p in sorted(os.listdir(old)):
4441
full_old = os.path.join(old, p)
4542
full_new = os.path.join(new, p)
4643
if not (os.path.exists(full_old) and os.path.isdir(full_old) and
4744
os.path.exists(full_new) and os.path.isdir(full_new)):
4845
continue
49-
old_stats = load_stats_dir(full_old,
50-
select_module=mod,
51-
select_stat=stat,
52-
exclude_timers=xt)
53-
new_stats = load_stats_dir(full_new,
54-
select_module=mod,
55-
select_stat=stat,
56-
exclude_timers=xt)
46+
old_stats = load_stats_dir(full_old, **vars(args))
47+
new_stats = load_stats_dir(full_new, **vars(args))
5748
if len(old_stats) == 0 or len(new_stats) == 0:
5849
continue
5950
paired_stats.append((p, (old_stats, new_stats)))
@@ -63,22 +54,14 @@ def load_paired_stats_dirs(args):
6354
def write_catapult_trace(args):
6455
allstats = []
6556
for path in args.remainder:
66-
allstats += load_stats_dir(path,
67-
select_module=args.select_module,
68-
select_stat=args.select_stat,
69-
exclude_timers=args.exclude_timers)
57+
allstats += load_stats_dir(path, **vars(args))
7058
json.dump([s.to_catapult_trace_obj() for s in allstats], args.output)
7159

7260

7361
def write_lnt_values(args):
7462
for d in args.remainder:
75-
stats = load_stats_dir(d,
76-
select_module=args.select_module,
77-
select_stat=args.select_stat,
78-
exclude_timers=args.exclude_timers)
79-
merged = merge_all_jobstats(stats,
80-
select_module=args.select_module,
81-
group_by_module=args.group_by_module)
63+
stats = load_stats_dir(d, **vars(args))
64+
merged = merge_all_jobstats(stats, **vars(args))
8265
j = merged.to_lnt_test_obj(args)
8366
if args.lnt_submit is None:
8467
json.dump(j, args.output, indent=4)
@@ -107,16 +90,11 @@ def show_paired_incrementality(args):
10790
out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
10891
out.writeheader()
10992

110-
sel = args.select_module
11193
for (name, (oldstats, newstats)) in load_paired_stats_dirs(args):
11294
olddriver = merge_all_jobstats((x for x in oldstats
113-
if x.is_driver_job()),
114-
select_module=sel,
115-
group_by_module=args.group_by_module)
95+
if x.is_driver_job()), **vars(args))
11696
newdriver = merge_all_jobstats((x for x in newstats
117-
if x.is_driver_job()),
118-
select_module=sel,
119-
group_by_module=args.group_by_module)
97+
if x.is_driver_job()), **vars(args))
12098
if olddriver is None or newdriver is None:
12199
continue
122100
oldpct = olddriver.incrementality_percentage()
@@ -137,10 +115,7 @@ def show_incrementality(args):
137115
out.writeheader()
138116

139117
for path in args.remainder:
140-
stats = load_stats_dir(path,
141-
select_module=args.select_module,
142-
select_stat=args.select_stat,
143-
exclude_timers=args.exclude_timers)
118+
stats = load_stats_dir(path, **vars(args))
144119
for s in stats:
145120
if s.is_driver_job():
146121
pct = s.incrementality_percentage()
@@ -223,16 +198,12 @@ def set_csv_baseline(args):
223198
with open(args.set_csv_baseline, "wb") as f:
224199
out = csv.DictWriter(f, fieldnames, dialect='excel-tab',
225200
quoting=csv.QUOTE_NONNUMERIC)
226-
mod = args.select_module
227-
stat = args.select_stat
228-
xt = args.exclude_timers
229201
m = merge_all_jobstats((s for d in args.remainder
230-
for s in load_stats_dir(d,
231-
select_module=mod,
232-
select_stat=stat,
233-
exclude_timers=xt)),
234-
select_module=mod,
235-
group_by_module=args.group_by_module)
202+
for s in load_stats_dir(d, **vars(args))),
203+
**vars(args))
204+
if m is None:
205+
print "no stats found"
206+
return 1
236207
changed = 0
237208
newepoch = int(time.time())
238209
for name in sorted(m.stats.keys()):
@@ -303,16 +274,9 @@ def write_comparison(args, old_stats, new_stats):
303274

304275
def compare_to_csv_baseline(args):
305276
old_stats = read_stats_dict_from_csv(args.compare_to_csv_baseline)
306-
mod = args.select_module
307-
stat = args.select_stat
308-
xt = args.exclude_timers
309277
m = merge_all_jobstats((s for d in args.remainder
310-
for s in load_stats_dir(d,
311-
select_module=mod,
312-
select_stat=stat,
313-
exclude_timers=xt)),
314-
select_module=mod,
315-
group_by_module=args.group_by_module)
278+
for s in load_stats_dir(d, **vars(args))),
279+
**vars(args))
316280
old_stats = dict((k, v) for (k, (_, v)) in old_stats.items())
317281
new_stats = m.stats
318282

@@ -325,20 +289,10 @@ def compare_stats_dirs(args):
325289
raise ValueError("Expected exactly 2 stats-dirs")
326290

327291
(old, new) = args.remainder
328-
old_stats = merge_all_jobstats(
329-
load_stats_dir(old,
330-
select_module=args.select_module,
331-
select_stat=args.select_stat,
332-
exclude_timers=args.exclude_timers),
333-
select_module=args.select_module,
334-
group_by_module=args.group_by_module)
335-
new_stats = merge_all_jobstats(
336-
load_stats_dir(new,
337-
select_module=args.select_module,
338-
select_stat=args.select_stat,
339-
exclude_timers=args.exclude_timers),
340-
select_module=args.select_module,
341-
group_by_module=args.group_by_module)
292+
old_stats = merge_all_jobstats(load_stats_dir(old, **vars(args)),
293+
**vars(args))
294+
new_stats = merge_all_jobstats(load_stats_dir(new, **vars(args)),
295+
**vars(args))
342296

343297
return write_comparison(args, old_stats.stats, new_stats.stats)
344298

@@ -395,6 +349,10 @@ def main():
395349
default=False,
396350
action="store_true",
397351
help="Sort comparison results in descending order")
352+
parser.add_argument("--merge-by",
353+
default="sum",
354+
type=str,
355+
help="Merge identical metrics by (sum|min|max)")
398356
parser.add_argument("--markdown",
399357
default=False,
400358
action="store_true",

0 commit comments

Comments
 (0)