[stats] Move jobstats support code to its own module.

graydon · graydon · commit 93a832de7d70 · 2017-09-01T16:26:17.000-07:00
diff --git a/utils/jobstats/__init__.py b/utils/jobstats/__init__.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+#
+# ==-- jobstats - support for reading the contents of stats dirs --==#
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See https://swift.org/LICENSE.txt for license information
+# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+#
+# ==------------------------------------------------------------------------==#
+#
+# This module contains subroutines for loading object-representations of one or
+# more directories generated by `swiftc -stats-output-dir`.
+
+__author__ = 'Graydon Hoare'
+__email__ = 'ghoare@apple.com'
+__versioninfo__ = (0, 1, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__)
+
+from .jobstats import JobStats, load_stats_dir, merge_all_jobstats # noqa
diff --git a/utils/jobstats/jobstats.py b/utils/jobstats/jobstats.py
@@ -0,0 +1,204 @@
+#!/usr/bin/python
+#
+# ==-- jobstats - support for reading the contents of stats dirs --==#
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See https://swift.org/LICENSE.txt for license information
+# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+#
+# ==------------------------------------------------------------------------==#
+#
+# This file contains subroutines for loading object-representations of one or
+# more directories generated by `swiftc -stats-output-dir`.
+
+import datetime
+import json
+import os
+import random
+import re
+
+
+class JobStats:
+    """Object holding the stats of a single job run during a compilation,
+    corresponding to a single JSON file produced by a single job process
+    passed -stats-output-dir."""
+
+    def __init__(self, jobkind, jobid, module, start_usec, dur_usec,
+                 jobargs, stats):
+        self.jobkind = jobkind
+        self.jobid = jobid
+        self.module = module
+        self.start_usec = start_usec
+        self.dur_usec = dur_usec
+        self.jobargs = jobargs
+        self.stats = stats
+
+    def is_driver_job(self):
+        """Return true iff self measures a driver job"""
+        return self.jobkind == 'driver'
+
+    def is_frontend_job(self):
+        """Return true iff self measures a frontend job"""
+        return self.jobkind == 'frontend'
+
+    def driver_jobs_ran(self):
+        """Return the count of a driver job's ran sub-jobs"""
+        assert(self.is_driver_job())
+        return self.stats.get("Driver.NumDriverJobsRun", 0)
+
+    def driver_jobs_skipped(self):
+        """Return the count of a driver job's skipped sub-jobs"""
+        assert(self.is_driver_job())
+        return self.stats.get("Driver.NumDriverJobsSkipped", 0)
+
+    def driver_jobs_total(self):
+        """Return the total count of a driver job's ran + skipped sub-jobs"""
+        assert(self.is_driver_job())
+        return self.driver_jobs_ran() + self.driver_jobs_skipped()
+
+    def merged_with(self, other):
+        """Return a new JobStats, holding the merger of self and other"""
+        merged_stats = {}
+        for k, v in self.stats.items() + other.stats.items():
+            merged_stats[k] = v + merged_stats.get(k, 0.0)
+        merged_kind = self.jobkind
+        if other.jobkind != merged_kind:
+            merged_kind = "<merged>"
+        merged_module = self.module
+        if other.module != merged_module:
+            merged_module = "<merged>"
+        merged_start = min(self.start_usec, other.start_usec)
+        merged_end = max(self.start_usec + self.dur_usec,
+                         other.start_usec + other.dur_usec)
+        merged_dur = merged_end - merged_start
+        return JobStats(merged_kind, random.randint(0, 1000000000),
+                        merged_module, merged_start, merged_dur,
+                        self.jobargs + other.jobargs, merged_stats)
+
+    def incrementality_percentage(self):
+        """Assuming the job is a driver job, return the amount of
+        jobs that actually ran, as a percentage of the total number."""
+        assert(self.is_driver_job())
+        ran = self.driver_jobs_ran()
+        total = self.driver_jobs_total()
+        return round((float(ran) / float(total)) * 100.0, 2)
+
+    def to_catapult_trace_obj(self):
+        """Return a JSON-formattable object fitting chrome's
+        'catapult' trace format"""
+        return {"name": self.module,
+                "cat": self.jobkind,
+                "ph": "X",              # "X" == "complete event"
+                "pid": self.jobid,
+                "tid": 1,
+                "ts": self.start_usec,
+                "dur": self.dur_usec,
+                "args": self.jobargs}
+
+    def start_timestr(self):
+        """Return a formatted timestamp of the job's start-time"""
+        t = datetime.datetime.fromtimestamp(self.start_usec / 1000000.0)
+        return t.strftime("%Y-%m-%d %H:%M:%S")
+
+    def end_timestr(self):
+        """Return a formatted timestamp of the job's end-time"""
+        t = datetime.datetime.fromtimestamp((self.start_usec +
+                                             self.dur_usec) / 1000000.0)
+        return t.strftime("%Y-%m-%d %H:%M:%S")
+
+    def pick_lnt_metric_suffix(self, metric_name):
+        """Guess an appropriate LNT metric type for a given metric name"""
+        if "BytesOutput" in metric_name:
+            return "code_size"
+        if "RSS" in metric_name or "BytesAllocated" in metric_name:
+            return "mem"
+        return "compile"
+
+    def to_lnt_test_obj(self, args):
+        """Return a JSON-formattable object fitting LNT's 'submit' format"""
+        run_info = {
+            "run_order": str(args.lnt_order),
+            "tag": str(args.lnt_tag),
+        }
+        run_info.update(dict(args.lnt_run_info))
+        stats = self.stats
+        return {
+            "Machine":
+            {
+                "Name": args.lnt_machine,
+                "Info": dict(args.lnt_machine_info)
+            },
+            "Run":
+            {
+                "Start Time": self.start_timestr(),
+                "End Time": self.end_timestr(),
+                "Info": run_info
+            },
+            "Tests":
+            [
+                {
+                    "Data": [v],
+                    "Info": {},
+                    "Name": "%s.%s.%s.%s" % (args.lnt_tag, self.module,
+                                             k, self.pick_lnt_metric_suffix(k))
+                }
+                for (k, v) in stats.items()
+            ]
+        }
+
+
+def load_stats_dir(path):
+    """Loads all stats-files found in path into a list of JobStats objects"""
+    jobstats = []
+    auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
+              r"-(?P<out>[^-]+)-(?P<opt>[^-]+)")
+    fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
+            auxpat +
+            r"-(?P<pid>\d+)(-.*)?.json$")
+    for root, dirs, files in os.walk(path):
+        for f in files:
+            m = re.match(fpat, f)
+            if m:
+                # NB: "pid" in fpat is a random number, not unix pid.
+                mg = m.groupdict()
+                jobkind = mg['kind']
+                jobid = int(mg['pid'])
+                start_usec = int(mg['start'])
+                module = mg["module"]
+                jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]]
+
+                j = json.load(open(os.path.join(root, f)))
+                dur_usec = 1
+                patstr = (r"time\.swift-" + jobkind + r"\." + auxpat +
+                          r"\.wall$")
+                pat = re.compile(patstr)
+                stats = dict()
+                for (k, v) in j.items():
+                    if k.startswith("time."):
+                        v = int(1000000.0 * float(v))
+                    stats[k] = v
+                    tm = re.match(pat, k)
+                    if tm:
+                        dur_usec = v
+
+                e = JobStats(jobkind=jobkind, jobid=jobid,
+                             module=module, start_usec=start_usec,
+                             dur_usec=dur_usec, jobargs=jobargs,
+                             stats=stats)
+                jobstats.append(e)
+    return jobstats
+
+
+def merge_all_jobstats(jobstats):
+    """Does a pairwise merge of the elements of list of jobs"""
+    m = None
+    for j in jobstats:
+        if m is None:
+            m = j
+        else:
+            m = m.merged_with(j)
+    return m
diff --git a/utils/process-stats-dir.py b/utils/process-stats-dir.py