Skip to content

[Stress Tester] Use new duration measurement capability of stress tester to analyse code completion durations #539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions analyze_request_durations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python

from __future__ import print_function

import argparse
import json
import os

# This value was emirically determined on an iMac Pro with a fairly small sample size.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"emirically" :)

# All measurements in seconds should be taken with extreme care
INSTRUCTIONS_PER_SECOND = 6693088549.09358
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could probably just round it if it doesn't really matter anyway 🤷

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I’ve removed the decimal digits. Not sure if rounding it any further gives much benefit. Might as well use the most accurate number we have at the moment.


# The length of the bars in the histograms
HISTOGRAM_BAR_LENGTH = 40


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('request_durations_file')
return parser.parse_args()


def print_stats(entries):
"""
entries is a list of dictionaries from the JSON file under the
files.<file-name>.<request-name> path containing the following keys
- totalInstructions
- logHistogram
- requestsExecuted
"""

# Create a log histogram for all entries
aggregated_completion_log_histogram = {}
# Adds totalInstruction for each entry
total_instructions = 0
# Adds requestsExecuted for each entry
total_requests_executed = 0
for entry in entries:
total_instructions += int(entry['totalInstructions'])
total_requests_executed += int(entry['requestsExecuted'])
for (bucket, count) in entry['logHistogram'].iteritems():
bucket = int(bucket)
count = int(count)
if bucket not in aggregated_completion_log_histogram:
aggregated_completion_log_histogram[bucket] = 0
aggregated_completion_log_histogram[bucket] += count

if total_requests_executed == 0:
print('No requests executed')
return

print('Requests: {}'.format(total_requests_executed))

average_instructions_per_request = total_instructions / total_requests_executed
seconds_estimate = average_instructions_per_request / INSTRUCTIONS_PER_SECOND
print('Average: {:.0e} instr (ca. {:.3f}s)'
.format(average_instructions_per_request, seconds_estimate))

print('\nHistogram:')

min_bucket = min(aggregated_completion_log_histogram)
max_bucket = max(aggregated_completion_log_histogram)
max_count = max([x[1] for x in aggregated_completion_log_histogram.iteritems()])

# Sums up the percentages for each entry to make it easier to spot the
# n-th percentile in the histogram
running_percentage_sum = 0
for bucket in range(min_bucket, max_bucket + 1):
count = aggregated_completion_log_histogram.get(bucket, 0)
executed_instructions = pow(2, bucket)
estimated_seconds = executed_instructions / INSTRUCTIONS_PER_SECOND
bar_fill = (count * HISTOGRAM_BAR_LENGTH / max_count)
bar = '*' * bar_fill + ' ' * (HISTOGRAM_BAR_LENGTH - bar_fill)
percentage = float(count) / total_requests_executed * 100
running_percentage_sum += percentage
print('< {:5.0e} instr (ca. {:6.3f}s): {} {:4.1f}% (< {:5.1f}%)'
.format(executed_instructions, estimated_seconds, bar, percentage,
running_percentage_sum))


def print_stats_for_all_files(request_durations, request_name):
print('\n---------- All {} requests ----------\n'.format(request_name))
all_request_entries = []
for (file_name, file_stats) in request_durations['files'].iteritems():
all_request_entries.append(file_stats[request_name])
print_stats(all_request_entries)


def print_slowest_files(request_durations, request_name, num_files):
print('\n---------- {} slowest files for {} requests ----------\n'
.format(num_files, request_name))

file_and_average = []
for (file_name, file_stats) in request_durations['files'].iteritems():
instr_executed_in_file = int(file_stats[request_name]['totalInstructions'])
requests_in_file = int(file_stats[request_name]['requestsExecuted'])
if requests_in_file > 0:
average = instr_executed_in_file / requests_in_file
file_and_average.append((file_name, average))
file_and_average = sorted(file_and_average, key=lambda x: x[1], reverse=True)

for (file_name, average_request_duration) in file_and_average[0:num_files]:
print('-- {} --\n'.format(file_name))
print_stats([request_durations['files'][file_name][request_name]])
print('\n')


def print_seconds_disclaimer():
print('''IMPORTANT:
All measurements printed by this script were done in number of number of
instructions. Measurements in seconds are computed assuming that all machines
execute the same number of instructions per second at all times, which is not
correct. Thus all measurements in seconds should be taken with extreme care.
As they scale linearly with the number of instructions, it is safe to compare
values in seconds output by this script.
''')


def analyze_request_durations(file):
if not os.path.exists(file):
print('{} does not exist. No request durations to analyze.'.format(file))
return
print_seconds_disclaimer()
print('\n---------- Raw durations for further analysis ----------\n')
with open(file, 'r') as json_file:
print(json_file.read())

with open(file, 'r') as json_file:
request_durations = json.load(json_file, encoding='utf-8')
print_stats_for_all_files(request_durations, 'CodeComplete')
print_slowest_files(request_durations, 'CodeComplete', 10)


def main():
args = parse_args()
analyze_request_durations(args.request_durations_file)


if __name__ == '__main__':
main()
10 changes: 8 additions & 2 deletions run_sk_stress_test
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import subprocess
import argparse
import platform
import codecs
from analyze_request_durations import analyze_request_durations

import common

Expand Down Expand Up @@ -230,9 +231,10 @@ class StressTesterRunner(object):
# temporary file paths
filtered_projects = os.path.join(script_dir, 'stress-tester-projects.json')
results = os.path.join(script_dir, 'stress-tester-results.json')
request_durations = os.path.join(script_dir, 'stress-tester-request-durations.json')

# remove temporary files if they already exist
self._cleanup([filtered_projects, results])
self._cleanup([filtered_projects, results, request_durations])

run_env = {
'SK_STRESS_TEST': self.stress_tester,
Expand All @@ -243,7 +245,9 @@ class StressTesterRunner(object):
'SK_XFAILS_PATH': self.xfails_path,
'SK_STRESS_ACTIVE_CONFIG': self.swift_branch,
'SK_STRESS_SUPPRESS_OUTPUT': 'true',
'SK_STRESS_REWRITE_MODES': 'none concurrent insideout'}
'SK_STRESS_REWRITE_MODES': 'none concurrent insideout',
'SK_STRESS_REQUEST_DURATIONS_FILE': request_durations,
}
run_env.update(os.environ)
run_cmd = ['./runner.py',
'--projects', filtered_projects,
Expand All @@ -264,6 +268,8 @@ class StressTesterRunner(object):
except common.ExecuteCommandFailure:
self.compat_runner_failed = True

analyze_request_durations(request_durations)

success = self._process_output(results, self.xfails_path)

return success
Expand Down