Skip to content

Commit 7371d27

Browse files
authored
Merge pull request #17801 from gottesmm/pr-0bba02517c6062d9cf88f1fe694e0d757716e79b
2 parents 472031f + 31b8c29 commit 7371d27

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python
2+
3+
# This is a simple script that reads in a csv file, selects a column, and then
4+
# forms an "s-curve" graph of that column.
5+
6+
import argparse
7+
import csv
8+
import sys
9+
10+
11+
def get_data(input_file, before_column, after_column):
12+
13+
def get_selected_csv_rows(input_file, before_column, after_column):
14+
for row in csv.DictReader(input_file):
15+
before = float(row[before_column])
16+
after = float(row[after_column])
17+
delta = after / before
18+
yield delta
19+
20+
def f(input_data):
21+
result = list(enumerate(sorted(input_data)))
22+
count = float(len(result) - 1)
23+
return [(x[0] / count, x[1]) for x in result]
24+
25+
return f(get_selected_csv_rows(input_file, before_column, after_column))
26+
27+
28+
def main():
29+
p = argparse.ArgumentParser(description="""
30+
31+
A script that reads in a csv file, splices out selected before/after
32+
column, and then outputs a new csv file with that data in s-curve form. An
33+
s-curve is a graph where one sorts the output %-change and graphs the %-n
34+
vs %-change.
35+
36+
NOTE: We assume that the csv has a csv header that maps to the before and
37+
after column names passed in.
38+
""")
39+
40+
p.add_argument('input_file', type=argparse.FileType('r'))
41+
p.add_argument('before_column_name', type=str)
42+
p.add_argument('after_column_name', type=str)
43+
44+
args = p.parse_args()
45+
46+
data = get_data(args.input_file, args.before_column_name,
47+
args.after_column_name)
48+
w = csv.DictWriter(sys.stdout, fieldnames=['N/total', 'New/Old'])
49+
w.writeheader()
50+
for d in data:
51+
w.writerow({'N/total': d[0], 'New/Old': d[1]})
52+
53+
54+
if __name__ == "__main__":
55+
main()

utils/dev-scripts/scurve_printer.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/env python
2+
3+
# This is a simple script that takes in an scurve file produced by
4+
# csvcolumn_to_scurve and produces a png graph of the scurve.
5+
6+
import argparse
7+
import csv
8+
9+
import matplotlib.pyplot as plt
10+
11+
import numpy as np
12+
13+
FIELDS = ['N/total', 'New/Old']
14+
15+
16+
def get_data(input_file):
17+
global FIELDS
18+
for row in csv.DictReader(input_file):
19+
yield (float(row[FIELDS[0]]), float(row[FIELDS[1]]))
20+
21+
22+
def main():
23+
p = argparse.ArgumentParser()
24+
p.add_argument('input_csv_file', type=argparse.FileType('r'))
25+
p.add_argument('output_file', type=str)
26+
p.add_argument('-y-axis-num-tick-marks', type=int,
27+
help='The number of y tick marks to use above/below zero.')
28+
p.add_argument('-y-axis-min', type=float,
29+
help='Override the min y axis that we use')
30+
p.add_argument('-y-axis-max', type=float,
31+
help='Override the min y axis that we use')
32+
p.add_argument('-title', type=str,
33+
help='Title of the graph')
34+
p.add_argument('-x-axis-title', type=str,
35+
help='The title to use on the x-axis of the graph')
36+
p.add_argument('-y-axis-title', type=str,
37+
help='The title to use on the x-axis of the graph')
38+
39+
args = p.parse_args()
40+
41+
data = np.array(list(get_data(args.input_csv_file)))
42+
assert np.all(data >= 0)
43+
44+
x = data[:, 0]
45+
y = data[:, 1]
46+
47+
x_axis_title = args.x_axis_title or FIELDS[0]
48+
y_axis_title = args.y_axis_title or FIELDS[1]
49+
title = args.title or "{} vs {}".format(x_axis_title, y_axis_title)
50+
51+
fig, ax = plt.subplots()
52+
fig.set_size_inches(18.5, 18.5)
53+
54+
fig.suptitle(title, fontsize=20)
55+
ax.set_xlabel(x_axis_title, fontsize=20)
56+
ax.set_ylabel(y_axis_title, fontsize=20)
57+
ax.plot(x, y)
58+
ax.scatter(x, y)
59+
60+
# To get good bounds, we:
61+
#
62+
# 1. Re-center our data at 0 by subtracting 1. This will give us the %
63+
# difference in between new and old (i.e. (new - old)/old)
64+
#
65+
# 2. Then we take the maximum absolute delta from zero and round to a
66+
# multiple of 5 away from zero. Lets call this value limit.
67+
#
68+
# 3. We set [min_y, max_y] = [1.0 - limit, 1.0 + limit]
69+
recentered_data = y - 1.0
70+
max_magnitude = int(np.max(np.abs(recentered_data)) * 100.0)
71+
y_limit = float(((max_magnitude // 5) + 1) * 5) * 0.01
72+
73+
ax.set_xlim(0.0, 1.0)
74+
y_min = args.y_axis_min or 1.0 - y_limit
75+
y_max = args.y_axis_max or 1.0 + y_limit
76+
assert(y_min <= y_max)
77+
ax.set_ylim(y_min, y_max)
78+
ax.grid(True)
79+
ax.xaxis.set_ticks(np.arange(0.0, 1.0, 0.05))
80+
if args.y_axis_num_tick_marks:
81+
y_delta = y_max - y_min
82+
y_tickmark_frequency = y_delta / float(args.y_axis_num_tick_marks)
83+
ax.yaxis.set_ticks(np.arange(y_min, y_max, y_tickmark_frequency))
84+
plt.savefig(args.output_file)
85+
86+
87+
if __name__ == "__main__":
88+
main()

0 commit comments

Comments
 (0)