@@ -26,7 +26,67 @@ class JobMetrics:
26
26
workflow_id : int
27
27
28
28
29
- def get_metrics (github_repo : github .Repository , workflows_to_track : dict [str , int ]):
29
+ @dataclass
30
+ class GaugeMetric :
31
+ name : str
32
+ value : int
33
+ time_ns : int
34
+
35
+
36
+ def get_sampled_workflow_metrics (github_repo : github .Repository ):
37
+ """Gets global statistics about the Github workflow queue
38
+
39
+ Args:
40
+ github_repo: A github repo object to use to query the relevant information.
41
+
42
+ Returns:
43
+ Returns a list of GaugeMetric objects, containing the relevant metrics about
44
+ the workflow
45
+ """
46
+
47
+ # Other states are available (pending, waiting, etc), but the meaning
48
+ # is not documented (See #70540).
49
+ # "queued" seems to be the info we want.
50
+ queued_workflow_count = len (
51
+ [
52
+ x
53
+ for x in github_repo .get_workflow_runs (status = "queued" )
54
+ if x .name in WORKFLOWS_TO_TRACK
55
+ ]
56
+ )
57
+ running_workflow_count = len (
58
+ [
59
+ x
60
+ for x in github_repo .get_workflow_runs (status = "in_progress" )
61
+ if x .name in WORKFLOWS_TO_TRACK
62
+ ]
63
+ )
64
+
65
+ workflow_metrics = []
66
+ workflow_metrics .append (
67
+ GaugeMetric (
68
+ "workflow_queue_size" ,
69
+ queued_workflow_count ,
70
+ time .time_ns (),
71
+ )
72
+ )
73
+ workflow_metrics .append (
74
+ GaugeMetric (
75
+ "running_workflow_count" ,
76
+ running_workflow_count ,
77
+ time .time_ns (),
78
+ )
79
+ )
80
+ # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
81
+ workflow_metrics .append (
82
+ GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
83
+ )
84
+ return workflow_metrics
85
+
86
+
87
+ def get_per_workflow_metrics (
88
+ github_repo : github .Repository , workflows_to_track : dict [str , int ]
89
+ ):
30
90
"""Gets the metrics for specified Github workflows.
31
91
32
92
This function takes in a list of workflows to track, and optionally the
@@ -43,14 +103,14 @@ def get_metrics(github_repo: github.Repository, workflows_to_track: dict[str, in
43
103
Returns a list of JobMetrics objects, containing the relevant metrics about
44
104
the workflow.
45
105
"""
46
- workflow_runs = iter (github_repo .get_workflow_runs ())
47
-
48
106
workflow_metrics = []
49
107
50
108
workflows_to_include = set (workflows_to_track .keys ())
51
109
52
- while len (workflows_to_include ) > 0 :
53
- workflow_run = next (workflow_runs )
110
+ for workflow_run in iter (github_repo .get_workflow_runs ()):
111
+ if len (workflows_to_include ) == 0 :
112
+ break
113
+
54
114
if workflow_run .status != "completed" :
55
115
continue
56
116
@@ -139,12 +199,27 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
139
199
metrics_userid: The userid to use for the upload.
140
200
api_key: The API key to use for the upload.
141
201
"""
202
+
203
+ if len (workflow_metrics ) == 0 :
204
+ print ("No metrics found to upload." , file = sys .stderr )
205
+ return
206
+
142
207
metrics_batch = []
143
208
for workflow_metric in workflow_metrics :
144
- workflow_formatted_name = workflow_metric .job_name .lower ().replace (" " , "_" )
145
- metrics_batch .append (
146
- f"{ workflow_formatted_name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .created_at_ns } "
147
- )
209
+ if isinstance (workflow_metric , GaugeMetric ):
210
+ name = workflow_metric .name .lower ().replace (" " , "_" )
211
+ metrics_batch .append (
212
+ f"{ name } value={ workflow_metric .value } { workflow_metric .time_ns } "
213
+ )
214
+ elif isinstance (workflow_metric , JobMetrics ):
215
+ name = workflow_metric .job_name .lower ().replace (" " , "_" )
216
+ metrics_batch .append (
217
+ f"{ name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .created_at_ns } "
218
+ )
219
+ else :
220
+ raise ValueError (
221
+ f"Unsupported object type { type (workflow_metric )} : { str (workflow_metric )} "
222
+ )
148
223
149
224
request_data = "\n " .join (metrics_batch )
150
225
response = requests .post (
@@ -176,16 +251,21 @@ def main():
176
251
# Enter the main loop. Every five minutes we wake up and dump metrics for
177
252
# the relevant jobs.
178
253
while True :
179
- current_metrics = get_metrics (github_repo , workflows_to_track )
180
- if len (current_metrics ) == 0 :
181
- print ("No metrics found to upload." , file = sys .stderr )
182
- continue
254
+ current_metrics = get_per_workflow_metrics (github_repo , workflows_to_track )
255
+ current_metrics += get_sampled_workflow_metrics (github_repo )
256
+ # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
257
+ current_metrics .append (
258
+ GaugeMetric ("metrics_container_heartbeat" , 1 , time .time_ns ())
259
+ )
183
260
184
261
upload_metrics (current_metrics , grafana_metrics_userid , grafana_api_key )
185
262
print (f"Uploaded { len (current_metrics )} metrics" , file = sys .stderr )
186
263
187
264
for workflow_metric in reversed (current_metrics ):
188
- workflows_to_track [workflow_metric .job_name ] = workflow_metric .workflow_id
265
+ if isinstance (workflow_metric , JobMetrics ):
266
+ workflows_to_track [
267
+ workflow_metric .job_name
268
+ ] = workflow_metric .workflow_id
189
269
190
270
time .sleep (SCRAPE_INTERVAL_SECONDS )
191
271
0 commit comments