5
5
import os
6
6
from dataclasses import dataclass
7
7
import sys
8
+ import collections
8
9
import logging
9
10
10
11
import github
@@ -232,6 +233,44 @@ def buildkite_get_metrics(
232
233
return output , last_recorded_build
233
234
234
235
236
+ def github_job_name_to_metric_name (workflow_name , job_name ):
237
+ workflow_key = GITHUB_WORKFLOW_TO_TRACK [workflow_name ]
238
+ job_key = GITHUB_JOB_TO_TRACK [workflow_key ][job_name ]
239
+ return f"{ workflow_key } _{ job_key } "
240
+
241
+
242
+ def github_count_queued_running_workflows (workflow_list ):
243
+ """Returns the per-job count of running & queued jobs in the passed
244
+ workflow list.
245
+
246
+ Args:
247
+ workflow_list: an iterable of workflows.
248
+
249
+ Returns:
250
+ A tuple, (per-job-queue-size, per-job-running-count). The key
251
+ is the pretty job name, and the value the count of jobs.
252
+ """
253
+ queued_count = collections .Counter ()
254
+ running_count = collections .Counter ()
255
+
256
+ for workflow in workflow_list :
257
+ if workflow .name not in GITHUB_WORKFLOW_TO_TRACK :
258
+ continue
259
+
260
+ workflow_key = GITHUB_WORKFLOW_TO_TRACK [workflow .name ]
261
+ for job in workflow .jobs ():
262
+ if job .name not in GITHUB_JOB_TO_TRACK [workflow_key ]:
263
+ continue
264
+ job_key = GITHUB_JOB_TO_TRACK [workflow_key ][job .name ]
265
+ metric_name = f"{ workflow_key } _{ job_key } "
266
+
267
+ if job .status == "queued" :
268
+ queued_count [metric_name ] += 1
269
+ elif job .status == "in_progress" :
270
+ running_count [metric_name ] += 1
271
+ return queued_count , running_count
272
+
273
+
235
274
def get_sampled_workflow_metrics (github_repo : github .Repository ):
236
275
"""Gets global statistics about the Github workflow queue
237
276
@@ -242,60 +281,26 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
242
281
Returns a list of GaugeMetric objects, containing the relevant metrics about
243
282
the workflow
244
283
"""
245
- queued_job_counts = {}
246
- running_job_counts = {}
247
-
248
284
# Other states are available (pending, waiting, etc), but the meaning
249
285
# is not documented (See #70540).
250
286
# "queued" seems to be the info we want.
251
- for queued_workflow in github_repo .get_workflow_runs (status = "queued" ):
252
- if queued_workflow .name not in GITHUB_WORKFLOW_TO_TRACK :
253
- continue
254
- for queued_workflow_job in queued_workflow .jobs ():
255
- job_name = queued_workflow_job .name
256
- # Workflows marked as queued can potentially only have some jobs
257
- # queued, so make sure to also count jobs currently in progress.
258
- if queued_workflow_job .status == "queued" :
259
- if job_name not in queued_job_counts :
260
- queued_job_counts [job_name ] = 1
261
- else :
262
- queued_job_counts [job_name ] += 1
263
- elif queued_workflow_job .status == "in_progress" :
264
- if job_name not in running_job_counts :
265
- running_job_counts [job_name ] = 1
266
- else :
267
- running_job_counts [job_name ] += 1
268
-
269
- for running_workflow in github_repo .get_workflow_runs (status = "in_progress" ):
270
- if running_workflow .name not in GITHUB_WORKFLOW_TO_TRACK :
271
- continue
272
- for running_workflow_job in running_workflow .jobs ():
273
- job_name = running_workflow_job .name
274
- if running_workflow_job .status != "in_progress" :
275
- continue
276
-
277
- if job_name not in running_job_counts :
278
- running_job_counts [job_name ] = 1
279
- else :
280
- running_job_counts [job_name ] += 1
287
+ queued_1 , running_1 = github_count_queued_running_workflows (
288
+ github_repo .get_workflow_runs (status = "queued" )
289
+ )
290
+ queued_2 , running_2 = github_count_queued_running_workflows (
291
+ github_repo .get_workflow_runs (status = "in_progress" )
292
+ )
281
293
282
294
workflow_metrics = []
283
- for queued_job in queued_job_counts :
295
+ for key , value in ( queued_1 + queued_2 ). items () :
284
296
workflow_metrics .append (
285
- GaugeMetric (
286
- f"workflow_queue_size_{ queued_job } " ,
287
- queued_job_counts [queued_job ],
288
- time .time_ns (),
289
- )
297
+ GaugeMetric (f"workflow_queue_size_{ key } " , value , time .time_ns ())
290
298
)
291
- for running_job in running_job_counts :
299
+ for key , value in ( running_1 + running_2 ). items () :
292
300
workflow_metrics .append (
293
- GaugeMetric (
294
- f"running_workflow_count_{ running_job } " ,
295
- running_job_counts [running_job ],
296
- time .time_ns (),
297
- )
301
+ GaugeMetric (f"running_workflow_count_{ key } " , value , time .time_ns ())
298
302
)
303
+
299
304
# Always send a hearbeat metric so we can monitor is this container is
300
305
# still able to log to Grafana.
301
306
workflow_metrics .append (
@@ -325,10 +330,12 @@ def get_per_workflow_metrics(github_repo: github.Repository, last_workflow_id: s
325
330
the workflow.
326
331
"""
327
332
workflow_metrics = []
328
- last_checked_workflow_id = last_workflow_id
329
-
333
+ last_recorded_workflow = None
330
334
for workflow_run in iter (github_repo .get_workflow_runs (status = "completed" )):
331
- last_checked_workflow_id = workflow_run .id
335
+ # Record the first workflow of this list as the most recent one.
336
+ if last_recorded_workflow is None :
337
+ last_recorded_workflow = workflow_run .id
338
+
332
339
# If we saw this workflow already, break. We also break if no
333
340
# workflow has been seen, as this means the script just started.
334
341
if last_workflow_id == workflow_run .id or last_workflow_id is None :
@@ -390,7 +397,7 @@ def get_per_workflow_metrics(github_repo: github.Repository, last_workflow_id: s
390
397
)
391
398
)
392
399
393
- return workflow_metrics , last_checked_workflow_id
400
+ return workflow_metrics , last_recorded_workflow
394
401
395
402
def upload_metrics (workflow_metrics , metrics_userid , api_key ):
396
403
"""Upload metrics to Grafana.
0 commit comments