Skip to content

Commit a192cad

Browse files
committed
rewrite metric sampling big
1 parent 07da03d commit a192cad

File tree

1 file changed

+56
-49
lines changed

1 file changed

+56
-49
lines changed

.ci/metrics/metrics.py

Lines changed: 56 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
from dataclasses import dataclass
77
import sys
8+
import collections
89
import logging
910

1011
import github
@@ -232,6 +233,44 @@ def buildkite_get_metrics(
232233
return output, last_recorded_build
233234

234235

236+
def github_job_name_to_metric_name(workflow_name, job_name):
237+
workflow_key = GITHUB_WORKFLOW_TO_TRACK[workflow_name]
238+
job_key = GITHUB_JOB_TO_TRACK[workflow_key][job_name]
239+
return f"{workflow_key}_{job_key}"
240+
241+
242+
def github_count_queued_running_workflows(workflow_list):
243+
"""Returns the per-job count of running & queued jobs in the passed
244+
workflow list.
245+
246+
Args:
247+
workflow_list: an iterable of workflows.
248+
249+
Returns:
250+
A tuple, (per-job-queue-size, per-job-running-count). The key
251+
is the pretty job name, and the value the count of jobs.
252+
"""
253+
queued_count = collections.Counter()
254+
running_count = collections.Counter()
255+
256+
for workflow in workflow_list:
257+
if workflow.name not in GITHUB_WORKFLOW_TO_TRACK:
258+
continue
259+
260+
workflow_key = GITHUB_WORKFLOW_TO_TRACK[workflow.name]
261+
for job in workflow.jobs():
262+
if job.name not in GITHUB_JOB_TO_TRACK[workflow_key]:
263+
continue
264+
job_key = GITHUB_JOB_TO_TRACK[workflow_key][job.name]
265+
metric_name = f"{workflow_key}_{job_key}"
266+
267+
if job.status == "queued":
268+
queued_count[metric_name] += 1
269+
elif job.status == "in_progress":
270+
running_count[metric_name] += 1
271+
return queued_count, running_count
272+
273+
235274
def get_sampled_workflow_metrics(github_repo: github.Repository):
236275
"""Gets global statistics about the Github workflow queue
237276
@@ -242,60 +281,26 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
242281
Returns a list of GaugeMetric objects, containing the relevant metrics about
243282
the workflow
244283
"""
245-
queued_job_counts = {}
246-
running_job_counts = {}
247-
248284
# Other states are available (pending, waiting, etc), but the meaning
249285
# is not documented (See #70540).
250286
# "queued" seems to be the info we want.
251-
for queued_workflow in github_repo.get_workflow_runs(status="queued"):
252-
if queued_workflow.name not in GITHUB_WORKFLOW_TO_TRACK:
253-
continue
254-
for queued_workflow_job in queued_workflow.jobs():
255-
job_name = queued_workflow_job.name
256-
# Workflows marked as queued can potentially only have some jobs
257-
# queued, so make sure to also count jobs currently in progress.
258-
if queued_workflow_job.status == "queued":
259-
if job_name not in queued_job_counts:
260-
queued_job_counts[job_name] = 1
261-
else:
262-
queued_job_counts[job_name] += 1
263-
elif queued_workflow_job.status == "in_progress":
264-
if job_name not in running_job_counts:
265-
running_job_counts[job_name] = 1
266-
else:
267-
running_job_counts[job_name] += 1
268-
269-
for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
270-
if running_workflow.name not in GITHUB_WORKFLOW_TO_TRACK:
271-
continue
272-
for running_workflow_job in running_workflow.jobs():
273-
job_name = running_workflow_job.name
274-
if running_workflow_job.status != "in_progress":
275-
continue
276-
277-
if job_name not in running_job_counts:
278-
running_job_counts[job_name] = 1
279-
else:
280-
running_job_counts[job_name] += 1
287+
queued_1, running_1 = github_count_queued_running_workflows(
288+
github_repo.get_workflow_runs(status="queued")
289+
)
290+
queued_2, running_2 = github_count_queued_running_workflows(
291+
github_repo.get_workflow_runs(status="in_progress")
292+
)
281293

282294
workflow_metrics = []
283-
for queued_job in queued_job_counts:
295+
for key, value in (queued_1 + queued_2).items():
284296
workflow_metrics.append(
285-
GaugeMetric(
286-
f"workflow_queue_size_{queued_job}",
287-
queued_job_counts[queued_job],
288-
time.time_ns(),
289-
)
297+
GaugeMetric(f"workflow_queue_size_{key}", value, time.time_ns())
290298
)
291-
for running_job in running_job_counts:
299+
for key, value in (running_1 + running_2).items():
292300
workflow_metrics.append(
293-
GaugeMetric(
294-
f"running_workflow_count_{running_job}",
295-
running_job_counts[running_job],
296-
time.time_ns(),
297-
)
301+
GaugeMetric(f"running_workflow_count_{key}", value, time.time_ns())
298302
)
303+
299304
# Always send a hearbeat metric so we can monitor is this container is
300305
# still able to log to Grafana.
301306
workflow_metrics.append(
@@ -325,10 +330,12 @@ def get_per_workflow_metrics(github_repo: github.Repository, last_workflow_id: s
325330
the workflow.
326331
"""
327332
workflow_metrics = []
328-
last_checked_workflow_id = last_workflow_id
329-
333+
last_recorded_workflow = None
330334
for workflow_run in iter(github_repo.get_workflow_runs(status="completed")):
331-
last_checked_workflow_id = workflow_run.id
335+
# Record the first workflow of this list as the most recent one.
336+
if last_recorded_workflow is None:
337+
last_recorded_workflow = workflow_run.id
338+
332339
# If we saw this workflow already, break. We also break if no
333340
# workflow has been seen, as this means the script just started.
334341
if last_workflow_id == workflow_run.id or last_workflow_id is None:
@@ -390,7 +397,7 @@ def get_per_workflow_metrics(github_repo: github.Repository, last_workflow_id: s
390397
)
391398
)
392399

393-
return workflow_metrics, last_checked_workflow_id
400+
return workflow_metrics, last_recorded_workflow
394401

395402
def upload_metrics(workflow_metrics, metrics_userid, api_key):
396403
"""Upload metrics to Grafana.

0 commit comments

Comments
 (0)