Skip to content

Commit 2d878cc

Browse files
[CI] Track Queue/In Progress Metrics By Job Rather Than Workflow
This patch makes it so that the metrics container counts the number of in progress and queued jobs at the job level rather than at the workflow level. This helps us distinguish windows versus linux load and also lets us filter out the MacOS jobs that only run in the release branch. Reviewers: Keenuts, lnihlen Reviewed By: lnihlen Pull Request: #127274
1 parent 7a6d150 commit 2d878cc

File tree

1 file changed

+46
-26
lines changed

1 file changed

+46
-26
lines changed

.ci/metrics/metrics.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -43,40 +43,60 @@ def get_sampled_workflow_metrics(github_repo: github.Repository):
4343
Returns a list of GaugeMetric objects, containing the relevant metrics about
4444
the workflow
4545
"""
46+
queued_job_counts = {}
47+
running_job_counts = {}
4648

4749
# Other states are available (pending, waiting, etc), but the meaning
4850
# is not documented (See #70540).
4951
# "queued" seems to be the info we want.
50-
queued_workflow_count = len(
51-
[
52-
x
53-
for x in github_repo.get_workflow_runs(status="queued")
54-
if x.name in WORKFLOWS_TO_TRACK
55-
]
56-
)
57-
running_workflow_count = len(
58-
[
59-
x
60-
for x in github_repo.get_workflow_runs(status="in_progress")
61-
if x.name in WORKFLOWS_TO_TRACK
62-
]
63-
)
52+
for queued_workflow in github_repo.get_workflow_runs(status="queued"):
53+
if queued_workflow.name not in WORKFLOWS_TO_TRACK:
54+
continue
55+
for queued_workflow_job in queued_workflow.jobs():
56+
job_name = queued_workflow_job.name
57+
# Workflows marked as queued can potentially only have some jobs
58+
# queued, so make sure to also count jobs currently in progress.
59+
if queued_workflow_job.status == "queued":
60+
if job_name not in queued_job_counts:
61+
queued_job_counts[job_name] = 1
62+
else:
63+
queued_job_counts[job_name] += 1
64+
elif queued_workflow_job.status == "in_progress":
65+
if job_name not in running_job_counts:
66+
running_job_counts[job_name] = 1
67+
else:
68+
running_job_counts[job_name] += 1
69+
70+
for running_workflow in github_repo.get_workflow_runs(status="in_progress"):
71+
if running_workflow.name not in WORKFLOWS_TO_TRACK:
72+
continue
73+
for running_workflow_job in running_workflow.jobs():
74+
job_name = running_workflow_job.name
75+
if running_workflow_job.status != "in_progress":
76+
continue
77+
78+
if job_name not in running_job_counts:
79+
running_job_counts[job_name] = 1
80+
else:
81+
running_job_counts[job_name] += 1
6482

6583
workflow_metrics = []
66-
workflow_metrics.append(
67-
GaugeMetric(
68-
"workflow_queue_size",
69-
queued_workflow_count,
70-
time.time_ns(),
84+
for queued_job in queued_job_counts:
85+
workflow_metrics.append(
86+
GaugeMetric(
87+
f"workflow_queue_size_{queued_job}",
88+
queued_job_counts[queued_job],
89+
time.time_ns(),
90+
)
7191
)
72-
)
73-
workflow_metrics.append(
74-
GaugeMetric(
75-
"running_workflow_count",
76-
running_workflow_count,
77-
time.time_ns(),
92+
for running_job in running_job_counts:
93+
workflow_metrics.append(
94+
GaugeMetric(
95+
f"running_workflow_count_{running_job}",
96+
running_job_counts[running_job],
97+
time.time_ns(),
98+
)
7899
)
79-
)
80100
# Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana.
81101
workflow_metrics.append(
82102
GaugeMetric("metrics_container_heartbeat", 1, time.time_ns())

0 commit comments

Comments
 (0)