@@ -91,25 +91,26 @@ spec:
91
91
expr : |
92
92
sum(time() - kube_pod_deletion_timestamp{namespace="default", pod=~"^ws-.*", cluster!~"ephemeral.*"}) by (pod) > 24 * 60 * 60
93
93
94
- - alert : GitpodImagebuildSuccessRate
94
+ - alert : GitpodImagebuildDoneSuccess
95
95
labels :
96
- severity : warning
96
+ severity : critical
97
97
team : workspace
98
+ for : 4h
98
99
annotations :
99
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildSuccessRate .md
100
- summary : imagebuild success rate is low in cluster {{ $labels.cluster }}.
101
- description : imagebuild are failing at too high of a rate in cluster {{ $labels.cluster }}.
100
+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildDoneSuccess .md
101
+ summary : imagebuilds done are failing at a high rate in cluster {{ $labels.cluster }}.
102
+ description : imagebuilds`s are not reaching done at too high of a rate in cluster {{ $labels.cluster }}.
102
103
expr : |
103
- (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{cluster!~"ephemeral.*"}[4h])))) < 0.75
104
+ (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{cluster!~"ephemeral.*"}[4h])))) < 0.60
104
105
105
- - alert : GitpodImagebuildSuccessFailing
106
+ - alert : GitpodImagebuildStartSuccess
106
107
labels :
107
108
severity : critical
108
109
team : workspace
109
- for : 3h
110
+ for : 2h
110
111
annotations :
111
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildSuccessRate .md
112
- summary : imagebuild success rate is failing in cluster {{ $labels.cluster }}.
113
- description : imagebuild are failing at too high of a rate in cluster {{ $labels.cluster }}.
112
+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildStartSuccess .md
113
+ summary : imagebuild start success rate is failing in cluster {{ $labels.cluster }}.
114
+ description : imagebuild starts are failing at too high of a rate in cluster {{ $labels.cluster }}.
114
115
expr : |
115
- (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{ cluster!~"ephemeral.*"}[4h])))) < 0.50
116
+ (1 - (sum(rate(gitpod_ws_manager_workspace_starts_failure_total{type="IMAGEBUILD", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_ws_manager_workspace_starts_total{type="IMAGEBUILD", cluster!~"ephemeral.*"}[4h])))) < 0.99
0 commit comments