Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit 738a37c

Browse files
authored
sg/msp: add alert policy documentation to generated ops pages (#61939)
1 parent b64422d commit 738a37c

File tree

14 files changed

+330
-46
lines changed

14 files changed

+330
-46
lines changed

dev/managedservicesplatform/operationdocs/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ go_library(
1212
visibility = ["//visibility:public"],
1313
deps = [
1414
"//dev/managedservicesplatform/operationdocs/internal/markdown",
15+
"//dev/managedservicesplatform/operationdocs/terraform",
1516
"//dev/managedservicesplatform/spec",
1617
"//lib/errors",
1718
"//lib/pointers",
@@ -29,6 +30,7 @@ go_test(
2930
data = glob(["testdata/**"]),
3031
embed = [":operationdocs"],
3132
deps = [
33+
"//dev/managedservicesplatform/operationdocs/terraform",
3234
"//dev/managedservicesplatform/spec",
3335
"//lib/pointers",
3436
"@com_github_hexops_autogold_v2//:autogold",

dev/managedservicesplatform/operationdocs/links.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,9 @@ func ServiceLogsURL(serviceKind spec.ServiceKind, envProjectID string) string {
6262
}).String()
6363
}
6464
}
65+
66+
// AlertPolicyDashboardURL creates a link to the GCP monitoring dashboards page
67+
// with a filter for `custom` type (user created) dashboards
68+
func AlertPolicyDashboardURL(envProjectID string) string {
69+
return markdown.Linkf("Dashboard", "https://console.cloud.google.com/monitoring/dashboards?project=%s&pageState=(%%22dashboards%%22:(%%22t%%22:%%22All%%22),%%22dashboardList%%22:(%%22f%%22:%%22%%255B%%257B_22k_22_3A_22Type_22_2C_22t_22_3A10_2C_22v_22_3A_22_5C_22Custom_5C_22_22_2C_22s_22_3Atrue_2C_22i_22_3A_22category_22%%257D%%255D%%22))", envProjectID)
70+
}

dev/managedservicesplatform/operationdocs/operationdocs.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
package operationdocs
22

33
import (
4+
"bytes"
45
"fmt"
56
"path"
67
"slices"
78
"strings"
89
"time"
910

1011
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/operationdocs/internal/markdown"
12+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/operationdocs/terraform"
1113
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
1214
"github.com/sourcegraph/sourcegraph/lib/errors"
1315
"github.com/sourcegraph/sourcegraph/lib/pointers"
16+
"golang.org/x/exp/maps"
1417
)
1518

1619
type Options struct {
@@ -22,6 +25,9 @@ type Options struct {
2225
GenerateCommand string
2326
// Handbook indicates we are generating output for sourcegraph/handbook.
2427
Handbook bool
28+
// AlertPolicies is a deduplicated map of alert policies defined for all
29+
// environments of a service
30+
AlertPolicies map[string]terraform.AlertPolicy
2531
}
2632

2733
// AddDocumentComment adds a comment to the markdown document with details about
@@ -182,7 +188,10 @@ This service is operated on the %s.`,
182188
return l
183189
}), ", ")},
184190
{"Slack notifications", markdown.Linkf("#"+slackChannelName, "https://sourcegraph.slack.com/archives/"+slackChannelName)},
185-
{"Alerts", markdown.Linkf("GCP monitoring", "https://console.cloud.google.com/monitoring/alerting?project=%s", env.ProjectID)},
191+
{"Alert policies",
192+
fmt.Sprintf("%s, %s",
193+
markdown.Linkf("GCP Monitoring alert policies list", "https://console.cloud.google.com/monitoring/alerting/policies?project=%s", env.ProjectID),
194+
AlertPolicyDashboardURL(env.ProjectID))},
186195
{"Errors", sentryLink},
187196
}
188197
if env.EnvironmentServiceSpec != nil {
@@ -331,6 +340,28 @@ If you make your Entitle request, then log in, you will be removed from any team
331340
md.CodeBlockf("bash", `sg msp tfc view %s %s`, s.Service.ID, env.ID)
332341
}
333342

343+
md.Headingf(3, "Alert Policies")
344+
345+
md.Paragraphf("The following alert policies are defined for each of this service's environments.")
346+
347+
// Render alerts
348+
// Sort the map keys to make order deterministic
349+
alertKeys := maps.Keys(opts.AlertPolicies)
350+
slices.Sort(alertKeys)
351+
for _, key := range alertKeys {
352+
policy := opts.AlertPolicies[key]
353+
md.Headingf(4, policy.DisplayName)
354+
// We need to remove the footer text we add to each alert policy description
355+
b := []byte(policy.Documentation.Content)
356+
lastParagraphIndex := bytes.LastIndex(b, []byte("\n\n"))
357+
if lastParagraphIndex != -1 {
358+
b = b[:lastParagraphIndex]
359+
}
360+
361+
md.CodeBlock("md", string(b))
362+
md.Paragraphf("Severity: %s", policy.Severity)
363+
}
364+
334365
return md.String(), nil
335366
}
336367

dev/managedservicesplatform/operationdocs/operationdocs_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"github.com/hexops/autogold/v2"
77
"github.com/stretchr/testify/require"
88

9+
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/operationdocs/terraform"
910
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
1011
"github.com/sourcegraph/sourcegraph/lib/pointers"
1112
)
@@ -53,6 +54,24 @@ func TestRender(t *testing.T) {
5354
Stages: []spec.RolloutStageSpec{{EnvironmentID: testServiceEnvironment}},
5455
},
5556
},
57+
opts: Options{
58+
AlertPolicies: map[string]terraform.AlertPolicy{
59+
"monitoring-common-cpu": {
60+
DisplayName: "High Container CPU Utilization",
61+
Documentation: terraform.Documentation{
62+
Content: "High CPU Usage - it may be neccessary to reduce load or increase CPU allocation",
63+
},
64+
Severity: "WARNING",
65+
},
66+
"monitoring-common-memory": {
67+
DisplayName: "High Container Memory Utilization",
68+
Documentation: terraform.Documentation{
69+
Content: "High Memory Usage - it may be neccessary to reduce load or increase memory allocation",
70+
},
71+
Severity: "WARNING",
72+
},
73+
},
74+
},
5675
}, {
5776
name: "resources",
5877
spec: spec.Spec{
@@ -86,6 +105,24 @@ func TestRender(t *testing.T) {
86105
},
87106
}},
88107
},
108+
opts: Options{
109+
AlertPolicies: map[string]terraform.AlertPolicy{
110+
"monitoring-common-cpu": {
111+
DisplayName: "High Container CPU Utilization",
112+
Documentation: terraform.Documentation{
113+
Content: "High CPU Usage - it may be neccessary to reduce load or increase CPU allocation",
114+
},
115+
Severity: "WARNING",
116+
},
117+
"monitoring-common-memory": {
118+
DisplayName: "High Container Memory Utilization",
119+
Documentation: terraform.Documentation{
120+
Content: "High Memory Usage - it may be neccessary to reduce load or increase memory allocation",
121+
},
122+
Severity: "WARNING",
123+
},
124+
},
125+
},
89126
}, {
90127
name: "with README",
91128
spec: spec.Spec{
@@ -115,6 +152,24 @@ func TestRender(t *testing.T) {
115152
116153
Some additional operations!`),
117154
},
155+
opts: Options{
156+
AlertPolicies: map[string]terraform.AlertPolicy{
157+
"monitoring-common-cpu": {
158+
DisplayName: "High Container CPU Utilization",
159+
Documentation: terraform.Documentation{
160+
Content: "High CPU Usage - it may be neccessary to reduce load or increase CPU allocation",
161+
},
162+
Severity: "WARNING",
163+
},
164+
"monitoring-common-memory": {
165+
DisplayName: "High Container Memory Utilization",
166+
Documentation: terraform.Documentation{
167+
Content: "High Memory Usage - it may be neccessary to reduce load or increase memory allocation",
168+
},
169+
Severity: "WARNING",
170+
},
171+
},
172+
},
118173
}, {
119174
name: "multi env rollout",
120175
spec: spec.Spec{
@@ -149,6 +204,24 @@ Some additional operations!`),
149204
Stages: []spec.RolloutStageSpec{{EnvironmentID: testServiceEnvironment}, {EnvironmentID: robertServiceEnvironment}},
150205
},
151206
},
207+
opts: Options{
208+
AlertPolicies: map[string]terraform.AlertPolicy{
209+
"monitoring-common-cpu": {
210+
DisplayName: "High Container CPU Utilization",
211+
Documentation: terraform.Documentation{
212+
Content: "High CPU Usage - it may be neccessary to reduce load or increase CPU allocation",
213+
},
214+
Severity: "WARNING",
215+
},
216+
"monitoring-common-memory": {
217+
DisplayName: "High Container Memory Utilization",
218+
Documentation: terraform.Documentation{
219+
Content: "High Memory Usage - it may be neccessary to reduce load or increase memory allocation",
220+
},
221+
Severity: "WARNING",
222+
},
223+
},
224+
},
152225
}} {
153226
t.Run(tc.name, func(t *testing.T) {
154227
doc, err := Render(tc.spec, tc.opts)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "terraform",
5+
srcs = ["terraform.go"],
6+
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/operationdocs/terraform",
7+
visibility = ["//visibility:public"],
8+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package terraform
2+
3+
import (
4+
"encoding/json"
5+
"io"
6+
"os"
7+
)
8+
9+
// Monitoring stack cdktf json
10+
type Monitoring struct {
11+
ResourceType ResourceType `json:"resource"`
12+
}
13+
14+
// ResourceType is a terraform resource type e.g. `google_monitoring_alert_policy`
15+
type ResourceType struct {
16+
GoogleMonitoringAlertPolicy map[string]AlertPolicy `json:"google_monitoring_alert_policy"`
17+
}
18+
19+
// AlertPolicy is the configuration for an alert policy
20+
type AlertPolicy struct {
21+
DisplayName string `json:"display_name,omitempty"`
22+
Documentation Documentation `json:"documentation"`
23+
Severity string `json:"severity"`
24+
}
25+
26+
// Documentation is the markdown formatted documentation for an alert
27+
type Documentation struct {
28+
Content string `json:"content"`
29+
}
30+
31+
// ParseMonitoringCDKTF parses the generated terraform json
32+
func ParseMonitoringCDKTF(path string) (*Monitoring, error) {
33+
jsonFile, err := os.Open(path)
34+
if err != nil {
35+
return nil, err
36+
}
37+
38+
defer jsonFile.Close()
39+
40+
bytes, err := io.ReadAll(jsonFile)
41+
if err != nil {
42+
return nil, err
43+
}
44+
45+
var monitoring Monitoring
46+
err = json.Unmarshal(bytes, &monitoring)
47+
if err != nil {
48+
return nil, err
49+
}
50+
return &monitoring, nil
51+
}

dev/managedservicesplatform/operationdocs/testdata/TestRender/basic.golden

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ Changes to MSP Testbed are continuously delivered to the first stage ([test](#te
3636

3737
### test
3838

39-
| PROPERTY | DETAILS |
40-
|---------------------|---------------------------------------------------------------------------------------------------------------|
41-
| Project ID | [`msp-testbed-test-77589aae45d0`](https://console.cloud.google.com/run?project=msp-testbed-test-77589aae45d0) |
42-
| Category | **test** |
43-
| Deployment type | `rollout` |
44-
| Resources | |
45-
| Slack notifications | [#alerts-msp-testbed-test](https://sourcegraph.slack.com/archives/alerts-msp-testbed-test) |
46-
| Alerts | [GCP monitoring](https://console.cloud.google.com/monitoring/alerting?project=msp-testbed-test-77589aae45d0) |
47-
| Errors | [Sentry `msp-testbed-test`](https://sourcegraph.sentry.io/projects/msp-testbed-test/) |
39+
| PROPERTY | DETAILS |
40+
|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
41+
| Project ID | [`msp-testbed-test-77589aae45d0`](https://console.cloud.google.com/run?project=msp-testbed-test-77589aae45d0) |
42+
| Category | **test** |
43+
| Deployment type | `rollout` |
44+
| Resources | |
45+
| Slack notifications | [#alerts-msp-testbed-test](https://sourcegraph.slack.com/archives/alerts-msp-testbed-test) |
46+
| Alert policies | [GCP Monitoring alert policies list](https://console.cloud.google.com/monitoring/alerting/policies?project=msp-testbed-test-77589aae45d0), [Dashboard](https://console.cloud.google.com/monitoring/dashboards?pageState=%28%22dashboards%22%3A%28%22t%22%3A%22All%22%29%2C%22dashboardList%22%3A%28%22f%22%3A%22%255B%257B_22k_22_3A_22Type_22_2C_22t_22_3A10_2C_22v_22_3A_22_5C_22Custom_5C_22_22_2C_22s_22_3Atrue_2C_22i_22_3A_22category_22%257D%255D%22%29%29&project=msp-testbed-test-77589aae45d0) |
47+
| Errors | [Sentry `msp-testbed-test`](https://sourcegraph.sentry.io/projects/msp-testbed-test/) |
4848

4949
MSP infrastructure access needs to be requested using Entitle for time-bound privileges. Test environments may have less stringent requirements.
5050

@@ -96,3 +96,23 @@ The Terraform Cloud workspaces for this service environment are [grouped under t
9696
```bash
9797
sg msp tfc view msp-testbed test
9898
```
99+
100+
### Alert Policies
101+
102+
The following alert policies are defined for each of this service's environments.
103+
104+
#### High Container CPU Utilization
105+
106+
```md
107+
High CPU Usage - it may be neccessary to reduce load or increase CPU allocation
108+
```
109+
110+
Severity: WARNING
111+
112+
#### High Container Memory Utilization
113+
114+
```md
115+
High Memory Usage - it may be neccessary to reduce load or increase memory allocation
116+
```
117+
118+
Severity: WARNING

0 commit comments

Comments
 (0)