Skip to content

Commit 2565a1c

Browse files
authored
[supervisor] add ssh tunnel metrics (#18795)
1 parent 7416680 commit 2565a1c

File tree

4 files changed

+76
-5
lines changed

4 files changed

+76
-5
lines changed

components/supervisor/pkg/metrics/metrics.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
type SupervisorMetrics struct {
1414
IDEReadyDurationTotal *prometheus.HistogramVec
1515
InitializerHistogram *prometheus.HistogramVec
16+
SSHTunnelOpenedTotal *prometheus.CounterVec
17+
SSHTunnelClosedTotal *prometheus.CounterVec
1618
}
1719

1820
func NewMetrics() *SupervisorMetrics {
@@ -27,13 +29,23 @@ func NewMetrics() *SupervisorMetrics {
2729
Help: "initializer speed in bytes per second",
2830
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 12),
2931
}, []string{"kind"}),
32+
SSHTunnelOpenedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
33+
Name: "supervisor_ssh_tunnel_opened_total",
34+
Help: "Total number of SSH tunnels opened by the supervisor",
35+
}, []string{}),
36+
SSHTunnelClosedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
37+
Name: "supervisor_ssh_tunnel_closed_total",
38+
Help: "Total number of SSH tunnels closed by the supervisor",
39+
}, []string{"code"}),
3040
}
3141
}
3242

3343
func (m *SupervisorMetrics) Register(registry *prometheus.Registry) error {
3444
metrics := []prometheus.Collector{
3545
m.IDEReadyDurationTotal,
3646
m.InitializerHistogram,
47+
m.SSHTunnelOpenedTotal,
48+
m.SSHTunnelClosedTotal,
3749
}
3850

3951
for _, metric := range metrics {

components/supervisor/pkg/metrics/reporter.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ func NewGrpcMetricsReporter(gitpodHost string) *GrpcMetricsReporter {
4343
"supervisor_initializer_bytes_second": true,
4444
"supervisor_client_handled_total": true,
4545
"supervisor_client_handling_seconds": true,
46+
"supervisor_ssh_tunnel_opened_total": true,
47+
"supervisor_ssh_tunnel_closed_total": true,
4648
},
4749
values: make(map[string]float64),
4850
addCounter: func(name string, labels map[string]string, value uint64) {

components/supervisor/pkg/supervisor/supervisor.go

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"os/exec"
2424
"os/signal"
2525
"path/filepath"
26+
"regexp"
2627
"runtime"
2728
"runtime/debug"
2829
"strconv"
@@ -418,7 +419,7 @@ func Run(options ...RunOption) {
418419
}
419420

420421
wg.Add(1)
421-
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, apiEndpointOpts...)
422+
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, supervisorMetrics, topService, apiEndpointOpts...)
422423

423424
wg.Add(1)
424425
go startSSHServer(ctx, cfg, &wg)
@@ -1187,7 +1188,28 @@ func isBlacklistedEnvvar(name string) bool {
11871188
return false
11881189
}
11891190

1190-
func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, services []RegisterableService, tunneled *ports.TunneledPortsService, metricsReporter *metrics.GrpcMetricsReporter, opts ...grpc.ServerOption) {
1191+
var websocketCloseErrorPattern = regexp.MustCompile(`websocket: close (\d+)`)
1192+
1193+
func extractCloseErrorCode(errStr string) string {
1194+
matches := websocketCloseErrorPattern.FindStringSubmatch(errStr)
1195+
if len(matches) < 2 {
1196+
return "unknown"
1197+
}
1198+
1199+
return matches[1]
1200+
}
1201+
1202+
func startAPIEndpoint(
1203+
ctx context.Context,
1204+
cfg *Config,
1205+
wg *sync.WaitGroup,
1206+
services []RegisterableService,
1207+
tunneled *ports.TunneledPortsService,
1208+
metricsReporter *metrics.GrpcMetricsReporter,
1209+
supervisorMetrics *metrics.SupervisorMetrics,
1210+
topService *TopService,
1211+
opts ...grpc.ServerOption,
1212+
) {
11911213
defer wg.Done()
11921214
defer log.Debug("startAPIEndpoint shutdown")
11931215

@@ -1308,6 +1330,17 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv
13081330
tunnelOverWebSocket(tunneled, conn)
13091331
}))
13101332
routes.Handle("/_supervisor/tunnel/ssh", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
1333+
var err error
1334+
supervisorMetrics.SSHTunnelOpenedTotal.WithLabelValues().Inc()
1335+
defer func() {
1336+
code := "unknown"
1337+
if err != nil {
1338+
code = extractCloseErrorCode(err.Error())
1339+
}
1340+
supervisorMetrics.SSHTunnelClosedTotal.WithLabelValues(code).Inc()
1341+
}()
1342+
startTime := time.Now()
1343+
log := log.WithField("userAgent", r.Header.Get("user-agent")).WithField("remoteAddr", r.RemoteAddr)
13111344
wsConn, err := upgrader.Upgrade(rw, r, nil)
13121345
if err != nil {
13131346
log.WithError(err).Error("tunnel ssh: upgrade to the WebSocket protocol failed")
@@ -1331,13 +1364,21 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv
13311364

13321365
go io.Copy(conn, conn2)
13331366
_, err = io.Copy(conn2, conn)
1334-
if err != nil && !websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
1335-
log.WithError(err).Error("tunnel ssh: error returned from io.copy")
1367+
if err != nil {
1368+
var usedCpu, usedMemory int64
1369+
data := topService.data
1370+
if data != nil && data.Cpu != nil {
1371+
usedCpu = data.Cpu.Used
1372+
}
1373+
if data != nil && data.Memory != nil {
1374+
usedMemory = data.Memory.Used
1375+
}
1376+
log.WithField("usedCpu", usedCpu).WithField("usedMemory", usedMemory).WithError(err).Error("tunnel ssh: error returned from io.copy")
13361377
}
13371378

13381379
conn.Close()
13391380
conn2.Close()
1340-
log.Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
1381+
log.WithField("duration", time.Since(startTime).Seconds()).Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
13411382
}))
13421383
if cfg.DebugEnable {
13431384
routes.Handle("/_supervisor/debug/tunnels", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {

install/installer/pkg/components/ide-metrics/configmap.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,22 @@ func configmap(ctx *common.RenderContext) ([]runtime.Object, error) {
336336
},
337337
},
338338
},
339+
{
340+
Name: "supervisor_ssh_tunnel_opened_total",
341+
Help: "Total number of SSH tunnels opened by the supervisor",
342+
Labels: []config.LabelAllowList{},
343+
},
344+
{
345+
Name: "supervisor_ssh_tunnel_closed_total",
346+
Help: "Total number of SSH tunnels closed by the supervisor",
347+
Labels: []config.LabelAllowList{
348+
{
349+
Name: "code",
350+
AllowValues: []string{"*"},
351+
DefaultValue: "unknown",
352+
},
353+
},
354+
},
339355
}
340356

341357
histogramMetrics := []config.HistogramMetricsConfiguration{

0 commit comments

Comments
 (0)