Skip to content

[supervisor] add ssh tunnel metrics #18795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions components/supervisor/pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
type SupervisorMetrics struct {
IDEReadyDurationTotal *prometheus.HistogramVec
InitializerHistogram *prometheus.HistogramVec
SSHTunnelOpenedTotal *prometheus.CounterVec
SSHTunnelClosedTotal *prometheus.CounterVec
}

func NewMetrics() *SupervisorMetrics {
Expand All @@ -27,13 +29,23 @@ func NewMetrics() *SupervisorMetrics {
Help: "initializer speed in bytes per second",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 12),
}, []string{"kind"}),
SSHTunnelOpenedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "supervisor_ssh_tunnel_opened_total",
Help: "Total number of SSH tunnels opened by the supervisor",
}, []string{}),
SSHTunnelClosedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "supervisor_ssh_tunnel_closed_total",
Help: "Total number of SSH tunnels closed by the supervisor",
}, []string{"code"}),
}
}

func (m *SupervisorMetrics) Register(registry *prometheus.Registry) error {
metrics := []prometheus.Collector{
m.IDEReadyDurationTotal,
m.InitializerHistogram,
m.SSHTunnelOpenedTotal,
m.SSHTunnelClosedTotal,
}

for _, metric := range metrics {
Expand Down
2 changes: 2 additions & 0 deletions components/supervisor/pkg/metrics/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ func NewGrpcMetricsReporter(gitpodHost string) *GrpcMetricsReporter {
"supervisor_initializer_bytes_second": true,
"supervisor_client_handled_total": true,
"supervisor_client_handling_seconds": true,
"supervisor_ssh_tunnel_opened_total": true,
"supervisor_ssh_tunnel_closed_total": true,
},
values: make(map[string]float64),
addCounter: func(name string, labels map[string]string, value uint64) {
Expand Down
51 changes: 46 additions & 5 deletions components/supervisor/pkg/supervisor/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"os/exec"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"runtime/debug"
"strconv"
Expand Down Expand Up @@ -418,7 +419,7 @@ func Run(options ...RunOption) {
}

wg.Add(1)
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, apiEndpointOpts...)
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, supervisorMetrics, topService, apiEndpointOpts...)

wg.Add(1)
go startSSHServer(ctx, cfg, &wg)
Expand Down Expand Up @@ -1187,7 +1188,28 @@ func isBlacklistedEnvvar(name string) bool {
return false
}

func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, services []RegisterableService, tunneled *ports.TunneledPortsService, metricsReporter *metrics.GrpcMetricsReporter, opts ...grpc.ServerOption) {
var websocketCloseErrorPattern = regexp.MustCompile(`websocket: close (\d+)`)

func extractCloseErrorCode(errStr string) string {
matches := websocketCloseErrorPattern.FindStringSubmatch(errStr)
if len(matches) < 2 {
return "unknown"
}

return matches[1]
}

func startAPIEndpoint(
ctx context.Context,
cfg *Config,
wg *sync.WaitGroup,
services []RegisterableService,
tunneled *ports.TunneledPortsService,
metricsReporter *metrics.GrpcMetricsReporter,
supervisorMetrics *metrics.SupervisorMetrics,
topService *TopService,
opts ...grpc.ServerOption,
) {
defer wg.Done()
defer log.Debug("startAPIEndpoint shutdown")

Expand Down Expand Up @@ -1308,6 +1330,17 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv
tunnelOverWebSocket(tunneled, conn)
}))
routes.Handle("/_supervisor/tunnel/ssh", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
var err error
supervisorMetrics.SSHTunnelOpenedTotal.WithLabelValues().Inc()
defer func() {
code := "unknown"
if err != nil {
code = extractCloseErrorCode(err.Error())
}
supervisorMetrics.SSHTunnelClosedTotal.WithLabelValues(code).Inc()
}()
startTime := time.Now()
log := log.WithField("userAgent", r.Header.Get("user-agent")).WithField("remoteAddr", r.RemoteAddr)
wsConn, err := upgrader.Upgrade(rw, r, nil)
if err != nil {
log.WithError(err).Error("tunnel ssh: upgrade to the WebSocket protocol failed")
Expand All @@ -1331,13 +1364,21 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv

go io.Copy(conn, conn2)
_, err = io.Copy(conn2, conn)
if err != nil && !websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
log.WithError(err).Error("tunnel ssh: error returned from io.copy")
if err != nil {
var usedCpu, usedMemory int64
data := topService.data
if data != nil && data.Cpu != nil {
usedCpu = data.Cpu.Used
}
if data != nil && data.Memory != nil {
usedMemory = data.Memory.Used
}
log.WithField("usedCpu", usedCpu).WithField("usedMemory", usedMemory).WithError(err).Error("tunnel ssh: error returned from io.copy")
}

conn.Close()
conn2.Close()
log.Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
log.WithField("duration", time.Since(startTime).Seconds()).Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
}))
if cfg.DebugEnable {
routes.Handle("/_supervisor/debug/tunnels", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
Expand Down
16 changes: 16 additions & 0 deletions install/installer/pkg/components/ide-metrics/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,22 @@ func configmap(ctx *common.RenderContext) ([]runtime.Object, error) {
},
},
},
{
Name: "supervisor_ssh_tunnel_opened_total",
Help: "Total number of SSH tunnels opened by the supervisor",
Labels: []config.LabelAllowList{},
},
{
Name: "supervisor_ssh_tunnel_closed_total",
Help: "Total number of SSH tunnels closed by the supervisor",
Labels: []config.LabelAllowList{
{
Name: "code",
AllowValues: []string{"*"},
DefaultValue: "unknown",
},
},
},
}

histogramMetrics := []config.HistogramMetricsConfiguration{
Expand Down