Skip to content

Commit 4a7e71d

Browse files
committed
[spicedb] fix stale connection handling
1 parent 687f337 commit 4a7e71d

File tree

16 files changed

+140
-125
lines changed

16 files changed

+140
-125
lines changed

components/content-service-api/typescript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"lib"
1212
],
1313
"dependencies": {
14-
"@grpc/grpc-js": "1.8.8",
14+
"@grpc/grpc-js": "1.9.1",
1515
"google-protobuf": "^3.19.1",
1616
"inversify": "^6.0.1",
1717
"opentracing": "^0.14.4"

components/gitpod-protocol/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"src"
1111
],
1212
"devDependencies": {
13-
"@grpc/grpc-js": "1.8.8",
13+
"@grpc/grpc-js": "1.9.1",
1414
"@testdeck/mocha": "^0.3.3",
1515
"@types/analytics-node": "^3.1.9",
1616
"@types/chai-subset": "^1.3.3",

components/ide-metrics-api/typescript-grpc/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"lib"
1010
],
1111
"dependencies": {
12-
"@grpc/grpc-js": "1.8.8",
12+
"@grpc/grpc-js": "1.9.1",
1313
"google-protobuf": "^3.19.1"
1414
},
1515
"devDependencies": {

components/image-builder-api/typescript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"dependencies": {
1414
"@gitpod/content-service": "0.1.5",
1515
"@gitpod/gitpod-protocol": "0.1.5",
16-
"@grpc/grpc-js": "1.8.8",
16+
"@grpc/grpc-js": "1.9.1",
1717
"google-protobuf": "^3.19.1",
1818
"inversify": "^6.0.1",
1919
"opentracing": "^0.14.4"

components/server/debug.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ kubectl scale deployment $deploymentName --replicas=1
2626
echo "Wait for the pod to be ready"
2727
kubectl wait --for=condition=ready pod -l component=$deploymentName
2828

29+
while [[ $(kubectl get pods -l component=$deploymentName -o 'jsonpath={..status.phase}' | grep -o 'Running' | wc -w) -ne 1 ]]; do
30+
echo "Waiting for scale down operation to complete..."
31+
sleep 1
32+
done
33+
2934
podName=$(kubectl get pods -l component=$deploymentName -o=jsonpath='{.items[0].metadata.name}')
3035
echo "Forward $podName port $debugPort to localhost. Waiting for a debugger to attach ..."
3136
kubectl port-forward pod/"$podName" $debugPort

components/server/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,5 +136,8 @@
136136
"ts-node": "^10.4.0",
137137
"typescript": "~4.4.2",
138138
"watch": "^1.0.2"
139+
},
140+
"resolutions": {
141+
"@authzed/authzed-node/**/@grpc/grpc-js": "1.9.1"
139142
}
140143
}

components/server/src/authorization/spicedb-authorizer.ts

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/expe
1212
import { inject, injectable } from "inversify";
1313
import { observeSpicedbClientLatency, spicedbClientLatency } from "../prometheus-metrics";
1414
import { SpiceDBClientProvider } from "./spicedb";
15-
import * as grpc from "@grpc/grpc-js";
1615

1716
@injectable()
1817
export class SpiceDBAuthorizer {
@@ -21,6 +20,10 @@ export class SpiceDBAuthorizer {
2120
private readonly clientProvider: SpiceDBClientProvider,
2221
) {}
2322

23+
private get client(): v1.ZedPromiseClientInterface {
24+
return this.clientProvider.getClient();
25+
}
26+
2427
async check(
2528
req: v1.CheckPermissionRequest,
2629
experimentsFields: {
@@ -39,8 +42,7 @@ export class SpiceDBAuthorizer {
3942
const timer = spicedbClientLatency.startTimer();
4043
let error: Error | undefined;
4144
try {
42-
const client = this.clientProvider.getClient();
43-
const response = await client.checkPermission(req, this.callOptions);
45+
const response = await this.client.checkPermission(req);
4446
const permitted = response.permissionship === v1.CheckPermissionResponse_Permissionship.HAS_PERMISSION;
4547

4648
return permitted;
@@ -59,12 +61,10 @@ export class SpiceDBAuthorizer {
5961
const timer = spicedbClientLatency.startTimer();
6062
let error: Error | undefined;
6163
try {
62-
const client = this.clientProvider.getClient();
63-
const response = await client.writeRelationships(
64+
const response = await this.client.writeRelationships(
6465
v1.WriteRelationshipsRequest.create({
6566
updates,
6667
}),
67-
this.callOptions,
6868
);
6969
log.info("[spicedb] Successfully wrote relationships.", { response, updates });
7070

@@ -81,11 +81,10 @@ export class SpiceDBAuthorizer {
8181
const timer = spicedbClientLatency.startTimer();
8282
let error: Error | undefined;
8383
try {
84-
const client = this.clientProvider.getClient();
85-
const existing = await client.readRelationships(v1.ReadRelationshipsRequest.create(req), this.callOptions);
84+
const existing = await this.client.readRelationships(v1.ReadRelationshipsRequest.create(req));
8685
if (existing.length > 0) {
87-
const response = await client.deleteRelationships(req, this.callOptions);
88-
const after = await client.readRelationships(v1.ReadRelationshipsRequest.create(req), this.callOptions);
86+
const response = await this.client.deleteRelationships(req);
87+
const after = await this.client.readRelationships(v1.ReadRelationshipsRequest.create(req));
8988
if (after.length > 0) {
9089
log.error("[spicedb] Failed to delete relationships.", { existing, after, request: req });
9190
}
@@ -108,21 +107,6 @@ export class SpiceDBAuthorizer {
108107
}
109108

110109
async readRelationships(req: v1.ReadRelationshipsRequest): Promise<v1.ReadRelationshipsResponse[]> {
111-
const client = this.clientProvider.getClient();
112-
if (!client) {
113-
return [];
114-
}
115-
return client.readRelationships(req, this.callOptions);
116-
}
117-
118-
/**
119-
* permission_service.grpc-client.d.ts has all methods overloaded with this pattern:
120-
* - xyzRelationships(input: Request, metadata?: grpc.Metadata | grpc.CallOptions, options?: grpc.CallOptions): grpc.ClientReadableStream<ReadRelationshipsResponse>;
121-
* But the promisified client somehow does not have the same overloads. Thus we convince it here that options may be passed as 2nd argument.
122-
*/
123-
private get callOptions(): grpc.Metadata {
124-
return (<grpc.CallOptions>{
125-
deadline: Date.now() + 8000,
126-
}) as any as grpc.Metadata;
110+
return this.client.readRelationships(req);
127111
}
128112
}

components/server/src/authorization/spicedb.ts

Lines changed: 53 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,16 @@
55
*/
66

77
import { v1 } from "@authzed/authzed-node";
8-
import { IClientCallMetrics, createClientCallMetricsInterceptor } from "@gitpod/gitpod-protocol/lib/util/grpc";
98
import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
109
import * as grpc from "@grpc/grpc-js";
1110

12-
export const SpiceDBClientProvider = Symbol("SpiceDBClientProvider");
13-
1411
export interface SpiceDBClientConfig {
1512
address: string;
1613
token: string;
1714
}
1815

1916
export type SpiceDBClient = v1.ZedPromiseClientInterface;
2017
type Client = v1.ZedClientInterface & grpc.Client;
21-
export interface SpiceDBClientProvider {
22-
getClient(): SpiceDBClient;
23-
}
2418

2519
export function spiceDBConfigFromEnv(): SpiceDBClientConfig | undefined {
2620
const token = process.env["SPICEDB_PRESHARED_KEY"];
@@ -40,63 +34,67 @@ export function spiceDBConfigFromEnv(): SpiceDBClientConfig | undefined {
4034
};
4135
}
4236

43-
function spicedbClientFromConfig(config: SpiceDBClientConfig): Client {
44-
const clientOptions: grpc.ClientOptions = {
45-
"grpc.client_idle_timeout_ms": 10000, // this ensures a connection is not stuck in the "READY" state for too long. Required for the reconnect logic below.
46-
"grpc.max_reconnect_backoff_ms": 5000, // default: 12000
47-
};
48-
49-
return v1.NewClient(
50-
config.token,
51-
config.address,
52-
v1.ClientSecurity.INSECURE_PLAINTEXT_CREDENTIALS,
53-
undefined,
54-
clientOptions,
55-
) as Client;
56-
}
57-
58-
export class CachingSpiceDBClientProvider implements SpiceDBClientProvider {
37+
export class SpiceDBClientProvider {
5938
private client: Client | undefined;
6039

61-
private readonly interceptors: grpc.Interceptor[] = [];
62-
6340
constructor(
64-
private readonly _clientConfig: SpiceDBClientConfig,
65-
private readonly clientCallMetrics?: IClientCallMetrics | undefined,
66-
) {
67-
if (this.clientCallMetrics) {
68-
this.interceptors.push(createClientCallMetricsInterceptor(this.clientCallMetrics));
69-
}
70-
}
41+
private readonly clientConfig: SpiceDBClientConfig,
42+
private readonly interceptors: grpc.Interceptor[] = [],
43+
) {}
7144

7245
getClient(): SpiceDBClient {
73-
let client = this.client;
74-
if (!client) {
75-
client = spicedbClientFromConfig(this.clientConfig);
76-
} else if (client.getChannel().getConnectivityState(true) !== grpc.connectivityState.READY) {
77-
// (gpl): We need this check and explicit re-connect because we observe a ~120s connection timeout without it.
78-
// It's not entirely clear where that timeout comes from, but likely from the underlying transport, that is not exposed by grpc/grpc-js
79-
client.close();
80-
81-
log.warn("[spicedb] Lost connection to SpiceDB - reconnecting...");
82-
83-
client = spicedbClientFromConfig(this.clientConfig);
46+
if (this.client) {
47+
const state = this.client.getChannel().getConnectivityState(true);
48+
if (state === grpc.connectivityState.TRANSIENT_FAILURE || state === grpc.connectivityState.SHUTDOWN) {
49+
log.warn("[spicedb] Lost connection to SpiceDB - reconnecting...");
50+
try {
51+
this.client.close();
52+
} catch (error) {
53+
log.error("[spicedb] Failed to close client", error);
54+
} finally {
55+
this.client = undefined;
56+
}
57+
}
8458
}
85-
this.client = client;
86-
87-
return client.promises;
88-
}
8959

90-
protected get clientConfig() {
91-
const config = this._clientConfig;
92-
if (this.interceptors) {
93-
return {
94-
...config,
95-
options: {
96-
interceptors: [...this.interceptors],
60+
if (!this.client) {
61+
this.client = v1.NewClient(
62+
this.clientConfig.token,
63+
this.clientConfig.address,
64+
v1.ClientSecurity.INSECURE_PLAINTEXT_CREDENTIALS,
65+
undefined, //
66+
{
67+
// wie ping frequently to check if the connection is still alive
68+
"grpc.keepalive_time_ms": 1000,
69+
"grpc.keepalive_timeout_ms": 1000,
70+
71+
"grpc.max_reconnect_backoff_ms": 5000,
72+
"grpc.initial_reconnect_backoff_ms": 500,
73+
"grpc.service_config": JSON.stringify({
74+
methodConfig: [
75+
{
76+
name: [{}],
77+
deadline: {
78+
seconds: 1,
79+
nanos: 0,
80+
},
81+
retryPolicy: {
82+
maxAttempts: 5,
83+
initialBackoff: "0.1s",
84+
maxBackoff: "1s",
85+
backoffMultiplier: 2.0,
86+
retryableStatusCodes: ["UNAVAILABLE", "DEADLINE_EXCEEDED"],
87+
},
88+
},
89+
],
90+
}),
91+
92+
// "grpc.client_idle_timeout_ms": 10000, // this ensures a connection is not stuck in the "READY" state for too long. Required for the reconnect logic below.
93+
"grpc.enable_retries": 1, //TODO enabled by default
94+
interceptors: this.interceptors,
9795
},
98-
};
96+
) as Client;
9997
}
100-
return config;
98+
return this.client.promises;
10199
}
102100
}

components/server/src/container-module.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ import { GitpodFileParser } from "@gitpod/gitpod-protocol/lib/gitpod-file-parser
1111
import { PrometheusClientCallMetrics } from "@gitpod/gitpod-protocol/lib/messaging/client-call-metrics";
1212
import { newAnalyticsWriterFromEnv } from "@gitpod/gitpod-protocol/lib/util/analytics";
1313
import { DebugApp } from "@gitpod/gitpod-protocol/lib/util/debug-app";
14-
import { IClientCallMetrics, defaultGRPCOptions } from "@gitpod/gitpod-protocol/lib/util/grpc";
14+
import {
15+
IClientCallMetrics,
16+
createClientCallMetricsInterceptor,
17+
defaultGRPCOptions,
18+
} from "@gitpod/gitpod-protocol/lib/util/grpc";
1519
import { prometheusClientMiddleware } from "@gitpod/gitpod-protocol/lib/util/nice-grpc";
1620
import { IDEServiceClient, IDEServiceDefinition } from "@gitpod/ide-service-api/lib/ide.pb";
1721
import { ImageBuilderClientCallMetrics, ImageBuilderClientProvider } from "@gitpod/image-builder/lib";
@@ -46,7 +50,7 @@ import { AuthJWT, SignInJWT } from "./auth/jwt";
4650
import { LoginCompletionHandler } from "./auth/login-completion-handler";
4751
import { VerificationService } from "./auth/verification-service";
4852
import { Authorizer, createInitializingAuthorizer } from "./authorization/authorizer";
49-
import { CachingSpiceDBClientProvider, SpiceDBClientProvider, spiceDBConfigFromEnv } from "./authorization/spicedb";
53+
import { SpiceDBClientProvider, spiceDBConfigFromEnv } from "./authorization/spicedb";
5054
import { BillingModes } from "./billing/billing-mode";
5155
import { EntitlementService, EntitlementServiceImpl } from "./billing/entitlement-service";
5256
import { EntitlementServiceUBP } from "./billing/entitlement-service-ubp";
@@ -309,7 +313,10 @@ export const productionContainerModule = new ContainerModule(
309313
throw new Error("[spicedb] Missing configuration expected in env vars!");
310314
}
311315
const clientCallMetrics = ctx.container.get<IClientCallMetrics>(IClientCallMetrics);
312-
return new CachingSpiceDBClientProvider(config, clientCallMetrics);
316+
return new SpiceDBClientProvider(
317+
config, //
318+
[createClientCallMetricsInterceptor(clientCallMetrics)],
319+
);
313320
})
314321
.inSingletonScope();
315322
bind(SpiceDBAuthorizer).toSelf().inSingletonScope();

components/server/src/test/service-testing-container-module.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { v4 } from "uuid";
1313
import { AuthProviderParams } from "../auth/auth-provider";
1414
import { HostContextProvider, HostContextProviderFactory } from "../auth/host-context-provider";
1515
import { HostContextProviderImpl } from "../auth/host-context-provider-impl";
16-
import { CachingSpiceDBClientProvider, SpiceDBClientProvider } from "../authorization/spicedb";
16+
import { SpiceDBClientProvider } from "../authorization/spicedb";
1717
import { Config } from "../config";
1818
import { StorageClient } from "../storage/storage-client";
1919
import { testContainer } from "@gitpod/gitpod-db/lib";
@@ -194,7 +194,7 @@ const mockApplyingContainerModule = new ContainerModule((bind, unbound, isbound,
194194
token: v4(),
195195
address: "localhost:50051",
196196
};
197-
return new CachingSpiceDBClientProvider(config);
197+
return new SpiceDBClientProvider(config);
198198
})
199199
.inSingletonScope();
200200
});

components/supervisor-api/typescript-grpc/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"lib"
1010
],
1111
"dependencies": {
12-
"@grpc/grpc-js": "1.8.8",
12+
"@grpc/grpc-js": "1.9.1",
1313
"google-protobuf": "^3.19.1"
1414
},
1515
"devDependencies": {

components/ws-daemon-api/typescript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
],
1414
"dependencies": {
1515
"@gitpod/content-service": "0.1.5",
16-
"@grpc/grpc-js": "1.8.8",
16+
"@grpc/grpc-js": "1.9.1",
1717
"google-protobuf": "^3.19.1",
1818
"inversify": "^6.0.1",
1919
"opentracing": "^0.14.4"

components/ws-manager-api/typescript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"dependencies": {
1717
"@gitpod/content-service": "0.1.5",
1818
"@gitpod/gitpod-protocol": "0.1.5",
19-
"@grpc/grpc-js": "1.8.8",
19+
"@grpc/grpc-js": "1.9.1",
2020
"google-protobuf": "^3.19.1",
2121
"inversify": "^6.0.1",
2222
"opentracing": "^0.14.4"

components/ws-manager-bridge-api/typescript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"lib"
1212
],
1313
"dependencies": {
14-
"@grpc/grpc-js": "1.8.8",
14+
"@grpc/grpc-js": "1.9.1",
1515
"google-protobuf": "^3.19.1",
1616
"inversify": "^6.0.1",
1717
"opentracing": "^0.14.4"

install/installer/pkg/components/spicedb/deployment.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,13 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) {
154154
Command: []string{"grpc_health_probe", "-v", fmt.Sprintf("-addr=localhost:%d", ContainerGRPCPort)},
155155
},
156156
},
157-
InitialDelaySeconds: 5,
158-
// try again every 10 seconds
159-
PeriodSeconds: 10,
160-
// fail after 6 * 10 + 5 = 65 seconds
161-
FailureThreshold: 6,
157+
InitialDelaySeconds: 1,
158+
// try again every 2 seconds
159+
PeriodSeconds: 2,
160+
// fail after 30 * 2 + 1 = 61
161+
FailureThreshold: 30,
162162
SuccessThreshold: 1,
163-
TimeoutSeconds: 3,
163+
TimeoutSeconds: 1,
164164
},
165165
VolumeMounts: []v1.VolumeMount{
166166
bootstrapVolumeMount,

0 commit comments

Comments
 (0)