Skip to content

Commit

Permalink
[ws-manager-bridge] Add status update metric
Browse files Browse the repository at this point in the history
  • Loading branch information
csweichel authored and roboquat committed Sep 20, 2021
1 parent afb019f commit b031cf3
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
19 changes: 13 additions & 6 deletions components/ws-manager-bridge/src/bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,12 @@ export class WorkspaceManagerBridge implements Disposable {
protected readonly disposables: Disposable[] = [];
protected readonly queues = new Map<string, Queue>();

protected cluster: WorkspaceClusterInfo;

public start(cluster: WorkspaceClusterInfo, clientProvider: ClientProvider) {
const logPayload = { name: cluster.name, url: cluster.url };
log.info(`starting bridge to cluster...`, logPayload);
this.cluster = cluster;

if (cluster.govern) {
log.debug(`starting DB updater: ${cluster.name}`, logPayload);
Expand All @@ -70,7 +73,7 @@ export class WorkspaceManagerBridge implements Disposable {
throw new Error("controllerInterval <= 0!");
}
log.debug(`starting controller: ${cluster.name}`, logPayload);
this.startController(clientProvider, cluster.name, controllerInterval, this.config.controllerMaxDisconnectSeconds);
this.startController(clientProvider, controllerInterval, this.config.controllerMaxDisconnectSeconds);
}
log.info(`started bridge to cluster.`, logPayload);
}
Expand Down Expand Up @@ -129,7 +132,10 @@ export class WorkspaceManagerBridge implements Disposable {
const logCtx = { instanceId, workspaceId, userId };

const instance = await this.workspaceDB.trace({span}).findInstanceById(instanceId);
if (!instance) {
if (instance) {
this.prometheusExporter.statusUpdateReceived(this.cluster.name, true);
} else {
this.prometheusExporter.statusUpdateReceived(this.cluster.name, false);
log.warn(logCtx, "Received a status update for an unknown instance", { status });
return;
}
Expand Down Expand Up @@ -259,17 +265,17 @@ export class WorkspaceManagerBridge implements Disposable {
}
}

protected startController(clientProvider: ClientProvider, installation: string, controllerIntervalSeconds: number, controllerMaxDisconnectSeconds: number, maxTimeToRunningPhaseSeconds = 60 * 60) {
protected startController(clientProvider: ClientProvider, controllerIntervalSeconds: number, controllerMaxDisconnectSeconds: number, maxTimeToRunningPhaseSeconds = 60 * 60) {
let disconnectStarted = Number.MAX_SAFE_INTEGER;
const timer = setInterval(async () => {
try {
const client = await clientProvider();
await this.controlInstallationInstances(client, installation, maxTimeToRunningPhaseSeconds);
await this.controlInstallationInstances(client, maxTimeToRunningPhaseSeconds);

disconnectStarted = Number.MAX_SAFE_INTEGER; // Reset disconnect period
} catch (e) {
if (durationLongerThanSeconds(disconnectStarted, controllerMaxDisconnectSeconds)) {
log.warn("error while controlling installation's workspaces", e, { installation });
log.warn("error while controlling installation's workspaces", e, { installation: this.cluster.name });
} else if (disconnectStarted > Date.now()) {
disconnectStarted = Date.now();
}
Expand All @@ -278,7 +284,8 @@ export class WorkspaceManagerBridge implements Disposable {
this.disposables.push({ dispose: () => clearTimeout(timer) });
}

protected async controlInstallationInstances(client: PromisifiedWorkspaceManagerClient, installation: string, maxTimeToRunningPhaseSeconds: number) {
protected async controlInstallationInstances(client: PromisifiedWorkspaceManagerClient, maxTimeToRunningPhaseSeconds: number) {
const installation = this.cluster.name;
log.debug("controlling instances", { installation });
let ctx: TraceContext = {};

Expand Down
10 changes: 10 additions & 0 deletions components/ws-manager-bridge/src/prometheus-metrics-exporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export class PrometheusMetricsExporter {
protected readonly timeToFirstUserActivityHistogram: prom.Histogram<string>;
protected readonly clusterScore: prom.Gauge<string>;
protected readonly clusterCordoned: prom.Gauge<string>;
protected readonly statusUpdatesTotal: prom.Counter<string>;

constructor() {
this.workspaceStartupTimeHistogram = new prom.Histogram({
Expand All @@ -39,6 +40,11 @@ export class PrometheusMetricsExporter {
help: 'Cordoned status of the individual registered workspace cluster',
labelNames: ["workspace_cluster"]
});
this.statusUpdatesTotal = new prom.Counter({
name: 'gitpod_ws_manager_bridge_status_updates_total',
help: 'Total workspace status updates received',
labelNames: ["workspace_cluster", "known_instance"]
});
}

observeWorkspaceStartupTime(instance: WorkspaceInstance): void {
Expand Down Expand Up @@ -69,5 +75,9 @@ export class PrometheusMetricsExporter {
this.clusterScore.labels(cluster.name).set(cluster.score);
});
}

statusUpdateReceived(installation: string, knownInstance: boolean): void {
this.statusUpdatesTotal.labels(installation, knownInstance ? "true" : "false").inc();
}
}

0 comments on commit b031cf3

Please sign in to comment.