Skip to content

Commit 6495e99

Browse files
committed
fix(run-engine): observe only managed worker groups in the queue-length gauge
1 parent 9c5633b commit 6495e99

2 files changed

Lines changed: 23 additions & 2 deletions

File tree

internal-packages/run-engine/src/engine/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,11 @@ export class RunEngine {
485485
* length even when nothing is dequeuing from it. Includes hidden groups; excludes
486486
* groups whose cloud provider is configured to be excluded (groups with no cloud
487487
* provider are always included).
488+
*
489+
* Only MANAGED groups are observed. UNMANAGED groups are created per project
490+
* (masterQueue `<projectId>-<name>`), so observing them would grow the set, and the
491+
* per-tick Redis fanout, with the number of self-hosted-worker projects rather than
492+
* with the managed regions this gauge is meant to track.
488493
*/
489494
async refreshWorkerQueueObservation() {
490495
const suffixes = this.options.workerQueueObserver?.additionalQueueSuffixes ?? [];
@@ -495,6 +500,7 @@ export class RunEngine {
495500
// Read from the replica: this is a periodic metrics-only read and worker groups change
496501
// rarely, so a little replication lag is fine and keeps it off the primary.
497502
const workerGroups = await this.readOnlyPrisma.workerInstanceGroup.findMany({
503+
where: { type: "MANAGED" },
498504
select: { masterQueue: true, cloudProvider: true },
499505
});
500506

internal-packages/run-engine/src/engine/tests/workerQueueObservation.test.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ describe("RunEngine worker queue observation", () => {
4343
},
4444
});
4545

46+
// An UNMANAGED (per-project, self-hosted) worker group should not be observed.
47+
await prisma.workerInstanceGroup.create({
48+
data: {
49+
name: "unmanaged-region",
50+
masterQueue: "unmanaged-region",
51+
type: "UNMANAGED",
52+
token: { create: { tokenHash: "unmanaged_region_token_hash" } },
53+
},
54+
});
55+
4656
const engine = new RunEngine({
4757
prisma,
4858
// This test only exercises enqueue + processMasterQueue + the observer gauge, so keep
@@ -118,17 +128,19 @@ describe("RunEngine worker queue observation", () => {
118128
const defaultBacklog = 3;
119129
const scheduledBacklog = 2;
120130
const hiddenBacklog = 2;
121-
const doBacklog = 2;
131+
const doBacklog = 1;
132+
const unmanagedBacklog = 1;
122133

123134
// Build a backlog across several worker queues, then move them into the worker queue
124135
// lists, but never dequeue.
125136
await enqueueTo("default", defaultBacklog, "r_default");
126137
await enqueueTo("default:scheduled", scheduledBacklog, "r_scheduled");
127138
await enqueueTo("hidden-region", hiddenBacklog, "r_hidden");
128139
await enqueueTo("do-region", doBacklog, "r_do");
140+
await enqueueTo("unmanaged-region", unmanagedBacklog, "r_unmanaged");
129141
await engine.runQueue.processMasterQueueForEnvironment(
130142
authenticatedEnvironment.id,
131-
defaultBacklog + scheduledBacklog + hiddenBacklog + doBacklog
143+
defaultBacklog + scheduledBacklog + hiddenBacklog + doBacklog + unmanagedBacklog
132144
);
133145

134146
// Observe the worker queues derived from the WorkerInstanceGroup records. No dequeue
@@ -142,6 +154,9 @@ describe("RunEngine worker queue observation", () => {
142154

143155
// Excluded: the DigitalOcean group is not observed even though it has a backlog.
144156
expect(await lengthOf("do-region")).toBe(0);
157+
158+
// Excluded: the UNMANAGED (per-project) group is not observed even with a backlog.
159+
expect(await lengthOf("unmanaged-region")).toBe(0);
145160
} finally {
146161
await engine.quit();
147162
}

0 commit comments

Comments
 (0)