diff --git a/nginx.conf b/nginx.conf index 2da48c6af..8466fd86c 100644 --- a/nginx.conf +++ b/nginx.conf @@ -120,6 +120,25 @@ server { proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } + + # /api/health endpoint - No caching, always hit the backend + location = /api/health { + proxy_pass http://127.0.0.1:3000; + proxy_http_version 1.1; + + # Cache configuration - No caching for health checks + proxy_cache off; + add_header X-Cache-Status "BYPASS"; + add_header Cache-Control "no-store, no-cache, must-revalidate, proxy-revalidate"; + add_header Pragma "no-cache"; + add_header Expires "0"; + + # Standard proxy headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } # /commit.txt endpoint - Cache for 5 seconds location = /commit.txt { @@ -250,4 +269,4 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } -} \ No newline at end of file +} diff --git a/src/server/Master.ts b/src/server/Master.ts index a9322d8ff..90b9cf48f 100644 --- a/src/server/Master.ts +++ b/src/server/Master.ts @@ -145,6 +145,15 @@ app.get("/api/env", async (req, res) => { res.json(envConfig); }); +app.get("/api/health", (_req, res) => { + const ready = lobbyService?.isHealthy() ?? false; + if (ready) { + res.json({ status: "ok" }); + } else { + res.status(503).json({ status: "unavailable" }); + } +}); + // SPA fallback route app.get("*", async function (_req, res) { try { diff --git a/src/server/MasterLobbyService.ts b/src/server/MasterLobbyService.ts index 0dfe89285..31e58b55d 100644 --- a/src/server/MasterLobbyService.ts +++ b/src/server/MasterLobbyService.ts @@ -59,6 +59,13 @@ export class MasterLobbyService { this.readyWorkers.delete(workerId); } + isHealthy(): boolean { + // We consider the lobby service healthy if at least half of the workers are ready. + // This allows for some leeway if a worker crashes. + const minWorkers = Math.max(this.config.numWorkers() / 2, 1); + return this.started && this.readyWorkers.size >= minWorkers; + } + private handleWorkerReady(workerId: number) { this.readyWorkers.add(workerId); this.log.info( diff --git a/tests/server/MasterLobbyServiceHealth.test.ts b/tests/server/MasterLobbyServiceHealth.test.ts new file mode 100644 index 000000000..fe5800eac --- /dev/null +++ b/tests/server/MasterLobbyServiceHealth.test.ts @@ -0,0 +1,119 @@ +import EventEmitter from "events"; +import { describe, expect, it, vi } from "vitest"; +import { MasterLobbyService } from "../../src/server/MasterLobbyService"; +import { TestServerConfig } from "../util/TestServerConfig"; + +vi.mock("../../src/server/Logger", () => ({ + logger: { + child: () => ({ + error: vi.fn(), + info: vi.fn(), + }), + }, +})); + +vi.mock("../../src/server/PollingLoop", () => ({ + startPolling: vi.fn(), +})); + +function createMockWorker(): EventEmitter { + const emitter = new EventEmitter(); + (emitter as any).send = vi.fn(); + return emitter; +} + +function sendWorkerReady(worker: EventEmitter, workerId: number) { + worker.emit("message", { type: "workerReady", workerId }); +} + +function createService(numWorkers: number): MasterLobbyService { + const config = new TestServerConfig(); + vi.spyOn(config, "numWorkers").mockReturnValue(numWorkers); + const log = { info: vi.fn(), error: vi.fn() } as any; + return new MasterLobbyService(config, {} as any, log); +} + +function startAllWorkers( + service: MasterLobbyService, + count: number, +): { id: number; w: EventEmitter }[] { + const workers = Array.from({ length: count }, (_, i) => { + const id = i + 1; + const w = createMockWorker(); + service.registerWorker(id, w as any); + return { id, w }; + }); + for (const { w, id } of workers) { + sendWorkerReady(w, id); + } + return workers; +} + +describe("MasterLobbyService.isHealthy", () => { + it("unhealthy before any workers register", () => { + const service = createService(4); + expect(service.isHealthy()).toBe(false); + }); + + it("unhealthy when workers registered but not ready", () => { + const service = createService(2); + service.registerWorker(1, createMockWorker() as any); + expect(service.isHealthy()).toBe(false); + }); + + it("unhealthy when only some workers are ready (server not started)", () => { + const service = createService(4); + + // 1 of 4 ready -- not enough to flip `started` + const w1 = createMockWorker(); + service.registerWorker(1, w1 as any); + sendWorkerReady(w1, 1); + + expect(service.isHealthy()).toBe(false); + }); + + it("healthy once all workers are ready", () => { + const service = createService(2); + startAllWorkers(service, 2); + expect(service.isHealthy()).toBe(true); + }); + + it("stays healthy after a single worker crash", () => { + const service = createService(4); + startAllWorkers(service, 4); + + service.removeWorker(4); // 3 of 4 left, threshold is 2 + expect(service.isHealthy()).toBe(true); + }); + + it("goes unhealthy when too many workers crash", () => { + const service = createService(4); + startAllWorkers(service, 4); + + service.removeWorker(2); + service.removeWorker(3); + service.removeWorker(4); // 1 of 4 left, threshold is 2 + expect(service.isHealthy()).toBe(false); + }); + + it("single-worker setup goes unhealthy on crash", () => { + const service = createService(1); + startAllWorkers(service, 1); + expect(service.isHealthy()).toBe(true); + + service.removeWorker(1); + expect(service.isHealthy()).toBe(false); + }); + + it("odd worker count: threshold rounds up (3 workers)", () => { + const service = createService(3); + startAllWorkers(service, 3); + + // min = 3/2 = 1.5, so 2 ready is enough, 1 is not + service.removeWorker(3); + expect(service.isHealthy()).toBe(true); + + service.removeWorker(2); + expect(service.isHealthy()).toBe(false); + }); +});