Export prometheus metrics (#286)

This commit is contained in:
evanpelle
2025-03-18 09:00:05 -07:00
committed by GitHub
parent fe3b4fb8cc
commit 70e348c94b
8 changed files with 208 additions and 1 deletions
+38
View File
@@ -43,6 +43,7 @@
"page": "^1.11.6",
"pg": "^8.13.3",
"priority-queue-typescript": "^1.0.1",
"prom-client": "^15.1.3",
"protobufjs": "^7.3.2",
"pureimage": "^0.4.13",
"raphael": "^2.3.0",
@@ -4890,6 +4891,15 @@
"node": ">=10"
}
},
"node_modules/@opentelemetry/api": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"license": "Apache-2.0",
"engines": {
"node": ">=8.0.0"
}
},
"node_modules/@pkgjs/parseargs": {
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
@@ -7631,6 +7641,12 @@
"resolved": "https://registry.npmjs.org/binary-loader/-/binary-loader-0.0.1.tgz",
"integrity": "sha512-LujAJL3IhYn4zVWZWAct9B6G5hdonNle+fWsG/u9QyY1OhOVp00jFgygUVr9SrBVw8doddyx9A/VS1/T9co4Dg=="
},
"node_modules/bintrees": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz",
"integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==",
"license": "MIT"
},
"node_modules/bl": {
"version": "6.0.16",
"resolved": "https://registry.npmjs.org/bl/-/bl-6.0.16.tgz",
@@ -15845,6 +15861,19 @@
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
"license": "MIT"
},
"node_modules/prom-client": {
"version": "15.1.3",
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz",
"integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/api": "^1.4.0",
"tdigest": "^0.1.1"
},
"engines": {
"node": "^16 || ^18 || >=20"
}
},
"node_modules/promise-retry": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/promise-retry/-/promise-retry-2.0.1.tgz",
@@ -17579,6 +17608,15 @@
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
"license": "ISC"
},
"node_modules/tdigest": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz",
"integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
"license": "MIT",
"dependencies": {
"bintrees": "1.0.2"
}
},
"node_modules/teeny-request": {
"version": "9.0.0",
"resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-9.0.0.tgz",
+1
View File
@@ -110,6 +110,7 @@
"page": "^1.11.6",
"pg": "^8.13.3",
"priority-queue-typescript": "^1.0.1",
"prom-client": "^15.1.3",
"protobufjs": "^7.3.2",
"pureimage": "^0.4.13",
"raphael": "^2.3.0",
+12
View File
@@ -40,6 +40,18 @@ export class GameManager {
return game;
}
activeGames(): number {
return this.games.size;
}
activeClients(): number {
let totalClients = 0;
this.games.forEach((game: GameServer) => {
totalClients += game.activeClients.length;
});
return totalClients;
}
tick() {
const active = new Map<GameID, GameServer>();
for (const [id, game] of this.games) {
+8
View File
@@ -13,6 +13,7 @@ import path from "path";
import rateLimit from "express-rate-limit";
import { fileURLToPath } from "url";
import { gatekeeper, LimiterType } from "./Gatekeeper";
import { setupMetricsServer } from "./MasterMetrics";
const config = getServerConfigFromServer();
const readyWorkers = new Set();
@@ -20,6 +21,10 @@ const readyWorkers = new Set();
const app = express();
const server = http.createServer(app);
// Create a separate metrics server on port 9090
const metricsApp = express();
const metricsServer = http.createServer(metricsApp);
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
app.use(express.json());
@@ -135,6 +140,9 @@ export async function startMaster() {
server.listen(PORT, () => {
console.log(`Master HTTP server listening on port ${PORT}`);
});
// Setup the metrics server
setupMetricsServer();
}
app.get(
+79
View File
@@ -0,0 +1,79 @@
import express from "express";
import http from "http";
import promClient from "prom-client";
import { getServerConfigFromServer } from "../core/configuration/Config";
const config = getServerConfigFromServer();
// Create a separate metrics server on port 9090
const metricsApp = express();
const metricsServer = http.createServer(metricsApp);
// Initialize the Prometheus registry for the master's own metrics
const register = new promClient.Registry();
// Prometheus metrics endpoint that gathers metrics from workers
export function setupMetricsServer() {
metricsApp.get("/metrics", async (req, res) => {
console.log("Metrics requested");
try {
// Get the master's metrics
const masterMetrics = await register.metrics();
// Collect metrics from all workers
const workerMetricsPromises = [];
// For each worker, fetch their metrics
for (let i = 0; i < config.numWorkers(); i++) {
const workerPort = config.workerPortByIndex(i);
const workerUrl = `http://localhost:${workerPort}/metrics`;
console.log(`Fetching metrics from worker ${i} at ${workerUrl}`);
const workerMetricsPromise = fetch(workerUrl, {
headers: {
[config.adminHeader()]: config.adminToken(),
},
})
.then((response) => {
if (!response.ok) {
throw new Error(`Worker ${i} returned status ${response.status}`);
}
return response.text();
})
.then((metricsText) => {
// Add worker label to each metric line
return metricsText.replace(
/^([a-z][a-z0-9_]*(?:{[^}]*})?)\s/gm,
`$1{worker="worker-${i}"} `,
);
})
.catch((error) => {
console.error(`Error fetching metrics from worker ${i}:`, error);
return `# Error fetching metrics from worker ${i}: ${error.message}`;
});
workerMetricsPromises.push(workerMetricsPromise);
}
// Wait for all worker metrics to be fetched
const workerMetricsArray = await Promise.all(workerMetricsPromises);
// Add worker label to the master metrics
const masterMetricsWithLabel = masterMetrics.replace(
/^([a-z][a-z0-9_]*(?:{[^}]*})?)\s/gm,
`$1{worker="master"} `,
);
// Combine all metrics and send the response
res.set("Content-Type", register.contentType);
res.end(`${masterMetricsWithLabel}\n${workerMetricsArray.join("\n")}`);
} catch (error) {
console.error("Error collecting metrics:", error);
res.status(500).end(`# Error collecting metrics: ${error.message}`);
}
});
// Start the metrics server on port 9090
const METRICS_PORT = 9090;
metricsServer.listen(METRICS_PORT, () => {
console.log(`Metrics server listening on port ${METRICS_PORT}`);
});
}
+24
View File
@@ -17,6 +17,7 @@ import { slog } from "./StructuredLog";
import { GameType } from "../core/game/Game";
import { archive, readGameRecord } from "./Archive";
import { gatekeeper, LimiterType } from "./Gatekeeper";
import { metrics } from "./WorkerMetrics";
const config = getServerConfigFromServer();
@@ -35,6 +36,11 @@ export function startWorker() {
const gm = new GameManager(config);
// Set up periodic metrics updates
setInterval(() => {
metrics.updateGameMetrics(gm);
}, 15000); // Update every 15 seconds
// Middleware to handle /wX path prefix
app.use((req, res, next) => {
// Extract the original path without the worker prefix
@@ -241,6 +247,24 @@ export function startWorker() {
}),
);
app.get(
"/metrics",
gatekeeper.httpHandler(LimiterType.Get, async (req, res) => {
if (req.headers[config.adminHeader()] !== config.adminToken()) {
return res.status(403).end("Access denied");
}
console.log(`metrics requested on worker ${workerId}`);
try {
const metricsData = await metrics.register.metrics();
res.set("Content-Type", metrics.register.contentType);
res.end(metricsData);
} catch (error) {
res.status(500).end(error.message);
}
}),
);
// WebSocket handling
wss.on("connection", (ws: WebSocket, req) => {
ws.on(
+45
View File
@@ -0,0 +1,45 @@
import promClient from "prom-client";
import { GameManager } from "./GameManager";
// Initialize the Prometheus registry
const register = new promClient.Registry();
// Enable default Node.js metrics collection
promClient.collectDefaultMetrics({ register });
// Add worker-specific metrics
const activeGamesGauge = new promClient.Gauge({
name: "active_games_count",
help: "Number of active games on this worker",
registers: [register],
});
const connectedClientsGauge = new promClient.Gauge({
name: "connected_clients_count",
help: "Number of connected clients on this worker",
registers: [register],
});
const memoryUsageGauge = new promClient.Gauge({
name: "memory_usage_bytes",
help: "Current memory usage of the worker process in bytes",
registers: [register],
});
// Export the metrics for use in the worker
export const metrics = {
register,
activeGamesGauge,
connectedClientsGauge,
memoryUsageGauge,
// Function to update game-related metrics
updateGameMetrics: (gameManager: GameManager) => {
activeGamesGauge.set(gameManager.activeGames());
connectedClientsGauge.set(gameManager.activeClients());
// Update memory usage metrics
const memoryUsage = process.memoryUsage();
memoryUsageGauge.set(memoryUsage.heapUsed);
},
};
+1 -1
View File
@@ -77,7 +77,7 @@ if [ "$REGION" == "staging" ]; then
fi
echo "Starting new container for ${REGION} environment..."
docker run -d -p 80:80 \
docker run -d -p 80:80 -p 127.0.0.1:9090:9090 \
--restart=always \
$VOLUME_MOUNTS \
--log-driver json-file \