From 5e4b2791aa50e918d473ec1006f8d80bff4f320a Mon Sep 17 00:00:00 2001 From: Evan Date: Fri, 3 Jul 2026 12:30:28 -0700 Subject: [PATCH] perf: reduce core-sim GC churn 42% and add GC-churn profiling to the perf harness (#4494) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Reduces core-simulation GC churn by **42%** on a 20-game-minute Giant World Map run, and extends the headless full-game perf harness so churn is measurable and regressions are visible. ### 1. GC-churn measurement (`tests/perf/fullgame/GcProfiler.ts`) `npm run perf:game` now reports: - **GC pauses** by kind (minor/major/incremental) via a `PerformanceObserver` on `'gc'` entries, bucketed into tick windows by timestamp (V8 only delivers these entries on a timer task, so they're flushed after the run) - **Allocation rate** per `--window N` ticks (default 1000) from used-heap deltas sampled every tick, so churn can be tracked across game phases - **Top allocating functions** from the V8 sampling heap profiler with `includeObjectsCollectedBy{Major,Minor}GC` — i.e. actual churn including short-lived garbage, not live memory — plus a `.heapprofile` loadable in Chrome DevTools (Memory → Allocation sampling) New flags: `--window N`, `--no-gc-profile`, `--no-alloc-profile`. ### 2. Allocation reductions in the hot paths it found | Site | Change | |---|---| | `GameMap.bfs` | inline neighbor enumeration instead of an array per visited tile | | `GameMap`/`Game` | new `forEachNeighborNSWE` — allocation-free iterator matching `neighbors()` N,S,W,E order for order-sensitive callers (`forEachNeighbor` visits W,E,N,S, so substituting it would change sim behavior) | | `PlayerImpl.nearby` / `sharesBorderWith` / `shoreReachableNeighbors` | no per-call neighbor arrays; no materialized shore-tile array | | `PlayerImpl.units(types)` | gather into a reusable scratch buffer, return one exact-size slice (still a fresh snapshot array per call) | | `AiAttackBehavior.maybeAttack` | single pass over border neighbors replacing the `flatMap`/`filter`/`map` chain over every border tile | | `AiAttackBehavior.isBorderingNukedTerritory` | reusable `neighbors4` buffer with early exit | | `SharedWaterCache.build` | allocation-free neighbor iteration | | `SpatialQuery.bfsNearest` | first-minimum scan instead of collect-then-stable-sort (identical result incl. tie-breaking) | ### Results (Giant World Map, 400 bots, 12,000 ticks ≈ 20 game-minutes, seed `perf-default`) | Metric | Before | After | |---|---|---| | Sampled allocations (incl. collected) | 97.7 GB | **56.9 GB (−42%)** | | GC count / total pause | 1,682 / 3,313 ms (1.8% of wall) | 1,058 / 2,087 ms (1.2%) | | Ticks/sec | 66 | 70 | | p99 / max tick | 49.9 ms / 988 ms | 43.5 ms / 689 ms | | Ticks over 100 ms budget | 31 | 19 | ## Determinism Every rewrite preserves exact iteration order (the new NSWE iterator exists precisely for the order-sensitive sites). Verified by identical final game-state hashes on three runs: Giant World Map 12,000 ticks (`67286276735690560`), Giant World Map 2,000 ticks, and World 1,800 ticks. ## Test plan - [x] Full suite green (1,896 tests) - [x] New tests: `forEachNeighborNSWE` order contract vs `neighbors()` over every tile; `units()` filtering semantics (insertion order, fresh-array guarantee, duplicate types, Set path) - [x] Final-hash equality on 3 seeded headless runs (2 maps) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Fable 5 --- src/client/view/GameView.ts | 6 + src/core/execution/nation/SharedWaterCache.ts | 21 +- src/core/execution/utils/AiAttackBehavior.ts | 42 ++- src/core/game/Game.ts | 8 + src/core/game/GameImpl.ts | 6 + src/core/game/GameMap.ts | 42 ++- src/core/game/PlayerImpl.ts | 112 ++++--- src/core/pathfinding/spatial/SpatialQuery.ts | 20 +- tests/NeighborIteration.test.ts | 11 + tests/PlayerImpl.test.ts | 59 ++++ tests/perf/fullgame/FullGamePerf.ts | 153 +++++++++- tests/perf/fullgame/GcProfiler.ts | 276 ++++++++++++++++++ 12 files changed, 655 insertions(+), 101 deletions(-) create mode 100644 tests/perf/fullgame/GcProfiler.ts diff --git a/src/client/view/GameView.ts b/src/client/view/GameView.ts index 3e02af3cb..3e4eb7244 100644 --- a/src/client/view/GameView.ts +++ b/src/client/view/GameView.ts @@ -1172,6 +1172,12 @@ export class GameView implements GameMap { forEachNeighbor(ref: TileRef, callback: (neighbor: TileRef) => void): void { this._map.forEachNeighbor(ref, callback); } + forEachNeighborNSWE( + ref: TileRef, + callback: (neighbor: TileRef) => void, + ): void { + this._map.forEachNeighborNSWE(ref, callback); + } neighbors4(ref: TileRef, out: TileRef[]): number { return this._map.neighbors4(ref, out); } diff --git a/src/core/execution/nation/SharedWaterCache.ts b/src/core/execution/nation/SharedWaterCache.ts index adcc8af16..b2026d43c 100644 --- a/src/core/execution/nation/SharedWaterCache.ts +++ b/src/core/execution/nation/SharedWaterCache.ts @@ -48,17 +48,20 @@ export class SharedWaterCache { let hasOcean = false; const lakes = new Set(); + // The lake set is only membership-tested, so neighbor visit order does + // not matter — use the allocation-free iterator. + const visit = (neighbor: number) => { + if (!game.isWater(neighbor)) return; + if (game.isOcean(neighbor)) { + hasOcean = true; + return; + } + const comp = game.getWaterComponent(neighbor); + if (comp !== null) lakes.add(comp); + }; for (const tile of player.borderTiles()) { if (!game.isShore(tile)) continue; - for (const neighbor of game.neighbors(tile)) { - if (!game.isWater(neighbor)) continue; - if (game.isOcean(neighbor)) { - hasOcean = true; - continue; - } - const comp = game.getWaterComponent(neighbor); - if (comp !== null) lakes.add(comp); - } + game.forEachNeighbor(tile, visit); } playerToWater.set(player, { hasOcean, lakes }); diff --git a/src/core/execution/utils/AiAttackBehavior.ts b/src/core/execution/utils/AiAttackBehavior.ts index 70efd5a76..9589046e7 100644 --- a/src/core/execution/utils/AiAttackBehavior.ts +++ b/src/core/execution/utils/AiAttackBehavior.ts @@ -33,6 +33,9 @@ import { import { TransportShipExecution } from "../TransportShipExecution"; import { closestTwoTiles } from "../Util"; +// Reusable neighbor buffer for hot loops; the simulation is single-threaded. +const NEIGHBOR_SCRATCH: TileRef[] = [0, 0, 0, 0]; + export class AiAttackBehavior { private botAttackTroopsSent: number = 0; @@ -52,20 +55,24 @@ export class AiAttackBehavior { throw new Error("not initialized"); } - const border = Array.from(this.player.borderTiles()) - .flatMap((t) => this.game.neighbors(t)) - .filter( - (t) => - this.game.isLand(t) && - !this.game.isImpassable(t) && - this.game.ownerID(t) !== this.player?.smallID(), - ); + // Neighbor visit order matters here: the set's insertion order feeds the + // stable troop-count sort below, so ties keep border-discovery order. + const borderingPlayerSet = new Set(); + let borderHasNonNukedTerraNullius = false; + const smallID = this.player.smallID(); + const visit = (t: number) => { + if (!this.game.isLand(t) || this.game.isImpassable(t)) return; + if (this.game.ownerID(t) === smallID) return; + const owner = this.game.playerBySmallID(this.game.ownerID(t)); + if (owner.isPlayer()) borderingPlayerSet.add(owner); + if (!this.game.hasOwner(t) && !this.game.hasFallout(t)) { + borderHasNonNukedTerraNullius = true; + } + }; + for (const t of this.player.borderTiles()) { + this.game.forEachNeighborNSWE(t, visit); + } const playerNeighbors = this.player.nearby(); - const borderingPlayerSet = new Set( - border - .map((t) => this.game.playerBySmallID(this.game.ownerID(t))) - .filter((o): o is Player => o.isPlayer()), - ); for (const n of playerNeighbors) { if (n.isPlayer()) borderingPlayerSet.add(n); } @@ -81,7 +88,7 @@ export class AiAttackBehavior { // Attack TerraNullius but not nuked territory (direct border or across a river) const hasNonNukedTerraNullius = - border.some((t) => !this.game.hasOwner(t) && !this.game.hasFallout(t)) || + borderHasNonNukedTerraNullius || playerNeighbors.some((n) => !n.isPlayer()); if (hasNonNukedTerraNullius) { if (this.sendAttack(this.game.terraNullius())) return; @@ -548,8 +555,13 @@ export class AiAttackBehavior { return false; } + // Boolean result, so neighbor order doesn't matter; a reused scratch + // buffer keeps this allocation-free and allows early exit. + const nbuf = NEIGHBOR_SCRATCH; for (const tile of this.player.borderTiles()) { - for (const neighbor of this.game.neighbors(tile)) { + const n = this.game.neighbors4(tile, nbuf); + for (let i = 0; i < n; i++) { + const neighbor = nbuf[i]; if ( this.game.isLand(neighbor) && !this.game.hasOwner(neighbor) && diff --git a/src/core/game/Game.ts b/src/core/game/Game.ts index 1a026c724..f2b18c3b2 100644 --- a/src/core/game/Game.ts +++ b/src/core/game/Game.ts @@ -705,6 +705,14 @@ export interface Game extends GameMap { forEachTile(fn: (tile: TileRef) => void): void; // Zero-allocation neighbor iteration (cardinal only) to avoid creating arrays forEachNeighbor(tile: TileRef, callback: (neighbor: TileRef) => void): void; + // Same, but in neighbors() N, S, W, E order — for order-sensitive code. + forEachNeighborNSWE( + tile: TileRef, + callback: (neighbor: TileRef) => void, + ): void; + // Writes the cardinal neighbors of ref into out (W, E, N, S order) and + // returns the count. Reuse out across calls to avoid allocation. + neighbors4(ref: TileRef, out: TileRef[]): number; // Zero-allocation neighbor iteration for performance-critical cluster calculation // Alternative to neighborsWithDiag() that returns arrays // Avoids creating intermediate arrays and uses a callback for better performance diff --git a/src/core/game/GameImpl.ts b/src/core/game/GameImpl.ts index db3a9e57c..98e70de5c 100644 --- a/src/core/game/GameImpl.ts +++ b/src/core/game/GameImpl.ts @@ -1134,6 +1134,12 @@ export class GameImpl implements Game { forEachNeighbor(tile: TileRef, callback: (neighbor: TileRef) => void): void { this._map.forEachNeighbor(tile, callback); } + forEachNeighborNSWE( + tile: TileRef, + callback: (neighbor: TileRef) => void, + ): void { + this._map.forEachNeighborNSWE(tile, callback); + } neighbors4(ref: TileRef, out: TileRef[]): number { return this._map.neighbors4(ref, out); } diff --git a/src/core/game/GameMap.ts b/src/core/game/GameMap.ts index fa9e58184..3bc0013ac 100644 --- a/src/core/game/GameMap.ts +++ b/src/core/game/GameMap.ts @@ -39,6 +39,14 @@ export interface GameMap { neighbors(ref: TileRef): TileRef[]; // Zero-allocation neighbor iteration (cardinal only), in W, E, N, S order. forEachNeighbor(ref: TileRef, callback: (neighbor: TileRef) => void): void; + // Zero-allocation neighbor iteration (cardinal only) in the same N, S, W, E + // order as neighbors(). Use this in order-sensitive code — anything feeding + // sets/arrays whose iteration order affects the simulation — where + // forEachNeighbor's W, E, N, S order would change behavior. + forEachNeighborNSWE( + ref: TileRef, + callback: (neighbor: TileRef) => void, + ): void; // Writes the cardinal neighbors of ref into out (W, E, N, S order) and // returns the count. out must have length >= 4; reuse it across calls to // avoid allocation in hot loops. @@ -395,6 +403,19 @@ export class GameMapImpl implements GameMap { if (ref < (this.height_ - 1) * w) callback(ref + w); } + forEachNeighborNSWE( + ref: TileRef, + callback: (neighbor: TileRef) => void, + ): void { + const w = this.width_; + const x = this.refToX[ref]; + + if (ref >= w) callback(ref - w); + if (ref < (this.height_ - 1) * w) callback(ref + w); + if (x !== 0) callback(ref - 1); + if (x !== w - 1) callback(ref + 1); + } + neighbors4(ref: TileRef, out: TileRef[]): number { const w = this.width_; const x = this.refToX[ref]; @@ -480,15 +501,24 @@ export class GameMapImpl implements GameMap { q.push(tile); } + // Neighbors are enumerated inline in the same order as neighbors() to + // avoid allocating an array per visited tile. + const w = this.width_; + const southLimit = (this.height_ - 1) * w; + const visit = (n: TileRef) => { + if (!seen.has(n) && filter(this, n)) { + seen.add(n); + q.push(n); + } + }; while (q.length > 0) { const curr = q.pop(); if (curr === undefined) continue; - for (const n of this.neighbors(curr)) { - if (!seen.has(n) && filter(this, n)) { - seen.add(n); - q.push(n); - } - } + const x = this.refToX[curr]; + if (curr >= w) visit(curr - w); + if (curr < southLimit) visit(curr + w); + if (x !== 0) visit(curr - 1); + if (x !== w - 1) visit(curr + 1); } return seen; } diff --git a/src/core/game/PlayerImpl.ts b/src/core/game/PlayerImpl.ts index c65f58fda..41aba0a35 100644 --- a/src/core/game/PlayerImpl.ts +++ b/src/core/game/PlayerImpl.ts @@ -84,6 +84,13 @@ const EMPTY_ATTACK_UPDATES: AttackUpdate[] = []; const EMPTY_ALLIANCE_VIEWS: AllianceView[] = []; const EMPTY_EMOJIS: EmojiMessage[] = []; const EMPTY_EMBARGOES = new Set(); +// Reusable buffers for hot loops. The simulation is single-threaded and these +// are fully consumed before any re-entrant call, so sharing is safe. +const NEIGHBOR_SCRATCH: TileRef[] = [0, 0, 0, 0]; +const UNITS_SCRATCH: Unit[] = []; +// N, S, W, E — the sampling directions used by shoreReachableNeighbors(). +const SHORE_DIRECTIONS_DX = [0, 0, -1, 1]; +const SHORE_DIRECTIONS_DY = [-1, 1, 0, 0]; Object.freeze(EMPTY_NUMBER_ARRAY); Object.freeze(EMPTY_STRING_ARRAY); Object.freeze(EMPTY_ATTACK_UPDATES); @@ -361,53 +368,41 @@ export class PlayerImpl implements Player { return this._units; } + // Hot path. Matches are gathered into a reusable scratch buffer and + // copied out with an exact-size slice, so each call allocates exactly + // one right-sized result array. + const scratch = UNITS_SCRATCH; + let n = 0; + // Fast paths for common small arity calls to avoid Set allocation. if (len === 1) { const t0 = types[0]!; - const out: Unit[] = []; for (const u of this._units) { - if (u.type() === t0) out.push(u); + if (u.type() === t0) scratch[n++] = u; } - return out; - } - - if (len === 2) { + } else if (len === 2) { const t0 = types[0]!; const t1 = types[1]!; - if (t0 === t1) { - const out: Unit[] = []; - for (const u of this._units) { - if (u.type() === t0) out.push(u); - } - return out; - } - const out: Unit[] = []; for (const u of this._units) { const t = u.type(); - if (t === t0 || t === t1) out.push(u); + if (t === t0 || t === t1) scratch[n++] = u; } - return out; - } - - if (len === 3) { + } else if (len === 3) { const t0 = types[0]!; const t1 = types[1]!; const t2 = types[2]!; // Keep semantics identical for duplicates in types by using direct comparisons. - const out: Unit[] = []; for (const u of this._units) { const t = u.type(); - if (t === t0 || t === t1 || t === t2) out.push(u); + if (t === t0 || t === t1 || t === t2) scratch[n++] = u; + } + } else { + const ts = new Set(types); + for (const u of this._units) { + if (ts.has(u.type())) scratch[n++] = u; } - return out; } - - const ts = new Set(types); - const out: Unit[] = []; - for (const u of this._units) { - if (ts.has(u.type())) out.push(u); - } - return out; + return scratch.slice(0, n); } private numUnitsConstructed: Partial> = {}; @@ -454,9 +449,13 @@ export class PlayerImpl implements Player { } sharesBorderWith(other: Player | TerraNullius): boolean { + const map = this.mg.map(); + const otherID = other.smallID(); + const nbuf = NEIGHBOR_SCRATCH; for (const border of this._borderTiles) { - for (const neighbor of this.mg.map().neighbors(border)) { - if (this.mg.map().ownerID(neighbor) === other.smallID()) { + const n = map.neighbors4(border, nbuf); + for (let i = 0; i < n; i++) { + if (map.ownerID(nbuf[i]) === otherID) { return true; } } @@ -478,26 +477,23 @@ export class PlayerImpl implements Player { nearby(): (Player | TerraNullius)[] { const ns: Set = new Set(); - for (const border of this.borderTiles()) { - for (const neighbor of this.mg.map().neighbors(border)) { - if ( - this.mg.map().isLand(neighbor) && - !this.mg.map().isImpassable(neighbor) - ) { - if ( - !this.mg.map().hasOwner(neighbor) && - this.mg.map().hasFallout(neighbor) - ) { - continue; - } - const owner = this.mg.map().ownerID(neighbor); - if (owner !== this.smallID()) { - ns.add( - this.mg.playerBySmallID(owner) satisfies Player | TerraNullius, - ); - } + const map = this.mg.map(); + const smallID = this.smallID(); + const visit = (neighbor: TileRef) => { + if (map.isLand(neighbor) && !map.isImpassable(neighbor)) { + if (!map.hasOwner(neighbor) && map.hasFallout(neighbor)) { + return; + } + const owner = map.ownerID(neighbor); + if (owner !== smallID) { + ns.add( + this.mg.playerBySmallID(owner) satisfies Player | TerraNullius, + ); } } + }; + for (const border of this.borderTiles()) { + map.forEachNeighborNSWE(border, visit); } for (const n of this.shoreReachableNeighbors()) { ns.add(n); @@ -511,21 +507,19 @@ export class PlayerImpl implements Player { private shoreReachableNeighbors(): Set { const ns: Set = new Set(); const map = this.mg.map(); - const shores = Array.from(this.borderTiles()).filter((t) => map.isShore(t)); - const directions: [number, number][] = [ - [0, -1], - [0, 1], - [-1, 0], - [1, 0], - ]; - for (let i = 0; i < shores.length; i += 10) { - const border = shores[i]; + let shoreIdx = 0; + for (const border of this.borderTiles()) { + if (!map.isShore(border)) continue; + // Visit every 10th shore tile. + if (shoreIdx++ % 10 !== 0) continue; const bx = map.x(border); const by = map.y(border); - for (const [dx, dy] of directions) { + for (let d = 0; d < 4; d++) { + const dx = SHORE_DIRECTIONS_DX[d]; + const dy = SHORE_DIRECTIONS_DY[d]; // Only follow directions that immediately enter water; land-adjacent // directions are already covered by the direct neighbors() loop. const x1 = bx + dx; diff --git a/src/core/pathfinding/spatial/SpatialQuery.ts b/src/core/pathfinding/spatial/SpatialQuery.ts index 9128dd0b4..7b4fe7cc9 100644 --- a/src/core/pathfinding/spatial/SpatialQuery.ts +++ b/src/core/pathfinding/spatial/SpatialQuery.ts @@ -32,25 +32,25 @@ export class SpatialQuery { predicate: (t: TileRef) => boolean, ): TileRef | null { const map = this.game.map(); - const candidates: TileRef[] = []; + // Strict < keeps the first candidate on distance ties, so the winner + // depends only on the deterministic BFS visit order. + let best: TileRef | null = null; + let bestDist = Infinity; for (const tile of map.bfs( from, (_, t) => map.manhattanDist(from, t) <= maxDist, )) { if (predicate(tile)) { - candidates.push(tile); + const dist = map.manhattanDist(from, tile); + if (dist < bestDist) { + best = tile; + bestDist = dist; + } } } - if (candidates.length === 0) return null; - - // Sort by Manhattan distance to find actual nearest - candidates.sort( - (a, b) => map.manhattanDist(from, a) - map.manhattanDist(from, b), - ); - - return candidates[0]; + return best; } /** diff --git a/tests/NeighborIteration.test.ts b/tests/NeighborIteration.test.ts index 754e66baf..3c95f5160 100644 --- a/tests/NeighborIteration.test.ts +++ b/tests/NeighborIteration.test.ts @@ -70,6 +70,17 @@ describe("Neighbor iteration", () => { }); }); + // forEachNeighborNSWE's contract is exact order equality with neighbors(), + // including at edges and corners, so order-sensitive code can use the two + // interchangeably. + test("forEachNeighborNSWE matches map.neighbors() exactly (contents and order) for every tile", () => { + game.forEachTile((tile) => { + const out: TileRef[] = []; + game.forEachNeighborNSWE(tile, (n) => out.push(n)); + expect(out).toEqual(game.map().neighbors(tile)); + }); + }); + test("forEachNeighborWithDiag visits all 8 neighbors in dx-major order", () => { const tile = game.ref(5, 7); expect(collectNeighborsWithDiag(tile)).toEqual([ diff --git a/tests/PlayerImpl.test.ts b/tests/PlayerImpl.test.ts index 1cb05ea19..00f4cd87f 100644 --- a/tests/PlayerImpl.test.ts +++ b/tests/PlayerImpl.test.ts @@ -76,6 +76,65 @@ describe("PlayerImpl", () => { expect(cityToUpgrade).toBe(false); }); + describe("units() type filtering", () => { + beforeEach(() => { + player.buildUnit(UnitType.City, game.ref(0, 0), {}); + player.buildUnit(UnitType.DefensePost, game.ref(11, 0), {}); + player.buildUnit(UnitType.City, game.ref(0, 11), {}); + player.buildUnit(UnitType.MissileSilo, game.ref(11, 11), {}); + }); + + // Reference implementation: filter _units preserving insertion order. + function expected(...types: UnitType[]) { + const ts = new Set(types); + return player.units().filter((u) => ts.has(u.type())); + } + + test("single type returns matching units in insertion order", () => { + expect(player.units(UnitType.City)).toEqual(expected(UnitType.City)); + expect(player.units(UnitType.City)).toHaveLength(2); + }); + + test("returns a fresh array, not the internal or shared buffer", () => { + const a = player.units(UnitType.City); + const b = player.units(UnitType.City); + expect(a).not.toBe(b); + expect(a).not.toBe(player.units()); + // Mutating one result must not affect a later query. + a.length = 0; + expect(player.units(UnitType.City)).toHaveLength(2); + }); + + test("two and three types return the union in insertion order", () => { + expect(player.units(UnitType.City, UnitType.MissileSilo)).toEqual( + expected(UnitType.City, UnitType.MissileSilo), + ); + expect( + player.units(UnitType.City, UnitType.DefensePost, UnitType.MissileSilo), + ).toEqual( + expected(UnitType.City, UnitType.DefensePost, UnitType.MissileSilo), + ); + // Duplicate types don't duplicate results. + expect(player.units(UnitType.City, UnitType.City)).toEqual( + expected(UnitType.City), + ); + }); + + test("four or more types (Set path) and no match", () => { + expect( + player.units( + UnitType.City, + UnitType.DefensePost, + UnitType.MissileSilo, + UnitType.Port, + ), + ).toEqual( + expected(UnitType.City, UnitType.DefensePost, UnitType.MissileSilo), + ); + expect(player.units(UnitType.Port)).toEqual([]); + }); + }); + test("Can't send alliance requests when dead", () => { // conquer other const otherTiles = other.tiles(); diff --git a/tests/perf/fullgame/FullGamePerf.ts b/tests/perf/fullgame/FullGamePerf.ts index df2e06f08..d097394c6 100644 --- a/tests/perf/fullgame/FullGamePerf.ts +++ b/tests/perf/fullgame/FullGamePerf.ts @@ -10,6 +10,10 @@ * 3. Top functions by self time from the V8 sampling profiler, plus a * .cpuprofile loadable in Chrome DevTools (Performance tab) as a * flame graph. + * 4. GC churn: GC pause counts/time by kind, allocation rate per + * time window across the game, and top allocating functions from the + * V8 sampling heap profiler (plus a .heapprofile loadable in Chrome + * DevTools > Memory > Allocation sampling). * * The run is deterministic for a given --seed/--map/--bots, and the final * game-state hash is printed so optimizations can be verified to not change @@ -17,8 +21,9 @@ * * Usage: * npm run perf:game -- [--map world] [--ticks 1800] [--bots 400] - * [--seed perf-default] [--top 30] + * [--seed perf-default] [--top 30] [--window 1000] * [--no-cpu-profile] [--no-exec-profile] + * [--no-gc-profile] [--no-alloc-profile] */ import fs from "fs"; import path from "path"; @@ -40,6 +45,14 @@ import { GameRunner } from "../../../src/core/GameRunner"; import { PseudoRandom } from "../../../src/core/PseudoRandom"; import { GameConfig, GameStartInfo } from "../../../src/core/Schemas"; import { simpleHash } from "../../../src/core/Util"; +import { + AllocationSampler, + GcTracker, + HeapSampler, + HeapWindow, + summarizeAllocationProfile, + summarizeGcEvents, +} from "./GcProfiler"; import { NodeGameMapLoader } from "./NodeGameMapLoader"; import { CpuProfiler, @@ -63,8 +76,11 @@ interface Options { nations: "default" | "disabled" | number; seed: string; top: number; + window: number; cpuProfile: boolean; execProfile: boolean; + gcProfile: boolean; + allocProfile: boolean; } function resolveMap(name: string): GameMapType { @@ -88,8 +104,11 @@ function parseArgs(argv: string[]): Options { nations: "default", seed: "perf-default", top: 30, + window: 1000, cpuProfile: true, execProfile: true, + gcProfile: true, + allocProfile: true, }; for (let i = 0; i < argv.length; i++) { const arg = argv[i]; @@ -120,12 +139,21 @@ function parseArgs(argv: string[]): Options { case "--top": opts.top = parseInt(next(), 10); break; + case "--window": + opts.window = parseInt(next(), 10); + break; case "--no-cpu-profile": opts.cpuProfile = false; break; case "--no-exec-profile": opts.execProfile = false; break; + case "--no-gc-profile": + opts.gcProfile = false; + break; + case "--no-alloc-profile": + opts.allocProfile = false; + break; default: throw new Error(`unknown argument: ${arg}`); } @@ -139,6 +167,11 @@ function fmtMs(ms: number): string { return ms >= 100 ? ms.toFixed(0) : ms >= 10 ? ms.toFixed(1) : ms.toFixed(2); } +function fmtMB(bytes: number): string { + const mb = bytes / 1024 / 1024; + return mb >= 100 ? mb.toFixed(0) : mb >= 10 ? mb.toFixed(1) : mb.toFixed(2); +} + function table(headers: string[], rows: string[][]): string { const widths = headers.map((h, c) => Math.max(h.length, ...rows.map((r) => r[c].length)), @@ -235,6 +268,10 @@ async function main(): Promise { ); runner.init(); + const gcTracker = opts.gcProfile ? new GcTracker() : null; + gcTracker?.start(); + const heapSampler = opts.gcProfile ? new HeapSampler() : null; + let turnNumber = 0; const runTick = (stats: TickStats): boolean => { runner.addTurn({ turnNumber: turnNumber++, intents: [] }); @@ -242,6 +279,7 @@ async function main(): Promise { const start = performance.now(); const ok = runner.executeNextTick(); stats.record(tick, performance.now() - start); + heapSampler?.tick(); return ok && fatalError === undefined; }; @@ -263,14 +301,21 @@ async function main(): Promise { `${game.players().filter((p) => p.isAlive()).length} players spawned.`, ); - // Main game phase, under the CPU profiler. + heapSampler?.closeWindow("spawn"); + + // Main game phase, under the CPU profiler and allocation sampler. const cpuProfiler = opts.cpuProfile ? new CpuProfiler() : null; if (cpuProfiler) { await cpuProfiler.start(); } + const allocSampler = opts.allocProfile ? new AllocationSampler() : null; + if (allocSampler) { + await allocSampler.start(); + } const gameStats = new TickStats(); const gameStart_ = performance.now(); let heapPeak = 0; + let windowStartTick = game.ticks(); for (let i = 0; i < opts.ticks; i++) { if (!runTick(gameStats)) { console.error(`game errored at tick ${game.ticks()}:\n${fatalError}`); @@ -280,9 +325,15 @@ async function main(): Promise { if (i % 50 === 0) { heapPeak = Math.max(heapPeak, process.memoryUsage().heapUsed); } + if ((i + 1) % opts.window === 0 || i === opts.ticks - 1) { + heapSampler?.closeWindow(`${windowStartTick}-${game.ticks() - 1}`); + windowStartTick = game.ticks(); + } } const gamePhaseMs = performance.now() - gameStart_; const profile = cpuProfiler ? await cpuProfiler.stop() : null; + const allocProfile = allocSampler ? await allocSampler.stop() : null; + const gcEvents = gcTracker ? await gcTracker.stop() : null; // ── Report ── @@ -355,6 +406,104 @@ async function main(): Promise { ); } + if (gcEvents && heapSampler) { + const gamePhaseEvents = gcEvents.filter((e) => e.startTime >= gameStart_); + const gc = summarizeGcEvents(gamePhaseEvents); + + console.log(`\n--- GC (game phase) ---`); + console.log( + table( + ["kind", "count", "total ms", "avg ms", "max ms"], + (["minor", "major", "incremental", "weakcb", "all"] as const).map( + (kind) => [ + kind, + String(gc[kind].count), + fmtMs(gc[kind].totalMs), + fmtMs(gc[kind].count > 0 ? gc[kind].totalMs / gc[kind].count : 0), + fmtMs(gc[kind].maxMs), + ], + ), + ), + ); + console.log( + `GC time: ${fmtMs(gc.all.totalMs)}ms = ` + + `${((gc.all.totalMs * 100) / gamePhaseMs).toFixed(1)}% of game-phase wall time`, + ); + + console.log(`\n--- Allocation & GC by window ---`); + const windowRow = (w: HeapWindow): string[] => { + const wgc = summarizeGcEvents( + gcTracker!.eventsBetween(w.startTime, w.endTime), + ); + return [ + w.label, + fmtMB(w.allocatedBytes), + w.ticks > 0 ? ((w.allocatedBytes / w.ticks) * 1e-3).toFixed(0) : "0", + String(wgc.minor.count), + fmtMs(wgc.minor.totalMs), + String(wgc.major.count), + fmtMs(wgc.major.totalMs), + fmtMs(wgc.incremental.totalMs), + fmtMB(w.heapUsedEnd), + ]; + }; + console.log( + table( + [ + "ticks", + "alloc MB", + "KB/tick", + "minor#", + "minor ms", + "major#", + "major ms", + "incr ms", + "heap MB", + ], + heapSampler.all().map(windowRow), + ), + ); + console.log( + `(alloc = sum of positive used-heap deltas between ticks; a lower bound on churn)`, + ); + } + + if (allocProfile) { + const { sites, totalBytes } = summarizeAllocationProfile( + allocProfile, + PROJECT_ROOT, + ); + console.log( + `\n--- Top allocating functions (game phase, sampled; ` + + `~${fmtMB(totalBytes)} MB total incl. collected) ---`, + ); + console.log( + table( + ["alloc MB", "%", "function", "location"], + sites + .slice(0, opts.top) + .map((s) => [ + fmtMB(s.selfBytes), + s.selfPct.toFixed(1), + s.functionName, + s.location, + ]), + ), + ); + + const outDir = path.join(PROJECT_ROOT, "tests/perf/output"); + fs.mkdirSync(outDir, { recursive: true }); + const outFile = path.join( + outDir, + `fullgame-${opts.map.replace(/\W+/g, "_")}-${opts.seed}.heapprofile`, + ); + fs.writeFileSync(outFile, JSON.stringify(allocProfile)); + console.log( + `Heap profile written to ${path.relative(PROJECT_ROOT, outFile)}` + + ` (open in Chrome DevTools > Memory > Allocation sampling)`, + ); + } + if (profile) { console.log(`\n--- Top functions by self time (V8 sampling profiler) ---`); const fns = summarizeCpuProfile(profile, PROJECT_ROOT); diff --git a/tests/perf/fullgame/GcProfiler.ts b/tests/perf/fullgame/GcProfiler.ts new file mode 100644 index 000000000..31fbf4420 --- /dev/null +++ b/tests/perf/fullgame/GcProfiler.ts @@ -0,0 +1,276 @@ +import { Session } from "node:inspector"; +import { PerformanceObserver } from "node:perf_hooks"; +import v8 from "node:v8"; + +// ── GC pause tracking (PerformanceObserver on 'gc' entries) ── + +export type GcKind = "minor" | "major" | "incremental" | "weakcb"; + +const KIND_NAMES: Record = { + 1: "minor", // NODE_PERFORMANCE_GC_MINOR (scavenge) + 4: "major", // NODE_PERFORMANCE_GC_MAJOR (mark-sweep-compact) + 8: "incremental", // NODE_PERFORMANCE_GC_INCREMENTAL (marking steps) + 16: "weakcb", // NODE_PERFORMANCE_GC_WEAKCB (weak callbacks) +}; + +export interface GcEvent { + kind: GcKind; + /** performance.now() timeline of when the GC started. */ + startTime: number; + durationMs: number; +} + +export interface GcKindSummary { + count: number; + totalMs: number; + maxMs: number; +} + +export type GcSummary = Record & { + all: GcKindSummary; +}; + +export function summarizeGcEvents(events: GcEvent[]): GcSummary { + const empty = (): GcKindSummary => ({ count: 0, totalMs: 0, maxMs: 0 }); + const summary: GcSummary = { + minor: empty(), + major: empty(), + incremental: empty(), + weakcb: empty(), + all: empty(), + }; + for (const e of events) { + for (const bucket of [summary[e.kind], summary.all]) { + bucket.count++; + bucket.totalMs += e.durationMs; + bucket.maxMs = Math.max(bucket.maxMs, e.durationMs); + } + } + return summary; +} + +/** + * Records every GC the process performs, with timestamps, so pauses can be + * attributed to time windows after the fact. The tick loop is synchronous and + * V8 only dispatches buffered GC entries to observers on a later timer task + * (setImmediate and takeRecords() both see nothing), so stop() awaits timer + * ticks until no new entries arrive. + */ +export class GcTracker { + private observer: PerformanceObserver | null = null; + readonly events: GcEvent[] = []; + + start(): void { + this.observer = new PerformanceObserver((list) => { + for (const entry of list.getEntries()) { + // Node's PerformanceEntry has .detail; the bundled DOM type does not. + const detail = (entry as { detail?: { kind?: number } }).detail; + const kind = KIND_NAMES[detail?.kind ?? 0]; + if (kind === undefined) continue; + this.events.push({ + kind, + startTime: entry.startTime, + durationMs: entry.duration, + }); + } + }); + this.observer.observe({ entryTypes: ["gc"] }); + } + + async stop(): Promise { + let idleRounds = 0; + let lastCount = this.events.length; + while (idleRounds < 3) { + await new Promise((resolve) => setTimeout(resolve, 0)); + if (this.events.length === lastCount) { + idleRounds++; + } else { + idleRounds = 0; + lastCount = this.events.length; + } + } + this.observer?.disconnect(); + this.observer = null; + return this.events; + } + + /** Events whose start falls in [fromTime, toTime) on the performance.now() timeline. */ + eventsBetween(fromTime: number, toTime: number): GcEvent[] { + return this.events.filter( + (e) => e.startTime >= fromTime && e.startTime < toTime, + ); + } +} + +// ── Per-window heap sampling (allocation-rate proxy) ── + +export interface HeapWindow { + label: string; + ticks: number; + wallMs: number; + /** + * Sum of positive used-heap deltas between consecutive ticks. This is a + * lower bound on bytes allocated (allocation and collection inside a single + * tick cancel out), but tracks churn trends well at ~10ms ticks. + */ + allocatedBytes: number; + heapUsedEnd: number; + /** Filled in after the run from GcTracker events. */ + startTime: number; + endTime: number; +} + +/** + * Call tick() after every simulation tick and closeWindow() at reporting + * boundaries. Uses v8.getHeapStatistics() (no /proc reads, unlike + * process.memoryUsage()). + */ +export class HeapSampler { + private windows: HeapWindow[] = []; + private lastHeapUsed: number; + private windowStartTime: number; + private windowAllocated = 0; + private windowTicks = 0; + + constructor() { + this.lastHeapUsed = v8.getHeapStatistics().used_heap_size; + this.windowStartTime = performance.now(); + } + + tick(): void { + const used = v8.getHeapStatistics().used_heap_size; + const delta = used - this.lastHeapUsed; + if (delta > 0) { + this.windowAllocated += delta; + } + this.lastHeapUsed = used; + this.windowTicks++; + } + + closeWindow(label: string): HeapWindow { + const now = performance.now(); + const window: HeapWindow = { + label, + ticks: this.windowTicks, + wallMs: now - this.windowStartTime, + allocatedBytes: this.windowAllocated, + heapUsedEnd: v8.getHeapStatistics().used_heap_size, + startTime: this.windowStartTime, + endTime: now, + }; + this.windows.push(window); + this.windowStartTime = now; + this.windowAllocated = 0; + this.windowTicks = 0; + return window; + } + + all(): HeapWindow[] { + return this.windows; + } +} + +// ── V8 sampling heap profiler (allocation sites, includes collected objects) ── + +interface SamplingHeapProfileNode { + callFrame: { + functionName: string; + url: string; + lineNumber: number; + }; + selfSize: number; + children?: SamplingHeapProfileNode[]; +} + +export interface SamplingHeapProfile { + head: SamplingHeapProfileNode; + samples: unknown[]; +} + +export interface AllocationSite { + functionName: string; + location: string; + selfBytes: number; + selfPct: number; +} + +/** + * Samples allocations (including objects already collected, i.e. churn) and + * attributes bytes to the allocating function. Sampled — low overhead, sizes + * are statistical estimates. + */ +export class AllocationSampler { + private session = new Session(); + + private post(method: string, params?: object): Promise { + return new Promise((resolve, reject) => { + this.session.post(method, params, (err, result) => + err ? reject(err) : resolve(result), + ); + }); + } + + async start(samplingIntervalBytes = 65536): Promise { + this.session.connect(); + await this.post("HeapProfiler.enable"); + await this.post("HeapProfiler.startSampling", { + samplingInterval: samplingIntervalBytes, + includeObjectsCollectedByMajorGC: true, + includeObjectsCollectedByMinorGC: true, + }); + } + + async stop(): Promise { + const { profile } = (await this.post("HeapProfiler.stopSampling")) as { + profile: SamplingHeapProfile; + }; + this.session.disconnect(); + return profile; + } +} + +/** Aggregates self-allocated bytes per function from a sampling heap profile. */ +export function summarizeAllocationProfile( + profile: SamplingHeapProfile, + projectRoot: string, +): { sites: AllocationSite[]; totalBytes: number } { + const bySite = new Map(); + let totalBytes = 0; + + const visit = (node: SamplingHeapProfileNode): void => { + if (node.selfSize > 0) { + totalBytes += node.selfSize; + const { functionName, url, lineNumber } = node.callFrame; + const name = functionName || "(anonymous)"; + let location = url.replace(/^file:\/\//, ""); + if (location.startsWith(projectRoot)) { + location = location.slice(projectRoot.length + 1); + } + if (location !== "" && lineNumber > 0) { + location += `:${lineNumber + 1}`; + } + const key = `${name}@${location}`; + const site = bySite.get(key); + if (site) { + site.selfBytes += node.selfSize; + } else { + bySite.set(key, { + functionName: name, + location, + selfBytes: node.selfSize, + } as AllocationSite); + } + } + for (const child of node.children ?? []) { + visit(child); + } + }; + visit(profile.head); + + const sites = [...bySite.values()]; + for (const site of sites) { + site.selfPct = totalBytes > 0 ? (site.selfBytes * 100) / totalBytes : 0; + } + sites.sort((a, b) => b.selfBytes - a.selfBytes); + return { sites, totalBytes }; +}