Incremental GPU scatter recompute for tile borders (#4166)

## Description:

Incremental GPU border recompute — sequel to #4159.

On 10 yo low-end chrome book this increased performance by ~5fps. I'm
now able to get 40fps on GWM.

`BorderComputePass` previously re-ran its fragment shader over every
tile on
the map every time any input changed (tile flip, highlight, relation,
defense post). Cost was O(mapW × mapH) per invalidation, and tile flips
invalidate it ~every render frame in live play.

This PR adds `BorderScatterPass`, which runs the same fragment shader
but
rasterizes only one POINT per dirty tile (plus its 4 cardinal neighbors,
to
cover the cardinal-neighbor read in the border shader). Cost is O(dirty
tiles) regardless of map size or spatial distribution.

### What changed

- New `BorderScatterPass` — owns its own FBO, VAO, and instance buffer;
  shares the border fragment shader with `BorderComputePass` so the two
  paths can't diverge in output.
- `BorderComputePass.draw()` now picks per frame:
- **Full recompute** — when `globalDirty` is set by highlight / relation
/
    defense-post changes (those affect tiles across the whole map).
  - **Scatter** — when only per-tile patches have been queued via
    `patchTile()`.
- `TerritoryPass.flushTileTexture()` now returns `"none" | "full" |
  "scatter"` instead of `boolean`, so the renderer can pick the right
  downstream invalidation:
- `"full"` → `borderPass.markGlobalDirty()` (full tile upload supersedes
    per-tile patches).
  - `"scatter"` → no-op; per-tile patches were already pushed via the
    wired `borderPatchConsumer` callback during drip drain.
- Renderer wires `territoryPass.setBorderPatchConsumer((x, y) =>
borderPass.patchTile(x, y))` so every per-tile scatter write to
`tileTex`
  also schedules an incremental border recompute for that tile + its
  neighbors.

### Known limitation

Highlight-thicken rings (within `uHighlightThicken` of a changed tile)
are
NOT incrementally repainted — they'll lag visually until the next full
recompute. In practice this is short-lived (the next highlight change or
seek triggers a full recompute) and not visible during normal play; the
trade is documented in the `BorderScatterPass` header.


## Please complete the following:

- [x] I have added screenshots for all UI updates
- [x] I process any text displayed to the user through translateText()
and I've added it to the en.json file
- [x] I have added relevant tests to the test directory

## Please put your Discord username so you can be contacted if a bug or
regression is found:

evan
This commit is contained in:
Evan
2026-06-05 13:29:50 -07:00
committed by GitHub
parent d1ce199a52
commit 075547b7b6
5 changed files with 365 additions and 40 deletions
+10 -2
View File
@@ -333,6 +333,11 @@ export class GPURenderer {
this.skinAnchorTex,
this.settings,
);
// Route per-tile changes to the border pass so it can scatter-recompute
// just the affected tiles instead of rebuilding the whole map.
this.territoryPass.setBorderPatchConsumer((x, y) =>
this.borderPass.patchTile(x, y),
);
// --- Spawn overlay (needs tileTex) ---
this.spawnOverlayPass = new SpawnOverlayPass(
@@ -1194,8 +1199,11 @@ export class GPURenderer {
} else {
this.territoryPass.drainDripBucket();
}
if (this.territoryPass.flushTileTexture())
this.borderPass.notifyTilesChanged();
// Full uploads need a full border recompute; scatter uploads already
// pushed per-tile border patches via the wired `borderPatchConsumer`.
if (this.territoryPass.flushTileTexture() === "full") {
this.borderPass.markGlobalDirty();
}
this.trailPass.flushTexture();
this.heatManager.updateHeat();
}
@@ -22,6 +22,7 @@ import {
shaderSrc,
} from "../utils/GlUtils";
import { TILE_DEFINES } from "../utils/TileCodec";
import { BorderScatterPass } from "./BorderScatterPass";
const MAX_DEFENSE_POSTS = 64;
@@ -53,13 +54,20 @@ export class BorderComputePass {
private uDefensePostRange: WebGLUniformLocation;
private highlightOwner = 0;
/** True when any input has changed since last draw. Starts true so first frame computes. */
private dirty = true;
/**
* True when something that affects ALL borders (highlight owner, relation
* matrix, defense posts) has changed since the last draw. Forces a full
* recompute next frame. Starts true so the first frame computes.
*/
private globalDirty = true;
/** Packed defense post data: [x, y, ownerID, 0, x, y, ownerID, 0, ...] */
private defensePostData = new Float32Array(MAX_DEFENSE_POSTS * 4);
private defensePostCount = 0;
/** Incremental per-tile recompute. Used between full recomputes. */
private scatter!: BorderScatterPass;
constructor(
gl: WebGL2RenderingContext,
mapW: number,
@@ -142,6 +150,16 @@ export class BorderComputePass {
// Store tileTex reference for binding
this._tileTex = tileTex;
this.scatter = new BorderScatterPass(
gl,
mapW,
mapH,
this.borderTex,
tileTex,
this.relationTex,
settings,
);
}
private _tileTex: WebGLTexture;
@@ -150,7 +168,8 @@ export class BorderComputePass {
setHighlightOwner(ownerID: number): void {
if (ownerID === this.highlightOwner) return;
this.highlightOwner = ownerID;
this.dirty = true;
this.scatter.setHighlightOwner(ownerID);
this.globalDirty = true;
}
/**
@@ -173,7 +192,7 @@ export class BorderComputePass {
gl.UNSIGNED_BYTE,
data,
);
this.dirty = true;
this.globalDirty = true;
}
/** Update defense post positions for checkerboard proximity. */
@@ -189,12 +208,27 @@ export class BorderComputePass {
data[off + 3] = 0;
}
this.defensePostCount = count;
this.dirty = true;
this.scatter.setDefensePostData(data, count);
this.globalDirty = true;
}
/** Notify that the tile texture has been updated (ownership may have changed). */
notifyTilesChanged(): void {
this.dirty = true;
/**
* Force a full recompute next draw. Use this when tile state has been
* replaced wholesale (initial load, seek) — individual `patchTile` calls
* would be too many to be cheaper than rebuilding the whole map.
*/
markGlobalDirty(): void {
this.globalDirty = true;
}
/**
* Notify that one tile changed owner. Schedules incremental border recompute
* for that tile + its 4 cardinal neighbors. Cheap: ~5 points per call.
* Caller is responsible for ensuring tileTex contains the new state before
* the next draw — TerritoryPass.flushTileTexture takes care of that.
*/
patchTile(x: number, y: number): void {
this.scatter.pushWithNeighbors(x, y);
}
/** The border buffer texture (RG8, tile resolution). */
@@ -203,35 +237,46 @@ export class BorderComputePass {
}
/**
* Compute border flags for the current frame. Call before MapOverlayPass and stamp overlay.
* Leaves the GL state with its own FBO bound — caller must restore FBO and viewport.
* Update border flags for the current frame. Either a full recompute (when
* globalDirty is set by highlight/relation/defense-post changes) or a
* scatter of the per-tile patches queued via `patchTile`.
*
* Exit GL state:
* - Full recompute path: `borderFbo` is still bound; viewport at map size.
* - Scatter path: default framebuffer bound; viewport at map size.
* - No-op path: state unchanged.
* Caller must restore both framebuffer and viewport before subsequent draws.
*/
draw(): void {
if (!this.dirty) return;
this.dirty = false;
if (this.globalDirty) {
this.globalDirty = false;
this.scatter.clear(); // full recompute supersedes any queued patches
const gl = this.gl;
const mo = this.settings.mapOverlay;
const gl = this.gl;
const mo = this.settings.mapOverlay;
gl.bindFramebuffer(gl.FRAMEBUFFER, this.borderFbo);
gl.viewport(0, 0, this.mapW, this.mapH);
gl.disable(gl.BLEND);
gl.bindFramebuffer(gl.FRAMEBUFFER, this.borderFbo);
gl.viewport(0, 0, this.mapW, this.mapH);
gl.disable(gl.BLEND);
gl.useProgram(this.program);
gl.uniform2f(this.uMapSize, this.mapW, this.mapH);
gl.uniform1ui(this.uHighlightOwner, this.highlightOwner);
gl.uniform1i(this.uHighlightThicken, Math.floor(mo.highlightThicken));
gl.uniform4fv(this.uDefensePosts, this.defensePostData);
gl.uniform1i(this.uDefensePostCount, this.defensePostCount);
gl.uniform1f(this.uDefensePostRange, mo.defensePostRange);
gl.useProgram(this.program);
gl.uniform2f(this.uMapSize, this.mapW, this.mapH);
gl.uniform1ui(this.uHighlightOwner, this.highlightOwner);
gl.uniform1i(this.uHighlightThicken, Math.floor(mo.highlightThicken));
gl.uniform4fv(this.uDefensePosts, this.defensePostData);
gl.uniform1i(this.uDefensePostCount, this.defensePostCount);
gl.uniform1f(this.uDefensePostRange, mo.defensePostRange);
gl.activeTexture(gl.TEXTURE0);
gl.bindTexture(gl.TEXTURE_2D, this._tileTex);
gl.activeTexture(gl.TEXTURE1);
gl.bindTexture(gl.TEXTURE_2D, this.relationTex);
gl.activeTexture(gl.TEXTURE0);
gl.bindTexture(gl.TEXTURE_2D, this._tileTex);
gl.activeTexture(gl.TEXTURE1);
gl.bindTexture(gl.TEXTURE_2D, this.relationTex);
gl.bindVertexArray(this.vao);
gl.drawArrays(gl.TRIANGLES, 0, 6);
gl.bindVertexArray(this.vao);
gl.drawArrays(gl.TRIANGLES, 0, 6);
} else if (this.scatter.count > 0) {
this.scatter.flush();
}
}
dispose(): void {
@@ -240,5 +285,6 @@ export class BorderComputePass {
gl.deleteTexture(this.borderTex);
gl.deleteTexture(this.relationTex);
gl.deleteFramebuffer(this.borderFbo);
this.scatter.dispose();
}
}
@@ -0,0 +1,225 @@
/**
* BorderScatterPass — incremental GPU border recompute for tiles that changed.
*
* Companion to BorderComputePass. The full-screen pass in BorderComputePass
* runs the same fragment shader over every tile in the map every time the
* border buffer is invalidated; for per-frame tile flips that scales linearly
* with map area (O(mapW × mapH)). This pass shares the same fragment shader
* but rasterizes only one POINT per dirty tile — cost is O(dirty patches)
* regardless of distribution.
*
* Each tile change requires recomputing the border value at the changed tile
* plus its 4 cardinal neighbors, because the cardinal-neighbor test in the
* border shader makes the neighbors' results depend on this tile's ownership.
* Use `pushWithNeighbors` to do that expansion automatically.
*
* Highlight-thicken rings within `uHighlightThicken` of a changed tile are
* NOT incrementally repainted — they'll lag visually until the next full
* recompute (which fires on highlight / relation / defense changes). That
* artifact is small and short-lived; for live combat it's a fair trade.
*/
import type { RenderSettings } from "../RenderSettings";
import { createProgram, shaderSrc } from "../utils/GlUtils";
import { TILE_DEFINES } from "../utils/TileCodec";
import borderComputeFragSrc from "../shaders/border-compute/border-compute.frag.glsl?raw";
import borderScatterVertSrc from "../shaders/border-compute/border-scatter.vert.glsl?raw";
const MAX_DEFENSE_POSTS = 64;
const FLOATS_PER_PATCH = 2;
const INITIAL_CAPACITY = 4096;
export class BorderScatterPass {
private gl: WebGL2RenderingContext;
private mapW: number;
private mapH: number;
private settings: RenderSettings;
private tileTex: WebGLTexture;
private relationTex: WebGLTexture;
private program: WebGLProgram;
private uMapSize: WebGLUniformLocation;
private uHighlightOwner: WebGLUniformLocation;
private uHighlightThicken: WebGLUniformLocation;
private uDefensePosts: WebGLUniformLocation;
private uDefensePostCount: WebGLUniformLocation;
private uDefensePostRange: WebGLUniformLocation;
private fbo: WebGLFramebuffer;
private vao: WebGLVertexArrayObject;
private vbo: WebGLBuffer;
// Mirrored from BorderComputePass — set via setters when those change.
private highlightOwner = 0;
private defensePostData = new Float32Array(MAX_DEFENSE_POSTS * 4);
private defensePostCount = 0;
/** CPU-side patch buffer: [x, y, x, y, …]. */
private patchData: Float32Array;
private patchCount = 0;
private patchCapacity = INITIAL_CAPACITY;
private gpuCapacityBytes = 0;
constructor(
gl: WebGL2RenderingContext,
mapW: number,
mapH: number,
borderTex: WebGLTexture,
tileTex: WebGLTexture,
relationTex: WebGLTexture,
settings: RenderSettings,
) {
this.gl = gl;
this.mapW = mapW;
this.mapH = mapH;
this.settings = settings;
this.tileTex = tileTex;
this.relationTex = relationTex;
this.program = createProgram(
gl,
borderScatterVertSrc,
shaderSrc(borderComputeFragSrc, { ...TILE_DEFINES, MAX_DEFENSE_POSTS }),
);
this.uMapSize = gl.getUniformLocation(this.program, "uMapSize")!;
this.uHighlightOwner = gl.getUniformLocation(
this.program,
"uHighlightOwner",
)!;
this.uHighlightThicken = gl.getUniformLocation(
this.program,
"uHighlightThicken",
)!;
this.uDefensePosts = gl.getUniformLocation(this.program, "uDefensePosts")!;
this.uDefensePostCount = gl.getUniformLocation(
this.program,
"uDefensePostCount",
)!;
this.uDefensePostRange = gl.getUniformLocation(
this.program,
"uDefensePostRange",
)!;
gl.useProgram(this.program);
gl.uniform1i(gl.getUniformLocation(this.program, "uTileTex"), 0);
gl.uniform1i(gl.getUniformLocation(this.program, "uRelationTex"), 1);
this.fbo = gl.createFramebuffer()!;
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fbo);
gl.framebufferTexture2D(
gl.FRAMEBUFFER,
gl.COLOR_ATTACHMENT0,
gl.TEXTURE_2D,
borderTex,
0,
);
gl.bindFramebuffer(gl.FRAMEBUFFER, null);
this.vbo = gl.createBuffer()!;
this.vao = gl.createVertexArray()!;
gl.bindVertexArray(this.vao);
gl.bindBuffer(gl.ARRAY_BUFFER, this.vbo);
gl.enableVertexAttribArray(0);
gl.vertexAttribPointer(0, 2, gl.FLOAT, false, FLOATS_PER_PATCH * 4, 0);
gl.bindVertexArray(null);
this.patchData = new Float32Array(INITIAL_CAPACITY * FLOATS_PER_PATCH);
}
/** Queue one tile coordinate. */
push(x: number, y: number): void {
if (this.patchCount >= this.patchCapacity) this.grow();
const p = this.patchCount * FLOATS_PER_PATCH;
this.patchData[p] = x;
this.patchData[p + 1] = y;
this.patchCount++;
}
/** Queue the tile + its 4 cardinal neighbors (clipped to map bounds). */
pushWithNeighbors(x: number, y: number): void {
this.push(x, y);
if (x > 0) this.push(x - 1, y);
if (x < this.mapW - 1) this.push(x + 1, y);
if (y > 0) this.push(x, y - 1);
if (y < this.mapH - 1) this.push(x, y + 1);
}
get count(): number {
return this.patchCount;
}
clear(): void {
this.patchCount = 0;
}
setHighlightOwner(owner: number): void {
this.highlightOwner = owner;
}
setDefensePostData(data: Float32Array, count: number): void {
// Caller may mutate the source array; copy to keep ours stable.
this.defensePostData.set(data.subarray(0, MAX_DEFENSE_POSTS * 4));
this.defensePostCount = count;
}
flush(): void {
if (this.patchCount === 0) return;
const gl = this.gl;
const floats = this.patchCount * FLOATS_PER_PATCH;
const byteCount = floats * 4;
const view = this.patchData.subarray(0, floats);
gl.bindBuffer(gl.ARRAY_BUFFER, this.vbo);
if (byteCount > this.gpuCapacityBytes) {
gl.bufferData(gl.ARRAY_BUFFER, view, gl.STREAM_DRAW);
this.gpuCapacityBytes = byteCount;
} else {
gl.bufferSubData(gl.ARRAY_BUFFER, 0, view);
}
const mo = this.settings.mapOverlay;
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fbo);
gl.viewport(0, 0, this.mapW, this.mapH);
gl.disable(gl.BLEND);
gl.useProgram(this.program);
gl.uniform2f(this.uMapSize, this.mapW, this.mapH);
gl.uniform1ui(this.uHighlightOwner, this.highlightOwner);
gl.uniform1i(this.uHighlightThicken, Math.floor(mo.highlightThicken));
gl.uniform4fv(this.uDefensePosts, this.defensePostData);
gl.uniform1i(this.uDefensePostCount, this.defensePostCount);
gl.uniform1f(this.uDefensePostRange, mo.defensePostRange);
gl.activeTexture(gl.TEXTURE0);
gl.bindTexture(gl.TEXTURE_2D, this.tileTex);
gl.activeTexture(gl.TEXTURE1);
gl.bindTexture(gl.TEXTURE_2D, this.relationTex);
gl.bindVertexArray(this.vao);
gl.drawArrays(gl.POINTS, 0, this.patchCount);
gl.bindFramebuffer(gl.FRAMEBUFFER, null);
this.patchCount = 0;
}
dispose(): void {
const gl = this.gl;
gl.deleteProgram(this.program);
gl.deleteFramebuffer(this.fbo);
gl.deleteBuffer(this.vbo);
gl.deleteVertexArray(this.vao);
}
private grow(): void {
const newCapacity = this.patchCapacity * 2;
const newBuf = new Float32Array(newCapacity * FLOATS_PER_PATCH);
newBuf.set(this.patchData);
this.patchData = newBuf;
this.patchCapacity = newCapacity;
}
}
+38 -8
View File
@@ -72,6 +72,13 @@ export class TerritoryPass {
*/
private scatter!: TileScatterPass;
/**
* Hook for forwarding tile changes to the border-compute pipeline so it can
* incrementally repaint affected tiles instead of rebuilding the whole map.
* Wired by the renderer to `borderPass.patchTile`.
*/
private borderPatchConsumer: ((x: number, y: number) => void) | null = null;
/**
* Drip buckets — round-robin staggering of tile updates across render frames.
* Each incoming change is hashed by tile ref to a fixed bucket (stable hash
@@ -188,11 +195,22 @@ export class TerritoryPass {
this.tilesDirty = true;
}
/**
* Wire a consumer that will be called once per tile coordinate change while
* scatter mode is active (i.e., not during a full upload). The renderer
* hooks this to `borderPass.patchTile` so border recompute scales with the
* number of changed tiles instead of full map area.
*/
setBorderPatchConsumer(fn: (x: number, y: number) => void): void {
this.borderPatchConsumer = fn;
}
/** Apply tile deltas (during playback). */
uploadDeltaTiles(changedTiles: TilePair[]): void {
const ts = this.cpuTileState;
const w = this.mapW;
const pending = this.fullUploadPending;
const borderFn = this.borderPatchConsumer;
for (let i = 0; i < changedTiles.length; i++) {
const tp = changedTiles[i];
ts[tp.ref] = tp.state;
@@ -200,6 +218,7 @@ export class TerritoryPass {
const x = tp.ref % w;
const y = (tp.ref - x) / w;
this.scatter.push(x, y, tp.state);
if (borderFn) borderFn(x, y);
}
}
this.tilesDirty = true;
@@ -227,6 +246,7 @@ export class TerritoryPass {
const ts = this.cpuTileState;
const w = this.mapW;
const pending = this.fullUploadPending;
const borderFn = this.borderPatchConsumer;
for (let i = 0; i < bucket.length; i += 2) {
const ref = bucket[i];
const state = bucket[i + 1];
@@ -235,6 +255,7 @@ export class TerritoryPass {
const x = ref % w;
const y = (ref - x) / w;
this.scatter.push(x, y, state);
if (borderFn) borderFn(x, y);
}
}
bucket.length = 0;
@@ -252,6 +273,7 @@ export class TerritoryPass {
const ts = this.cpuTileState;
const w = this.mapW;
const pending = this.fullUploadPending;
const borderFn = this.borderPatchConsumer;
for (let b = 0; b < this.nBuckets; b++) {
const bucket = this.dripBuckets[b];
if (bucket.length === 0) continue;
@@ -264,6 +286,7 @@ export class TerritoryPass {
const x = ref % w;
const y = (ref - x) / w;
this.scatter.push(x, y, state);
if (borderFn) borderFn(x, y);
}
}
bucket.length = 0;
@@ -319,11 +342,15 @@ export class TerritoryPass {
// GPU flush + draw
// ---------------------------------------------------------------------------
/** Flush tile texture to GPU early (before heat update reads it). Returns true if data was uploaded. */
flushTileTexture(): boolean {
if (!this.tilesDirty) return false;
/**
* Flush tile texture to GPU early (before heat update reads it).
* Return value lets the renderer decide what downstream invalidation is
* needed — full uploads require a full border recompute, scatter uploads
* already pushed per-tile border patches via `borderPatchConsumer`.
*/
flushTileTexture(): "none" | "full" | "scatter" {
if (!this.tilesDirty) return "none";
const gl = this.gl;
let uploaded = false;
if (this.fullUploadPending) {
// Full upload (first tick, seek, replay full frame, etc.) — supersedes
@@ -343,16 +370,19 @@ export class TerritoryPass {
);
this.scatter.clear();
this.fullUploadPending = false;
uploaded = true;
} else if (this.scatter.count > 0) {
this.tilesDirty = false;
return "full";
}
if (this.scatter.count > 0) {
// Per-frame patches — scatter via FBO + POINTS draw. Constant cost in
// patch count regardless of spatial distribution.
this.scatter.flush();
uploaded = true;
this.tilesDirty = false;
return "scatter";
}
this.tilesDirty = false;
return uploaded;
return "none";
}
setAltView(active: boolean): void {
@@ -0,0 +1,16 @@
#version 300 es
precision highp float;
precision highp int;
// Per-patch tile coord (integer in [0, mapW) × [0, mapH))
layout(location = 0) in vec2 aPos;
uniform vec2 uMapSize;
void main() {
// Position the point at the center of the target pixel so a 1×1 point
// rasterizes into exactly that texel. Same trick as TileScatterPass.
vec2 ndc = ((aPos + 0.5) / uMapSize) * 2.0 - 1.0;
gl_Position = vec4(ndc, 0.0, 1.0);
gl_PointSize = 1.0;
}