From d1ce199a521afc8e02b54670a040a19d36a4d45a Mon Sep 17 00:00:00 2001 From: Evan Date: Fri, 5 Jun 2026 07:07:03 -0700 Subject: [PATCH] Upload tile delta to GPU (#4159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description Reduces the amount of tile data sent to the gpu each tick, roughly ~10fps rate increase on 10 year old chromebook. Two changes to the territory rendering path: ### 1. Split `passEnabled.mapOverlay` into four flags The single `mapOverlay` toggle controlled four unrelated passes (territory fill, border compute, border stamp, trail). Splits it into `territory`, `borderCompute`, `borderStamp`, `trail` so each can be toggled independently in the debug GUI. Pure rename — default behavior is unchanged (all four default to `true`). ### 2. GPU scatter for per-frame tile texture updates Replaces the dirty-row bbox `texSubImage2D` upload in `TerritoryPass` with a new `TileScatterPass` that uploads a small attribute buffer of `(x, y, state)` patches and runs a single `POINTS` draw into an FBO bound to `tileTex`. Each patch rasterizes as a 1×1 point into exactly its target texel. **Why:** the old path's cost scaled with the bounding box of the dirty rows, not the number of changed tiles. In typical play, tile changes are spread across the whole map (multiple players fighting in different regions, scattered trails/fallout), so the bbox covered most of the map's rows and we re-uploaded mostly-unchanged data every frame. The new path is constant cost in patch count regardless of spatial distribution, and no longer scales with map size. The full-upload path (initial load / seek / spawn-phase flush) is unchanged. `fullUploadPending` correctly supersedes any queued scatter patches. ## Please complete the following: - [x] I have added screenshots for all UI updates *(N/A — no UI changes)* - [x] I process any text displayed to the user through translateText() and I've added it to the en.json file *(N/A — no user-facing text)* - [x] I have added relevant tests to the test directory *(renderer code, not covered by unit tests; verified visually)* ## Please put your Discord username so you can be contacted if a bug or regression is found: evan --- src/client/render/gl/RenderSettings.ts | 5 +- src/client/render/gl/Renderer.ts | 8 +- src/client/render/gl/debug/Layout.ts | 5 +- src/client/render/gl/passes/TerritoryPass.ts | 154 ++++++++---------- .../render/gl/passes/TileScatterPass.ts | 144 ++++++++++++++++ src/client/render/gl/render-settings.json | 5 +- .../map-overlay/tile-scatter.frag.glsl | 12 ++ .../map-overlay/tile-scatter.vert.glsl | 19 +++ 8 files changed, 260 insertions(+), 92 deletions(-) create mode 100644 src/client/render/gl/passes/TileScatterPass.ts create mode 100644 src/client/render/gl/shaders/map-overlay/tile-scatter.frag.glsl create mode 100644 src/client/render/gl/shaders/map-overlay/tile-scatter.vert.glsl diff --git a/src/client/render/gl/RenderSettings.ts b/src/client/render/gl/RenderSettings.ts index 71c423868..50fe28db3 100644 --- a/src/client/render/gl/RenderSettings.ts +++ b/src/client/render/gl/RenderSettings.ts @@ -3,7 +3,10 @@ import defaults from "./render-settings.json"; export interface RenderSettings { passEnabled: { terrain: boolean; - mapOverlay: boolean; + territory: boolean; + borderCompute: boolean; + borderStamp: boolean; + trail: boolean; territoryPatterns: boolean; structure: boolean; unit: boolean; diff --git a/src/client/render/gl/Renderer.ts b/src/client/render/gl/Renderer.ts index 41110b3ed..3e73da8a4 100644 --- a/src/client/render/gl/Renderer.ts +++ b/src/client/render/gl/Renderer.ts @@ -1201,7 +1201,7 @@ export class GPURenderer { } private computeTextures(): void { - if (this.settings.passEnabled.mapOverlay) this.borderPass.draw(); + if (this.settings.passEnabled.borderCompute) this.borderPass.draw(); } private renderFrame(): void { @@ -1259,7 +1259,7 @@ export class GPURenderer { if (pe.terrain) this.terrainPass.draw(cam); gl.enable(gl.BLEND); gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA); - if (pe.mapOverlay) this.territoryPass.draw(cam); + if (pe.territory) this.territoryPass.draw(cam); } private renderOverlays(cam: Float32Array, zoom: number): void { @@ -1270,7 +1270,7 @@ export class GPURenderer { gl.blendFunc(gl.SRC_ALPHA, gl.ONE_MINUS_SRC_ALPHA); this.spawnOverlayPass.draw(cam); - if (pe.mapOverlay) this.borderStampPass.draw(cam); + if (pe.borderStamp) this.borderStampPass.draw(cam); if (pe.railroad) this.railroadPass.draw(cam, zoom); if (pe.unit) this.unitPass.drawGround(cam); this.samRadiusPass.draw(cam); @@ -1285,7 +1285,7 @@ export class GPURenderer { this.moveIndicatorPass.draw(cam, zoom); this.nukeTelegraphPass.draw(cam); if (pe.falloutBloom) this.bloomPass.draw(cam, this.frameTick); - if (pe.mapOverlay) this.trailPass.draw(cam); + if (pe.trail) this.trailPass.draw(cam); if (pe.unit) this.unitPass.drawMissiles(cam); if (pe.fx) { diff --git a/src/client/render/gl/debug/Layout.ts b/src/client/render/gl/debug/Layout.ts index c5c6dc01c..3e9074c2a 100644 --- a/src/client/render/gl/debug/Layout.ts +++ b/src/client/render/gl/debug/Layout.ts @@ -9,7 +9,10 @@ export function buildTree(s: RenderSettings, d: RenderSettings): DebugNode[] { return [ folder("Pass Enables", [ toggle(s.passEnabled, "terrain", d.passEnabled), - toggle(s.passEnabled, "mapOverlay", d.passEnabled), + toggle(s.passEnabled, "territory", d.passEnabled), + toggle(s.passEnabled, "borderCompute", d.passEnabled), + toggle(s.passEnabled, "borderStamp", d.passEnabled), + toggle(s.passEnabled, "trail", d.passEnabled), toggle(s.passEnabled, "structure", d.passEnabled), toggle(s.passEnabled, "unit", d.passEnabled), toggle(s.passEnabled, "name", d.passEnabled), diff --git a/src/client/render/gl/passes/TerritoryPass.ts b/src/client/render/gl/passes/TerritoryPass.ts index ffa39640c..dd07f7aad 100644 --- a/src/client/render/gl/passes/TerritoryPass.ts +++ b/src/client/render/gl/passes/TerritoryPass.ts @@ -20,6 +20,7 @@ import { OWNER_MASK, TILE_DEFINES } from "../utils/TileCodec"; import overlayVertSrc from "../shaders/map-overlay/overlay.vert.glsl?raw"; import territoryFragSrc from "../shaders/map-overlay/territory.frag.glsl?raw"; +import { TileScatterPass } from "./TileScatterPass"; export class TerritoryPass { private gl: WebGL2RenderingContext; @@ -58,9 +59,18 @@ export class TerritoryPass { private cpuTileState: Uint16Array; private tilesDirty = false; - /** Dirty row range for partial tile upload. Infinity/-1 = full upload. */ - private dirtyRowMin = Infinity; - private dirtyRowMax = -1; + /** + * True after a full state replacement (initial load / seek). flushTileTexture + * uploads the full cpuTileState via texSubImage2D and discards any queued + * scatter patches — those are already covered by the full upload. + */ + private fullUploadPending = false; + + /** + * GPU scatter pass for per-frame patches. Replaces the old dirty-row bbox + * upload — constant cost regardless of how spatially scattered patches are. + */ + private scatter!: TileScatterPass; /** * Drip buckets — round-robin staggering of tile updates across render frames. @@ -152,6 +162,8 @@ export class TerritoryPass { gl.uniform1i(gl.getUniformLocation(this.program, "uSkinAnchor"), 6); this.vao = createMapQuad(gl, mapW, mapH); + + this.scatter = new TileScatterPass(gl, mapW, mapH, tileTex); } // --------------------------------------------------------------------------- @@ -162,8 +174,8 @@ export class TerritoryPass { uploadFullTileState(tileState: Uint16Array): void { this.cpuTileState.set(tileState); this.clearDripBuckets(); - this.dirtyRowMin = Infinity; - this.dirtyRowMax = -1; + this.scatter.clear(); + this.fullUploadPending = true; this.tilesDirty = true; } @@ -171,8 +183,8 @@ export class TerritoryPass { setLiveRef(tileState: Uint16Array): void { this.cpuTileState.set(tileState); this.clearDripBuckets(); - this.dirtyRowMin = Infinity; - this.dirtyRowMax = -1; + this.scatter.clear(); + this.fullUploadPending = true; this.tilesDirty = true; } @@ -180,12 +192,15 @@ export class TerritoryPass { uploadDeltaTiles(changedTiles: TilePair[]): void { const ts = this.cpuTileState; const w = this.mapW; + const pending = this.fullUploadPending; for (let i = 0; i < changedTiles.length; i++) { const tp = changedTiles[i]; ts[tp.ref] = tp.state; - const row = (tp.ref / w) | 0; - if (row < this.dirtyRowMin) this.dirtyRowMin = row; - if (row > this.dirtyRowMax) this.dirtyRowMax = row; + if (!pending) { + const x = tp.ref % w; + const y = (tp.ref - x) / w; + this.scatter.push(x, y, tp.state); + } } this.tilesDirty = true; } @@ -209,28 +224,19 @@ export class TerritoryPass { drainDripBucket(): void { const bucket = this.dripBuckets[this.currentBucket]; if (bucket.length > 0) { - const isFullUploadPending = this.tilesDirty && this.dirtyRowMax < 0; - - if (isFullUploadPending) { - // Full upload pending: skip tracking dirty rows, just flush data - for (let i = 0; i < bucket.length; i += 2) { - this.cpuTileState[bucket[i]] = bucket[i + 1]; + const ts = this.cpuTileState; + const w = this.mapW; + const pending = this.fullUploadPending; + for (let i = 0; i < bucket.length; i += 2) { + const ref = bucket[i]; + const state = bucket[i + 1]; + ts[ref] = state; + if (!pending) { + const x = ref % w; + const y = (ref - x) / w; + this.scatter.push(x, y, state); } - } else { - const w = this.mapW; - let minRow = this.dirtyRowMin; - let maxRow = this.dirtyRowMax; - for (let i = 0; i < bucket.length; i += 2) { - const ref = bucket[i]; - this.cpuTileState[ref] = bucket[i + 1]; - const row = (ref / w) | 0; - if (row < minRow) minRow = row; - if (row > maxRow) maxRow = row; - } - this.dirtyRowMin = minRow; - this.dirtyRowMax = maxRow; } - bucket.length = 0; this.tilesDirty = true; } @@ -243,39 +249,25 @@ export class TerritoryPass { */ flushAllDripBuckets(): void { let any = false; - const isFullUploadPending = this.tilesDirty && this.dirtyRowMax < 0; - - if (isFullUploadPending) { - for (let b = 0; b < this.nBuckets; b++) { - const bucket = this.dripBuckets[b]; - if (bucket.length === 0) continue; - any = true; - for (let i = 0; i < bucket.length; i += 2) { - this.cpuTileState[bucket[i]] = bucket[i + 1]; + const ts = this.cpuTileState; + const w = this.mapW; + const pending = this.fullUploadPending; + for (let b = 0; b < this.nBuckets; b++) { + const bucket = this.dripBuckets[b]; + if (bucket.length === 0) continue; + any = true; + for (let i = 0; i < bucket.length; i += 2) { + const ref = bucket[i]; + const state = bucket[i + 1]; + ts[ref] = state; + if (!pending) { + const x = ref % w; + const y = (ref - x) / w; + this.scatter.push(x, y, state); } - bucket.length = 0; } - } else { - const w = this.mapW; - let minRow = this.dirtyRowMin; - let maxRow = this.dirtyRowMax; - for (let b = 0; b < this.nBuckets; b++) { - const bucket = this.dripBuckets[b]; - if (bucket.length === 0) continue; - any = true; - for (let i = 0; i < bucket.length; i += 2) { - const ref = bucket[i]; - this.cpuTileState[ref] = bucket[i + 1]; - const row = (ref / w) | 0; - if (row < minRow) minRow = row; - if (row > maxRow) maxRow = row; - } - bucket.length = 0; - } - this.dirtyRowMin = minRow; - this.dirtyRowMax = maxRow; + bucket.length = 0; } - if (any) { this.tilesDirty = true; } @@ -331,28 +323,13 @@ export class TerritoryPass { flushTileTexture(): boolean { if (!this.tilesDirty) return false; const gl = this.gl; - const src = this.cpuTileState; - gl.activeTexture(gl.TEXTURE0); - gl.bindTexture(gl.TEXTURE_2D, this.tileTex); + let uploaded = false; - if (this.dirtyRowMax >= 0) { - // Partial upload — only dirty rows - const minRow = this.dirtyRowMin; - const rowCount = this.dirtyRowMax - minRow + 1; - const offset = minRow * this.mapW; - gl.texSubImage2D( - gl.TEXTURE_2D, - 0, - 0, - minRow, - this.mapW, - rowCount, - gl.RED_INTEGER, - gl.UNSIGNED_SHORT, - src.subarray(offset, offset + rowCount * this.mapW), - ); - } else { - // Full upload (first tick, seek, replay full frame, etc.) + if (this.fullUploadPending) { + // Full upload (first tick, seek, replay full frame, etc.) — supersedes + // any queued scatter patches. + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, this.tileTex); gl.texSubImage2D( gl.TEXTURE_2D, 0, @@ -362,14 +339,20 @@ export class TerritoryPass { this.mapH, gl.RED_INTEGER, gl.UNSIGNED_SHORT, - src, + this.cpuTileState, ); + this.scatter.clear(); + this.fullUploadPending = false; + uploaded = true; + } else if (this.scatter.count > 0) { + // Per-frame patches — scatter via FBO + POINTS draw. Constant cost in + // patch count regardless of spatial distribution. + this.scatter.flush(); + uploaded = true; } - this.dirtyRowMin = Infinity; - this.dirtyRowMax = -1; this.tilesDirty = false; - return true; + return uploaded; } setAltView(active: boolean): void { @@ -449,6 +432,7 @@ export class TerritoryPass { const gl = this.gl; gl.deleteProgram(this.program); gl.deleteVertexArray(this.vao); + this.scatter.dispose(); // tileTex, paletteTex, patternMetaTex, patternDataTex owned by GPUResources / renderer } } diff --git a/src/client/render/gl/passes/TileScatterPass.ts b/src/client/render/gl/passes/TileScatterPass.ts new file mode 100644 index 000000000..d998d6f0d --- /dev/null +++ b/src/client/render/gl/passes/TileScatterPass.ts @@ -0,0 +1,144 @@ +/** + * TileScatterPass — GPU-side scatter writes into the R16UI tile texture. + * + * Replaces per-frame texSubImage2D bbox uploads with a single small attribute + * buffer upload + one POINTS draw call into an FBO bound to tileTex. Constant + * cost in the number of dirty tiles regardless of their spatial distribution — + * unlike row-range uploads, which scale with the bounding box of dirty rows. + * + * Per-patch CPU cost is ~12 bytes (3 floats: x, y, state). Per draw call cost + * is fixed regardless of patch count. + */ + +import { createProgram } from "../utils/GlUtils"; + +import fragSrc from "../shaders/map-overlay/tile-scatter.frag.glsl?raw"; +import vertSrc from "../shaders/map-overlay/tile-scatter.vert.glsl?raw"; + +const FLOATS_PER_PATCH = 3; +const INITIAL_CAPACITY = 4096; + +export class TileScatterPass { + private gl: WebGL2RenderingContext; + private mapW: number; + private mapH: number; + + private program: WebGLProgram; + private uMapSize: WebGLUniformLocation; + + private fbo: WebGLFramebuffer; + private vao: WebGLVertexArrayObject; + private vbo: WebGLBuffer; + + /** CPU-side patch buffer: [x, y, state, x, y, state, …]. */ + private patchData: Float32Array; + private patchCount = 0; + private patchCapacity = INITIAL_CAPACITY; + /** GPU buffer byte capacity — grown via bufferData when exceeded. */ + private gpuCapacityBytes = 0; + + constructor( + gl: WebGL2RenderingContext, + mapW: number, + mapH: number, + tileTex: WebGLTexture, + ) { + this.gl = gl; + this.mapW = mapW; + this.mapH = mapH; + + this.program = createProgram(gl, vertSrc, fragSrc); + this.uMapSize = gl.getUniformLocation(this.program, "uMapSize")!; + + this.fbo = gl.createFramebuffer()!; + gl.bindFramebuffer(gl.FRAMEBUFFER, this.fbo); + gl.framebufferTexture2D( + gl.FRAMEBUFFER, + gl.COLOR_ATTACHMENT0, + gl.TEXTURE_2D, + tileTex, + 0, + ); + gl.bindFramebuffer(gl.FRAMEBUFFER, null); + + this.vbo = gl.createBuffer()!; + this.vao = gl.createVertexArray()!; + gl.bindVertexArray(this.vao); + gl.bindBuffer(gl.ARRAY_BUFFER, this.vbo); + const stride = FLOATS_PER_PATCH * 4; + gl.enableVertexAttribArray(0); + gl.vertexAttribPointer(0, 2, gl.FLOAT, false, stride, 0); + gl.enableVertexAttribArray(1); + gl.vertexAttribPointer(1, 1, gl.FLOAT, false, stride, 8); + gl.bindVertexArray(null); + + this.patchData = new Float32Array(INITIAL_CAPACITY * FLOATS_PER_PATCH); + } + + /** Queue one tile patch. */ + push(x: number, y: number, state: number): void { + if (this.patchCount >= this.patchCapacity) this.grow(); + const p = this.patchCount * FLOATS_PER_PATCH; + this.patchData[p] = x; + this.patchData[p + 1] = y; + this.patchData[p + 2] = state; + this.patchCount++; + } + + get count(): number { + return this.patchCount; + } + + /** Drop any pending patches without writing (used on seek / full upload). */ + clear(): void { + this.patchCount = 0; + } + + /** Upload patches and run the scatter draw. Resets the queue. */ + flush(): void { + if (this.patchCount === 0) return; + const gl = this.gl; + + const floats = this.patchCount * FLOATS_PER_PATCH; + const byteCount = floats * 4; + const view = this.patchData.subarray(0, floats); + + gl.bindBuffer(gl.ARRAY_BUFFER, this.vbo); + if (byteCount > this.gpuCapacityBytes) { + gl.bufferData(gl.ARRAY_BUFFER, view, gl.STREAM_DRAW); + this.gpuCapacityBytes = byteCount; + } else { + gl.bufferSubData(gl.ARRAY_BUFFER, 0, view); + } + + gl.bindFramebuffer(gl.FRAMEBUFFER, this.fbo); + gl.viewport(0, 0, this.mapW, this.mapH); + gl.disable(gl.BLEND); + + gl.useProgram(this.program); + gl.uniform2f(this.uMapSize, this.mapW, this.mapH); + + gl.bindVertexArray(this.vao); + gl.drawArrays(gl.POINTS, 0, this.patchCount); + + gl.bindFramebuffer(gl.FRAMEBUFFER, null); + + this.patchCount = 0; + } + + dispose(): void { + const gl = this.gl; + gl.deleteProgram(this.program); + gl.deleteFramebuffer(this.fbo); + gl.deleteBuffer(this.vbo); + gl.deleteVertexArray(this.vao); + } + + private grow(): void { + const newCapacity = this.patchCapacity * 2; + const newBuf = new Float32Array(newCapacity * FLOATS_PER_PATCH); + newBuf.set(this.patchData); + this.patchData = newBuf; + this.patchCapacity = newCapacity; + } +} diff --git a/src/client/render/gl/render-settings.json b/src/client/render/gl/render-settings.json index 988138317..9ddcdc6c7 100644 --- a/src/client/render/gl/render-settings.json +++ b/src/client/render/gl/render-settings.json @@ -1,7 +1,10 @@ { "passEnabled": { "terrain": true, - "mapOverlay": true, + "territory": true, + "borderCompute": true, + "borderStamp": true, + "trail": true, "territoryPatterns": true, "structure": true, "unit": true, diff --git a/src/client/render/gl/shaders/map-overlay/tile-scatter.frag.glsl b/src/client/render/gl/shaders/map-overlay/tile-scatter.frag.glsl new file mode 100644 index 000000000..a82380c80 --- /dev/null +++ b/src/client/render/gl/shaders/map-overlay/tile-scatter.frag.glsl @@ -0,0 +1,12 @@ +#version 300 es +precision highp float; +precision highp int; + +flat in uint vState; + +// R16UI color attachment — integer output type required. +layout(location = 0) out uvec4 fragColor; + +void main() { + fragColor = uvec4(vState, 0u, 0u, 0u); +} diff --git a/src/client/render/gl/shaders/map-overlay/tile-scatter.vert.glsl b/src/client/render/gl/shaders/map-overlay/tile-scatter.vert.glsl new file mode 100644 index 000000000..c64218211 --- /dev/null +++ b/src/client/render/gl/shaders/map-overlay/tile-scatter.vert.glsl @@ -0,0 +1,19 @@ +#version 300 es +precision highp float; +precision highp int; + +layout(location = 0) in vec2 aPos; // tile coord (integer in [0, mapW) × [0, mapH)) +layout(location = 1) in float aState; // R16UI state value (passed as float, fits in 16 bits exactly) + +uniform vec2 uMapSize; + +flat out uint vState; + +void main() { + // Position the point at the center of the target pixel so a 1×1 point + // rasterizes into exactly that texel. + vec2 ndc = ((aPos + 0.5) / uMapSize) * 2.0 - 1.0; + gl_Position = vec4(ndc, 0.0, 1.0); + gl_PointSize = 1.0; + vState = uint(aState); +}