Refactor recency penalty and profit estimation in TrainStation

- Adjusted recency penalties to be softer and decay faster, with a maximum penalty of 40% for immediate revisits. - Introduced a new method to estimate expected profit for a station based on passenger demand and relationship with the train owner. - Updated edge score calculation to incorporate expected profit and travel time cost, enhancing routing decisions.
2026-07-01 09:23:24 +00:00 · 2025-11-22 20:45:39 +01:00
parent ad5d5eecd0
commit 21c9594788
2 changed files with 63 additions and 20 deletions
@@ -75,7 +75,7 @@ export class Railroad {

  getFare(): bigint {
    const baseLengthFare = 10;
-    const baseCongestionFare = BigInt(5000);
+    const baseCongestionFare = BigInt(2000);
    const lengthFare = BigInt(this.getLength() * baseLengthFare); // Base fare proportional to length
    // Busy railroads should be more expensive: each train adds a congestion premium
    const effectiveCongestion = Math.max(0, Math.round(this.congestionEma));
@@ -177,8 +177,11 @@ export class TrainStation {
  private readonly stationDemandSensitivity: number = 0.1; // How strongly passenger demand boosts scores
  private readonly heatDecayInterval: number = 60; // How often heat decays (ticks)
  private readonly heatDecayFactor: number = 1 - 0.1; // How much heat decays per time (0.95 = 5% decay)
-  private readonly recencyDecayFactor: number = 1 - 0.2; // How much recency penalties decay per time (0.8 = 20% decay)
-  private readonly maxRecencyPenalty: number = 1; // Maximum penalty for immediate revisits
+  // Softer, faster-decaying recency penalties now that profit-based routing discourages loops:
+  // - Immediate revisit gets at most ~40% penalty
+  // - Penalty shrinks quickly for older visits
+  private readonly recencyDecayFactor: number = 1 - 0.1; // 0.9
+  private readonly maxRecencyPenalty: number = 0.4; // 40% max penalty for immediate revisits

  private readonly randomChoiceProbability: number = 0.1; // Probability of making random choice instead of best (0.1 = 10%)

@@ -527,15 +530,46 @@ export class TrainStation {
  }

  /**
-   * Calculate edge score for local greedy routing with graduated recency penalties
+   * Roughly estimate the gold this train owner can expect from visiting a station,
+   * using the same config values as the real payout (but without mutating state).
+   *
+   * Real payout at a stop is:
+   *   perLevelMax(rel) * level * passengerFullness  (capped by the pool)
+   *
+   * We approximate that as:
+   *   expectedProfit ≈ perLevelMax(rel) * (level * passengerFullness)
+   *                  = perLevelMax(rel) * demandScore
+   */
+  private estimateExpectedProfitForStation(
+    trainOwner: Player,
+    station: TrainStation,
+  ): number {
+    const stationOwner = station.unit.owner();
+    const relationship = rel(trainOwner, stationOwner);
+    const perLevelMax = this.mg.config().trainGold(relationship); // Gold (BigInt)
+
+    const demandScore = station.getPassengerDemandScore(); // ≈ level * fullness (0..level)
+
+    // Convert to number for scoring; we only care about relative ordering.
+    const basePerLevel = Number(perLevelMax);
+    if (!Number.isFinite(basePerLevel) || basePerLevel <= 0) {
+      return 0;
+    }
+
+    return basePerLevel * demandScore;
+  }
+
+  /**
+   * Calculate edge score for local greedy routing with graduated recency penalties.
+   * Uses an approximate "expected gold per tick" signal:
+   *
+   *   score ≈ expectedProfit(trainOwner, neighbor) / (fare + travelTimeCost)
   */
  private calculateEdgeScore(
    neighbor: TrainStation,
    stationsAgo: number, // -1 = never visited, 1 = immediate previous, 2 = 2 ago, etc.
+    trainOwner: Player,
  ): number {
-    // Heuristic:
-    // - Estimate expected profit as (demand - normalized fare)
-    // - Divide by estimated travel time (tiles / train speed) to get profit per tick
    const railroad = this.getRailroadTo(neighbor);
    if (!railroad) {
      return -Infinity;
@@ -547,18 +581,30 @@ export class TrainStation {
    }

    const lengthTiles = railroad.getLength();
-    const travelTime =
+    const travelTimeTicks =
      lengthTiles > 0 ? lengthTiles / this.approxTrainSpeedTilesPerTick : 1;

-    // Pull current demand from the neighbor station.
-    const neighborDemandScore = neighbor.getPassengerDemandScore();
+    // Translate time into an approximate gold-cost so that long detours
+    // are less attractive even when fare is low.
+    const timeCostPerTick = 500; // tuning knob: "opportunity cost" of a tick
+    const travelTimeCost = timeCostPerTick * travelTimeTicks;

-    // Normalize fare into the same rough magnitude as demand.
-    const normalizedFare = fare / this.fareNormalizationFactor;
-    const expectedValue = neighborDemandScore - normalizedFare;
+    const expectedProfit = this.estimateExpectedProfitForStation(
+      trainOwner,
+      neighbor,
+    );

-    // Base score: expected profit per unit of travel time.
-    let score = expectedValue / Math.max(1, travelTime);
+    if (expectedProfit <= 0) {
+      return -Infinity;
+    }
+
+    const effectiveCost = fare + travelTimeCost;
+    if (effectiveCost <= 0) {
+      return expectedProfit;
+    }
+
+    // Base score: approximate gold per unit of (fare + time cost).
+    let score = expectedProfit / effectiveCost;

    // Apply graduated recency penalty based on stations ago
    if (stationsAgo > 0) {
@@ -572,11 +618,8 @@ export class TrainStation {
      score *= recencyPenalty;
    }

-    // Apply station demand preference (higher demand => higher score)
-    score *= 1 + this.stationDemandSensitivity * neighborDemandScore;
-
    // Ensure unvisited stations get a minimum exploration score
-    // This prevents zero-profit unvisited stations(factories) from being ignored
+    // This prevents unknown stations from being ignored forever
    if (stationsAgo < 0 && score <= 0) {
      score = 0.2; // Small positive score to encourage exploration
    }
@@ -686,7 +729,7 @@ export class TrainStation {

    for (const neighbor of neighbors) {
      const stationsAgo = this.getStationsAgo(neighbor, recentStations);
-      const score = this.calculateEdgeScore(neighbor, stationsAgo);
+      const score = this.calculateEdgeScore(neighbor, stationsAgo, trainOwner);

      validNeighbors.push({ station: neighbor, score });
    }