fix: catch punctuation-separated slurs in username censor

Re-add skipNonAlphabeticTransformer to both matcher chains so bypass attempts like "n.i.g.g.e.r" are detected.
2026-06-21 09:30:22 +00:00 · 2026-05-04 18:18:36 -06:00
parent 08b9fd96e6
commit ffbe48ad10
2 changed files with 24 additions and 6 deletions
@@ -6,6 +6,7 @@ import {
  pattern,
  resolveConfusablesTransformer,
  resolveLeetSpeakTransformer,
+  skipNonAlphabeticTransformer,
  toAsciiLowerCaseTransformer,
 } from "obscenity";
 import countries from "resources/countries.json";
@@ -71,15 +72,21 @@ export function createMatcher(bannedWords: string[]): RegExpMatcher {
  ];
  // substringMatcher: literal patterns, no collapse — catches "niggertesting" as a substring
  // collapseMatcher: deduped patterns + collapse transformer — catches "niiiigger", "hiiitler"
+  // skipNonAlphabeticTransformer is applied last to catch punctuation-separated bypasses
+  // like "n.i.g.g.e.r".
  const substringMatcher = new RegExpMatcher({
    ...buildDataset(bannedWords, false),
-    blacklistMatcherTransformers: baseTransformers,
+    blacklistMatcherTransformers: [
+      ...baseTransformers,
+      skipNonAlphabeticTransformer(),
+    ],
  });
  const collapseMatcher = new RegExpMatcher({
    ...buildDataset(bannedWords, true),
    blacklistMatcherTransformers: [
      ...baseTransformers,
      collapseDuplicatesTransformer(),
+      skipNonAlphabeticTransformer(),
    ],
  });
  return {
@@ -114,11 +114,9 @@ describe("UsernameCensor", () => {
      expect(matcher.hasMatch("MyChairName")).toBe(true);
    });

-    test("detects banned words with underscores/dots/numbers mixed in", () => {
-      // These should NOT bypass the filter (skipNonAlphabetic was intentionally removed)
-      // Words separated by non-alpha chars are treated as separate tokens
-      expect(matcher.hasMatch("n.i.g.g.e.r")).toBe(false); // dots break the word
-      expect(matcher.hasMatch("hi_tler")).toBe(false); // underscore breaks it
+    test("detects banned words with non-alphabetic characters mixed in", () => {
+      expect(matcher.hasMatch("n.i.g.g.e.r")).toBe(true);
+      expect(matcher.hasMatch("hi_tler")).toBe(true);
    });

    test("allows clean usernames", () => {
@@ -141,6 +139,19 @@ describe("UsernameCensor", () => {
      expect(matcher.hasMatch("kkklover")).toBe(true);
      expect(matcher.hasMatch("ilovekkkboys")).toBe(true);
    });
+
+    test("catches slurs separated by periods (bypass attempt)", () => {
+      expect(matcher.hasMatch("n.i.g.g.e.r")).toBe(true);
+      expect(matcher.hasMatch("N.I.G.G.E.R")).toBe(true);
+      expect(matcher.hasMatch("n.i.g.g.a")).toBe(true);
+      expect(matcher.hasMatch("h.i.t.l.e.r")).toBe(true);
+      expect(matcher.hasMatch("hello n.i.g.g.e.r world")).toBe(true);
+    });
+
+    test("censor replaces period-separated slur usernames", () => {
+      const result = checker.censor("n.i.g.g.e.r", null);
+      expect(shadowNames).toContain(result.username);
+    });
  });

  describe("censor", () => {