feat: Slugify strings TDE-1019 (#827)

#### Motivation Allow us to easily slugify arbitrary strings for use in URLs. #### Modification This assumes that each part of a path will be slugified separately. This is less convenient than slugifying the entire path in one go, but ensures that no path segment contains a slash. #### Checklist _If not applicable, provide explanation of why._ - [x] Tests updated - [x] Docs updated (comments only) - [x] Issue linked in Title
linz · Jan 19, 2024 · b0a7874 · b0a7874
1 parent 8923fce
commit b0a7874
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 0 deletions.
diff --git a/src/utils/__test__/slugify.test.ts b/src/utils/__test__/slugify.test.ts
@@ -0,0 +1,53 @@
+import assert from 'node:assert';
+import { describe, it } from 'node:test';
+
+import { slugify } from '../slugify.js';
+
+describe('slugify', () => {
+  it('should pass through output alphabet unchanged', () => {
+    assert.equal(slugify('abcdefghijklmnopqrstuvwxyz0123456789_.-'), 'abcdefghijklmnopqrstuvwxyz0123456789_.-');
+  });
+  it('should lowercase uppercase ASCII characters', () => {
+    assert.equal(slugify('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 'abcdefghijklmnopqrstuvwxyz');
+  });
+  it('should replace spaces with hyphens', () => {
+    assert.equal(slugify('Upper North Island'), 'upper-north-island');
+  });
+  it('should remove diacritics', () => {
+    ['á', 'Á', 'ä', 'Ä', 'ā', 'Ā'].forEach((value) => {
+      assert.equal(slugify(value), 'a');
+    });
+    ['é', 'É', 'ē', 'Ē'].forEach((value) => {
+      assert.equal(slugify(value), 'e');
+    });
+    ['ì', 'Ì', 'ī', 'Ī'].forEach((value) => {
+      assert.equal(slugify(value), 'i');
+    });
+    ['ó', 'Ó', 'ô', 'Ô', 'ö', 'Ö', 'ō', 'Ō'].forEach((value) => {
+      assert.equal(slugify(value), 'o');
+    });
+    ['ü', 'Ü', 'ū', 'Ū'].forEach((value) => {
+      assert.equal(slugify(value), 'u');
+    });
+  });
+  it('should convert "ø" (U+00F8) and "Ø" (U+00D8) to "o"', () => {
+    ['ø', 'Ø'].forEach((value) => {
+      assert.equal(slugify(value), 'o');
+    });
+  });
+  it('should handle decomposed characters', () => {
+    assert.equal(slugify('\u0041\u0304'), 'a');
+  });
+  it('should treat any unhandled characters as an error', () => {
+    assert.throws(
+      () => {
+        slugify('“a\\b//c—;\n”');
+      },
+      {
+        name: 'Error',
+        message: 'Unhandled characters: "\\n", "/", ";", "\\", "—", "“", "”"',
+        cause: { characters: ['\n', '/', ';', '\\', '—', '“', '”'] },
+      },
+    );
+  });
+});
diff --git a/src/utils/slugify.ts b/src/utils/slugify.ts
@@ -0,0 +1,30 @@
+/**
+ * @param input Human-readable string
+ * @returns String slug. See src/utils/__test__/slugify.test.ts for examples.
+ */
+export function slugify(input: string): string {
+  const result = removeDiacritics(input).replaceAll('ø', 'o').replaceAll('Ø', 'O').replaceAll(' ', '-').toLowerCase();
+
+  const unhandledCharacters = result.match(/[^abcdefghijklmnopqrstuvwxyz0123456789_.-]/g);
+  if (unhandledCharacters) {
+    const sortedUniqueCharacters = Array.from(new Set(unhandledCharacters)).sort();
+    const formattedCharacters = sortedUniqueCharacters.map((character) => {
+      return JSON.stringify(character).replaceAll('\\\\', '\\');
+    });
+    throw Error(`Unhandled characters: ${formattedCharacters.join(', ')}`, {
+      cause: { characters: sortedUniqueCharacters },
+    });
+  }
+
+  return result;
+}
+
+/**
+ * Normalization form decomposition (NFD) splits characters like "ā" into their
+ * [combining diacritical mark](https://www.unicode.org/charts/PDF/U0300.pdf) and the character which is being modified
+ * by the diacritic. This way we can remove the macron from "ā", the accent from "é", and the like.
+ */
+function removeDiacritics(input: string): string {
+  const combiningDiacriticalMarks = /[\u0300-\u036F]/g;
+  return input.normalize('NFD').replaceAll(combiningDiacriticalMarks, '');
+}
diff --git a/tsconfig.json b/tsconfig.json
@@ -1,6 +1,8 @@
 {
   "extends": "@linzjs/style/tsconfig.base.json",
   "compilerOptions": {
+    "lib": ["ES2022"],
+    "target": "ES2022",
     "outDir": "build"
   }
 }