Skip to content

Commit

Permalink
feat: Slugify strings TDE-1019 (#827)
Browse files Browse the repository at this point in the history
#### Motivation

Allow us to easily slugify arbitrary strings for use in URLs.

#### Modification

This assumes that each part of a path will be slugified separately. This
is less convenient than slugifying the entire path in one go, but
ensures that no path segment contains a slash.

#### Checklist

_If not applicable, provide explanation of why._

- [x] Tests updated
- [x] Docs updated (comments only)
- [x] Issue linked in Title
  • Loading branch information
l0b0 authored Jan 19, 2024
1 parent 8923fce commit b0a7874
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 0 deletions.
53 changes: 53 additions & 0 deletions src/utils/__test__/slugify.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import assert from 'node:assert';
import { describe, it } from 'node:test';

import { slugify } from '../slugify.js';

describe('slugify', () => {
it('should pass through output alphabet unchanged', () => {
assert.equal(slugify('abcdefghijklmnopqrstuvwxyz0123456789_.-'), 'abcdefghijklmnopqrstuvwxyz0123456789_.-');
});
it('should lowercase uppercase ASCII characters', () => {
assert.equal(slugify('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 'abcdefghijklmnopqrstuvwxyz');
});
it('should replace spaces with hyphens', () => {
assert.equal(slugify('Upper North Island'), 'upper-north-island');
});
it('should remove diacritics', () => {
['á', 'Á', 'ä', 'Ä', 'ā', 'Ā'].forEach((value) => {
assert.equal(slugify(value), 'a');
});
['é', 'É', 'ē', 'Ē'].forEach((value) => {
assert.equal(slugify(value), 'e');
});
['ì', 'Ì', 'ī', 'Ī'].forEach((value) => {
assert.equal(slugify(value), 'i');
});
['ó', 'Ó', 'ô', 'Ô', 'ö', 'Ö', 'ō', 'Ō'].forEach((value) => {
assert.equal(slugify(value), 'o');
});
['ü', 'Ü', 'ū', 'Ū'].forEach((value) => {
assert.equal(slugify(value), 'u');
});
});
it('should convert "ø" (U+00F8) and "Ø" (U+00D8) to "o"', () => {
['ø', 'Ø'].forEach((value) => {
assert.equal(slugify(value), 'o');
});
});
it('should handle decomposed characters', () => {
assert.equal(slugify('\u0041\u0304'), 'a');
});
it('should treat any unhandled characters as an error', () => {
assert.throws(
() => {
slugify('“a\\b//c—;\n”');
},
{
name: 'Error',
message: 'Unhandled characters: "\\n", "/", ";", "\\", "—", "“", "”"',
cause: { characters: ['\n', '/', ';', '\\', '—', '“', '”'] },
},
);
});
});
30 changes: 30 additions & 0 deletions src/utils/slugify.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/**
* @param input Human-readable string
* @returns String slug. See src/utils/__test__/slugify.test.ts for examples.
*/
export function slugify(input: string): string {
const result = removeDiacritics(input).replaceAll('ø', 'o').replaceAll('Ø', 'O').replaceAll(' ', '-').toLowerCase();

const unhandledCharacters = result.match(/[^abcdefghijklmnopqrstuvwxyz0123456789_.-]/g);
if (unhandledCharacters) {
const sortedUniqueCharacters = Array.from(new Set(unhandledCharacters)).sort();
const formattedCharacters = sortedUniqueCharacters.map((character) => {
return JSON.stringify(character).replaceAll('\\\\', '\\');
});
throw Error(`Unhandled characters: ${formattedCharacters.join(', ')}`, {
cause: { characters: sortedUniqueCharacters },
});
}

return result;
}

/**
* Normalization form decomposition (NFD) splits characters like "ā" into their
* [combining diacritical mark](https://www.unicode.org/charts/PDF/U0300.pdf) and the character which is being modified
* by the diacritic. This way we can remove the macron from "ā", the accent from "é", and the like.
*/
function removeDiacritics(input: string): string {
const combiningDiacriticalMarks = /[\u0300-\u036F]/g;
return input.normalize('NFD').replaceAll(combiningDiacriticalMarks, '');
}
2 changes: 2 additions & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"extends": "@linzjs/style/tsconfig.base.json",
"compilerOptions": {
"lib": ["ES2022"],
"target": "ES2022",
"outDir": "build"
}
}

0 comments on commit b0a7874

Please sign in to comment.