Website Structure

This commit is contained in:
supalerk-ar66 2026-01-13 10:46:40 +07:00
parent 62812f2090
commit 71f0676a62
22365 changed files with 4265753 additions and 791 deletions

View file

@ -0,0 +1,16 @@
/**
* Bit flags & masks for the binary trie encoding used for entity decoding.
*
* Bit layout (16 bits total):
* 15..14 VALUE_LENGTH (+1 encoding; 0 => no value)
* 13 FLAG13. If valueLength>0: semicolon required flag (implicit ';').
* If valueLength==0: compact run flag.
* 12..7 BRANCH_LENGTH Branch length (0 => single branch in 6..0 if jumpOffset==char) OR run length (when compact run)
* 6..0 JUMP_TABLE Jump offset (jump table) OR single-branch char code OR first run char
*/
export enum BinTrieFlags {
VALUE_LENGTH = 0b1100_0000_0000_0000,
FLAG13 = 0b0010_0000_0000_0000,
BRANCH_LENGTH = 0b0001_1111_1000_0000,
JUMP_TABLE = 0b0000_0000_0111_1111,
}

View file

@ -0,0 +1,30 @@
/*
* Shared base64 decode helper for generated decode data.
* Assumes global atob is available.
*/
export function decodeBase64(input: string): Uint16Array {
const binary: string =
// eslint-disable-next-line n/no-unsupported-features/node-builtins
typeof atob === "function"
? // Browser (and Node >=16)
// eslint-disable-next-line n/no-unsupported-features/node-builtins
atob(input)
: // Older Node versions (<16)
// eslint-disable-next-line n/no-unsupported-features/node-builtins
typeof Buffer.from === "function"
? // eslint-disable-next-line n/no-unsupported-features/node-builtins
Buffer.from(input, "base64").toString("binary")
: // eslint-disable-next-line unicorn/no-new-buffer, n/no-deprecated-api
new Buffer(input, "base64").toString("binary");
const evenLength = binary.length & ~1; // Round down to even length
const out = new Uint16Array(evenLength / 2);
for (let index = 0, outIndex = 0; index < evenLength; index += 2) {
const lo = binary.charCodeAt(index);
const hi = binary.charCodeAt(index + 1);
out[outIndex++] = lo | (hi << 8);
}
return out;
}

View file

@ -0,0 +1,121 @@
/**
* A node inside the encoding trie used by `encode.ts`.
*
* There are two physical shapes to minimize allocations and lookup cost:
*
* 1. Leaf node (string)
* - A plain string (already in the form `"&name;"`).
* - Represents a terminal match with no children.
*
* 2. Branch / value node (object)
*/
export type EncodeTrieNode =
| string
| {
/**
* Entity value for the current code point sequence (wrapped: `&...;`).
* Present when the path to this node itself is a valid named entity.
*/
value: string | undefined;
/** If a number, the next code unit of the only next character. */
next: number | Map<number, EncodeTrieNode>;
/** If next is a number, `nextValue` contains the entity value. */
nextValue?: string;
};
/**
* Parse a compact encode trie string into a Map structure used for encoding.
*
* Format per entry (ascending code points using delta encoding):
* <diffBase36>[&name;][{<children>}] -- diff omitted when 0
* Where diff = currentKey - previousKey - 1 (first entry stores absolute key).
* `&name;` is the entity value (already wrapped); a following `{` denotes children.
*/
export function parseEncodeTrie(
serialized: string,
): Map<number, EncodeTrieNode> {
const top = new Map<number, EncodeTrieNode>();
const totalLength = serialized.length;
let cursor = 0;
let lastTopKey = -1;
function readDiff(): number {
const start = cursor;
while (cursor < totalLength) {
const char = serialized.charAt(cursor);
if ((char < "0" || char > "9") && (char < "a" || char > "z")) {
break;
}
cursor++;
}
if (cursor === start) return 0;
return Number.parseInt(serialized.slice(start, cursor), 36);
}
function readEntity(): string {
if (serialized[cursor] !== "&") {
throw new Error(`Child entry missing value near index ${cursor}`);
}
// Cursor currently points at '&'
const start = cursor;
const end = serialized.indexOf(";", cursor + 1);
if (end === -1) {
throw new Error(`Unterminated entity starting at index ${start}`);
}
cursor = end + 1; // Move past ';'
return serialized.slice(start, cursor); // Includes & ... ;
}
while (cursor < totalLength) {
const keyDiff = readDiff();
const key = lastTopKey === -1 ? keyDiff : lastTopKey + keyDiff + 1;
let value: string | undefined;
if (serialized[cursor] === "&") value = readEntity();
if (serialized[cursor] === "{") {
cursor++; // Skip '{'
// Parse first child
let diff = readDiff();
let childKey = diff; // First key (lastChildKey = -1)
const firstValue = readEntity();
if (serialized[cursor] === "{") {
throw new Error("Unexpected nested '{' beyond depth 2");
}
// If end of block -> single child optimization
if (serialized[cursor] === "}") {
top.set(key, { value, next: childKey, nextValue: firstValue });
cursor++; // Skip '}'
} else {
const childMap = new Map<number, EncodeTrieNode>();
childMap.set(childKey, firstValue);
let lastChildKey = childKey;
while (cursor < totalLength && serialized[cursor] !== "}") {
diff = readDiff();
childKey = lastChildKey + diff + 1;
const childValue = readEntity();
if (serialized[cursor] === "{") {
throw new Error("Unexpected nested '{' beyond depth 2");
}
childMap.set(childKey, childValue);
lastChildKey = childKey;
}
if (serialized[cursor] !== "}") {
throw new Error("Unterminated child block");
}
cursor++; // Skip '}'
top.set(key, { value, next: childMap });
}
} else if (value === undefined) {
throw new Error(
`Malformed encode trie: missing value at index ${cursor}`,
);
} else {
top.set(key, value);
}
lastTopKey = key;
}
return top;
}