import { AST, RegExpParser } from "@eslint-community/regexpp"; export interface SourceLocation { start: number; end: number; } interface NodeBase { type: Node["type"]; parent: Node["parent"]; source?: SourceLocation; } export type Element = CharacterClass | Alternation | Quantifier | Assertion | Unknown; export type Parent = Expression | Alternation | Quantifier | Assertion; export type Node = Expression | CharacterClass | Alternation | Quantifier | Assertion | Concatenation | Unknown; export interface Alternation extends NodeBase { type: "Alternation"; parent: Concatenation; alternatives: Concatenation[]; } export interface Assertion extends NodeBase { type: "Assertion"; parent: Concatenation; alternatives: Concatenation[]; kind: "ahead" | "behind"; negate: boolean; } export interface Quantifier extends NodeBase { type: "Quantifier"; parent: Concatenation; alternatives: Concatenation[]; lazy: boolean; min: number; max: number; } export interface CharacterClass extends NodeBase { type: "CharacterClass"; parent: Concatenation; characters: CharSet; } export interface Unknown extends NodeBase { type: "Unknown"; parent: Concatenation; id: string; } export interface Expression extends NodeBase { type: "Expression"; parent: null; alternatives: Concatenation[]; } export interface Concatenation extends NodeBase { type: "Concatenation"; parent: Parent; elements: Element[]; } type NodeIdent = { type: Node["type"]; }; type NoParentArray = { [K in keyof T]: NoParent; }; type NoParentNode = { [K in keyof NoParentNodePick]: NoParent[K]>; }; type NoParentNodePick = Pick>; /** * A view of an AST node that hides the `parent` property. */ export type NoParent = T extends NodeIdent ? NoParentNode : T extends unknown[] ? NoParentArray : T; /** * Sets the `parent` properties of the given node and all of its child nodes. * * @param node * @param parent The parent of `node`. */ export function setParent(node: T | NoParent, parent: T["parent"]): asserts node is T; /** * Sets the `source` property of the given node and all of its child nodes. * * If `source` is not a function, then the source object will be copied for all `source` properties to be set. The * object will be copied using the `start` and `end` properties alone, other properties will not be copied. * * @param node * @param source * @param overwrite */ export function setSource( node: NoParent, source: SourceLocation | (() => SourceLocation), overwrite?: boolean ): void; export interface VisitAstHandler { onAlternationEnter?(node: Alternation): void; onAlternationLeave?(node: Alternation): void; onAssertionEnter?(node: Assertion): void; onAssertionLeave?(node: Assertion): void; onCharacterClassEnter?(node: CharacterClass): void; onCharacterClassLeave?(node: CharacterClass): void; onConcatenationEnter?(node: Concatenation): void; onConcatenationLeave?(node: Concatenation): void; onExpressionEnter?(node: Expression): void; onExpressionLeave?(node: Expression): void; onQuantifierEnter?(node: Quantifier): void; onQuantifierLeave?(node: Quantifier): void; onUnknownEnter?(node: Unknown): void; onUnknownLeave?(node: Unknown): void; } export interface VisitNoParentAstHandler { onAlternationEnter?(node: NoParent): void; onAlternationLeave?(node: NoParent): void; onAssertionEnter?(node: NoParent): void; onAssertionLeave?(node: NoParent): void; onCharacterClassEnter?(node: NoParent): void; onCharacterClassLeave?(node: NoParent): void; onConcatenationEnter?(node: NoParent): void; onConcatenationLeave?(node: NoParent): void; onExpressionEnter?(node: NoParent): void; onExpressionLeave?(node: NoParent): void; onQuantifierEnter?(node: NoParent): void; onQuantifierLeave?(node: NoParent): void; onUnknownEnter?(node: NoParent): void; onUnknownLeave?(node: NoParent): void; } /** * Calls the given visitor on the given node and all of its children. * * If the given visitor throws an error, the traversal will stop and the error will be re-thrown. * * @param node * @param visitor */ export function visitAst(node: Node, visitor: VisitAstHandler): void; export function visitAst(node: NoParent, visitor: VisitNoParentAstHandler): void; /** * A transform is some algorithm that takes a AST subtree and makes any number of modifications to the given subtree. * They cannot see or modify anything outside the given subtree. Transformers are assumed to behave like a set of pure * functions. * * Transformers are always applied bottom-up. * * The most simple transformer is an empty object (`{}`). This is equivalent to a no-op transformer that does not change * the given AST. */ export interface Transformer { /** * An optional name useful for diagnostics. */ readonly name?: string; onAlternation?(node: NoParent, context: TransformContext): void; onAssertion?(node: NoParent, context: TransformContext): void; onCharacterClass?(node: NoParent, context: TransformContext): void; onConcatenation?(node: NoParent, context: TransformContext): void; onExpression?(node: NoParent, context: TransformContext): void; onQuantifier?(node: NoParent, context: TransformContext): void; onUnknown?(node: NoParent, context: TransformContext): void; } export interface TransformContext { /** * The maximum character of all character sets in the AST. * * If the expression to transform does not contain any characters at the start of the transformation, then this * value will be `0`. */ readonly maxCharacter: Char; /** * Signals that the transformer changed the AST. */ readonly signalMutation: () => void; } /** * A transformer that runs all given transformers in sequentially order. * * The combined transformer is a special transformer in that the {@link transform} function knows about it. */ export class CombinedTransformer implements Transformer { readonly name = "CombinedTransformer"; /** * The transformers that will be applied in order. * * Note: These transformers are not necessarily the ones given to the constructor. If a transformer is a * `CombinedTransformer`, then its transformers will be used instead (think of it as flattening combined * transformers). */ readonly transformers: readonly Transformer[]; constructor(transformers: Iterable); onAlternation(node: NoParent, context: TransformContext): void; onAssertion(node: NoParent, context: TransformContext): void; onCharacterClass(node: NoParent, context: TransformContext): void; onConcatenation(node: NoParent, context: TransformContext): void; onExpression(node: NoParent, context: TransformContext): void; onQuantifier(node: NoParent, context: TransformContext): void; onUnknown(node: NoParent, context: TransformContext): void; } /** * Creates a new transformer that performs all given transformers in sequentially order. * * If only one transformer is given, the returned transformer will be functionally equivalent. If no transformers are * given, the returned transformer will be equivalent to a noop transformer. * * The given iterable can be changed and reused after this function returns. * * @param transformers * @deprecated Use `new CombinedTransformer(transformers)` instead. */ export function combineTransformers(transformers: Iterable): CombinedTransformer; export interface TransformEvents { /** * An optional callback that will be called at the start of every pass. * * @param ast The AST that will be transformed. * @param pass The number of the pass that will be performed. Starts at `1`. */ onPassStart?: (ast: NoParent, pass: number) => void; /** * An optional callback that will be called every time a transformer mutates the AST. * * @param ast The AST that was transformed. * @param node The node that was mutated by the transformer. Descendants of this node may have been mutated as well. * @param transformer The transformer that mutated the AST. */ onChange?: (ast: NoParent, node: NoParent, transformer: Transformer) => void; } export interface TransformOptions { /** * The maximum number of times the transformer will be applied to the AST. * * This is only a maximum. The transformer will be stopped before this number is reach if the AST isn't modified * anymore. * * @default 10 */ maxPasses?: number; /** * Optional events to observe the transformation process. */ events?: TransformEvents; } /** * Transforms the given expression according to the given transformer. * * __Do not__ use the given `ast` object again after calling this function, the object will be in an undefined state. * * Note: This function knows about {@link CombinedTransformer} and will give it special treatment. Instead of applying * the transformer as is, it will apply all of its transformers instead. While this does not change the behavior of the * transformer, it does change which transformers the {@link TransformEvents} will see. Instead of seeing the combined * transformer, they will see the individual transformers. * * @param transformer * @param ast * @param options */ export function transform( transformer: Transformer, ast: NoParent, options?: Readonly ): NoParent; /** * A character base is constructed from a collection of character sets. It holds a list of disjoint, non-empty * character sets - the base sets - that can be used to construct every character set in the collection it was * constructed from. * * ## Guarantees * * - The base sets are guaranteed to be mutually disjoint and non-empty. * * - Every character set in the collection can be constructed by combining (union) a unique set of base sets. * * - The list of base sets is guaranteed to be as small as possible. There are at most `min(n^2, o)` base sets where `n` * is the number of unique, non-empty character sets in the collection, and `o` is the number of characters in the * union of all character sets in the collection. * * ## Use case * * The primary purpose of base sets is to remap alphabets. Some FA operations scale with the number of characters in the * alphabet of the FA (e.g. DFA minimization). * * Base sets can be used to determine which characters in an FA's alphabet *Σ* cannot be distinguished by the FA *A*. * Two characters *a,b* in *Σ* are indistinguishable if for all inputs *w* the following hold true: * * 1. *w* is accepted by *A* iff *w* with all occurrences of *a* replaced with *b* is accepted by *A*. * 2. *w* is accepted by *A* iff *w* with all occurrences of *b* replaced with *a* is accepted by *A*. * * Two indistinguishable characters are guaranteed to be in the same base set. * * By treating each base set as a character, it is possible to create a new (smaller) alphabet *Γ* (*|Γ| <= |Σ|*) such * that the FA *A* still behaves the same. * * Since *Γ* is typically (several orders of magnitude) smaller, operations that scale with the size of the alphabet * can be done more quickly. */ export class CharBase { /** * A list of disjoint, non-empty character sets. * * See {@link CharBase} to learn more. */ readonly sets: readonly CharSet[]; /** * Create the base sets of the given collection of character sets. * * See {@link CharBase} to learn more. * * @param charSets * @throws `RangeError` if the collection contains two character sets with different maximums. */ constructor(charSets: Iterable); /** * Splits the given character set into its base sets. * * The returned array will be a list of indexes of base sets necessary to construct the given character sets. The * indexes will be sorted and occur at most once. * * **Note**: This assumes that `charSet` is either empty or can be constructed from the base sets. If the * assumption is not met, the output of this function will be undefined. * * @param charSet */ split(charSet: CharSet): number[]; } export interface ReadonlyCharMap extends Iterable<[CharRange, T]> { /** * Returns whether this map is empty. * * This is equivalent to `this.size === 0` and `this.entryCount === 0`. */ readonly isEmpty: boolean; /** * The number of characters in this map. This is different from {@link entryCount}. * * This is equivalent to `[...this.keys()].reduce((count, range) => count + range.max - range.min + 1, 0)`. */ readonly size: number; /** * The number of entires in this map. * * This is different from {@link size}. In general, you should use {@link size}, because it has the same semantics * as `Set#size` and `Map#size`. * * This is equivalent to `[...this.entries()].length`. */ readonly entryCount: number; /** * Returns whether the given character is a key in the map. * * @param char */ has(char: Char): boolean; /** * Returns whether every character in the given range is a key in the map. * * This is equivalent to: `[...chars].every(char => this.has(char))`. * * @param chars */ hasEvery(chars: CharRange): boolean; /** * Returns whether some character in the given range is a key in the map. * * This is equivalent to: `[...chars].some(char => this.has(char))`. * * @param chars */ hasSome(chars: CharRange): boolean; /** * Returns the value associated with the given character of `undefined` if the character is not key in the map. * * @param char */ get(char: Char): T | undefined; /** * Invokes the given callback for every item of the character map. * * This method is implemented more efficiently than other iterator based methods, so chose `forEach` where every * possible. * * @param callback */ forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap) => void): void; /** * Returns all ranges of characters that are keys in the map. * * Keys will be returned in the same order as `this.entries()`. */ keys(): Iterable; /** * Returns all values in the map. Values might not be unique if more than one range maps to the same value. * * Values will be returned in the same order as `this.entries()`. */ values(): Iterable; /** * Returns all key-value pairs in the map. * * Entries will be returned in the order of ascending ranges. */ entries(): Iterable<[CharRange, T]>; /** * Returns a mapping from the values of this map to its keys. */ invert(maxCharacter: Char): Map; /** * Returns a new map with all values mapped by the given function. * * If no function is given, the identity function is used. */ copy(): CharMap; copy(mapFn: (value: T) => U): CharMap; } /** * A map from characters to generic values. * * The map guarantees that there are no adjacent character ranges that map to the equal values, will always be iterated * as one character range. The equality of values is given by JavaScript's strict equality operator (`===`). */ export class CharMap implements ReadonlyCharMap { get isEmpty(): boolean; get size(): number; get entryCount(): number; has(char: Char): boolean; hasEvery(chars: CharRange): boolean; hasSome(chars: CharRange): boolean; get(char: Char): T | undefined; set(char: Char, value: T): void; /** * Sets the value for all characters in the given range. * * This is equivalent to `[...chars].forEach(char => this.set(char, value))`. * * @param chars * @param value */ setRange(chars: CharRange, value: T): void; /** * Sets the value for all characters in the given character set. * * This is equivalent to `[...charSet.characters()].forEach(char => this.set(char, value))`. * * @param charSet * @param value */ setCharSet(charSet: CharSet, value: T): void; delete(char: Char): boolean; /** * Deletes all characters in the given range. * * This is equivalent to `[...range].forEach(char => this.delete(char))`. * * @param range */ deleteRange(range: CharRange): void; /** * Deletes all entries in the map. */ clear(): void; copy(): CharMap; copy(mapFn: (value: T) => U): CharMap; map(mapFn: (value: T, chars: CharRange, map: ReadonlyCharMap) => T): void; mapRange( range: CharRange, mapFn: (value: T | undefined, chars: CharRange, map: ReadonlyCharMap) => T | undefined ): void; filter(conditionFn: (value: T, chars: CharRange, map: ReadonlyCharMap) => boolean): void; invert(maxCharacter: Char): Map; forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap) => void): void; keys(): IterableIterator; values(): IterableIterator; entries(): IterableIterator<[CharRange, T]>; [Symbol.iterator](): IterableIterator<[CharRange, T]>; } /** * An immutable interval of {@link Char}s with inclusive ends. * * Each interval contains all characters `x` with `min <= x <= max`. */ export interface CharRange { /** * The inclusive minimum of the interval. * * This value has to be less or equal to {@link max}. */ readonly min: Char; /** * The inclusive maximum of the interval. * * This value has to be greater or equal to {@link min}. */ readonly max: Char; } /** * An immutable set of {@link Char}s represented as a sorted set of disjoint non-adjacent intervals ({@link CharRange}). * * All characters in the set have to be between 0 (inclusive) and the maximum of the set (inclusive). */ export class CharSet { /** * The greatest character which can be element of the set. */ readonly maximum: Char; /** * An array of ranges representing this character set. * * The array must be guaranteed to have the following properties at all times: * * 1. Any two ranges are disjoint. * 2. Any two ranges are non-adjacent. * 3. 0 <= `min` <= `max` <= `this.maximum` for all ranges. * 4. All ranges are sorted by ascending `min`. */ readonly ranges: readonly CharRange[]; /** * Returns `true` if this set doesn't contain any characters. */ get isEmpty(): boolean; /** * Returns `true` if all characters in the range from 0 to `this.maximum`, including 0 and `this.maximum`, are in * the set. */ get isAll(): boolean; /** * Returns the number of unique characters in the set. * * The returned number will be at least `0` and at most `this.maximum + 1`. */ get size(): number; /** * Returns an iterable of all characters in this set. * * Characters are sorted by ascending order and each character is yielded exactly once. * * Note: The iterable is stable. It can be iterated multiple times. */ characters(): Iterable; /** * Returns a string representation of the character set. */ toString(): string; /** * Returns a string representation of the ranges of this character set. * * The string representation has the following rules: * * 1. Each character is represented as a hexadecimal number. * 2. Each range where `min == max` will be represented by the `min` character. * 3. Each range where `min != max` will be represented by `min` followed by `".."` followed by `max`. * 4. The sequence of ranges will be joined together using `", "`. * * The returned string representation will have the following format: * * ``` * string = [ ranges ] * ranges = range *( ", " range ) * range = +hex [ ".." +hex ] * hex = "a" | "b" | "c" | "d" | "e" | "f" | digit * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" * ``` */ toRangesString(): string; /** * Returns a string representation of the Unicode ranges of this character set. * * The primary purpose of this function is provide an easy way to get a readable representation of a Unicode or * Unicode-like character set. The format is optimized for ease of reading for humans. * * The format follows these rules: * - If the character set is empty, `empty` will be returned. * - If the character set contains all characters, `all` will be returned. * - Ranges may be negated, which is indicated with `not`. E.g. `not a b` is the character set that contains all * characters except for a and b. * - A contiguous range of characters is represented using `min-max` where `min` and `max` are formatted characters. * - Single characters are formatted as either: * - a Unicode character (e.g. `a`), * - a quoted Unicode character (e.g. `'-'`), or * - a Unicode escape (e.g. `U+FF`). * * The returned string representation will have the following format: * * ``` * string = "all" | "empty" | ranges | "not " ranges * ranges = range *( " " range ) * range = char [ "-" char ] * char = literal | quoted | escape * literal = ?Printable Unicode characters? * literal = "'" ?any character? "'" * escape = "U+" +hex * hex = "A" | "B" | "C" | "D" | "E" | "F" | digit * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" * ``` */ toUnicodeString(): string; /** * Returns an empty character set with the given maximum. * * @param maximum The greatest character which can be element of the set. */ static empty(maximum: Char): CharSet; /** * Returns a complete character set with the given maximum. * * @param maximum The greatest character which will be element of the set. */ static all(maximum: Char): CharSet; /** * Returns a character set which contains the given characters. * * @param maximum The greatest character which will be element of the set. * @param characters A sorted collection of characters. * @throws `RangeError` if the given collection is not sorted or contains characters greater than `maximum`. */ static fromCharacters(maximum: Char, characters: Iterable): CharSet; /** * Returns a character set which contains the given range. * * @param maximum The greatest character which will be element of the set. * @param range * @throws `RangeError` if the maximum of the given range is greater than `maximum`. */ static fromRange(maximum: Char, range: CharRange): CharSet; /** * Returns a character set which contains the given character. * * @param maximum The greatest character which will be element of the set. * @param char * @throws `RangeError` if the maximum of the given range is greater than `maximum`. */ static fromCharacter(maximum: Char, char: Char): CharSet; /** * Returns whether this and the given character set are equivalent. * * Two `CharSet`s are equal if and only if: * * 1. They have the same maximum. * 2. They contain the same characters. * * Since each set of characters has a unique range representation, 2 equal `CharSet`s are guaranteed to have equal * ranges. * * A `CharSet` and a `CharRange` are equal if and only if they contain the same characters. * * @param other */ equals(other: CharSet | CharRange): boolean; /** * Compares this set with given set and returns an integer value describing their relation. Two equivalent set are * always guaranteed to return 0. * * The order defined by this function is guaranteed to be a * [total order](https://en.wikipedia.org/wiki/Total_order). Apart from this, no other guarantees are given. * * @param other */ compare(other: CharSet): number; /** * Returns a character set with the given maximum. * * The ranges of the returned character set are equivalent to the ranges of * `this.intersect({ min: 0, max: newMaximum })`. * * @param newMaximum * @returns */ resize(newMaximum: Char): CharSet; /** * Returns [the complement](https://en.wikipedia.org/wiki/Complement_(set_theory)) of this set. * * The returned set will have the same maximum as this set. */ negate(): CharSet; /** * Returns [the union](https://en.wikipedia.org/wiki/Union_(set_theory)) of this set and all given sets and * character ranges. * * The returned set will have the same maximum as this set. * * @param data * @throws `RangeError` If the maximum of one of the given sets differs from the maximum of this set or if the * maximum of one of the given ranges is greater than the maximum of this set. */ union(...data: (Iterable | CharSet)[]): CharSet; /** * Returns [the intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)) of this set and the given * set/ranges of characters. * * The returned set will have the same maximum as this set. * * @param other * @throws `RangeError` If the maximum of the given set differs from the maximum of this set. */ intersect(other: CharSet | CharRange): CharSet; /** * Returns a set that contains all characters of this set that are not in the given set/range. * * The returned set will have the same maximum as this set. * * @param other * @throws `RangeError` If the maximum of the given set differs from the maximum of this set. */ without(other: CharSet | CharRange): CharSet; /** * Returns whether this set contains the given character. * * @param character */ has(character: Char): boolean; /** * Returns whether `this ⊇ other`. * * @param other */ isSupersetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊆ other`. * * @param other */ isSubsetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊃ other`. * * @param other */ isProperSupersetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊂ other`. * * @param other */ isProperSubsetOf(other: CharSet | CharRange): boolean; /** * Returns whether this set and the given set (or range) are disjoint. * * @param other */ isDisjointWith(other: CharSet | CharRange): boolean; /** * Returns any one of the common characters of this set and the given set or range. * * If this character set is disjoint with the given character set/range, then `undefined` will be returned. * * @param other */ commonCharacter(other: CharSet | CharRange): Char | undefined; } /** * A character is a non-negative integer. * * This is one of the core concepts of refa. Instead of operating on JavaScript strings, UTF16 character codes, or * Unicode code points, this library uses plain numbers instead. This makes refa agnostic to text encodings and even * text in general since the integers used as character may represent arbitrary concepts. * * There are only 2 restrictions on the numbers that can be characters: * * 1. They have to be non-negative integers. * 2. They can be at most `Number.MAX_SAFE_INTEGER`. * * --- * * This type serves as a way to document characters. It is a clear way to signal that a value is not just any number. */ export type Char = number & { __char?: never; }; /** * A word is finite sequence of {@link Char}s. * * This one of the core concepts of refa. Instead of operating on JavaScript strings, all functions operate on * {@link Char}s and char arrays (= words). This means that refa is agnostic to text encodings, the string * representation of JavaScript, and even text itself. * * This type serves as a way to document words. It should _not_ be used interchangeably with `Char[]` or `number[]`. */ export type Word = Char[]; /** * An immutable finite sequence of {@link Char}s. * * This is an immutable view on a {@link Word}. */ export type ReadonlyWord = readonly Char[]; export interface FiniteAutomaton { /** * Returns whether this FA accepts the empty language meaning that it doesn't accept any words. */ readonly isEmpty: boolean; /** * Returns whether the formal language accepted by this FA contains finitely many words. * * __Note__: Finite does not mean that all words can be iterated in practice. E.g. the set of all Unicode words with * 10 or less characters contains 2.6e54 many words and can be accepted by a DFA with only 11 states. */ readonly isFinite: boolean; /** * The maximum character that is part of the alphabet of the words that this FA can accept. */ readonly maxCharacter: Char; /** * Returns whether this FA accepts the given word. * * @param word The characters of the word to test. */ test(word: ReadonlyWord): boolean; /** * Returns an iterable that will yield all words accepted by this FA. Words are yielded by ascending length. * * If this FA accepts infinitely many words, the iterable will never end. */ words(): Iterable; /** * Returns an iterable that will yield all word sets accepted by this FA. Word sets are yielded by ascending length. * * If this FA accepts infinitely many words, the iterable will never end. If this FA is finite, the iterable will * end after at most `2^O(n)` word sets (`n` = number of states). * * If you analyse the words of an FA, consider using this method instead of `words`. If this method yields `k` word * sets, then `words` will yield up to `O(k * m ^ l)` words (`m` = number of possible characters, `l` = the maximum * length of any of the `k` word sets). */ wordSets(): Iterable; /** * Returns a string representation of this FA. */ toString(): string; /** * Returns the AST of a regular expression that accepts the same language as this FA. * * @param options */ toRegex(options?: Readonly): NoParent; /** * Returns the string representation of this FA in the * [DOT format](https://en.wikipedia.org/wiki/DOT_(graph_description_language)). * * The output of this function can passed to any graph visualization program. This can be a * [local installation](https://graphviz.org/download/) or an [online editor](https://edotor.net/). * * By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a * custom stringify function using the `charSetToString` parameter. * * @param charSetToString */ toDot(charSetToString?: (charSet: CharSet) => string): string; /** * Returns the string representation of this FA in the [Mermaid format](https://mermaid.js.org/). * * By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a * custom stringify function using the `charSetToString` parameter. * * @param charSetToString */ toMermaid(charSetToString?: (charSet: CharSet) => string): string; } /** * A graph iterator for all states of an FA with final states. * * @template S The type of a state in the FA to iterate. * @template O The type of the value each state maps to. */ export interface FAIterator> { /** * The initial state of the FA. */ readonly initial: S; /** * Returns the value a state maps to. * * Callers of this function are allowed to call the function **without** a `this` argument. * * @see {@link stableOut} */ readonly getOut: (state: S) => O; /** * Whether the {@link getOut} function is stableOut during the lifetime of the iterator. * * Stable means that if `getOut` gets called for the same state more than once, it will always return the same * value. * * The sameness of states is defined by * [the key equality of the Map class](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map#key_equality). * * The sameness of returned values is not defined by this interface and depends of the iterator. * * I.e. a stable `getOut` function may return new collections/iterators on subsequent invocations as long as the * collections/iterators are considered equal (e.g. equal could be defined as "same elements") by the consumer of * the iterator. * * @default false */ readonly stableOut?: boolean; /** * Returns whether the given state is a final state. * * This function is guaranteed to be deterministic during the time the iterator is used. It is also guaranteed to be * sufficiently fast, usually `O(1)` can be assumed. * * Callers of this function are allowed to call the function **without** a `this` argument. */ readonly isFinal: (state: S) => boolean; } /** * A factory for the nodes of finite automata. */ export interface NodeFactory { /** * Creates a new state. * * @throws {@link TooManyNodesError} * May be thrown if the number of created nodes exceeds some limit. */ readonly createNode: () => S; } /** * An FA builder has the responsibility of constructing a finite automaton. * * The constructed FA is always owned by the builder. * * @template S The type of a state. * @template T The transition type of the values linking states. */ export interface FABuilder extends NodeFactory { /** * The initial state of the FA. */ readonly initial: S; /** * Makes the given state behave like a final state of this FA. * * This does not necessarily mean that the given state will be a final state. I.e. calling `makeFinal(s)` does not * necessitate that `isFinal(s)` is true. * * The implementation has to guarantee that calling this method for the same state more than once is allowed. */ readonly makeFinal: (state: S) => void; /** * Returns whether the given state is a final state. * * This operation is assumed to be semantically equivalent to {@link FAIterator.isFinal}. */ readonly isFinal: (state: S) => boolean; /** * Links to the two given states using the given transition. * * Calling this operations more than once for the given `from` and `to` states is not guaranteed to succeed. */ readonly linkNodes: (from: S, to: S, transition: T) => void; } /** * An {@link FAIterator} where transitions are map of states to character sets. * * This is a commonly used interface when dealing with FA. It's the common core all currently implemented FA support. */ export type TransitionIterator = FAIterator>; /** * A graph or FA that can create a {@link TransitionIterator}. */ export interface TransitionIterable { readonly maxCharacter: Char; readonly transitionIterator: () => TransitionIterator; } export interface ToRegexOptions { /** * The maximum number of RE AST nodes the implementation is allowed to create. * * If the implementation has to create more nodes to create the RE, a `TooManyNodesError` will be thrown. This * maximum will be check before any optimization passes. * * @default 10000 */ maxNodes?: number; /** * The maximum number of optimization passes that will be done after the initial RE AST was created. * * The initial AST is usually a lot more complex than necessary. Optimizations are then applied in order to minimize * the AST until this limit is reached or the AST can be optimized no further. * * The default number of passes is implementation defined. */ maxOptimizationPasses?: number; } /** * A readonly {@link DFA}. */ export interface ReadonlyDFA extends FiniteAutomaton, TransitionIterable { /** * The initial state of the DFA. */ readonly initial: DFA.ReadonlyNode; /** * The set of final states of the DFA. * * This set may be empty or contain nodes not reachable from the initial state. */ readonly finals: ReadonlySet; stateIterator(): FAIterator; /** * Yields all nodes reachable from the initial state including the initial state. * * This may include trap states, but it will not include unreachable final states. * * The order in which nodes will be returned is implementation defined and may change after any operation that * modifies the DFA. * * Modifying the DFA while iterating will result in implementation-defined behavior. The implementation may stop the * iteration or yield an nodes. * * This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is * the number of transitions. */ nodes(): Iterable; /** * Returns the number of nodes reachable from the initial state including the initial state. * * This returns the number of nodes returned by {@link nodes}. */ countNodes(): number; /** * Creates a new DFA equivalent to this one. */ copy(factory?: NodeFactory): DFA; /** * Returns whether this and the given DFA are structurally equal meaning that all nodes and all transitions are * equal. * * @param other */ structurallyEqual(other: ReadonlyDFA): boolean; } /** * A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton). * * This class implements DFAs with the following properties: * * - There is exactly one initial state. * * - There may be any number of final states. * * This is implemented using a `Set` of states. * * - No epsilon transitions. * * - A transitions always consumes a character. * * (All character sets are guaranteed to be non-empty.) * * - Transitions are unordered. * * As a consequence, `/aa|bb/` and `/bb|aa/` have the same state machine. * * - Between any two states, there can at most be one transition. */ export class DFA implements ReadonlyDFA { readonly initial: DFA.Node; readonly finals: Set; readonly maxCharacter: Char; get isEmpty(): boolean; get isFinite(): boolean; stateIterator(): FAIterator; transitionIterator(): TransitionIterator; nodes(): Iterable; countNodes(): number; test(word: ReadonlyWord): boolean; wordSets(): Iterable; words(): Iterable; toString(): string; toRegex(options?: Readonly): NoParent; toDot(charSetToString?: (charSet: CharSet) => string): string; toMermaid(charSetToString?: (charSet: CharSet) => string): string; copy(factory?: NodeFactory): DFA; structurallyEqual(other: ReadonlyDFA): boolean; removeUnreachable(): void; /** * [Minimizes](https://en.wikipedia.org/wiki/DFA_minimization) this DFA. */ minimize(): void; /** * Complements this DFA. * * This DFA after calling this function will accept all words that are not accepted by this DFA before calling this * function. * * This operation will create at most 1 node with the given factory. * * @param factory */ complement(factory?: NodeFactory): void; /** * Modifies this DFA such that all prefixes of all accepted words are also accepted. * * If the language of this DFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. */ prefixes(): void; /** * Creates a new DFA which matches no words. The language of the returned DFA is empty. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static empty(options: Readonly, factory?: NodeFactory): DFA; /** * Creates a new DFA which matches only the empty word. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static emptyWord(options: Readonly, factory?: NodeFactory): DFA; /** * Creates a new DFA which matches all words. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static all(options: Readonly, factory?: NodeFactory): DFA; /** * Creates a new DFA which matches the given characters. * * This operation will create at most 2 nodes with the given factory. * * @param charSet * @param factory */ static fromCharSet(charSet: CharSet, factory?: NodeFactory): DFA; /** * Returns a new DFA which is equivalent to the intersection of the two given FA. * * @param left * @param right * @param factory */ static fromIntersection( left: TransitionIterable, right: TransitionIterable, factory?: NodeFactory ): DFA; /** * Creates a new DFA which matches all and only all of the given words. * * @param words * @param options * @param factory */ static fromWords( words: Iterable, options: Readonly, factory?: NodeFactory ): DFA; /** * Creates a new DFA which matches all and only all of the given word sets. * * @param wordSets * @param options * @param factory */ static fromWordSets( wordSets: Iterable, options: Readonly, factory?: NodeFactory ): DFA; static fromFA(fa: TransitionIterable, factory?: NodeFactory): DFA; static fromTransitionIterator( iter: TransitionIterator, options: Readonly, factory?: NodeFactory ): DFA; static fromBuilder(builder: DFA.Builder, options: Readonly): DFA; } /** * A namespace for DFA-specific classes and interfaces. * * @see {@link DFA} (class) */ export namespace DFA { interface ReadonlyNode { readonly out: ReadonlyCharMap; } class Node implements ReadonlyNode { readonly out: CharMap; link(to: Node, via: CharSet): void; unlink(to: Node): void; /** * Unlinks all outgoing and incoming transitions of this node. */ unlinkAll(): void; } /** * An unlimited node factory that will simply call the {@link Node} constructor. */ const nodeFactory: NodeFactory; class LimitedNodeFactory implements NodeFactory { readonly limit: number; constructor(limit?: number); createNode(): Node; } class Builder implements FABuilder { readonly initial: Node; readonly finals: Set; readonly factory: NodeFactory; constructor(factory: NodeFactory); makeFinal(state: Node): void; isFinal(state: Node): boolean; linkNodes(from: Node, to: Node, transition: CharSet): void; createNode(): Node; } interface Options { /** * The maximum numerical value any character can have. * * This will be the maximum of all underlying {@link CharSet}s. */ maxCharacter: Char; } } /** * A readonly {@link ENFA}. */ export interface ReadonlyENFA extends FiniteAutomaton, TransitionIterable { /** * The initial state of the ENFA. */ readonly initial: ENFA.ReadonlyNode; /** * The final state of the ENFA. * * This state may not be reachable from the initial state. */ readonly final: ENFA.ReadonlyNode; /** * Whether this ENFA is in its normal form. * * @see {@link ENFA} */ readonly isNormalized: boolean; stateIterator(resolveEpsilon: boolean): FAIterator; /** * Yields all nodes reachable from the initial state including the initial state. * * This may include trap states, but it will not include the final states if it is unreachable from the initial * state. * * The order in which nodes will be returned is implementation defined and may change after any operation that * modifies the ENFA. * * Modifying the ENFA while iterating will result in implementation-defined behavior. The implementation may stop * the iteration or yield an nodes. * * This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is * the number of transitions. */ nodes(): Iterable; /** * Returns the number of nodes reachable from the initial state including the initial state. * * This returns the number of nodes returned by {@link nodes}. */ countNodes(): number; /** * Create a mutable copy of this ENFA. */ copy(factory?: NodeFactory): ENFA; } /** * A [nondeterministic finite automaton](https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton) with epsilon * transitions. * * This class implements NFAs with the following properties: * * - There is exactly one initial state. * * - There is exactly one final state. * * - There are epsilon transitions. * * - A transitions either an epsilon transition or consumes a character. * * Epsilon transition are represented using `null` and characters are represented using non-empty `CharSet`s. * * - Transitions are ordered. * * As a consequence, `/aa|bb/` and `/bb|aa/` have different state machines in this NFA implementation. * * Order is only guaranteed as long as no transitions are removed. Order is defined by the key order of the JavaScript * `Map` class. * * - Between any two states, there can at most be one transition. * * Unlike the {@link NFA} class, transition cannot be merged. As a consequence, `/a|a/` and `/a/` have different * state machines in this NFA implementation. * * ## Normal form * * The normal form of this ENFA implementation has the following restriction: * * - The initial state must not have incoming transitions. * - The final state must not have outgoing transitions. * - The initial state and final state are different states. * * Non-normalized ENFAs will either be tolerated or normalized by operations. */ export class ENFA implements ReadonlyENFA { initial: ENFA.Node; final: ENFA.Node; readonly maxCharacter: Char; get isEmpty(): boolean; get isFinite(): boolean; get isNormalized(): boolean; /** * Brings this ENFA is in its normal form. * * This operation will create at most 2 nodes with the given factory. * * @param factory * @see {@link ENFA} */ normalize(factory?: NodeFactory): void; stateIterator(resolveEpsilon: boolean): FAIterator; transitionIterator(): TransitionIterator; nodes(): Iterable; countNodes(): number; copy(factory?: NodeFactory): ENFA; test(word: ReadonlyWord): boolean; wordSets(): Iterable; words(): Iterable; toString(): string; toRegex(options?: Readonly): NoParent; toDot(charSetToString?: (charSet: CharSet) => string): string; toMermaid(charSetToString?: (charSet: CharSet) => string): string; /** * Modifies this ENFA to accept the concatenation of this ENFA and the given FA. * * @param other * @param factory */ append(other: TransitionIterable, factory?: NodeFactory): void; /** * Modifies this ENFA to accept the concatenation of this ENFA and the other ENFA. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 4 nodes with the given factory. * * @param other * @param factory */ appendInto(other: ENFA, factory?: NodeFactory): void; /** * Modifies this ENFA to accept the concatenation of the given FA and this ENFA. * * @param other * @param factory */ prepend(other: TransitionIterable, factory?: NodeFactory): void; /** * Modifies this ENFA to accept the concatenation of the other ENFA and this ENFA. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 4 nodes with the given factory. * * @param other * @param factory */ prependInto(other: ENFA, factory?: NodeFactory): void; /** * Modifies this ENFA to accept the language of this ENFA and the language of the given FA. * * If the union kind is `left`, then this ENFA will be modified to accept `|`. Otherwise, it will be * modified to accept `|`. * * @param other * @param kind * @param factory */ union(other: TransitionIterable, kind?: "left" | "right", factory?: NodeFactory): void; /** * Modifies this ENFA to accept the language of this ENFA and the language of the other ENFA. * * If the union kind is `left`, then this ENFA will be modified to accept `|`. Otherwise, it will be * modified to accept `|`. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 6 nodes with the given factory. * * @param other * @param kind * @param factory */ unionInto(other: ENFA, kind?: "left" | "right", factory?: NodeFactory): void; /** * Modifies this ENFA to accept at least `min` and at most `max` concatenations of itself. * * Both `min` and `max` both have to be non-negative integers with `min <= max`. * `max` is also allowed to be `Infinity`. * * @param min * @param max * @param lazy * @param factory */ quantify(min: number, max: number, lazy?: boolean, factory?: NodeFactory): void; /** * Removes the empty word from the accepted languages of this ENFA. * * Unreachable states will be removed by this operation. * * @param factory */ withoutEmptyWord(factory?: NodeFactory): void; /** * All states which cannot be reached from the initial state or cannot reach (or are) a final state, will be * removed. */ removeUnreachable(): void; /** * Modifies this ENFA such that all prefixes of all accepted words are also accepted. * * If the language of this ENFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. * * @param factory */ prefixes(factory?: NodeFactory): void; /** * Modifies this ENFA such that all suffixes of all accepted words are also accepted. * * If the language of this ENFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. * * @param factory */ suffixes(factory?: NodeFactory): void; /** * Creates a new ENFA which matches no words. The language of the returned ENFA is empty. * * This operation will create exactly 2 nodes with the given factory. * * @param options * @param factory */ static empty(options: Readonly, factory?: NodeFactory): ENFA; /** * Creates a new ENFA which matches only the empty word. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static emptyWord(options: Readonly, factory?: NodeFactory): ENFA; /** * Creates a new ENFA which matches all words. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static all(options: Readonly, factory?: NodeFactory): ENFA; /** * Creates a new ENFA which matches the given characters. * * This operation will create exactly 2 nodes with the given factory. * * @param charSet * @param factory */ static fromCharSet(charSet: CharSet, factory?: NodeFactory): ENFA; static fromRegex( concat: NoParent, options: Readonly, creationOptions?: Readonly, factory?: NodeFactory ): ENFA; static fromRegex( alternatives: readonly NoParent[], options: Readonly, creationOptions?: Readonly, factory?: NodeFactory ): ENFA; /** * Creates a new ENFA which matches all and only all of the given words. * * @param words * @param options * @param factory */ static fromWords( words: Iterable, options: Readonly, factory?: NodeFactory ): ENFA; /** * Creates a new ENFA which matches all and only all of the given word sets. * * @param wordSets * @param options * @param factory */ static fromWordSets( wordSets: Iterable, options: Readonly, factory?: NodeFactory ): ENFA; static fromFA(fa: TransitionIterable, factory?: NodeFactory): ENFA; static fromTransitionIterator( iter: TransitionIterator, options: Readonly, factory?: NodeFactory ): ENFA; static fromBuilder(builder: ENFA.Builder, options: Readonly): ENFA; } /** * A namespace for ENFA-specific classes and interfaces. * * @see {@link ENFA} (class) */ export namespace ENFA { interface ReadonlyNode { readonly out: ReadonlyMap; readonly in: ReadonlyMap; /** * Calls the given consumer function on every non-epsilon transition directly reachable from the given node. * * Epsilon transitions will be resolved using a DFS algorithm. This means that for the following graph: * * ```text * (0) -> (1) : "a" * -> (2) : epsilon * -> (3) : "b" * * (1) -> (3) : "c" * * (2) -> (4) : "d" * -> (1) : "e" * -> (2) : epsilon * * (3) -> (1) : epsilon * * (4) -> empty * ``` * * The node `(0)` will return the resolved list: * * ```text * [(1), "a"] * [(4), "d"] * [(1), "e"] * [(3), "b"] * ``` */ resolveEpsilon(direction: "in" | "out", consumerFn: (charSet: CharSet, node: ReadonlyNode) => void): void; /** * Returns a set of all nodes that are reachable from the given node by only following epsilon transitions in * the given direction. The returned set is guaranteed to always contain the given node. * * The order of the nodes in the returned set in implementation-defined and cannot be relied upon. * * --- * * This method can be used to determine the set of all effectively final states. * * ``` * const effectivelyFinal = final.reachableViaEpsilon("in"); * ``` */ reachableViaEpsilon(direction: "in" | "out"): Set; } class Node implements ReadonlyNode { readonly out: Map; readonly in: Map; /** * Adds a transition from `this` to `to` using the given non-empty set of characters. * * If two nodes are already linked, an error will be thrown. * * @param to * @param via */ link(to: Node, via: CharSet | null): void; /** * Removes the transition from `this` to `to`. * * This will do nothing if `this` isn't linked to `to`. * * @param to */ unlink(to: Node): void; /** * Unlinks all outgoing and incoming transitions of this node. */ unlinkAll(): void; /** * Unlinks all outgoing transitions of this node. */ unlinkAllOut(): void; /** * Unlinks all incoming transitions of this node. */ unlinkAllIn(): void; resolveEpsilon(direction: "in" | "out", consumerFn: (charSet: CharSet, node: Node) => void): void; reachableViaEpsilon(direction: "in" | "out"): Set; } /** * An unlimited node factory that will simply call the {@link Node} constructor. */ const nodeFactory: NodeFactory; class LimitedNodeFactory implements NodeFactory { readonly limit: number; constructor(limit?: number); createNode(): Node; } class Builder implements FABuilder { readonly initial: Node; readonly final: Node; readonly factory: NodeFactory; constructor(factory: NodeFactory); makeFinal(state: Node): void; isFinal(state: Node): boolean; linkNodes(from: Node, to: Node, transition: CharSet | null): void; createNode(): Node; } interface Options { /** * The maximum numerical value any character can have. * * This will be the maximum of all underlying {@link CharSet}s. */ maxCharacter: Char; } interface FromRegexOptions { /** * How to handle assertions when construction the ENFA. * * - `"throw"` * * This method will throw an error when encountering an assertion. * * - `"disable"` * * This method will replace any assertion with an empty character class, effectively removing it. * * - `"ignore"` * * This method will replace any assertion with an empty group. * * @default "throw" */ assertions?: "disable" | "ignore" | "throw"; /** * How to handle unknowns when construction the ENFA. * * - `"throw"` * * This method will throw an error when encountering an unknown. * * - `"disable"` * * This method will replace any unknown with an empty character class, effectively removing it. * * @default "throw" */ unknowns?: "disable" | "throw"; /** * The number at which the maximum of a quantifier will be assumed to be infinity. * * Quantifiers with a large finite maximum (e.g. `a{1,10000}`) can create huge NFAs with thousands of states. * Any Quantifier with a maximum greater or equal to this threshold will be assumed to be infinite. * * @default Infinity */ infinityThreshold?: number; } } /** * An error that is thrown when the max characters of two or more FA or transition iterables is not the same. * * Operations on FA and transition iterables require the max characters of all given FA and transition iterables to be * the same and will throw this error if they are not. */ export class MaxCharacterError extends Error { /** * Asserts the two given max characters are the same. * * @param a * @param b * @param kind */ static assert( a: | Char | { maxCharacter: Char; }, b: | Char | { maxCharacter: Char; }, kind?: string ): void; } /** * An error that is thrown when an operation causes too many nodes to be created. * * Many FA operation have the potential to create a huge number of nodes (thousands and millions) which may result in * the JavaScript runtime running out of memory and/or crashing. This error will thrown before that happens to safely * abort an otherwise resource-intensive operation. */ export class TooManyNodesError extends Error { /** * Asserts that the current number of created nodes does not exceed the limit. * * @param current * @param limit * @param kind */ static assert(current: number, limit: number, kind: string): void; } /** * Returns a lazily-created {@link TransitionIterator} for the intersection of the two given FA. * * The iterator will create states as it is traversed. * * @param left * @param right * @param maxNodes */ export function getIntersectionIterator( left: TransitionIterable, right: TransitionIterable, maxNodes?: number ): TransitionIterator; /** * Returns whether the languages of this and the other FA are disjoint. * * The runtime of this algorithm is `O(n * m)` (n = number of states of this NFA, m = number of states of the other * FA) but it's a lot faster in practice with the worst case being very rare. * * Since this uses the intersection operation, you can supply intersection options. * * This is equivalent to `NFA.fromIntersection(left, right).isEmpty` but implemented more efficiently. * * @param left * @param right * @param maxNodes */ export function isDisjointWith( left: TransitionIterable, right: TransitionIterable, maxNodes?: number ): boolean; /** * Returns a potentially infinite iterable of word sets accepted by both given transition iterables. * * This function provides the following guarantees: * * 1. Word sets are guaranteed to be yielded in the order of increasing length. (Word sets of equal lengths may be * yielded in any order.) * 2. No character set of the yielded word sets is empty. * * This is roughly equivalent to `NFA.fromIntersection(left, right).wordSets()` but implemented more efficiently. * * @param left * @param right * @param maxNodes */ export function getIntersectionWordSets( left: TransitionIterable, right: TransitionIterable, maxNodes?: number ): Iterable; /** * Returns a potentially infinite iterable of words accepted by both given transition iterables. * * This function provides the following guarantees: * * 1. Words are guaranteed to be yielded in the order of increasing length. (Words of equal lengths may be yielded in * any order.) * * This is roughly equivalent to `NFA.fromIntersection(left, right).words()` but implemented more efficiently. * * @param left * @param right * @param maxNodes */ export function getIntersectionWords( left: TransitionIterable, right: TransitionIterable, maxNodes?: number ): Iterable; /** * A readonly {@link NFA}. */ export interface ReadonlyNFA extends FiniteAutomaton, TransitionIterable { /** * The initial state of the NFA. */ readonly initial: NFA.ReadonlyNode; /** * The set of final states of the NFA. * * This set may be empty or contain nodes not reachable from the initial state. */ readonly finals: ReadonlySet; /** * Whether this NFA is in its normal form. * * @see {@link NFA} */ readonly isNormalized: boolean; stateIterator(): FAIterator; /** * Yields all nodes reachable from the initial state including the initial state. * * This may include trap states, but it will not include unreachable final states. * * The order in which nodes will be returned is implementation defined and may change after any operation that * modifies the NFA. * * Modifying the NFA while iterating will result in implementation-defined behavior. The implementation may stop the * iteration or yield an nodes. * * This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is * the number of transitions. */ nodes(): Iterable; /** * Returns the number of nodes reachable from the initial state including the initial state. * * This returns the number of nodes returned by {@link nodes}. */ countNodes(): number; /** * Create a mutable copy of this NFA. * * @param factory */ copy(factory?: NodeFactory): NFA; } /** * A [nondeterministic finite automaton](https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton). * * This class implements NFAs with the following properties: * * - There is exactly one initial state. * * - There may be any number of final states. * * This is implemented using a `Set` of states. * * - No epsilon transitions. * * - A transitions always consumes a character. * * (All character sets are guaranteed to be non-empty.) * * - Transitions are unordered. * * As a consequence, `/aa|bb/` and `/bb|aa/` have the same state machine in this NFA implementation. * * (The underlying data structure may be a JavaScript `Map` but the key order is ignored.) * * - Between any two states, there can at most be one transition. * * This means that all transitions between two nodes will be merged into one. This is implemented as a simple * {@link CharSet.union}. As a consequence, `/a|a/` and `/a/` have the same state machine in this NFA implementation. * * ## Normal form * * The normal form of this NFA implementation has the following restriction: * * - The initial state must not have incoming transitions. * * Non-normalized NFAs will either be tolerated or normalized by operations. */ export class NFA implements ReadonlyNFA { readonly initial: NFA.Node; readonly finals: Set; readonly maxCharacter: Char; get isEmpty(): boolean; get isFinite(): boolean; get isNormalized(): boolean; /** * Brings this NFA is in its normal form. * * This operation will create at most 1 node with the given factory. * * @param factory * @see {@link NFA} */ normalize(factory?: NodeFactory): void; stateIterator(): FAIterator; transitionIterator(): TransitionIterator; nodes(): Iterable; countNodes(): number; copy(factory?: NodeFactory): NFA; test(word: ReadonlyWord): boolean; wordSets(): Iterable; words(): Iterable; toString(): string; toRegex(options?: Readonly): NoParent; toDot(charSetToString?: (charSet: CharSet) => string): string; toMermaid(charSetToString?: (charSet: CharSet) => string): string; /** * Modifies this NFA to accept all words from this NFA and the given FA. * * @param other * @param factory */ union(other: TransitionIterable, factory?: NodeFactory): void; /** * Modifies this NFA to accept all words from this NFA and the given NFA. * * This is implemented by simply moving the nodes from the given NFA into this NFA. The given NFA will be empty * after this operation as nodes are moved, not shared. * * @param other * @param factory */ unionInto(other: NFA, factory?: NodeFactory): void; /** * Modifies this NFA to accept the concatenation of this NFA and the given FA. * * @param other * @param factory */ append(other: TransitionIterable, factory?: NodeFactory): void; /** * Modifies this NFA to accept the concatenation of this NFA and the given FA. * * This is implemented by simply moving the nodes from the given NFA into this NFA. The given NFA will be empty * after this operation as nodes are moved, not shared. * * @param other * @param factory */ appendInto(other: NFA, factory?: NodeFactory): void; /** * Modifies this NFA to accept the concatenation of the given NFA and this FA. * * @param other * @param factory */ prepend(other: TransitionIterable, factory?: NodeFactory): void; /** * Modifies this NFA to accept the concatenation of the given NFA and this FA. * * This is implemented by simply moving the nodes from the given NFA into this NFA. The given NFA will be empty * after this operation as nodes are moved, not shared. * * @param other * @param factory */ prependInto(other: NFA, factory?: NodeFactory): void; /** * Modifies this NFA to accept at least `min` and at most `max` concatenations of itself. * * Both `min` and `max` both have to be non-negative integers with `min <= max`. * `max` is also allowed to be `Infinity`. * * @param min * @param max * @param factory */ quantify(min: number, max: number, factory?: NodeFactory): void; /** * Removes the empty word from the accepted languages of this NFA. */ withoutEmptyWord(): void; /** * Removes all states that are unreachable. * * Only the following states will remain after this operation: * * 1. The initial state. * 2. All states that are reachable from the initial state and can reach one of the final states. */ removeUnreachable(): void; /** * Modifies this NFA such that all prefixes of all accepted words are also accepted. * * If the language of this NFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. */ prefixes(): void; /** * Modifies this NFA such that all suffixes of all accepted words are also accepted. * * If the language of this NFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. */ suffixes(): void; /** * Modifies this NFA such that it accepts the reverse of all words it currently accepts. * * If the language of this NFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. */ reverse(): void; /** * Creates a new NFA which matches no words. The language of the returned NFA is empty. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static empty(options: Readonly, factory?: NodeFactory): NFA; /** * Creates a new NFA which matches only the empty word. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static emptyWord(options: Readonly, factory?: NodeFactory): NFA; /** * Creates a new NFA which matches all words. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static all(options: Readonly, factory?: NodeFactory): NFA; /** * Creates a new NFA which matches the given characters. * * This operation will create at most 2 nodes with the given factory. * * @param charSet * @param factory */ static fromCharSet(charSet: CharSet, factory?: NodeFactory): NFA; /** * Returns a new NFA which is equivalent to the intersection of the two given FA. * * @param left * @param right * @param factory */ static fromIntersection( left: TransitionIterable, right: TransitionIterable, factory?: NodeFactory ): NFA; static fromRegex( concat: NoParent, options: Readonly, creationOptions?: Readonly, factory?: NodeFactory ): NFA; static fromRegex( alternatives: readonly NoParent[], options: Readonly, creationOptions?: Readonly, factory?: NodeFactory ): NFA; /** * Creates a new NFA which matches all and only all of the given words. * * @param words * @param options * @param factory */ static fromWords( words: Iterable, options: Readonly, factory?: NodeFactory ): NFA; /** * Creates a new NFA which matches all and only all of the given word sets. * * @param wordSets * @param options * @param factory */ static fromWordSets( wordSets: Iterable, options: Readonly, factory?: NodeFactory ): NFA; static fromFA(fa: TransitionIterable, factory?: NodeFactory): NFA; static fromTransitionIterator( iter: TransitionIterator, options: Readonly, factory?: NodeFactory ): NFA; static fromBuilder(builder: NFA.Builder, options: Readonly): NFA; } /** * A namespace for NFA-specific classes and interfaces. * * @see {@link NFA} (class) */ export namespace NFA { interface ReadonlyNode { readonly out: ReadonlyMap; readonly in: ReadonlyMap; } class Node implements ReadonlyNode { readonly out: Map; readonly in: Map; link(to: Node, via: CharSet): void; unlink(to: Node): void; /** * Unlinks all outgoing and incoming transitions of this node. */ unlinkAll(): void; /** * Unlinks all outgoing transitions of this node. */ unlinkAllOut(): void; /** * Unlinks all incoming transitions of this node. */ unlinkAllIn(): void; } /** * An unlimited node factory that will simply call the {@link Node} constructor. */ const nodeFactory: NodeFactory; class LimitedNodeFactory implements NodeFactory { readonly limit: number; constructor(limit?: number); createNode(): Node; } class Builder implements FABuilder { readonly initial: Node; readonly finals: Set; readonly factory: NodeFactory; constructor(factory: NodeFactory); makeFinal(state: Node): void; isFinal(state: Node): boolean; linkNodes(from: Node, to: Node, transition: CharSet): void; createNode(): Node; } interface Options { /** * The maximum numerical value any character can have. * * This will be the maximum of all underlying {@link CharSet}s. */ maxCharacter: Char; } interface FromRegexOptions { /** * How to handle assertions when construction the NFA. * * - `"throw"` * * This method will throw an error when encountering an assertion. * * - `"disable"` * * This method will replace any assertion with an empty character class, effectively removing it. * * - `"ignore"` * * This method will replace any assertion with an empty group. * * @default "throw" */ assertions?: "disable" | "ignore" | "throw"; /** * How to handle unknowns when construction the NFA. * * - `"throw"` * * This method will throw an error when encountering an unknown. * * - `"disable"` * * This method will replace any unknown with an empty character class, effectively removing it. * * @default "throw" */ unknowns?: "disable" | "throw"; /** * The number at which the maximum of a quantifier will be assumed to be infinity. * * Quantifiers with a large finite maximum (e.g. `a{1,10000}`) can create huge NFAs with thousands of states. * Any Quantifier with a maximum greater or equal to this threshold will be assumed to be infinite. * * @default Infinity */ infinityThreshold?: number; } } /** * A word set is finite sequence of non-empty {@link CharSet}s. * * All {@link CharSet}s are guaranteed to be non-empty and to have the same maximum. * * All FA and regex implementations are based on either {@link CharSet}s or {@link CharRange}s. This is necessary * because it's not practical to represent the large character sets used in every-day regexes using single characters. * Consequently, it is more efficient to work with {@link CharSet}s for them, so operations that yield the words of an * FA or regex typically yield {@link WordSet}s instead of {@link Word}s. * * This type serves as a way to document word sets. It should _not_ be used interchangeably with `CharSet[]`. */ export type WordSet = CharSet[]; /** * An immutable finite sequence of non-empty {@link CharSet}s. * * This is an immutable view on a {@link WordSet}. */ export type ReadonlyWordSet = readonly CharSet[]; /** * Contains all AST transformer implementations of refa. * * All transformer factory functions implemented here will optionally take {@link CreationOptions} or a sub-class of it. * This can be used to control the behavior of the created transformers. * * For a simple transformer that applies most transformers while preserving the semantic of the given AST, * see {@link simplify}. * * @module */ export namespace Transformers { interface CreationOptions { /** * If `true`, transformers are allowed to reorder alternatives and to change/ignore the laziness of quantifiers. * This may cause the behavior of the regex to change. * * @default false */ ignoreOrder?: boolean; /** * If `true`, transformers are allowed to reduce or increase the ambiguity of the regular expression. * * @default false */ ignoreAmbiguity?: boolean; } /** * This transformer will apply all trivial assertion (e.g. `/(?!0)\d/` => `/[1-9]/`) and remove all branches in * assertions that are guaranteed to reject (e.g. `(?=\d+=|-)\w` => `(?=\d+=)\w`). * * @param _options */ function applyAssertions(_options?: Readonly): Transformer; /** * This will factor out common prefixes and suffixes in parent nodes. * * Examples: * * - `(?:abc|aba)` => `(?:ab(?:c|a))` * - `(? `(? `(?:abc(?:||))` * * @param options */ function factorOut(options?: Readonly): Transformer; /** * This transformer will simplify the AST by doing trivial inlining operations. * * It will: * * 1. Inline single-alternative alternations in concatenation (e.g. `a(?:b)c` => `abc`). * 2. Inline single-alternation concatenations (e.g. `(?:(?:a|b)|c)` => `(?:a|b|c)`). * 3. Inline constant-one quantifiers (e.g. `ab{1}c` => `abc`). * 4. Remove constant-zero quantifiers (e.g. `ab{0}c` => `ac`). * 5. Inline trivially nested assertions (e.g. `(?!(? `(?<=a)`). * 6. Inline nested assertions at the end of the expression tree (e.g. `(?!a(?=b))` => `(?!ab)`). * * --- * * This transformer should be used in combination with {@link removeDeadBranches} to handle trivial simplifications. * * @param _options */ function inline(_options?: Readonly): Transformer; /** * This transformer will try to make quantifiers greedy whenever possible. * * Note: If `ignoreOrder` is `true`, then quantifiers will always be made greedy. * * @param options */ function makeGreedy(options?: Readonly): Transformer; /** * This operation tries to merge as many elements as possible with existing quantifiers. * * Examples: * * - `/a*a/` => `/a+/` * - `/a*(?:a+|c)/` => `/a*(?:a|c)/` * * @param options */ function mergeWithQuantifier(options?: Readonly): Transformer; /** * This tries to simplify how a given sub-expression accepts the empty string. The goal is to modify the sub-expression * such that exactly one path accepts the empty string. This has the emergent result that the operator that causes the * sub-expression to accept the empty string moves closer to the root of the tree. * * Examples: * * - `a(?:b*|d?)` => `a(?:b+|d)?` * - `||a*|b` => `(?:a+|b)?` * * This operation largely ignores the order of alternatives and usually reduces the ambiguity of the expression. If * order or ambiguity have to be preserved, then the effectiveness of this transformer will be greatly reduced. * * @param options */ function moveUpEmpty(options?: Readonly): Transformer; /** * This merges/optimizes nested quantifiers. * * Examples: * * - `(?:a+)*` => `a*` * - `(?:a{2,4})+` => `a{2,}` * - `(?:a{4}){8}` => `a{32}` * - `(?:a*|b+c|f+)*` => `(?:a{1}|b+c|f{1})*` * * This operation largely ignores the order of alternatives and usually reduces the ambiguity of the expression. If * order or ambiguity have to be preserved, then the effectiveness of this transformer will be greatly reduced. * * @param options */ function nestedQuantifiers(options?: Readonly): Transformer; interface PatternEdgeAssertionsCreationOptions extends CreationOptions { /** * @default true */ inline?: boolean; /** * @default false */ remove?: boolean; } /** * This transformer will only touch assertion that assert characters beyond the edge of the pattern. * * E.g. in `/(? `/(? `/(?!\d)\w+\s* `a(?!\d)\w`, which * may prevent some removal. Some assertions will not be removed because another may be inlined later. E.g. the `(?!\d)` * in `(?=\w)a?(?!\d)` will not be removed because the pattern may be transformed to `(?:a|(?=\w))(?!\d)` => * `a(?!\d)|(?=\w)(?!\d)` which can be inlined to `a(?!\d)|(?!\d)\w` and transformed to `a(?!\d)|[A-Z_a-z]`. * * If neither inlining nor removal are active, then this transformer won't do anything. * * @param options */ function patternEdgeAssertions(options?: Readonly): Transformer; /** * This removes dead branches in the AST. * * Dead branches are parts of the regex that can never accept on any given input string (e.g. `[]a|b` => `b`). * * This operation may produce parent nodes with 0 alternatives. Quantifiers with 0 alternatives and a minimum of 0 will * be replaced with the empty concatenation (e.g. `a(?:[]b)?c` => `ac`). * * --- * * This transformer should be used in combination with {@link inline} to handle trivial simplifications. * * @param _options */ function removeDeadBranches(_options?: Readonly): Transformer; /** * This will remove all assertions that are known to always reject/accept no matter the input string. * * @param _options */ function removeUnnecessaryAssertions(_options?: Readonly): Transformer; interface RemoveAssertionsCreationOptions extends CreationOptions { /** * @default "empty-set" */ replacement?: "empty-set" | "empty-word"; } /** * This transformer will all assertions with either the empty set or the empty word. * * @param options */ function replaceAssertions(options?: Readonly): Transformer; /** * Sorts adjacent assertions such that lookbehinds are always to the right of lookaheads. * * This is operation may be necessary for other transformers to pick up on certain patterns. * * E.g. `(?=a)(?!b)(? `(?): Transformer; /** * Combines single-character alternatives. * * This rule will try to combine as many character classes as possible to simplify the regular expression. * * E.g. `a|b|c` => `[abc]`. * * @param options */ function unionCharacters(options?: Readonly): Transformer; /** * This transformer is a combined transformer with the goal of simplifying the AST as much as possible without * changing the semantics. * * The main purpose of this transformer is to provide a stable API. The specific functionality of individual * transformers may change over time, and transformers may depend on each other. This transformer will always * provide the same functionality. Namely, it will always simplify the AST. * * As with all transformers, creation option can be provided. Depending on the options, a different set of * underlying transformers may be used. * * @param options */ function simplify(options?: Readonly): CombinedTransformer; } /** * Contains algorithms consuming and producing {@link FAIterator}s. * * @module */ export namespace FAIterators { /** * This eagerly creates an FA that accepts exactly all the given words. * * The construction is already finished when this method returns, so the returned FA iterator does not have to be used. * * The construction will create a DFA by default. However, the FA builder implementation has to be carefully chosen to * preserve the determinism. In order for the determinism to be preserved, `builder` and `getOutState` have to fulfill * the following conditions: * * - Let `x`, `y` be any 2 states of `builder` and `c` be any valid character `<= maxCharacter`. Iff this function * called `builder.linkNodes(x, y, c)`, then `getOutState(builder, x, c) == y`. * - `builder` has to be an empty FA when given to this method. * - `builder.makeFinal(x)` must have no effect on `getOutState`. * * @param builder * @param getOutState * @param words * @param maxCharacter * @returns */ function fromWords( builder: FABuilder, getOutState: (state: S, char: Char) => S | undefined, words: Iterable, maxCharacter: Char ): FAIterator; /** * This eagerly creates an FA that accepts exactly all the given word sets. * * The construction is already finished when this method returns, so the returned FA iterator does not have to be used. * * The construction will create a DFA by default. * * @param builder * @param wordSets * @param maxCharacter * @returns */ function fromWordSets( builder: FABuilder, wordSets: Iterable, maxCharacter: Char ): FAIterator; /** * A lazy intersection algorithm that will use the given FA builder to construct the intersection FA as the returned * iterator is used to traverse the FA. * * To construct the whole intersection FA, simply traverse the entire iterator. * * @param builder * @param left * @param right */ function intersection( builder: FABuilder, left: TransitionIterator, right: TransitionIterator ): FAIterator; /** * Creates a new iterator that is equivalent to the given iterator with the given initial state. * * @param iter * @param initial * @returns */ function withInitial(iter: FAIterator, initial: S): FAIterator; /** * Creates a new iterator that is equivalent to the given iterator with the given `getOut` function. * * @param iter * @param getOut * @param stableOut * @returns */ function withGetOut( iter: FAIterator, getOut: (state: S) => T, stableOut?: boolean ): FAIterator; /** * Creates a new iterator that is equivalent to the given iterator with the given `isFinal` function. * * @param iter * @param isFinal * @returns */ function withIsFinal(iter: FAIterator, isFinal: (state: S) => boolean): FAIterator; /** * Maps the out type of the given iterator and returns a new iterator. * * @param iter * @param mapFn */ function mapOut(iter: FAIterator, mapFn: (out: O) => T): FAIterator; /** * Maps the out type of the given iterator and returns a new iterator. * * @param iter * @param mapFn */ function mapOutIter(iter: FAIterator>, mapFn: (out: O) => T): FAIterator>; /** * Maps the out type of the given iterator and returns a new iterator. * * @param iter * @param conditionFn */ function filterOutIter( iter: FAIterator>, conditionFn: (out: O) => boolean ): FAIterator>; /** * This will traverse the whole iterator can call the given consumer function (optional) on each reachable state * exactly once. * * The order in which states are traversed is implementation-defined. * * @param iter * @param consumerFn */ function forEach(iter: FAIterator, consumerFn?: (state: S) => void): void; /** * Returns the number of nodes reachable from the initial state (including the initial state itself). * * @param iter */ function count(iter: FAIterator): number; /** * The returned iterator is guaranteed to be stable. * * @param iter */ function ensureStableOut(iter: FAIterator): FAIterator; /** * Iterates all states reachable from the initial state of the given iterator in BFS order. * * The returned iterable cannot be empty and will always contain the initial state. * * @param iter */ function iterateStates(iter: FAIterator): Iterable; /** * Returns whether the initial state can reach (or is) a final state. * * @param iter */ function canReachFinal(iter: FAIterator): boolean; /** * Returns whether the given graph contains a cycle reachable from the initial state. * * @param iter */ function hasCycle(iter: FAIterator): boolean; /** * Returns whether the given FA only has finitely many paths that lead to a final state. * * @param iter */ function languageIsFinite(iter: FAIterator): boolean; /** * Creates a new iterator that is equivalent to the given iterator expect that the initial state is guaranteed to be * final. * * @param iter */ function makeInitialFinal(iter: FAIterator): FAIterator; /** * Creates a new iterator that is equivalent to the given iterator expect that the initial state is guaranteed to be * non-final. * * @param iter */ function makeInitialNonFinal(iter: FAIterator): FAIterator; /** * Returns any one of the shortest paths accepted by the given iterator. * * E.g. for the regex `a|b|cd`, the returned path may be `a` or `b` but not `cd`, * * If the iterator does not accept any path, `undefined` will be returned. * * @param iter * @param selectState */ function shortestAcceptingPath( iter: FAIterator>, selectState: (item: T) => S ): T[] | undefined; /** * This will return an iterator that iteratively create a DFA using the given {@link FABuilder}. * * This operation may produce up to _2^O(n)_ many states. The builder should limit the number of states created. * * @param builder * @param iter */ function makeDeterministic( builder: FABuilder, iter: FAIterator> ): FAIterator; /** * An FA builder that uses `Map` objects as nodes. Each node is the map of its outgoing transitions. */ class MapFABuilder implements FABuilder { readonly initial: MapFABuilderNode; readonly finals: Set; constructor(maxNodes?: number); makeFinal(state: MapFABuilderNode): void; isFinal(state: MapFABuilderNode): boolean; createNode(): MapFABuilderNode; linkNodes(from: MapFABuilderNode, to: MapFABuilderNode, transition: CharSet): void; } type MapFABuilderNode = Map; /** * Removes all dead states (and trap states) from the given iterator. * * Note: This will iteratively create a complete copy of the given FA. This method is an expensive operation. * * @param iter * @param select */ function removeDeadStates(iter: FAIterator>, select: (item: O) => S): FAIterator; function toDot( iter: FAIterator>, options: ToDotOptions | SimplePrintOptions ): string; type ToDotAttrs = Record; interface ToDotOptions { getEdgeAttributes: (transition: T, nth: number, from: S, to: S, info: NodeInfo) => Readonly; getGraphAttributes?: () => Readonly; getNodeAttributes?: (node: S, info: NodeInfo) => Readonly; } function toMermaid( iter: FAIterator>, options: ToMermaidOptions | SimplePrintOptions ): string; interface ToMermaidOptions { getNodeAttributes?: (node: S, info: NodeInfo) => Readonly; getEdgeAttributes: (transition: T, nth: number, from: S, to: S, info: NodeInfo) => MermaidEdge; } interface MermaidNode { label: string; shape: [string, string]; } interface MermaidEdge { label?: string; length?: number; } interface NodeInfo { isInitial(node: S): boolean; isFinal(node: S): boolean; getId(node: S): number; getNumberOfOutgoingEdges(node: S): number; } interface SimplePrintOptions { /** * Returns the string representation of the given transition. * * @param transition * @returns */ transitionToString: (transition: T) => string; /** * Whether transitions are ordered. * * @default false */ ordered?: boolean; } /** * Returns a regular expression for the given iterator. * * `null` transitions are assumed to be epsilon transitions. * * @param iter * @param options */ function toRegex( iter: FAIterator>, options?: Readonly ): NoParent; /** * Returns a human readable string representation of the given FA. The FA has to have exactly one initial state. * * All states will be labeled with numbers. The initial state will **always** has the number 0. Each state will be * mapped to its outgoing states. The outgoing states may contain duplicates and are sorted alphabetically by their * transition string. The number of states will be surrounded by brackets - square brackets for final states and round * brackets for non-final states. * * A conversion function for the transitions may optionally be given. If no transition function is given, the native * `String` function will be used. * * --- * * Example output for an NFA of `a*d|bb*` * * ```text * (0) -> (1) : 'a' * -> [2] : 'b' * -> [3] : 'd' * * (1) -> [3] : 'd' * * [2] -> [2] : 'b' * * [3] -> none * ``` * * @param iter * @param toString * @param ordered */ function toString( iter: FAIterator>, toString?: (value: T) => string, ordered?: boolean ): string; /** * Iterates all word sets of the given FA. * * Word sets are guaranteed to be iterated ordered by ascending length. Word sets might overlap. * * This function assumes that all character sets in the given iterator are non-empty. * * @param iter */ function iterateWordSets(iter: FAIterator>): Iterable; /** * Returns any one of the shortest word sets accepted by the given iterator. * * If the iterator does not accept any words, `undefined` will be returned. * * This function assumes that all character sets in the given iterator are non-empty. * * --- * * This operation is roughly equivalent to `firstOf(iterateWordSets(iter))` but implemented **much more** efficiently. * * @param iter */ function shortestWordSet(iter: FAIterator>): WordSet | undefined; /** * Returns a set of inputs rejected by the given iterator using the given input character set. * * If the iterator accepts all words, `undefined` is guaranteed to be returned. * * This algorithm implements an approximation to determine rejecting inputs in order to guarantee non-exponential * worst-case execution time. Consequently, the algorithm can't find rejecting inputs for some iterators and returns * `undefined` instead. * * @param iter * @param inputCharacters The set of input characters. * * All character sets in the returned word set will be subsets of the set of input characters. * * If all characters are allowed, use `CharSet.all(maxCharacter)`. */ function approximateRejectingWordSet( iter: FAIterator>, inputCharacters: CharSet ): WordSet | undefined; } /** * Classes and functions to convert JavaScript RegExp to refa AST and vise versa. * * All classes and functions in this module/namespace are specific to JavaScript regular expressions as defined by the * ECMAScript standard. * * @see {@link Parser}: A class to convert from JS RegExp to refa AST. * @see {@link toLiteral}: A function to convert from refa AST to JS RegExp. * * @module */ export namespace JS { type BoundaryAssertion = WordBoundaryAssertion | TextBoundaryAssertion; interface WordBoundaryAssertion { kind: "word"; negate: boolean; } interface TextBoundaryAssertion { kind: "end" | "start"; } function createAssertion(assertion: Readonly, flags: Readonly): NoParent; type PredefinedCharacterSet = | AnyCharacterSet | DigitCharacterSet | PropertyCharacterSet | SpaceCharacterSet | WordCharacterSet; interface AnyCharacterSet { kind: "any"; } interface DigitCharacterSet { kind: "digit"; negate: boolean; } type PropertyCharacterSet = CharacterPropertyCharacterSet | StringPropertyCharacterSet; interface CharacterPropertyCharacterSet { kind: "property"; key: string; value: string | null; strings: false; negate: boolean; } interface StringPropertyCharacterSet { kind: "property"; key: string; value: null; strings: true; negate: false; } interface SpaceCharacterSet { kind: "space"; negate: boolean; } interface WordCharacterSet { kind: "word"; negate: boolean; } /** * Creates a new character set with the characters equivalent to a JavaScript regular expression character set. * * @param chars The characters in the set. * @param flags The flags of the pattern. */ function createCharSet( chars: Iterable>>, flags: Readonly ): CharSet; interface ToLiteralOptions { /** * An optional template for the flags of the JavaScript RegExp literal to be created. * * All flags that are set to `false` are guaranteed to be disabled in the created literal. Likewise, all flags that * are set to `true` are guaranteed to be enabled in the created literal. * * Flags that are `undefined` will be enabled/disabled depending on the implementation. While no guarantees are * given, the implementation will generally try to choose flags such that it can create a literal that is as * small/simple as possible. * * If the constraints on flags defined here make it impossible to create a literal, an error will be thrown. */ flags?: Flags; /** * This will force the function to print characters as fast as possible. * * Literals created with this option will usually be created about 10x faster but the result will usually be very * hard to read. The is option is intended to provide performance benefits when readability is not a concern. * * @default false */ fastCharacters?: boolean; } /** * Converts the given AST or AST subtree into a JS literal. * * The returned literal will be a literal representation of the given AST. However, assertions maybe converted to * builtin JS RegExp assertions (e.g `\b`, `$`) instead of using the literal lookahead/lookbehind form. * * @param node * @param options */ function toLiteral(node: NoParent, options?: Readonly): Literal; function toLiteral(alternatives: readonly NoParent[], options?: Readonly): Literal; /** * An unchecked partial set of RegExp flags. * * Flags are not validated by TypeScript. You must ensure that the flags are valid. * Whenever possible, use the {@link Flags} type instead. */ interface UncheckedFlags { /** @default false */ dotAll?: boolean; /** @default false */ global?: boolean; /** @default false */ hasIndices?: boolean; /** @default false */ ignoreCase?: boolean; /** @default false */ multiline?: boolean; /** @default false */ sticky?: boolean; /** @default false */ unicode?: boolean; /** @default false */ unicodeSets?: boolean; } /** * Returns whether the given flags are valid. * * @param flags */ function isFlags(flags: UncheckedFlags): flags is Flags; /** * A partial set of non-Unicode-sets RegExp flags. The `v` flag is guaranteed to be unset. */ interface NonUnicodeSetsFlags extends UncheckedFlags { /** @default false */ unicode?: boolean; /** @default false */ unicodeSets?: false; } /** * A partial set of Unicode-sets RegExp flags. The `v` flag is guaranteed to be set. */ interface UnicodeSetsFlags extends UncheckedFlags { /** @default false */ unicode?: false; /** @default false */ unicodeSets: true; } /** * A partial set of RegExp flags. */ type Flags = NonUnicodeSetsFlags | UnicodeSetsFlags; /** * A light-weight representation of a * [JavaScript RegExp](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) object. * * This interface only requires the `source` and `flags` properties of a RegExp object. */ interface Literal { readonly source: string; readonly flags: string; } interface ParseOptions { /** * The maximum number of words a backreference can be replaced by. * * Set this to 0 to disable resolving backreferences. * * @default 100 */ maxBackreferenceWords?: number; /** * How to the parser will handle unresolved backreferences. * * - `"disable"` * * The parser will replace all backreferences with an empty character class. This will cause all paths containing * a backreference to be (effectively) removed. * * E.g. `(a*)(\1|b)` will be parsed as `(a*)(([])|b)` which is equivalent to `a*b`. * * - `"throw"` * * The parser will throw an error when encountering a backreference that cannot be removed. * * E.g. `(a*)b\1` will throw but `(a*)[^\s\S]\1` will not because the backreference will be removed anyway because * of the empty character class. * * - `"unknown"` * * The parser will create a `Unknown` node for each backreference that cannot be removed. The id of the node will * be raw string of the backreference. * * Backreferences that have been resolved are not affected by this option. * * @default "throw" */ backreferences?: "disable" | "throw" | "unknown"; /** * How the parser will handle assertions. * * - `"parse"` * * The parser will translate every assertion literally to an equivalent RE AST representation. Builtin assertions * (e.g. `\b`, `$`) will be transformed into equivalent assertions. * * - `"disable"` * * The parser will disable all assertion by replacing them with an empty character class. This will cause all * paths containing an assertion to be (effectively) removed. * * - `"ignore"` * * The parser will ignore all assertion by replacing them with an empty group. * * - `"throw"` * * The parser will throw an error when encountering a assertion that cannot be removed. * * E.g. `a\B` will throw but `a([]\b)(\b){0}` will not because none of the `\b`s can be reached. * * - `"unknown"` * * The parser will create a `Unknown` node for each assertion. The id of the node will be raw string of the * assertion. * * @default "parse" */ assertions?: "parse" | "disable" | "ignore" | "throw" | "unknown"; /** * By default, the parser will try to simplify the generated RE as much as possible. * * If set to `false`, all trivial simplifications will be disabled. This includes: * * - Removing alternatives where all paths go through an empty character class, an alternation with 0 alternatives, * or a disabled backreference/assertion. * - Removing constant 0 and constant 1 quantifiers. * - Inlining single-alternative groups. * * These simplifications might prevent certain backreferences or assertions from throwing an error. It's usually * good to have them enabled since parsing is usually faster and the produced RE AST is smaller. * * If the produced RE AST is supposed to be a literal translation, then simplifications have to be disabled. * * @default true */ simplify?: boolean; /** * The maximum number of nodes the parser is allowed to create. * * If the regexes requires more nodes, a {@link TooManyNodesError} will be thrown. * * @default 10000 */ maxNodes?: number; /** * `Unknown` nodes have an `id` property that can be used to identify the element that created the unknown. This * function can be used to control the `id` value. * * By default, the raw of the element will be used as its id. */ getUnknownId?: (element: AST.Backreference | AST.Assertion) => string; } interface RegexppAst { readonly pattern: AST.Pattern; readonly flags: AST.Flags; } type ParsableElement = AST.Element | AST.Pattern | AST.Alternative; interface ParseResult { expression: Expression; maxCharacter: Char; } /** * Converts JS RegExp to refa's RE AST format. */ class Parser { /** * The literal of the parser instance. */ readonly literal: Literal; /** * The parsed AST of the literal this parser works on. * * While not explicitly typed that way, the parser will assume that the AST is readonly and makes optimizations * based on that assumption. It is not safe to change the AST in any way. */ readonly ast: RegexppAst; /** * This contains the same flags as `ast.flags` but with a better type. */ readonly flags: Required; /** * The maximum character of all character sets in the parsed AST. * * This value will also be returned as part of the {@link ParseResult}. */ readonly maxCharacter: Char; /** * Creates a new parser from the given literal. * * This function will throw a `SyntaxError` if the given literal is not a valid RegExp literal according to the * given RegExp parser options. * * If a string is given as the literal, it must be of the form `/pattern/flags`. If possible, use the * object form with {@link Literal} instead. * * @param literal * @param parserOptions */ static fromLiteral(literal: Literal | string, parserOptions?: RegExpParser.Options): Parser; /** * Creates a new parser from the given [regexpp](https://github.com/mysticatea/regexpp) AST. * * When the JS RegExp has already been parsed using regexpp, this method can be used to avoid parsing the regex * again. * * The given AST is not allowed to be changed during the lifetime of the returned parser. * * @param ast */ static fromAst(ast: RegexppAst): Parser; /** * Parsed the entire literal. * * For more information on parsing, see {@link parseElement}. * * @param options */ parse(options?: Readonly): ParseResult; /** * Parses a specific element of the literal. * * Use {@link ParseOptions} to control how the element is parsed. * * @param element * @param options */ parseElement(element: ParsableElement, options?: Readonly): ParseResult; } type CharacterElement = | AST.CharacterClass | AST.Character | AST.CharacterClassRange | AST.CharacterSet | AST.ClassSetOperand | AST.StringAlternative | AST.ExpressionCharacterClass["expression"]; function parseUnicodeSet(element: CharacterElement, flags: Readonly): UnicodeSet; function parseCharSet( element: | AST.ClassRangesCharacterClass | AST.Character | AST.CharacterClassRange | Exclude, flags: Readonly ): CharSet; /** * A set of words. * * Words are stored as a sorted list of canonicalized words. The actual value of the set is {@link wordSets}. */ class StringSet { /** * `true` if this set is empty. */ get isEmpty(): boolean; /** * `true` if this set contains the empty word. */ get hasEmptyWord(): boolean; /** * `true` if this set contains at least one single-character word. * * This is equivalent to `this.getSingleCharacter() !== undefined`. */ get hasSingleCharacter(): boolean; /** * The words of this set with {@link CharCaseFolding#toCharSet} applied to each character. * * Word sets are guaranteed to be sorted by ascending length. * * Note: This is a lazy getter. Try to avoid calling it for best performance. */ get wordSets(): readonly ReadonlyWordSet[]; static readonly empty: StringSet; static from(words: Iterable, caseFolding: CharCaseFolding): StringSet; static fromWord(word: ReadonlyWord, caseFolding: CharCaseFolding): StringSet; /** * Returns whether this set is compatible with the given set. Compatibility is defined as follows: * * 1. The empty set is compatible with all sets. * 2. Sets with different case folding are incompatible. * * @param other */ isCompatibleWith(other: StringSet): boolean; /** * Returns whether this set is equal to the given set. * * Equality is defined as the `wordSets` of both sets being the same formal language. * * @param other */ equals(other: StringSet): boolean; /** * Returns the union of all given sets. * * Note: This operation is only allowed if all sets are compatible. * * @param others */ union(...others: StringSet[]): StringSet; /** * Returns the intersection of this set and the given set. * * Note: This operation is only allowed if all sets are compatible. * * @param other */ intersect(other: StringSet): StringSet; /** * Returns this set without the strings of the given set. * * Note: This operation is only allowed if all sets are compatible. * * @param other */ without(other: StringSet): StringSet; /** * Returns whether `this ⊇ other`. * * @param other */ isSupersetOf(other: StringSet): boolean; /** * Returns whether `this ⊆ other`. * * @param other */ isSubsetOf(other: StringSet): boolean; /** * Returns whether `this ⊃ other`. * * @param other */ isProperSupersetOf(other: StringSet): boolean; /** * Returns whether `this ⊂ other`. * * @param other */ isProperSubsetOf(other: StringSet): boolean; isDisjointWith(other: StringSet): boolean; /** * Returns a set of all single-character words in this set or `undefined` if this set contains no single-character * words. */ getSingleCharacters(): CharSet | undefined; /** * Removes all single-character words from this set. */ withoutSingleCharacters(): StringSet; /** * Removes the empty word from this set. */ withoutEmptyWord(): StringSet; /** * Returns the minimum and maximum length of words in this set. * * If this set is empty, `undefined` will be returned returned. */ getLengthRange(): | { min: number; max: number; } | undefined; } /** * A mathematical set of characters and strings. * * Despite the name, the characters in this set are not necessarily Unicode characters. * So `chars.maximum` is not necessarily `0x10FFFF`. * * The set is represented as a union of a {@link CharSet} and a {@link StringSet}. */ class UnicodeSet { /** * All single characters in the set. */ readonly chars: CharSet; /** * A sorted set of words. * * In addition to the usual guarantees of `StringSet`, this set is also guaranteed to not contain any * single-character words. */ readonly accept: StringSet; /** * The maximum character in the set. * * This is equivalent to `this.chars.maximum`. */ get maximum(): Char; /** * `true` if the set is empty (=accepts no words). */ get isEmpty(): boolean; /** * `true` if the set contains the empty word. * * This is equivalent to `this.accept.hasEmptyWord`. */ get hasEmptyWord(): boolean; /** * All word sets accepted by this set. * * Word sets are guaranteed to be sorted by **descending** length and code points. This means that word sets are in * the order in which the ECMAScript RegExp engine would try matching them. * * Note: This is a lazy getter. Try to avoid calling it for best performance. */ get wordSets(): readonly ReadonlyWordSet[]; static empty(maximum: Char): UnicodeSet; static fromChars(chars: CharSet): UnicodeSet; static from(chars: CharSet, accept: StringSet): UnicodeSet; /** * Returns whether this set and the other set contain the same formal language. * * @param other */ equals(other: UnicodeSet | CharSet | CharRange): boolean; isCompatibleWith(other: UnicodeSet): boolean; union(...others: (UnicodeSet | CharSet)[]): UnicodeSet; intersect(other: UnicodeSet | CharSet | CharRange): UnicodeSet; without(other: UnicodeSet | CharSet | CharRange): UnicodeSet; /** * Returns whether `this ⊇ other`. * * @param other */ isSupersetOf(other: UnicodeSet | CharSet | CharRange): boolean; /** * Returns whether `this ⊆ other`. * * @param other */ isSubsetOf(other: UnicodeSet | CharSet | CharRange): boolean; /** * Returns whether `this ⊃ other`. * * @param other */ isProperSupersetOf(other: UnicodeSet | CharSet | CharRange): boolean; /** * Returns whether `this ⊂ other`. * * @param other */ isProperSubsetOf(other: UnicodeSet | CharSet | CharRange): boolean; isDisjointWith(other: UnicodeSet | CharSet | CharRange): boolean; /** * Returns the minimum and maximum length of words in this set. * * If this set is empty, `undefined` will be returned returned. */ getLengthRange(): | { min: number; max: number; } | undefined; } /** * A set of functions that can be used to perform case-insensitive matching. * * It must fulfill the following conditions: * * 1. `canonicalize` must be idempotent, i.e. `canonicalize(canonicalize(char)) === canonicalize(char)`. * 2. `toCharSet(canonicalize(a))` is the set of all characters `c` such that `canonicalize(a) === canonicalize(c)`. */ interface CharCaseFolding { /** * The canonicalization function. This typically maps characters to their lowercase form. * * If no function is given, then the identity function is used. This also implies that `toCharSet` must return a * set containing only the given character. * * @default char => char */ readonly canonicalize?: (char: Char) => Char; readonly toCharSet: (char: Char) => CharSet; } function getCharCaseFolding(unicode: boolean, ignoreCase: boolean): CharCaseFolding; function getCharCaseFolding(flags: Readonly): CharCaseFolding; } export namespace Words { /** * Converts the given array of UTF16 character codes into a string. * * All numbers in the given array must be between 0 (inclusive) and 65535 = 0xFFFF (inclusive). * * @param word */ function fromUTF16ToString(word: ReadonlyWord): string; /** * Converts the given array of Unicode code points into a string. * * All numbers in the given array must be between 0 (inclusive) and 1114111 = 0x10FFFF (inclusive). * * @param word */ function fromUnicodeToString(word: ReadonlyWord): string; /** * Converts the given string into an array of UTF16 character codes. * * All numbers in the returned array are guaranteed to be between 0 (inclusive) and 65535 = 0xFFFF (inclusive). * * @param string */ function fromStringToUTF16(string: string): Word; /** * Converts the given string into an array of Unicode code points. * * All numbers in the returned array are guaranteed to be between 0 (inclusive) and 1114111 = 0x10FFFF (inclusive). * * @param string */ function fromStringToUnicode(string: string): Word; /** * Returns the most humanly readable character in the given character set. Which character is picked is entirely * implementation-defined but, generally, word characters will be picked over non-word characters and printable * characters will be picked over non-printable characters. * * If the given character set is empty, `undefined` will be returned. * * @param set */ function pickMostReadableCharacter(set: CharSet): Char | undefined; /** * Returns a word of the given word set that is the most humanly readable. * * @param wordSet */ function pickMostReadableWord(wordSet: ReadonlyWordSet): Word; /** * Returns an iterable yielding all words that can be constructed from the given word sets. * * @param wordSets */ function wordSetsToWords(wordSets: Iterable): Iterable; /** * Returns an iterable yielding all words that can be constructed from the given word set. * * @param wordSet * @deprecated Use {@link wordSetsToWords} instead. */ function wordSetToWords(wordSet: ReadonlyWordSet): Iterable; }