elearning/Frontend-Learner/node_modules/regexp-ast-analysis/index.d.ts

1351 lines
50 KiB
TypeScript
Raw Normal View History

2026-01-13 10:46:40 +07:00
// Generated by dts-bundle-generator v5.9.0
import {
Alternative,
Backreference,
CapturingGroup,
Character,
CharacterClass,
CharacterClassElement,
CharacterClassRange,
CharacterSet,
ClassIntersection,
ClassRangesCharacterClass,
ClassRangesCharacterClassElement,
ClassSetOperand,
ClassStringDisjunction,
ClassSubtraction,
EdgeAssertion,
Element,
ExpressionCharacterClass,
Flags,
Group,
LookaroundAssertion,
Node,
Pattern,
Quantifier,
RegExpLiteral,
StringAlternative,
StringsUnicodePropertyCharacterSet,
UnicodeSetsCharacterClass,
UnicodeSetsCharacterClassElement,
WordBoundaryAssertion,
} from "@eslint-community/regexpp/ast";
import { Char, CharSet, JS } from "refa";
/**
* A simple interface to represent JS RegExp flags.
*
* All properties are optional and assumed to be `false` by default.
*/
export interface ReadonlyFlags {
/**
* The `s` flag.
*
* @default false
*/
readonly dotAll?: boolean;
/**
* The `g` flag.
*
* @default false
*/
readonly global?: boolean;
/**
* The `d` flag.
*
* @default false
*/
readonly hasIndices?: boolean;
/**
* The `i` flag.
*
* @default false
*/
readonly ignoreCase?: boolean;
/**
* The `m` flag.
*
* @default false
*/
readonly multiline?: boolean;
/**
* The `y` flag.
*
* @default false
*/
readonly sticky?: boolean;
/**
* The `u` flag.
*
* @default false
*/
readonly unicode?: boolean;
/**
* The `v` flag.
*
* @default false
*/
readonly unicodeSets?: boolean;
}
export type CharacterElement =
| CharacterSet
| ClassIntersection
| ClassSubtraction
| CharacterClassElement
| CharacterClass
| StringAlternative;
/**
* Returns whether all (but at least one of the) paths of the given element do not consume characters.
*
* If this function returns `true`, then {@link isPotentiallyZeroLength} is guaranteed to return `true`.
*
* ## Backreferences
*
* This function uses the same condition for backreferences as {@link isEmpty}.
*
* ## Relations
*
* - `isZeroLength(e) -> isPotentiallyZeroLength(e)`
* - `isZeroLength(e) -> getLengthRange(e).max == 0`
*
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isZeroLength(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether at least one path of the given element does not consume characters.
*
* ## Backreferences
*
* This function uses the same condition for backreferences as {@link isPotentiallyEmpty}.
*
* ## Relations
*
* - `isPotentiallyZeroLength(e) -> getLengthRange(e).min == 0`
*
* @see {@link isZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isPotentiallyZeroLength(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether all (but at least one of the) paths of the given element do neither consume characters nor assert
* characters.
*
* If this function returns `true`, then {@link isZeroLength} and {@link isPotentiallyEmpty} are guaranteed to return
* `true`.
*
* ## Backreferences
*
* A backreferences will only be considered potentially empty, iff it is empty by the definition of
* {@link isEmptyBackreference}.
*
* ## Relations
*
* - `isEmpty(e) -> isZeroLength(e)`
* - `isEmpty(e) -> isPotentiallyEmpty(e)`
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isEmpty(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether at least one path of the given element does neither consume characters nor assert characters.
*
* ## Backreferences
*
* A backreferences will only be considered potentially empty, iff at least one of the following conditions is true:
*
* - The backreference is trivially always empty. (see {@link isEmptyBackreference})
* - The referenced capturing group is a descendant of the given element and at least one of the following conditions is
* true:
* * The referenced capturing group is potentially zero-length.
* * The backreferences is not always after its referenced capturing group.
* (see {@link isStrictBackreference})
*
* ## Relations
*
* - `isPotentiallyEmpty(e) -> isPotentiallyZeroLength(e)`
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link getLengthRange}
*/
export declare function isPotentiallyEmpty(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns the type of all possible ancestor nodes of the given node type.
*
* @see {@link hasSomeAncestor}
*/
export type Ancestor<T extends Node> = AncestorImpl<T>;
type AncestorImpl<T extends Node> = ExtendApproximation<Anc2<GetParent<T>>>;
type ExtendApproximation<T extends Node> =
| T
| (T extends UnicodeSetsCharacterClass ? CharacterClassAnc : never)
| (T extends Alternative ? AlternativeAnc : never);
type AlternativeAnc = TrueAnc<Alternative>;
type CharacterClassAnc = TrueAnc<UnicodeSetsCharacterClass>;
type TrueAnc<T extends Node> = Anc6<GetParent<T>>;
type GetParent<T extends Node> = NonNullable<T["parent"]>;
type Anc6<T extends Node> = T | Anc5<GetParent<T>>;
type Anc5<T extends Node> = T | Anc4<GetParent<T>>;
type Anc4<T extends Node> = T | Anc3<GetParent<T>>;
type Anc3<T extends Node> = T | Anc2<GetParent<T>>;
type Anc2<T extends Node> = T | Anc1<GetParent<T>>;
type Anc1<T extends Node> = T | GetParent<T>;
/**
* Returns whether any of the ancestors of the given node fulfills the given condition.
*
* If the given condition is an AST node instead of a function, `hasSomeAncestor` will behave as if the condition
* function was `d => d === conditionNode`.
*
* The ancestors will be iterated in the order from closest to farthest.
* The condition function will not be called on the given node.
*/
export declare function hasSomeAncestor<T extends Node>(
node: T,
condition: ((ancestor: Ancestor<T>) => boolean) | Node
): boolean;
/**
* Returns the type of all possible ancestor nodes of the given node type. This trivially includes the given type.
*
* @see {@link hasSomeDescendant}
*/
export type Descendant<T extends Node> = T | DescendantsImpl<T>;
type DescendantsImpl<T extends Node> = Dec1<GetChildren<T>>;
type Dec1<T extends Node> = T | Dec2<GetChildren<T>>;
type Dec2<T extends Node> = T | GetChildren<T>;
type GetChildren<T extends Node> =
| (T extends RegExpLiteral ? Flags | Pattern | Element : never)
| (T extends Alternative | CapturingGroup | Group | LookaroundAssertion | Quantifier | Pattern
? Alternative | Element
: never)
| (T extends Alternative ? Element : never)
| (T extends ClassRangesCharacterClass ? ClassRangesCharacterClassElement : never)
| (T extends CharacterClassRange ? Character : never)
| (T extends UnicodeSetsCharacterClass | ExpressionCharacterClass | ExpressionCharacterClass["expression"]
? UnicodeSetsDescendants
: never)
| (T extends ClassStringDisjunction ? StringAlternative : never)
| (T extends StringAlternative ? Character : never);
type UnicodeSetsDescendants =
| ClassSetOperand
| UnicodeSetsCharacterClassElement
| UnicodeSetsCharacterClass
| ExpressionCharacterClass
| ExpressionCharacterClass["expression"];
/**
* Returns whether any of the descendants of the given node fulfill the given condition.
*
* The descendants will be iterated in a DFS top-to-bottom manner from left to right with the first node being the
* given node.
*
* If the given condition is an AST node instead of a function, `hasSomeDescendant` will behave as if the condition
* function was `d => d === conditionNode`.
*
* This function is short-circuited, so as soon as any `condition` returns `true`, `true` will be returned.
*
* @param node
* @param condition
* @param descentConditionFn An optional function to decide whether the descendant of the given node will be checked as
* well.
*
* This function will be called with some node only after `condition` has returned `false` for this node.
*/
export declare function hasSomeDescendant<T extends Node>(
node: T,
condition: ((descendant: Descendant<T>) => boolean) | Node,
descentConditionFn?: (descendant: Descendant<T>) => boolean
): boolean;
/**
* Returns the one-based number of the given capturing group.
*
* This is the number needed to refer to the capturing group via backreferences.
*/
export declare function getCapturingGroupNumber(group: CapturingGroup): number;
/**
* Returns the pattern node of the JS RegExp of a given node.
*
* This operation is guaranteed to always success for all node types except for flags nodes. Flags nodes have an
* optional `parent` which, if not set, means that this function can't access the pattern node. If the function can't
* access the pattern node from a flags node, an error will be thrown.
*/
export declare function getPattern(node: Node): Pattern;
/**
* The correct matching direction of alternatives. This can be either `ltr` (left to right) or `rtl` (right to left).
*
* `ltr` is the matching direction of lookaheads and the default matching direction of JavaScript RegExps. `rtl` is the
* matching direction of lookbehinds.
*
* The current matching direction of an element is determined by the closest lookaround (lookahead or lookbehind)
* ancestor. If the closest lookaround ancestor is a lookahead, the matching direction is `ltr`. Likewise, if it's a
* lookbehind, it's `rtl`. If an element is not a descendant of a lookaround, the default matching direction `ltr` is
* assumed.
*
* @see {@link getMatchingDirection}
* @see {@link invertMatchingDirection}
* @see {@link getMatchingDirectionFromAssertionKind}
*/
export type MatchingDirection = "ltr" | "rtl";
/**
* This extends the {@link MatchingDirection} type to allow unknown matching
* directions.
*
* This is useful when the matching direction of an element/alternative cannot
* be known with 100% certainty.
*/
export type OptionalMatchingDirection = MatchingDirection | "unknown";
/**
* Returns the direction which which the given node will be matched relative to the closest parent alternative.
*
* If the given node is a lookaround, then the result of `getMatchingDirection(lookaround)` will be the same as
* `getMatchingDirection(lookaround.parent)`.
*/
export declare function getMatchingDirection(node: Node): MatchingDirection;
/**
* Returns the opposite matching direction of the given matching direction.
*
* If `ltr` is given, `rtl` will be returned and vise versa.
*/
export declare function invertMatchingDirection(direction: MatchingDirection): MatchingDirection;
/**
* Converts a given assertion kind into a matching direction.
*
* For lookaheads and lookbehinds, the returned matching direction will be the matching direction of their children.
* I.e. the result of `lookahead` is `ltr` and the result of `lookbehind` is `rtl`.
*
* For edge assertions (`^` and `$`), the returned value is the direction of the character the edge assertion asserts.
* I.e. the result of `^` is `rtl` (because it asserts the previous character) and the result of `$` is `ltr` (because
* it asserts the next character).
*/
export declare function getMatchingDirectionFromAssertionKind(
kind: LookaroundAssertion["kind"] | EdgeAssertion["kind"]
): MatchingDirection;
/**
* Returns whether the given backreference will always be replaced with the empty string.
*
* There are two reasons why a backreference might always be replaced with the empty string:
*
* 1. The referenced capturing group does not consume characters.
*
* This is the trivial case. If the referenced capturing group never consumes any characters, then a backreference to
* that group must be replaced with the empty string.
*
* E.g. `/(\b)a\1/`
*
* 2. The backreference is not after the referenced capturing group.
*
* A backreference can only be replaced with a non-empty string if the referenced capturing group has captured text
* before the backreference is matched. There are multiple reasons why the capturing group might be unable to capture
* text before a backreference to it is reached.
*
* - The capturing group might be in a different alternative. E.g. `/(a)b|\1/`.
* - The backreference might be *inside* the capturing group. E.g. `/(a\1)/`.
* - The backreference might be before the capturing group. E.g. `/\1(a)/`, `/(?:\1(a))+/`, `/(?<=(a)\1)b/`
*/
export declare function isEmptyBackreference(backreference: Backreference, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given backreference is a strict backreference.
*
* Strict backreferences are backreferences that are always matched __after__ the referenced group was matched. If there
* exists any path that goes through a backreference but not through the referenced capturing group, that backreference
* is not strict.
*
* ## Examples
*
* In the follow examples, `\1` is a strict backreference:
*
* - `/(a)\1/`
* - `/(a)(?:b|\1)/`
* - `/(a)\1?/`
* - `/(?<=\1(a))b/`
*
* In the follow examples, `\1` is not a strict backreference:
*
* - `/(a)|\1/`
* - `/(?:(a)|b)\1/`
* - `/(a)?\1/`
* - `/(?<=(a)\1)b/`
* - `/(?!(a)).\1/`
*/
export declare function isStrictBackreference(backreference: Backreference): boolean;
/**
* Given a node type `N`, this will map to whether a node of type `N` can contain a capturing group.
*/
export type ContainsCapturingGroup<N extends Node> = N extends
| CharacterClassElement
| CharacterClass
| CharacterSet
| Backreference
| EdgeAssertion
| WordBoundaryAssertion
| Flags
? false
: N extends CapturingGroup
? true
: boolean;
/**
* Returns whether the given node contains or is a capturing group.
*
* This function is guaranteed to behave in the same way as:
*
* ```js
* hasSomeDescendant(node, d => d.type === "CapturingGroup")
* ```
*/
export declare function containsCapturingGroup<N extends Node>(node: N): ContainsCapturingGroup<N>;
/**
* The length range of string accepted. All string that are accepted by have a length of `min <= length <= max`.
*
* @see {@link getLengthRange}
*/
export interface LengthRange {
readonly min: number;
readonly max: number;
}
/**
* Returns how many characters the given element can consume at most and has to consume at least.
*
* Note that character classes are not parsed by this function and are assumed to be non-empty.
*
* ## Backreferences
*
* While {@link isPotentiallyZeroLength} generally assumes the worst-case for backreferences that references capturing group
* outside the given element, this function does not/cannot. The length range of a backreference only depends on the
* referenced capturing group and the relative positions of the backreference and the capturing group within the
* pattern. It does not depend on the given element.
*
* This is an important distinction because it means that `isPotentiallyZeroLength(e) -> getLengthRange(e).min == 0` is
* guaranteed but `getLengthRange(e).min == 0 -> isPotentiallyZeroLength(e)` is only guaranteed if `e` does not contain
* backreferences.
*
* @throws {RangeError} if an empty array of alternatives is given.
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
*/
export declare function getLengthRange(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): LengthRange;
/**
* Returns whether `getLengthRange(e).min == 0`.
*
* This function is slightly different from {@link isPotentiallyZeroLength} in how it handles backreferences. See the
* notes on backreferences in the documentation of {@link isPotentiallyZeroLength} and {@link getLengthRange} for more
* information.
*
* ## Relations
*
* - `isLengthRangeMinZero(e) <-> getLengthRange(e).min == 0`
*
* @throws {RangeError} if an empty array of alternatives is given.
*
* @see {@link getLengthRange}
*/
export declare function isLengthRangeMinZero(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* The type of the closest ancestor of two nodes with the given types.
*
* @see {@link getClosestAncestor}
*/
export type ClosestAncestor<A extends Node, B extends Node> = Exclude<A | B, Descendant<Pattern>> extends never
? Exclude<(A | Ancestor<A>) & (B | Ancestor<B>), RegExpLiteral>
: (A | Ancestor<A>) & (B | Ancestor<B>);
/**
* Returns the closest ancestor of the given nodes.
*
* Since only one node is given, the node will be returned as is.
*/
export declare function getClosestAncestor<A extends Node>(a: A): A;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<A extends Node, B extends Node>(a: A, b: B): ClosestAncestor<A, B>;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<A extends Node, B extends Node>(a: A, ...b: B[]): ClosestAncestor<A, B>;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<T extends Node>(...args: T[]): ClosestAncestor<T, T> | undefined;
/**
* Returns how many times the regex engine can match the given element at most.
*
* This method will treat elements inside lookarounds differently. Elements inside lookarounds will ignore everything
* outside the lookaround.
*
* ## Examples
*
* - `/a?/`: This will return 1 for `a`.
* - `/a+/`: This will return infinity for `a` and 1 for the quantifier `a+`.
* - `/((a{0,8}){0,8}){0,8}/`: This will return 512 for `a`.
* - `/(ba{0})+/`: This will return 0 for `a` and infinity for the quantifier `a{0}`.
* - `/(\w(?!a{3}b))+/`: This will return 3 for `a` because `a` is inside a lookaround and therefore unaffected by the
* `(\w(?!a{3}b)))+` quantifier.
*/
export declare function getEffectiveMaximumRepetition(element: Node): number;
/**
* A cache that functions may use to store results.
*
* A cache implements the {@link ReadonlyFlags} interface. All functions that take a {@link ReadonlyFlags} objects can
* be given a cache instead to utilize the cache. Example:
*
* ```js
* const flags: ReadonlyFlags = getFlags();
* const cache = toCache(flags);
*
* toCharSet(element, flags); // uncached
* toCharSet(element, cache); // cached
* ```
*
* Whether the cache is actually utilized depends on the implementation of the function.
*
* To get a cache for some flags, use the {@link toCache} function.
*
* ### Assumption
*
* Caches assume that the regexpp AST of cached nodes is immutable. If this assumption is broken, then the cache may
* return old or incorrect results.
*
* The AST may be changed before the cache first sees a node of the AST and after the cached last sees a node of the
* AST. Changes are allowed as long as the AST appears to be immutable from the perspective of the cache.
*
* ### Memory
*
* The cache uses regexpp `Node` objects as keys in
* [`WeakMap`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WeakMap)s internally.
* They will not cause memory leaks.
*
* This means that caches may out-live the nodes they cache information for.
*
* @see {@link toCache}
* @see {@link createCache}
*/
export interface Cache extends Required<ReadonlyFlags> {
/** @internal */
readonly __cache?: never;
}
/**
* This will create a new cache instance for the given flags.
*
* This operation will always create a new cache. If you want to transparently reuse cache instances, use
* {@link toCache} instead.
*
* See {@link Cache} from more information about using caches.
*
* @see {@link Cache}
* @see {@link toCache}
*/
export declare function createCache(flags: ReadonlyFlags): Cache;
/**
* Returns a cache instance for the given flags.
*
* If the given flags are a cache instance, the cache instance will be returned. Otherwise a new cache instance will
* be created using {@link createCache}.
*
* See {@link Cache} from more information about using caches.
*
* @see {@link Cache}
* @see {@link createCache}
*/
export declare function toCache(flags: ReadonlyFlags): Cache;
/**
* All possible element types that are accepted by {@link toCharSet}.
*
* @see {@link toCharSet}
*/
export type ToCharSetElement =
| Character
| CharacterClassRange
| Exclude<CharacterSet, StringsUnicodePropertyCharacterSet>
| ClassRangesCharacterClass;
/**
* Converts the given element or array of elements into a refa `CharSet`.
*
* If an array is given, all the character sets of all elements will be unioned. This means that for any two element `a`
* and `b`, the results of `toCharSet([a, b])` and `toCharSet(a).union(toCharSet(b))` will be the same.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).chars`.
*/
export declare function toCharSet(
elements: ToCharSetElement | readonly ToCharSetElement[],
flags: ReadonlyFlags
): CharSet;
/**
* All possible element types that are accepted by {@link toCharSet}.
*
* @see {@link toCharSet}
*/
export type ToUnicodeSetElement =
| ToCharSetElement
| CharacterClass
| CharacterSet
| ClassSetOperand
| ExpressionCharacterClass["expression"]
| StringAlternative;
/**
* Converts the given element or array of elements into a refa `UnicodeSet`.
*
* If an array is given, all the character sets of all elements will be unioned. This means that for any two element `a`
* and `b`, the results of `toUnicodeSet([a, b])` and `toUnicodeSet(a).union(toUnicodeSet(b))` will be the same.
*/
export declare function toUnicodeSet(
elements: ToUnicodeSetElement | readonly ToUnicodeSetElement[],
flags: ReadonlyFlags
): JS.UnicodeSet;
/**
* Returns whether the given character class/set matches all characters.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).chars.isAll` but is implemented more efficiently.
*/
export declare function matchesAllCharacters(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given character class/set matches no characters.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).isEmpty` but is implemented more efficiently.
*/
export declare function matchesNoCharacters(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given character elements contains strings.
*
* This is guaranteed to be equivalent to `!toUnicodeSet(char).accept.isEmpty` but is implemented more efficiently.
*/
export declare function hasStrings(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* A set of functions to get predefined character sets.
*/
export declare namespace Chars {
/**
* Returns the maximum character for the given flags.
*/
function maxChar(flags: ReadonlyFlags): Char;
/**
* Returns the empty character set for the given flags.
*/
function empty(flags: ReadonlyFlags): CharSet;
/**
* Returns the full character set for the given flags.
*/
function all(flags: ReadonlyFlags): CharSet;
/**
* Returns the character set that contains only line terminators.
*
* This character set accepts all characters that the JS RegExp `.` rejects. The returned character set accepts
* all character that the regex `/^.$/` rejects.
*/
function lineTerminator(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\w` with the given flags.
*
* Note: `\w` is somewhat special because it has 3 values. All predefined character sets only have two values - one
* for Unicode mode and one for non-Unicode mode. This is because Unicode-mode changes the semantics of ignore case
* as well. This causes some of the ASCII letters to be ignore-case-equal to higher Unicode characters
* (e.g. K (Latin Capital Letter K, U+004b) == k (Latin Small Letter K, U+006b) == (Kelvin Sign, U+212A)). As a
* result `\w` has 3 values: one for non-Unicode mode, one for case-sensitive Unicode-mode, and one for
* case-insensitive Unicode-mode.
*/
function word(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\d` with the given flags.
*/
function digit(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\s` with the given flags.
*/
function space(flags: ReadonlyFlags): CharSet;
}
/**
* Returns whether two nodes are structurally equivalent.
*
* If two elements are structurally equivalent, they must also semantically equivalent. However, two semantically
* equivalent elements might not be structurally equivalent (e.g. `/[ab]/` !=<sub>struct</sub> `/[ba]/`).
*/
export declare function structurallyEqual(x: Node | null, y: Node | null): boolean;
/**
* The reason a path ends.
*
* Paths generally end because:
*
* 1. the {@link FollowOperations} do not wish to continue or
* 2. because paths cannot be followed further because of the structure of the regex.
*
* This type describes the reasons for the second option.
*
* @see {@link FollowOperations}
* @see {@link FollowOperations.endPath}
*/
export type FollowEndReason = "pattern" | "assertion";
/**
* A set of operations that determine how state is propagated and changed.
*
* @see {@link followPaths}
*/
export interface FollowOperations<S> {
/**
* Split off a new path from the given one.
*
* This function should not modify the given state.
*
* If the state is immutable, then `fork` may be implemented as the identify function in regard to `state`. If the
* function is omitted, it will default to the identify function.
*
* If the state is mutable, then `fork` must be implemented.
*
* @default x => x
*/
fork?: (state: S, direction: MatchingDirection) => S;
/**
* Joins any number of paths to create a combined path.
*/
join(states: S[], direction: MatchingDirection): S;
/**
* This function is called when dealing with lookarounds.
*
* It will __not__ be called for predefined assertion - `^`, `$`, `\b`, `\B`. Use {@link FollowOperations.enter} or
* {@link FollowOperations.leave} for predefined assertions instead.
*
* @default x => x
*/
assert?: (state: S, direction: MatchingDirection, assertion: S, assertionDirection: MatchingDirection) => S;
/**
* This function is called when entering an element.
*
* Operations for elements are called in the following order:
*
* 1. {@link FollowOperations.enter}
* 2. if {@link FollowOperations.continueInto} return `true`
* 1. Element-specific operations (if any) that can change the current state.
* 3. {@link FollowOperations.leave}
* 4. {@link FollowOperations.continueAfter} (optional; might not be called for every element)
*
* @default (_, x) => x
*/
enter?: (element: Element, state: S, direction: MatchingDirection) => S;
/**
* This function is called when leaving an element.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default (_, x) => x
*/
leave?: (element: Element, state: S, direction: MatchingDirection) => S;
/**
* This function is called when a path ends.
*
* Paths end at the end the patterns and assertions. It means that there is no element after the pattern/assertion
* in that direction.
*
* @default x => x
* @see {@link FollowEndReason}
*/
endPath?: (state: S, direction: MatchingDirection, reason: FollowEndReason) => S;
/**
* Whether the current path should go into the given element (return `true`) or whether it should be skipped
* (return `false`). If the element is skipped, the given state will not be changed and passed as-is to the `leave`
* function.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.enter} instead.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default () => true
*/
continueInto?: (element: Element, state: S, direction: MatchingDirection) => boolean;
/**
* Whether the current path should continue after the given element (return `true`) or whether all elements that
* follow this element should be skipped (return `false`).
*
* If the current path is a fork path, then only the elements until the fork is joined will be skipped. A stopped
* fork path will be joined with all other forks like normal.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.leave} instead.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default () => true
*/
continueAfter?: (element: Element, state: S, direction: MatchingDirection) => boolean;
/**
* Whether the current path should continue outside the given lookaround assertion.
*
* Paths that leave a lookaround assertions (= go outside of it) generally can't be followed. However, for some
* operations it makes sense to do it anyway.
*
* It usually makes sense to follow paths outside of assertions if
* `getMatchingDirectionFromAssertionKind(element.kind) !== direction`. This condition ensure that lookbehinds only
* follow paths going out to the right (e.g. `(?<=a)->b`) and lookaheads only follow paths going out to the left
* (e.g. `b<-(?=a)`).
*
* If this function returns `false`, {@link FollowOperations.endPath} is guaranteed to be called next.
* If this function returns `true`, {@link FollowOperations.continueAfter} is guaranteed to be called next for the
* lookaround assertion.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.endPath} or
* {@link FollowOperations.enter} instead.
*
* @default () => false
*/
continueOutside?: (element: LookaroundAssertion, state: S, direction: MatchingDirection) => boolean;
}
/**
* This function goes to all elements reachable from the given `start` element.
*
* ## Paths
*
* The function uses _paths_. A path is an [execution path](https://en.wikipedia.org/wiki/Symbolic_execution) that
* describes a sequence of regex elements.
*
* I.e. there are two paths to go from `a` to `b` in the pattern `/a(\w|dd)b/`. The first path is `a \w b` and the
* second path is `a d d b`.
*
* However, the problem with paths is that there can be exponentially many because of combinatorial explosion (e.g. the
* pattern `/(a|b)(a|b)(a|b)(a|b)(a|b)/` has 32 paths). To solve this problem, paths can be _joined_ together again.
*
* I.e. in the pattern `/a(\w|dd)b/`, first element of all paths will be `a`. After `a`, the path splits into two. We
* call each of the split paths a _fork_. The two forks will be `a ( \w` and `a ( d d`. The `(` is used to indicate that
* a fork was made. Since both paths come together after the group ends, they will be _joined_. The joined path of
* `a ( \w` and `a ( d d` will be written as `a ( \w | d d )`. The `)` is used to indicate that forks have been joined.
* The final path will be `a ( \w | d d ) b`.
*
* This method of forking and joining works for alternations but it won't work for quantifiers. This is why quantifiers
* will be treated as single elements that can be entered. By default, a quantifier `q` will be interpreted as `( q | )`
* if its minimum is zero and as `( q )` otherwise.
*
* I.e. in the pattern `/ab*c/`, the paths are `a ( b* | ) c`, and in `/ab+c/`, the path is `a b+ c`.
*
* ### State
*
* Paths are thought of as a sequence of elements and they are represented by state (type parameter `S`). All operations
* that fork, join, or assert paths will operate on state and not a sequence of elements.
*
* State allows operations to be implemented more efficiently and ensures that only necessary data is passed around.
* An analysis of paths usually tracks properties and analyses how these properties change, the current value of these
* properties is state.
*
* ## Operations
*
* Operations act upon state and are specific to the type of state. They define how state changes when
* entering/leaving/asserting elements and how paths fork, join, and continue.
*
* ### Operation sequence
*
* To follow all paths, two methods are necessary: one method that enters elements and one that determines the next
* element. These methods will be called `Enter` and `Next` respectively. These methods will call the given operations
* roughly like this:
*
* ```text
* function Enter(element, state):
* operations.enter
* if operations.continueInto:
* if element.type == GROUP:
* operations.join(
* element.alternatives.map(e => Enter(e, operations.fork(state)))
* )
* if element.type == QUANTIFIER:
* if element.max == 0:
* // do nothing
* else if element.min == 0:
* operations.join([
* state,
* Enter(quantifier, operations.fork(state))
* ])
* else:
* Enter(quantifier, operations.fork(state))
* if element.type == LOOKAROUND:
* operations.assert(
* state,
* operations.join(
* element.alternatives.map(e => Enter(e, operations.fork(state)))
* )
* )
* operations.leave
* Next(element, state)
*
* function Next(element, state):
* if operations.continueAfter:
* if noNextElement:
* operations.endPath
* else:
* Enter(nextElement, state)
* ```
*
* (This is just simplified pseudo code but the general order of operations will be the same.)
*
* ## Runtime
*
* If `n` elements can be reached from the given starting element, then the average runtime will be `O(n)` and the
* worst-case runtime will be `O(n^2)`.
*
* @param start
* @param startMode If "enter", then the first element to be entered will be the starting element. If "leave", then the
* first element to continue after will be the starting element.
* @param initialState
* @param operations
* @param direction The direction in which paths will be followed. If undefined, then the natural matching direction
* ({@link getMatchingDirection}) of the start element will be used.
*
* @typeParam S The type of the state.
*/
export declare function followPaths<S>(
start: Element | Alternative,
startMode: "enter" | "next",
initialState: S,
operations: FollowOperations<S>,
direction?: MatchingDirection
): S;
/**
* Options to control the behavior of {@link getLongestPrefix}.
*/
export interface GetLongestPrefixOptions {
/**
* Whether the returned sequence is to include the next character (if any)
* after the longest knowable sequence.
*
* The next character after the longest knowable sequence is either:
* - not consumed by the given alternative
* (e.g. `(ab)c` -> `[/a/, /b/, /c/]`),
* - only a superset of the actual next character
* (e.g. `ab(cd|ef)` -> `[/a/, /b/, /[ce]/]`), or
* - both.
*
* Note that enabling this options means that the returned sequence of
* character sets is no longer guaranteed to be a prefix of the given
* alternative.
*
* @default false
*/
includeAfter?: boolean;
/**
* Whether only characters inside the given alternative may be considered
* when creating the last character.
*
* This option control the behavior of {@link includeAfter}. By default,
* {@link includeAfter} will also look at the characters after the
* alternative to create the last character. This may be undesirable in
* some case.
*
* The enabling this option has the following effect: If the last character
* of the prefix is affected by characters outside the alternative, then
* the prefix with {@link includeAfter} set to `false` will be returned.
*
* @default false
*/
onlyInside?: boolean;
/**
* Whether groups will be combined more loosely.
*
* With this option disabled, groups will only be combined if they are of
* the same length and differ in at most one position. E.g. the longest
* prefix of `/(?:bitter|barber)/` is `[/b/, /[ia]/]`. This requirement is
* very strict and most groups do not fulfill it in practice.
*
* With this option enabled, groups will be combined if they are of the
* same length. Different characters at the same position are simply
* combined. E.g. the longest prefix `/(?:bitter|barber)/` is
* `[/b/, /[ia]/, /[tr]/, /[tb]/, /e/, /r/]`. With this option enabled, the
* returned prefix is only guaranteed to be a superset of the actual strict
* longest prefix.
*
* The purpose of this option is to provide longer prefixes in use cases
* where an approximation of the actual prefix is good enough.
*
* @default false
*/
looseGroups?: boolean;
}
/**
* Returns the longest knowable prefix guaranteed to always be accepted by the
* given alternative (ignoring assertions).
*
* All character sets except the last one are guaranteed to be non-empty. The
* last character set is only guaranteed to be non-empty if `includeAfter: false`.
*/
export declare function getLongestPrefix(
alternative: Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags,
options?: Readonly<GetLongestPrefixOptions>
): readonly CharSet[];
/**
* The first character after some point.
*
* This is not constrained to some specific element. This is conceptually how a lookaround sees the input string.
*
* ## Example
*
* In the regex `/ab?/` the first look character after `a` is `{ char: all, edge: true, exact: true }`. It accepts all
* characters because the `b` is optional, so there may be any character after `a`. `exact` is `true` because we know
* that *exactly* all characters are allowed after `a`. `edge` is `true` because the input string is also allowed to
* just end after `a` (i.e. the string `"a"` is accepted).
*
* ## Equivalent regexes
*
* The regex an instance of this type is equivalent to depends only on the `char` and `edge` properties. The equivalent
* regex is:
*
* - `edge: true`: `(?=[char]|$)` or `(?<=[char]|^)`
* - `edge: false`: `(?=[char])` or `(?<=[char])`
*
* (`$` and `^` denote the end and start of the input string respectively.)
*
* Note that `FirstLookChar` doesn't distinguish between lookaheads and lookbehinds. It can express either.
*
* ### Import values
*
* There are a few important values:
*
* - Accept all: The instance `{ char: all, exact: true, edge: true }` is guaranteed to be equivalent to an
* assertion that accepts all input strings (`(?=[\s\S]|$)`).
* - Reject all: The instance `{ char: empty, edge: false }` (`exact` doesn't matter) is guaranteed to be equivalent to
* an assertion that rejects all input strings (`(?=[])`).
* - Edge assertion: The instance `{ char: empty, edge: true }` (`exact` doesn't matter) is guaranteed to be equivalent
* to an edge assertion (either `^` or `$`).
*
* @see {@link FirstLookChars}
*/
export interface FirstLookChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character can be the start/end of the string.
*/
readonly edge: boolean;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
}
/**
* This namespace contains methods for working with {@link FirstLookChar}s.
*/
export declare namespace FirstLookChars {
/**
* Returns a {@link FirstLookChar} that is equivalent to a trivially accepting lookaround.
*
* The returned look is semantically equivalent to `(?=)` == `(?=[^]|$)` or `(?<=)` == `(?<=[^]|^)`.
*/
function all(flags: ReadonlyFlags): FirstLookChar;
/**
* Returns a {@link FirstLookChar} that is equivalent to an assertion that only accepts the start/end of the input
* string.
*
* The returned look is semantically equivalent to `$` == `(?=[]|$)` or `^` == `(?<=[]|^)`.
*/
function edge(flags: ReadonlyFlags): FirstLookChar;
/**
* Converts the given {@link FirstLookChar} to a {@link FirstConsumedChar}.
*
* This is semantically equivalent to `(?=b|$)` -> `[]|(?=b|$)`.
*
* Note: This operation will typically return a {@link FirstPartiallyConsumedChar}. It will only return a
* {@link FirstFullyConsumedChar} if the given `char` is empty and `edge: false`. This is because
* `(?=[])` -> `[]|(?=[])` == `[]`.
*/
function toConsumed(look: FirstLookChar): FirstConsumedChar;
}
/**
* The first character consumed by some element.
*
* The first character can either be fully consumed or partially consumed.
*
* @see {@link getFirstConsumedChar}
* @see {@link FirstConsumedChars}
*/
export type FirstConsumedChar = FirstFullyConsumedChar | FirstPartiallyConsumedChar;
/**
* This is equivalent to a regex fragment `[char]`.
*
* @see {@link FirstConsumedChar}
*/
export interface FirstFullyConsumedChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character also includes the empty word.
*/
readonly empty: false;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
}
/**
* This is equivalent to a regex fragment `[char]|(?=[look.char])` or `[char]|(?=[look.char]|$)` depending on
* {@link FirstLookChar.edge}.
*
* @see {@link FirstConsumedChar}
*/
export interface FirstPartiallyConsumedChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character also includes the empty word.
*/
readonly empty: true;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
/**
* A set of characters that may come after the consumed character
*/
readonly look: FirstLookChar;
}
/**
* This namespace contains methods for working with {@link FirstConsumedChar}s.
*/
export declare namespace FirstConsumedChars {
/**
* Returns a {@link FirstConsumedChar} that is equivalent to the empty concatenation.
*/
function emptyConcat(flags: ReadonlyFlags): FirstPartiallyConsumedChar;
/**
* Returns a {@link FirstConsumedChar} that is equivalent to the empty union (or empty set).
*/
function emptyUnion(flags: ReadonlyFlags): FirstFullyConsumedChar;
/**
* Converts the given {@link FirstConsumedChar} to a {@link FirstLookChar}.
*
* This is conceptually equivalent to wrapping the given consumed character into a lookaround.
*
* This is semantically equivalent to `a|(?=b|$)` -> `(?=a|(?=b|$))` == `(?=[ab]|$)`.
*/
function toLook(consumed: FirstConsumedChar): FirstLookChar;
/**
* Creates the union of all the given {@link FirstConsumedChar}s.
*
* The result is independent of the order in which the characters are given.
*/
function union(chars: Iterable<FirstConsumedChar>, flags: ReadonlyFlags): FirstConsumedChar;
/**
* Creates the concatenation of all the given {@link FirstConsumedChar}s.
*
* The given char iterable is evaluated **lazily**. The implementation will try to iterate as few chars as possible.
*/
function concat(chars: Iterable<FirstConsumedChar>, flags: ReadonlyFlags): FirstConsumedChar;
/**
* Makes the given consumed character optional.
*
* This is semantically equivalent to `a|(?=b|$)` -> `a?`.
*/
function makeOptional(consumed: FirstConsumedChar): FirstPartiallyConsumedChar;
}
/**
* If a character is returned, it guaranteed to be a super set of the actual character. If the given element is
* always of zero length, then the empty character set will be returned.
*
* If `exact` is `true` then it is guaranteed that the returned character is guaranteed to be the actual
* character at all times if this element is not influenced by lookarounds outside itself.
*
* ## Partially consumed
*
* Only the given element and its children are processed. This is important when considering partially consumed first
* characters. The lookaround is derived only from the assertions inside the given element.
*
* E.g. In `/b?a/`, the result for `b?` is `{ char: 'b', empty: true, look: { char: all, edge: true } }`. The
* lookaround accepts all characters because it doesn't take the `a` after `b?` into consideration.
*/
export declare function getFirstConsumedChar(
element: Element | Alternative | readonly Alternative[],
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstConsumedChar;
export declare function getFirstConsumedCharAfter(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstConsumedChar;
/**
* Returns the first character after the given element.
*
* What "after" means depends the on the given direction which will be interpreted as the current matching
* direction. You can use this to get the previous character of an element as well.
*/
export declare function getFirstCharAfter(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstLookChar;
/**
* A wrapper around a character value that adds which elements contributed to the character value.
*/
export interface WithContributors<Char> {
char: Char;
/**
* A list of elements that all contributed to the result. All sub-elements of the listed elements also contribute.
*/
contributors: Element[];
}
/**
* This function behaves exactly like {@link getFirstConsumedCharAfter} but it also tracks what elements contribute to
* the result.
*/
export declare function getFirstConsumedCharAfterWithContributors(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): WithContributors<FirstConsumedChar>;
/**
* This function behaves exactly like {@link getFirstCharAfter} but it also tracks what elements contribute to the
* result.
*/
export declare function getFirstCharAfterWithContributors(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): WithContributors<FirstLookChar>;
/**
* Options to control the behavior of {@link canReorder}.
*/
export interface CanReorderOptions {
/**
* The matching direction of the alternatives.
*
* The correctness of {@link canReorder} depends on this direction being
* correct.
*
* If the matching direction cannot be known, supply `"unknown"`.
* `"unknown"` is guaranteed to always create a correct result regardless
* of matching direction. If {@link canReorder} returns `true` for
* `"unknown"`, then it will also return `true` for both `"ltr"` and
* `"rtl"` and vise versa.
*
* This value defaults to the result of {@link getMatchingDirection} for
* any of the given alternatives.
*/
matchingDirection?: OptionalMatchingDirection;
/**
* Capturing groups are typically referenced by their position, so they
* cannot be reordered without affecting the behavior of the regular
* expression.
*
* However, in some cases capturing groups and their order doesn't matter.
* Enabling this option will allow all permutations that change the order
* of capturing groups.
*
* @default false
*/
ignoreCapturingGroups?: boolean;
}
/**
* Returns whether the given alternatives can all be reordered.
*
* In other words, given a set of alternatives, this will return whether all
* permutations of those alternatives behave exactly the same as the current
* permutation of those alternatives.
*
* The function makes one more guarantee when some alternatives of the same
* parent are not given. Let `T` be the set of the given alternatives and let
* `U` be the set of alternatives that are **not** given and have the same
* parent as the given alternatives. Let `M` be all alternatives in `U` that
* are positioned between two alternatives `T`. As long as the relative order
* of the alternatives in `M` is preserved, all permutations of `T M` are
* guaranteed to be have equivalently.
*
* Note that this function makes no guarantees about the alternative
* `U \ (T M)`. Permutations that change the position of those alternatives
* are **not** guaranteed to be valid.
*
* Example: `/0|1|2|💚|3|4|💯|👋|5|6/` with `T = 💚|💯|👋`, `U = 0|1|2|3|4|5|6`, and
* `M = 3|4`.
*
* This function will return `true` and the following are **guaranteed** to be
* valid permutations:
*
* - `/0|1|2|💚|3|4|💯|👋|5|6/` (unchanged)
* - `/0|1|2|3|💚|4|💯|👋|5|6/`
* - `/0|1|2|3|4|💚|💯|👋|5|6/`
* - `/0|1|2|💚|💯|3|4|👋|5|6/`
* - `/0|1|2|💚|💯|👋|3|4|5|6/`
* - `/0|1|2|👋|💯|💚|3|4|5|6/`
* - `/0|1|2|👋|3|4|💯|💚|5|6/`
*
* The following are **not guaranteed** to be valid permutations:
*
* - `/0|1|2|💚|4|3|💯|👋|5|6/` (`3` and `4` were swapped)
* - `/💚|0|1|2|3|4|💯|👋|5|6/` (the position of `0` was changed)
* - `/0|1|2|💚|3|4|👋|5|6|💯/` (the position of `6` was changed)
*/
export declare function canReorder(
alternatives: Iterable<Alternative>,
flags: ReadonlyFlags,
options?: CanReorderOptions
): boolean;
export declare function canReorderDirectional(
target: ReadonlySet<Alternative>,
slice: Alternative[],
direction: MatchingDirection,
flags: ReadonlyFlags,
ignoreCapturingGroups: boolean
): boolean;
export interface ConsumedChars {
chars: CharSet;
/**
* Whether `char` is exact.
*
* If `false`, then `char` is only guaranteed to be a superset of the
* actually possible characters.
*/
exact: boolean;
}
/**
* Returns the union of all characters that can possibly be consumed by the
* given element.
*/
export declare function getConsumedChars(element: Element | Pattern | Alternative, flags: ReadonlyFlags): ConsumedChars;
export {};