elearning/Frontend-Learner/node_modules/regexp-ast-analysis/index.d.ts
2026-01-13 10:48:02 +07:00

1350 lines
50 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Generated by dts-bundle-generator v5.9.0
import {
Alternative,
Backreference,
CapturingGroup,
Character,
CharacterClass,
CharacterClassElement,
CharacterClassRange,
CharacterSet,
ClassIntersection,
ClassRangesCharacterClass,
ClassRangesCharacterClassElement,
ClassSetOperand,
ClassStringDisjunction,
ClassSubtraction,
EdgeAssertion,
Element,
ExpressionCharacterClass,
Flags,
Group,
LookaroundAssertion,
Node,
Pattern,
Quantifier,
RegExpLiteral,
StringAlternative,
StringsUnicodePropertyCharacterSet,
UnicodeSetsCharacterClass,
UnicodeSetsCharacterClassElement,
WordBoundaryAssertion,
} from "@eslint-community/regexpp/ast";
import { Char, CharSet, JS } from "refa";
/**
* A simple interface to represent JS RegExp flags.
*
* All properties are optional and assumed to be `false` by default.
*/
export interface ReadonlyFlags {
/**
* The `s` flag.
*
* @default false
*/
readonly dotAll?: boolean;
/**
* The `g` flag.
*
* @default false
*/
readonly global?: boolean;
/**
* The `d` flag.
*
* @default false
*/
readonly hasIndices?: boolean;
/**
* The `i` flag.
*
* @default false
*/
readonly ignoreCase?: boolean;
/**
* The `m` flag.
*
* @default false
*/
readonly multiline?: boolean;
/**
* The `y` flag.
*
* @default false
*/
readonly sticky?: boolean;
/**
* The `u` flag.
*
* @default false
*/
readonly unicode?: boolean;
/**
* The `v` flag.
*
* @default false
*/
readonly unicodeSets?: boolean;
}
export type CharacterElement =
| CharacterSet
| ClassIntersection
| ClassSubtraction
| CharacterClassElement
| CharacterClass
| StringAlternative;
/**
* Returns whether all (but at least one of the) paths of the given element do not consume characters.
*
* If this function returns `true`, then {@link isPotentiallyZeroLength} is guaranteed to return `true`.
*
* ## Backreferences
*
* This function uses the same condition for backreferences as {@link isEmpty}.
*
* ## Relations
*
* - `isZeroLength(e) -> isPotentiallyZeroLength(e)`
* - `isZeroLength(e) -> getLengthRange(e).max == 0`
*
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isZeroLength(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether at least one path of the given element does not consume characters.
*
* ## Backreferences
*
* This function uses the same condition for backreferences as {@link isPotentiallyEmpty}.
*
* ## Relations
*
* - `isPotentiallyZeroLength(e) -> getLengthRange(e).min == 0`
*
* @see {@link isZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isPotentiallyZeroLength(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether all (but at least one of the) paths of the given element do neither consume characters nor assert
* characters.
*
* If this function returns `true`, then {@link isZeroLength} and {@link isPotentiallyEmpty} are guaranteed to return
* `true`.
*
* ## Backreferences
*
* A backreferences will only be considered potentially empty, iff it is empty by the definition of
* {@link isEmptyBackreference}.
*
* ## Relations
*
* - `isEmpty(e) -> isZeroLength(e)`
* - `isEmpty(e) -> isPotentiallyEmpty(e)`
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isPotentiallyEmpty}
* @see {@link getLengthRange}
*/
export declare function isEmpty(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns whether at least one path of the given element does neither consume characters nor assert characters.
*
* ## Backreferences
*
* A backreferences will only be considered potentially empty, iff at least one of the following conditions is true:
*
* - The backreference is trivially always empty. (see {@link isEmptyBackreference})
* - The referenced capturing group is a descendant of the given element and at least one of the following conditions is
* true:
* * The referenced capturing group is potentially zero-length.
* * The backreferences is not always after its referenced capturing group.
* (see {@link isStrictBackreference})
*
* ## Relations
*
* - `isPotentiallyEmpty(e) -> isPotentiallyZeroLength(e)`
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link getLengthRange}
*/
export declare function isPotentiallyEmpty(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* Returns the type of all possible ancestor nodes of the given node type.
*
* @see {@link hasSomeAncestor}
*/
export type Ancestor<T extends Node> = AncestorImpl<T>;
type AncestorImpl<T extends Node> = ExtendApproximation<Anc2<GetParent<T>>>;
type ExtendApproximation<T extends Node> =
| T
| (T extends UnicodeSetsCharacterClass ? CharacterClassAnc : never)
| (T extends Alternative ? AlternativeAnc : never);
type AlternativeAnc = TrueAnc<Alternative>;
type CharacterClassAnc = TrueAnc<UnicodeSetsCharacterClass>;
type TrueAnc<T extends Node> = Anc6<GetParent<T>>;
type GetParent<T extends Node> = NonNullable<T["parent"]>;
type Anc6<T extends Node> = T | Anc5<GetParent<T>>;
type Anc5<T extends Node> = T | Anc4<GetParent<T>>;
type Anc4<T extends Node> = T | Anc3<GetParent<T>>;
type Anc3<T extends Node> = T | Anc2<GetParent<T>>;
type Anc2<T extends Node> = T | Anc1<GetParent<T>>;
type Anc1<T extends Node> = T | GetParent<T>;
/**
* Returns whether any of the ancestors of the given node fulfills the given condition.
*
* If the given condition is an AST node instead of a function, `hasSomeAncestor` will behave as if the condition
* function was `d => d === conditionNode`.
*
* The ancestors will be iterated in the order from closest to farthest.
* The condition function will not be called on the given node.
*/
export declare function hasSomeAncestor<T extends Node>(
node: T,
condition: ((ancestor: Ancestor<T>) => boolean) | Node
): boolean;
/**
* Returns the type of all possible ancestor nodes of the given node type. This trivially includes the given type.
*
* @see {@link hasSomeDescendant}
*/
export type Descendant<T extends Node> = T | DescendantsImpl<T>;
type DescendantsImpl<T extends Node> = Dec1<GetChildren<T>>;
type Dec1<T extends Node> = T | Dec2<GetChildren<T>>;
type Dec2<T extends Node> = T | GetChildren<T>;
type GetChildren<T extends Node> =
| (T extends RegExpLiteral ? Flags | Pattern | Element : never)
| (T extends Alternative | CapturingGroup | Group | LookaroundAssertion | Quantifier | Pattern
? Alternative | Element
: never)
| (T extends Alternative ? Element : never)
| (T extends ClassRangesCharacterClass ? ClassRangesCharacterClassElement : never)
| (T extends CharacterClassRange ? Character : never)
| (T extends UnicodeSetsCharacterClass | ExpressionCharacterClass | ExpressionCharacterClass["expression"]
? UnicodeSetsDescendants
: never)
| (T extends ClassStringDisjunction ? StringAlternative : never)
| (T extends StringAlternative ? Character : never);
type UnicodeSetsDescendants =
| ClassSetOperand
| UnicodeSetsCharacterClassElement
| UnicodeSetsCharacterClass
| ExpressionCharacterClass
| ExpressionCharacterClass["expression"];
/**
* Returns whether any of the descendants of the given node fulfill the given condition.
*
* The descendants will be iterated in a DFS top-to-bottom manner from left to right with the first node being the
* given node.
*
* If the given condition is an AST node instead of a function, `hasSomeDescendant` will behave as if the condition
* function was `d => d === conditionNode`.
*
* This function is short-circuited, so as soon as any `condition` returns `true`, `true` will be returned.
*
* @param node
* @param condition
* @param descentConditionFn An optional function to decide whether the descendant of the given node will be checked as
* well.
*
* This function will be called with some node only after `condition` has returned `false` for this node.
*/
export declare function hasSomeDescendant<T extends Node>(
node: T,
condition: ((descendant: Descendant<T>) => boolean) | Node,
descentConditionFn?: (descendant: Descendant<T>) => boolean
): boolean;
/**
* Returns the one-based number of the given capturing group.
*
* This is the number needed to refer to the capturing group via backreferences.
*/
export declare function getCapturingGroupNumber(group: CapturingGroup): number;
/**
* Returns the pattern node of the JS RegExp of a given node.
*
* This operation is guaranteed to always success for all node types except for flags nodes. Flags nodes have an
* optional `parent` which, if not set, means that this function can't access the pattern node. If the function can't
* access the pattern node from a flags node, an error will be thrown.
*/
export declare function getPattern(node: Node): Pattern;
/**
* The correct matching direction of alternatives. This can be either `ltr` (left to right) or `rtl` (right to left).
*
* `ltr` is the matching direction of lookaheads and the default matching direction of JavaScript RegExps. `rtl` is the
* matching direction of lookbehinds.
*
* The current matching direction of an element is determined by the closest lookaround (lookahead or lookbehind)
* ancestor. If the closest lookaround ancestor is a lookahead, the matching direction is `ltr`. Likewise, if it's a
* lookbehind, it's `rtl`. If an element is not a descendant of a lookaround, the default matching direction `ltr` is
* assumed.
*
* @see {@link getMatchingDirection}
* @see {@link invertMatchingDirection}
* @see {@link getMatchingDirectionFromAssertionKind}
*/
export type MatchingDirection = "ltr" | "rtl";
/**
* This extends the {@link MatchingDirection} type to allow unknown matching
* directions.
*
* This is useful when the matching direction of an element/alternative cannot
* be known with 100% certainty.
*/
export type OptionalMatchingDirection = MatchingDirection | "unknown";
/**
* Returns the direction which which the given node will be matched relative to the closest parent alternative.
*
* If the given node is a lookaround, then the result of `getMatchingDirection(lookaround)` will be the same as
* `getMatchingDirection(lookaround.parent)`.
*/
export declare function getMatchingDirection(node: Node): MatchingDirection;
/**
* Returns the opposite matching direction of the given matching direction.
*
* If `ltr` is given, `rtl` will be returned and vise versa.
*/
export declare function invertMatchingDirection(direction: MatchingDirection): MatchingDirection;
/**
* Converts a given assertion kind into a matching direction.
*
* For lookaheads and lookbehinds, the returned matching direction will be the matching direction of their children.
* I.e. the result of `lookahead` is `ltr` and the result of `lookbehind` is `rtl`.
*
* For edge assertions (`^` and `$`), the returned value is the direction of the character the edge assertion asserts.
* I.e. the result of `^` is `rtl` (because it asserts the previous character) and the result of `$` is `ltr` (because
* it asserts the next character).
*/
export declare function getMatchingDirectionFromAssertionKind(
kind: LookaroundAssertion["kind"] | EdgeAssertion["kind"]
): MatchingDirection;
/**
* Returns whether the given backreference will always be replaced with the empty string.
*
* There are two reasons why a backreference might always be replaced with the empty string:
*
* 1. The referenced capturing group does not consume characters.
*
* This is the trivial case. If the referenced capturing group never consumes any characters, then a backreference to
* that group must be replaced with the empty string.
*
* E.g. `/(\b)a\1/`
*
* 2. The backreference is not after the referenced capturing group.
*
* A backreference can only be replaced with a non-empty string if the referenced capturing group has captured text
* before the backreference is matched. There are multiple reasons why the capturing group might be unable to capture
* text before a backreference to it is reached.
*
* - The capturing group might be in a different alternative. E.g. `/(a)b|\1/`.
* - The backreference might be *inside* the capturing group. E.g. `/(a\1)/`.
* - The backreference might be before the capturing group. E.g. `/\1(a)/`, `/(?:\1(a))+/`, `/(?<=(a)\1)b/`
*/
export declare function isEmptyBackreference(backreference: Backreference, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given backreference is a strict backreference.
*
* Strict backreferences are backreferences that are always matched __after__ the referenced group was matched. If there
* exists any path that goes through a backreference but not through the referenced capturing group, that backreference
* is not strict.
*
* ## Examples
*
* In the follow examples, `\1` is a strict backreference:
*
* - `/(a)\1/`
* - `/(a)(?:b|\1)/`
* - `/(a)\1?/`
* - `/(?<=\1(a))b/`
*
* In the follow examples, `\1` is not a strict backreference:
*
* - `/(a)|\1/`
* - `/(?:(a)|b)\1/`
* - `/(a)?\1/`
* - `/(?<=(a)\1)b/`
* - `/(?!(a)).\1/`
*/
export declare function isStrictBackreference(backreference: Backreference): boolean;
/**
* Given a node type `N`, this will map to whether a node of type `N` can contain a capturing group.
*/
export type ContainsCapturingGroup<N extends Node> = N extends
| CharacterClassElement
| CharacterClass
| CharacterSet
| Backreference
| EdgeAssertion
| WordBoundaryAssertion
| Flags
? false
: N extends CapturingGroup
? true
: boolean;
/**
* Returns whether the given node contains or is a capturing group.
*
* This function is guaranteed to behave in the same way as:
*
* ```js
* hasSomeDescendant(node, d => d.type === "CapturingGroup")
* ```
*/
export declare function containsCapturingGroup<N extends Node>(node: N): ContainsCapturingGroup<N>;
/**
* The length range of string accepted. All string that are accepted by have a length of `min <= length <= max`.
*
* @see {@link getLengthRange}
*/
export interface LengthRange {
readonly min: number;
readonly max: number;
}
/**
* Returns how many characters the given element can consume at most and has to consume at least.
*
* Note that character classes are not parsed by this function and are assumed to be non-empty.
*
* ## Backreferences
*
* While {@link isPotentiallyZeroLength} generally assumes the worst-case for backreferences that references capturing group
* outside the given element, this function does not/cannot. The length range of a backreference only depends on the
* referenced capturing group and the relative positions of the backreference and the capturing group within the
* pattern. It does not depend on the given element.
*
* This is an important distinction because it means that `isPotentiallyZeroLength(e) -> getLengthRange(e).min == 0` is
* guaranteed but `getLengthRange(e).min == 0 -> isPotentiallyZeroLength(e)` is only guaranteed if `e` does not contain
* backreferences.
*
* @throws {RangeError} if an empty array of alternatives is given.
*
* @see {@link isZeroLength}
* @see {@link isPotentiallyZeroLength}
* @see {@link isEmpty}
* @see {@link isPotentiallyEmpty}
*/
export declare function getLengthRange(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): LengthRange;
/**
* Returns whether `getLengthRange(e).min == 0`.
*
* This function is slightly different from {@link isPotentiallyZeroLength} in how it handles backreferences. See the
* notes on backreferences in the documentation of {@link isPotentiallyZeroLength} and {@link getLengthRange} for more
* information.
*
* ## Relations
*
* - `isLengthRangeMinZero(e) <-> getLengthRange(e).min == 0`
*
* @throws {RangeError} if an empty array of alternatives is given.
*
* @see {@link getLengthRange}
*/
export declare function isLengthRangeMinZero(
element: Element | CharacterElement | Alternative | readonly Alternative[],
flags: ReadonlyFlags
): boolean;
/**
* The type of the closest ancestor of two nodes with the given types.
*
* @see {@link getClosestAncestor}
*/
export type ClosestAncestor<A extends Node, B extends Node> = Exclude<A | B, Descendant<Pattern>> extends never
? Exclude<(A | Ancestor<A>) & (B | Ancestor<B>), RegExpLiteral>
: (A | Ancestor<A>) & (B | Ancestor<B>);
/**
* Returns the closest ancestor of the given nodes.
*
* Since only one node is given, the node will be returned as is.
*/
export declare function getClosestAncestor<A extends Node>(a: A): A;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<A extends Node, B extends Node>(a: A, b: B): ClosestAncestor<A, B>;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<A extends Node, B extends Node>(a: A, ...b: B[]): ClosestAncestor<A, B>;
/**
* Returns the closest ancestor of the given nodes.
*
* If the nodes are all the same node, the given node will be returned.
*
* If the given nodes are not part of the same AST tree, an error will be thrown.
*/
export declare function getClosestAncestor<T extends Node>(...args: T[]): ClosestAncestor<T, T> | undefined;
/**
* Returns how many times the regex engine can match the given element at most.
*
* This method will treat elements inside lookarounds differently. Elements inside lookarounds will ignore everything
* outside the lookaround.
*
* ## Examples
*
* - `/a?/`: This will return 1 for `a`.
* - `/a+/`: This will return infinity for `a` and 1 for the quantifier `a+`.
* - `/((a{0,8}){0,8}){0,8}/`: This will return 512 for `a`.
* - `/(ba{0})+/`: This will return 0 for `a` and infinity for the quantifier `a{0}`.
* - `/(\w(?!a{3}b))+/`: This will return 3 for `a` because `a` is inside a lookaround and therefore unaffected by the
* `(\w(?!a{3}b)))+` quantifier.
*/
export declare function getEffectiveMaximumRepetition(element: Node): number;
/**
* A cache that functions may use to store results.
*
* A cache implements the {@link ReadonlyFlags} interface. All functions that take a {@link ReadonlyFlags} objects can
* be given a cache instead to utilize the cache. Example:
*
* ```js
* const flags: ReadonlyFlags = getFlags();
* const cache = toCache(flags);
*
* toCharSet(element, flags); // uncached
* toCharSet(element, cache); // cached
* ```
*
* Whether the cache is actually utilized depends on the implementation of the function.
*
* To get a cache for some flags, use the {@link toCache} function.
*
* ### Assumption
*
* Caches assume that the regexpp AST of cached nodes is immutable. If this assumption is broken, then the cache may
* return old or incorrect results.
*
* The AST may be changed before the cache first sees a node of the AST and after the cached last sees a node of the
* AST. Changes are allowed as long as the AST appears to be immutable from the perspective of the cache.
*
* ### Memory
*
* The cache uses regexpp `Node` objects as keys in
* [`WeakMap`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WeakMap)s internally.
* They will not cause memory leaks.
*
* This means that caches may out-live the nodes they cache information for.
*
* @see {@link toCache}
* @see {@link createCache}
*/
export interface Cache extends Required<ReadonlyFlags> {
/** @internal */
readonly __cache?: never;
}
/**
* This will create a new cache instance for the given flags.
*
* This operation will always create a new cache. If you want to transparently reuse cache instances, use
* {@link toCache} instead.
*
* See {@link Cache} from more information about using caches.
*
* @see {@link Cache}
* @see {@link toCache}
*/
export declare function createCache(flags: ReadonlyFlags): Cache;
/**
* Returns a cache instance for the given flags.
*
* If the given flags are a cache instance, the cache instance will be returned. Otherwise a new cache instance will
* be created using {@link createCache}.
*
* See {@link Cache} from more information about using caches.
*
* @see {@link Cache}
* @see {@link createCache}
*/
export declare function toCache(flags: ReadonlyFlags): Cache;
/**
* All possible element types that are accepted by {@link toCharSet}.
*
* @see {@link toCharSet}
*/
export type ToCharSetElement =
| Character
| CharacterClassRange
| Exclude<CharacterSet, StringsUnicodePropertyCharacterSet>
| ClassRangesCharacterClass;
/**
* Converts the given element or array of elements into a refa `CharSet`.
*
* If an array is given, all the character sets of all elements will be unioned. This means that for any two element `a`
* and `b`, the results of `toCharSet([a, b])` and `toCharSet(a).union(toCharSet(b))` will be the same.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).chars`.
*/
export declare function toCharSet(
elements: ToCharSetElement | readonly ToCharSetElement[],
flags: ReadonlyFlags
): CharSet;
/**
* All possible element types that are accepted by {@link toCharSet}.
*
* @see {@link toCharSet}
*/
export type ToUnicodeSetElement =
| ToCharSetElement
| CharacterClass
| CharacterSet
| ClassSetOperand
| ExpressionCharacterClass["expression"]
| StringAlternative;
/**
* Converts the given element or array of elements into a refa `UnicodeSet`.
*
* If an array is given, all the character sets of all elements will be unioned. This means that for any two element `a`
* and `b`, the results of `toUnicodeSet([a, b])` and `toUnicodeSet(a).union(toUnicodeSet(b))` will be the same.
*/
export declare function toUnicodeSet(
elements: ToUnicodeSetElement | readonly ToUnicodeSetElement[],
flags: ReadonlyFlags
): JS.UnicodeSet;
/**
* Returns whether the given character class/set matches all characters.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).chars.isAll` but is implemented more efficiently.
*/
export declare function matchesAllCharacters(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given character class/set matches no characters.
*
* This is guaranteed to be equivalent to `toUnicodeSet(char).isEmpty` but is implemented more efficiently.
*/
export declare function matchesNoCharacters(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* Returns whether the given character elements contains strings.
*
* This is guaranteed to be equivalent to `!toUnicodeSet(char).accept.isEmpty` but is implemented more efficiently.
*/
export declare function hasStrings(char: ToUnicodeSetElement, flags: ReadonlyFlags): boolean;
/**
* A set of functions to get predefined character sets.
*/
export declare namespace Chars {
/**
* Returns the maximum character for the given flags.
*/
function maxChar(flags: ReadonlyFlags): Char;
/**
* Returns the empty character set for the given flags.
*/
function empty(flags: ReadonlyFlags): CharSet;
/**
* Returns the full character set for the given flags.
*/
function all(flags: ReadonlyFlags): CharSet;
/**
* Returns the character set that contains only line terminators.
*
* This character set accepts all characters that the JS RegExp `.` rejects. The returned character set accepts
* all character that the regex `/^.$/` rejects.
*/
function lineTerminator(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\w` with the given flags.
*
* Note: `\w` is somewhat special because it has 3 values. All predefined character sets only have two values - one
* for Unicode mode and one for non-Unicode mode. This is because Unicode-mode changes the semantics of ignore case
* as well. This causes some of the ASCII letters to be ignore-case-equal to higher Unicode characters
* (e.g. K (Latin Capital Letter K, U+004b) == k (Latin Small Letter K, U+006b) == (Kelvin Sign, U+212A)). As a
* result `\w` has 3 values: one for non-Unicode mode, one for case-sensitive Unicode-mode, and one for
* case-insensitive Unicode-mode.
*/
function word(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\d` with the given flags.
*/
function digit(flags: ReadonlyFlags): CharSet;
/**
* Returns a character set that is equivalent to `\s` with the given flags.
*/
function space(flags: ReadonlyFlags): CharSet;
}
/**
* Returns whether two nodes are structurally equivalent.
*
* If two elements are structurally equivalent, they must also semantically equivalent. However, two semantically
* equivalent elements might not be structurally equivalent (e.g. `/[ab]/` !=<sub>struct</sub> `/[ba]/`).
*/
export declare function structurallyEqual(x: Node | null, y: Node | null): boolean;
/**
* The reason a path ends.
*
* Paths generally end because:
*
* 1. the {@link FollowOperations} do not wish to continue or
* 2. because paths cannot be followed further because of the structure of the regex.
*
* This type describes the reasons for the second option.
*
* @see {@link FollowOperations}
* @see {@link FollowOperations.endPath}
*/
export type FollowEndReason = "pattern" | "assertion";
/**
* A set of operations that determine how state is propagated and changed.
*
* @see {@link followPaths}
*/
export interface FollowOperations<S> {
/**
* Split off a new path from the given one.
*
* This function should not modify the given state.
*
* If the state is immutable, then `fork` may be implemented as the identify function in regard to `state`. If the
* function is omitted, it will default to the identify function.
*
* If the state is mutable, then `fork` must be implemented.
*
* @default x => x
*/
fork?: (state: S, direction: MatchingDirection) => S;
/**
* Joins any number of paths to create a combined path.
*/
join(states: S[], direction: MatchingDirection): S;
/**
* This function is called when dealing with lookarounds.
*
* It will __not__ be called for predefined assertion - `^`, `$`, `\b`, `\B`. Use {@link FollowOperations.enter} or
* {@link FollowOperations.leave} for predefined assertions instead.
*
* @default x => x
*/
assert?: (state: S, direction: MatchingDirection, assertion: S, assertionDirection: MatchingDirection) => S;
/**
* This function is called when entering an element.
*
* Operations for elements are called in the following order:
*
* 1. {@link FollowOperations.enter}
* 2. if {@link FollowOperations.continueInto} return `true`
* 1. Element-specific operations (if any) that can change the current state.
* 3. {@link FollowOperations.leave}
* 4. {@link FollowOperations.continueAfter} (optional; might not be called for every element)
*
* @default (_, x) => x
*/
enter?: (element: Element, state: S, direction: MatchingDirection) => S;
/**
* This function is called when leaving an element.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default (_, x) => x
*/
leave?: (element: Element, state: S, direction: MatchingDirection) => S;
/**
* This function is called when a path ends.
*
* Paths end at the end the patterns and assertions. It means that there is no element after the pattern/assertion
* in that direction.
*
* @default x => x
* @see {@link FollowEndReason}
*/
endPath?: (state: S, direction: MatchingDirection, reason: FollowEndReason) => S;
/**
* Whether the current path should go into the given element (return `true`) or whether it should be skipped
* (return `false`). If the element is skipped, the given state will not be changed and passed as-is to the `leave`
* function.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.enter} instead.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default () => true
*/
continueInto?: (element: Element, state: S, direction: MatchingDirection) => boolean;
/**
* Whether the current path should continue after the given element (return `true`) or whether all elements that
* follow this element should be skipped (return `false`).
*
* If the current path is a fork path, then only the elements until the fork is joined will be skipped. A stopped
* fork path will be joined with all other forks like normal.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.leave} instead.
*
* See the documentation on {@link FollowOperations.enter} for more details.
*
* @default () => true
*/
continueAfter?: (element: Element, state: S, direction: MatchingDirection) => boolean;
/**
* Whether the current path should continue outside the given lookaround assertion.
*
* Paths that leave a lookaround assertions (= go outside of it) generally can't be followed. However, for some
* operations it makes sense to do it anyway.
*
* It usually makes sense to follow paths outside of assertions if
* `getMatchingDirectionFromAssertionKind(element.kind) !== direction`. This condition ensure that lookbehinds only
* follow paths going out to the right (e.g. `(?<=a)->b`) and lookaheads only follow paths going out to the left
* (e.g. `b<-(?=a)`).
*
* If this function returns `false`, {@link FollowOperations.endPath} is guaranteed to be called next.
* If this function returns `true`, {@link FollowOperations.continueAfter} is guaranteed to be called next for the
* lookaround assertion.
*
* You shouldn't modify state in this function. Modify state in {@link FollowOperations.endPath} or
* {@link FollowOperations.enter} instead.
*
* @default () => false
*/
continueOutside?: (element: LookaroundAssertion, state: S, direction: MatchingDirection) => boolean;
}
/**
* This function goes to all elements reachable from the given `start` element.
*
* ## Paths
*
* The function uses _paths_. A path is an [execution path](https://en.wikipedia.org/wiki/Symbolic_execution) that
* describes a sequence of regex elements.
*
* I.e. there are two paths to go from `a` to `b` in the pattern `/a(\w|dd)b/`. The first path is `a \w b` and the
* second path is `a d d b`.
*
* However, the problem with paths is that there can be exponentially many because of combinatorial explosion (e.g. the
* pattern `/(a|b)(a|b)(a|b)(a|b)(a|b)/` has 32 paths). To solve this problem, paths can be _joined_ together again.
*
* I.e. in the pattern `/a(\w|dd)b/`, first element of all paths will be `a`. After `a`, the path splits into two. We
* call each of the split paths a _fork_. The two forks will be `a ( \w` and `a ( d d`. The `(` is used to indicate that
* a fork was made. Since both paths come together after the group ends, they will be _joined_. The joined path of
* `a ( \w` and `a ( d d` will be written as `a ( \w | d d )`. The `)` is used to indicate that forks have been joined.
* The final path will be `a ( \w | d d ) b`.
*
* This method of forking and joining works for alternations but it won't work for quantifiers. This is why quantifiers
* will be treated as single elements that can be entered. By default, a quantifier `q` will be interpreted as `( q | )`
* if its minimum is zero and as `( q )` otherwise.
*
* I.e. in the pattern `/ab*c/`, the paths are `a ( b* | ) c`, and in `/ab+c/`, the path is `a b+ c`.
*
* ### State
*
* Paths are thought of as a sequence of elements and they are represented by state (type parameter `S`). All operations
* that fork, join, or assert paths will operate on state and not a sequence of elements.
*
* State allows operations to be implemented more efficiently and ensures that only necessary data is passed around.
* An analysis of paths usually tracks properties and analyses how these properties change, the current value of these
* properties is state.
*
* ## Operations
*
* Operations act upon state and are specific to the type of state. They define how state changes when
* entering/leaving/asserting elements and how paths fork, join, and continue.
*
* ### Operation sequence
*
* To follow all paths, two methods are necessary: one method that enters elements and one that determines the next
* element. These methods will be called `Enter` and `Next` respectively. These methods will call the given operations
* roughly like this:
*
* ```text
* function Enter(element, state):
* operations.enter
* if operations.continueInto:
* if element.type == GROUP:
* operations.join(
* element.alternatives.map(e => Enter(e, operations.fork(state)))
* )
* if element.type == QUANTIFIER:
* if element.max == 0:
* // do nothing
* else if element.min == 0:
* operations.join([
* state,
* Enter(quantifier, operations.fork(state))
* ])
* else:
* Enter(quantifier, operations.fork(state))
* if element.type == LOOKAROUND:
* operations.assert(
* state,
* operations.join(
* element.alternatives.map(e => Enter(e, operations.fork(state)))
* )
* )
* operations.leave
* Next(element, state)
*
* function Next(element, state):
* if operations.continueAfter:
* if noNextElement:
* operations.endPath
* else:
* Enter(nextElement, state)
* ```
*
* (This is just simplified pseudo code but the general order of operations will be the same.)
*
* ## Runtime
*
* If `n` elements can be reached from the given starting element, then the average runtime will be `O(n)` and the
* worst-case runtime will be `O(n^2)`.
*
* @param start
* @param startMode If "enter", then the first element to be entered will be the starting element. If "leave", then the
* first element to continue after will be the starting element.
* @param initialState
* @param operations
* @param direction The direction in which paths will be followed. If undefined, then the natural matching direction
* ({@link getMatchingDirection}) of the start element will be used.
*
* @typeParam S The type of the state.
*/
export declare function followPaths<S>(
start: Element | Alternative,
startMode: "enter" | "next",
initialState: S,
operations: FollowOperations<S>,
direction?: MatchingDirection
): S;
/**
* Options to control the behavior of {@link getLongestPrefix}.
*/
export interface GetLongestPrefixOptions {
/**
* Whether the returned sequence is to include the next character (if any)
* after the longest knowable sequence.
*
* The next character after the longest knowable sequence is either:
* - not consumed by the given alternative
* (e.g. `(ab)c` -> `[/a/, /b/, /c/]`),
* - only a superset of the actual next character
* (e.g. `ab(cd|ef)` -> `[/a/, /b/, /[ce]/]`), or
* - both.
*
* Note that enabling this options means that the returned sequence of
* character sets is no longer guaranteed to be a prefix of the given
* alternative.
*
* @default false
*/
includeAfter?: boolean;
/**
* Whether only characters inside the given alternative may be considered
* when creating the last character.
*
* This option control the behavior of {@link includeAfter}. By default,
* {@link includeAfter} will also look at the characters after the
* alternative to create the last character. This may be undesirable in
* some case.
*
* The enabling this option has the following effect: If the last character
* of the prefix is affected by characters outside the alternative, then
* the prefix with {@link includeAfter} set to `false` will be returned.
*
* @default false
*/
onlyInside?: boolean;
/**
* Whether groups will be combined more loosely.
*
* With this option disabled, groups will only be combined if they are of
* the same length and differ in at most one position. E.g. the longest
* prefix of `/(?:bitter|barber)/` is `[/b/, /[ia]/]`. This requirement is
* very strict and most groups do not fulfill it in practice.
*
* With this option enabled, groups will be combined if they are of the
* same length. Different characters at the same position are simply
* combined. E.g. the longest prefix `/(?:bitter|barber)/` is
* `[/b/, /[ia]/, /[tr]/, /[tb]/, /e/, /r/]`. With this option enabled, the
* returned prefix is only guaranteed to be a superset of the actual strict
* longest prefix.
*
* The purpose of this option is to provide longer prefixes in use cases
* where an approximation of the actual prefix is good enough.
*
* @default false
*/
looseGroups?: boolean;
}
/**
* Returns the longest knowable prefix guaranteed to always be accepted by the
* given alternative (ignoring assertions).
*
* All character sets except the last one are guaranteed to be non-empty. The
* last character set is only guaranteed to be non-empty if `includeAfter: false`.
*/
export declare function getLongestPrefix(
alternative: Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags,
options?: Readonly<GetLongestPrefixOptions>
): readonly CharSet[];
/**
* The first character after some point.
*
* This is not constrained to some specific element. This is conceptually how a lookaround sees the input string.
*
* ## Example
*
* In the regex `/ab?/` the first look character after `a` is `{ char: all, edge: true, exact: true }`. It accepts all
* characters because the `b` is optional, so there may be any character after `a`. `exact` is `true` because we know
* that *exactly* all characters are allowed after `a`. `edge` is `true` because the input string is also allowed to
* just end after `a` (i.e. the string `"a"` is accepted).
*
* ## Equivalent regexes
*
* The regex an instance of this type is equivalent to depends only on the `char` and `edge` properties. The equivalent
* regex is:
*
* - `edge: true`: `(?=[char]|$)` or `(?<=[char]|^)`
* - `edge: false`: `(?=[char])` or `(?<=[char])`
*
* (`$` and `^` denote the end and start of the input string respectively.)
*
* Note that `FirstLookChar` doesn't distinguish between lookaheads and lookbehinds. It can express either.
*
* ### Import values
*
* There are a few important values:
*
* - Accept all: The instance `{ char: all, exact: true, edge: true }` is guaranteed to be equivalent to an
* assertion that accepts all input strings (`(?=[\s\S]|$)`).
* - Reject all: The instance `{ char: empty, edge: false }` (`exact` doesn't matter) is guaranteed to be equivalent to
* an assertion that rejects all input strings (`(?=[])`).
* - Edge assertion: The instance `{ char: empty, edge: true }` (`exact` doesn't matter) is guaranteed to be equivalent
* to an edge assertion (either `^` or `$`).
*
* @see {@link FirstLookChars}
*/
export interface FirstLookChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character can be the start/end of the string.
*/
readonly edge: boolean;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
}
/**
* This namespace contains methods for working with {@link FirstLookChar}s.
*/
export declare namespace FirstLookChars {
/**
* Returns a {@link FirstLookChar} that is equivalent to a trivially accepting lookaround.
*
* The returned look is semantically equivalent to `(?=)` == `(?=[^]|$)` or `(?<=)` == `(?<=[^]|^)`.
*/
function all(flags: ReadonlyFlags): FirstLookChar;
/**
* Returns a {@link FirstLookChar} that is equivalent to an assertion that only accepts the start/end of the input
* string.
*
* The returned look is semantically equivalent to `$` == `(?=[]|$)` or `^` == `(?<=[]|^)`.
*/
function edge(flags: ReadonlyFlags): FirstLookChar;
/**
* Converts the given {@link FirstLookChar} to a {@link FirstConsumedChar}.
*
* This is semantically equivalent to `(?=b|$)` -> `[]|(?=b|$)`.
*
* Note: This operation will typically return a {@link FirstPartiallyConsumedChar}. It will only return a
* {@link FirstFullyConsumedChar} if the given `char` is empty and `edge: false`. This is because
* `(?=[])` -> `[]|(?=[])` == `[]`.
*/
function toConsumed(look: FirstLookChar): FirstConsumedChar;
}
/**
* The first character consumed by some element.
*
* The first character can either be fully consumed or partially consumed.
*
* @see {@link getFirstConsumedChar}
* @see {@link FirstConsumedChars}
*/
export type FirstConsumedChar = FirstFullyConsumedChar | FirstPartiallyConsumedChar;
/**
* This is equivalent to a regex fragment `[char]`.
*
* @see {@link FirstConsumedChar}
*/
export interface FirstFullyConsumedChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character also includes the empty word.
*/
readonly empty: false;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
}
/**
* This is equivalent to a regex fragment `[char]|(?=[look.char])` or `[char]|(?=[look.char]|$)` depending on
* {@link FirstLookChar.edge}.
*
* @see {@link FirstConsumedChar}
*/
export interface FirstPartiallyConsumedChar {
/**
* A super set of the first character.
*
* We can usually only guarantee a super set because lookaround in the pattern may narrow down the actual character
* set.
*/
readonly char: CharSet;
/**
* If `true`, then the first character also includes the empty word.
*/
readonly empty: true;
/**
* If `true`, then `char` is guaranteed to be exactly the first character and not just a super set of it.
*/
readonly exact: boolean;
/**
* A set of characters that may come after the consumed character
*/
readonly look: FirstLookChar;
}
/**
* This namespace contains methods for working with {@link FirstConsumedChar}s.
*/
export declare namespace FirstConsumedChars {
/**
* Returns a {@link FirstConsumedChar} that is equivalent to the empty concatenation.
*/
function emptyConcat(flags: ReadonlyFlags): FirstPartiallyConsumedChar;
/**
* Returns a {@link FirstConsumedChar} that is equivalent to the empty union (or empty set).
*/
function emptyUnion(flags: ReadonlyFlags): FirstFullyConsumedChar;
/**
* Converts the given {@link FirstConsumedChar} to a {@link FirstLookChar}.
*
* This is conceptually equivalent to wrapping the given consumed character into a lookaround.
*
* This is semantically equivalent to `a|(?=b|$)` -> `(?=a|(?=b|$))` == `(?=[ab]|$)`.
*/
function toLook(consumed: FirstConsumedChar): FirstLookChar;
/**
* Creates the union of all the given {@link FirstConsumedChar}s.
*
* The result is independent of the order in which the characters are given.
*/
function union(chars: Iterable<FirstConsumedChar>, flags: ReadonlyFlags): FirstConsumedChar;
/**
* Creates the concatenation of all the given {@link FirstConsumedChar}s.
*
* The given char iterable is evaluated **lazily**. The implementation will try to iterate as few chars as possible.
*/
function concat(chars: Iterable<FirstConsumedChar>, flags: ReadonlyFlags): FirstConsumedChar;
/**
* Makes the given consumed character optional.
*
* This is semantically equivalent to `a|(?=b|$)` -> `a?`.
*/
function makeOptional(consumed: FirstConsumedChar): FirstPartiallyConsumedChar;
}
/**
* If a character is returned, it guaranteed to be a super set of the actual character. If the given element is
* always of zero length, then the empty character set will be returned.
*
* If `exact` is `true` then it is guaranteed that the returned character is guaranteed to be the actual
* character at all times if this element is not influenced by lookarounds outside itself.
*
* ## Partially consumed
*
* Only the given element and its children are processed. This is important when considering partially consumed first
* characters. The lookaround is derived only from the assertions inside the given element.
*
* E.g. In `/b?a/`, the result for `b?` is `{ char: 'b', empty: true, look: { char: all, edge: true } }`. The
* lookaround accepts all characters because it doesn't take the `a` after `b?` into consideration.
*/
export declare function getFirstConsumedChar(
element: Element | Alternative | readonly Alternative[],
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstConsumedChar;
export declare function getFirstConsumedCharAfter(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstConsumedChar;
/**
* Returns the first character after the given element.
*
* What "after" means depends the on the given direction which will be interpreted as the current matching
* direction. You can use this to get the previous character of an element as well.
*/
export declare function getFirstCharAfter(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): FirstLookChar;
/**
* A wrapper around a character value that adds which elements contributed to the character value.
*/
export interface WithContributors<Char> {
char: Char;
/**
* A list of elements that all contributed to the result. All sub-elements of the listed elements also contribute.
*/
contributors: Element[];
}
/**
* This function behaves exactly like {@link getFirstConsumedCharAfter} but it also tracks what elements contribute to
* the result.
*/
export declare function getFirstConsumedCharAfterWithContributors(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): WithContributors<FirstConsumedChar>;
/**
* This function behaves exactly like {@link getFirstCharAfter} but it also tracks what elements contribute to the
* result.
*/
export declare function getFirstCharAfterWithContributors(
afterThis: Element | Alternative,
direction: MatchingDirection,
flags: ReadonlyFlags
): WithContributors<FirstLookChar>;
/**
* Options to control the behavior of {@link canReorder}.
*/
export interface CanReorderOptions {
/**
* The matching direction of the alternatives.
*
* The correctness of {@link canReorder} depends on this direction being
* correct.
*
* If the matching direction cannot be known, supply `"unknown"`.
* `"unknown"` is guaranteed to always create a correct result regardless
* of matching direction. If {@link canReorder} returns `true` for
* `"unknown"`, then it will also return `true` for both `"ltr"` and
* `"rtl"` and vise versa.
*
* This value defaults to the result of {@link getMatchingDirection} for
* any of the given alternatives.
*/
matchingDirection?: OptionalMatchingDirection;
/**
* Capturing groups are typically referenced by their position, so they
* cannot be reordered without affecting the behavior of the regular
* expression.
*
* However, in some cases capturing groups and their order doesn't matter.
* Enabling this option will allow all permutations that change the order
* of capturing groups.
*
* @default false
*/
ignoreCapturingGroups?: boolean;
}
/**
* Returns whether the given alternatives can all be reordered.
*
* In other words, given a set of alternatives, this will return whether all
* permutations of those alternatives behave exactly the same as the current
* permutation of those alternatives.
*
* The function makes one more guarantee when some alternatives of the same
* parent are not given. Let `T` be the set of the given alternatives and let
* `U` be the set of alternatives that are **not** given and have the same
* parent as the given alternatives. Let `M` be all alternatives in `U` that
* are positioned between two alternatives `T`. As long as the relative order
* of the alternatives in `M` is preserved, all permutations of `T M` are
* guaranteed to be have equivalently.
*
* Note that this function makes no guarantees about the alternative
* `U \ (T M)`. Permutations that change the position of those alternatives
* are **not** guaranteed to be valid.
*
* Example: `/0|1|2|💚|3|4|💯|👋|5|6/` with `T = 💚|💯|👋`, `U = 0|1|2|3|4|5|6`, and
* `M = 3|4`.
*
* This function will return `true` and the following are **guaranteed** to be
* valid permutations:
*
* - `/0|1|2|💚|3|4|💯|👋|5|6/` (unchanged)
* - `/0|1|2|3|💚|4|💯|👋|5|6/`
* - `/0|1|2|3|4|💚|💯|👋|5|6/`
* - `/0|1|2|💚|💯|3|4|👋|5|6/`
* - `/0|1|2|💚|💯|👋|3|4|5|6/`
* - `/0|1|2|👋|💯|💚|3|4|5|6/`
* - `/0|1|2|👋|3|4|💯|💚|5|6/`
*
* The following are **not guaranteed** to be valid permutations:
*
* - `/0|1|2|💚|4|3|💯|👋|5|6/` (`3` and `4` were swapped)
* - `/💚|0|1|2|3|4|💯|👋|5|6/` (the position of `0` was changed)
* - `/0|1|2|💚|3|4|👋|5|6|💯/` (the position of `6` was changed)
*/
export declare function canReorder(
alternatives: Iterable<Alternative>,
flags: ReadonlyFlags,
options?: CanReorderOptions
): boolean;
export declare function canReorderDirectional(
target: ReadonlySet<Alternative>,
slice: Alternative[],
direction: MatchingDirection,
flags: ReadonlyFlags,
ignoreCapturingGroups: boolean
): boolean;
export interface ConsumedChars {
chars: CharSet;
/**
* Whether `char` is exact.
*
* If `false`, then `char` is only guaranteed to be a superset of the
* actually possible characters.
*/
exact: boolean;
}
/**
* Returns the union of all characters that can possibly be consumed by the
* given element.
*/
export declare function getConsumedChars(element: Element | Pattern | Alternative, flags: ReadonlyFlags): ConsumedChars;
export {};