Skip to content

Commit

Permalink
Support dotAll mode
Browse files Browse the repository at this point in the history
  • Loading branch information
tjenkinson committed Oct 29, 2023
1 parent 32f26e2 commit 0a9ff3b
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 30 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ The following options exist for both the library and CLI:

- `caseInsensitive`: Enable case insensitive mode. _(Default: `false`)_
- `unicode`: Enable unicode mode. _(Default: `false`)_
- `dotAll`: Enable dot-all mode, which allows `.` to match new lines. _(Default: `false`)_
- `maxBacktracks`: If worst case count of possible backtracks is above this number, the regex will be considered unsafe. _(Default: `200`)_
- `maxSteps`: The maximum number of steps to make. Every time a new node is read from the pattern this counts as one step. If this limit is hit `error` will be `hitMaxSteps`. _(Default: `20000`)_
- `timeout`: The maximum amount of time (ms) to spend processing. Once this time is passed the trails found so far will be returned, and the `error` will be `timeout`. _(Default: `Infinity`)_
Expand All @@ -169,7 +170,7 @@ _Note it's possible for there to be a infinite number of results, so you should
### CLI

```sh
$ npx redos-detector check "<regex pattern>" (--caseInsensitive) (--unicode) (--maxBacktracks <number>) (--maxSteps <number>) (--timeout <number>) (--alwaysIncludeTrails) (--disableDowngrade) (--resultsLimit <number>) (--json)
$ npx redos-detector check "<regex pattern>" (--caseInsensitive) (--unicode) (--dotAll) (--maxBacktracks <number>) (--maxSteps <number>) (--timeout <number>) (--alwaysIncludeTrails) (--disableDowngrade) (--resultsLimit <number>) (--json)
```

to run on the fly or
Expand All @@ -195,7 +196,7 @@ $ npm install redos-detector
The following functions are provided:

- `isSafe(regexp: RegExp, options?: { maxBacktracks?: number, maxSteps?: number, timeout?: number, downgradePattern?: boolean })`: This takes a [`RegExp`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp). The `i` and `u` flags are supported.
- `isSafePattern(pattern: string, options?: { maxBacktracks?: number, maxSteps?: number, timeout?: number, downgradePattern?: boolean, caseInsensitive?: boolean, unicode?: boolean })`: This takes just the pattern as a string. E.g. `a*`.
- `isSafePattern(pattern: string, options?: { maxBacktracks?: number, maxSteps?: number, timeout?: number, downgradePattern?: boolean, caseInsensitive?: boolean, unicode?: boolean, dotAll?: boolean })`: This takes just the pattern as a string. E.g. `a*`.
- `downgradePattern(input: { pattern: string, unicode: boolean }`: This downgrades the provided pattern to one which is supported. You won't need to use this unless you set the `downgradePattern` option to `false`.

## Useful Resources
Expand Down
82 changes: 82 additions & 0 deletions src/__snapshots__/redos-detector.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1187778,6 +1187778,88 @@ exports[`RedosDetector isSafe cases /.?[\\n\\r\\u2028-\\u2029]?$/ 2`] = `[]`;

exports[`RedosDetector isSafe cases /.?[\\n\\r\\u2028-\\u2029]?$/ 3`] = `"Regex is safe."`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/ 1`] = `null`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/ 2`] = `[]`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/ 3`] = `"Regex is safe."`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/s 1`] = `null`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/s 2`] = `
{
"infinite": false,
"value": 1,
}
`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/s 3`] = `
[
{
"trail": [
{
"a": {
"backreferenceStack": [],
"node": {
"end": {
"offset": 1,
},
"source": ".",
"start": {
"offset": 0,
},
},
"quantifierIterations": "removed",
},
"b": {
"backreferenceStack": [],
"node": {
"end": {
"offset": 21,
},
"source": "[\\r\\n\\u2028-\\u2029]",
"start": {
"offset": 2,
},
},
"quantifierIterations": "removed",
},
},
{
"a": {
"backreferenceStack": [],
"node": {
"end": {
"offset": 21,
},
"source": "[\\r\\n\\u2028-\\u2029]",
"start": {
"offset": 2,
},
},
"quantifierIterations": "removed",
},
"b": {
"backreferenceStack": [],
"node": {
"end": {
"offset": 22,
},
"source": ".",
"start": {
"offset": 21,
},
},
"quantifierIterations": "removed",
},
},
],
},
]
`;

exports[`RedosDetector isSafe cases /.?[\\r\\n\\u2028-\\u2029].?$/s 4`] = `"Regex is safe. There could be at most 1 backtrack."`;

exports[`RedosDetector isSafe cases /.?[^\\n\\r\\u2028-\\u2029]?$/ 1`] = `null`;

exports[`RedosDetector isSafe cases /.?[^\\n\\r\\u2028-\\u2029]?$/ 2`] = `
Expand Down
11 changes: 7 additions & 4 deletions src/character-reader/character-reader-level-0.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ export type CharacterReader = Reader<CharacterReaderValue>;

export function buildCharacterReader({
caseInsensitive,
dotAll,
node,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: MyRootNode;
}): CharacterReader {
switch (node.type) {
Expand All @@ -96,17 +98,18 @@ export function buildCharacterReader({
case 'value':
return buildValueCharacterReader({ caseInsensitive, node });
case 'dot':
return buildDotCharacterReader(node);
return buildDotCharacterReader({ dotAll, node });
case 'alternative':
return buildSequenceCharacterReader({
caseInsensitive,
dotAll,
nodes: node.body,
});
case 'disjunction':
return buildDisjunctionCharacterReader({ caseInsensitive, node });
return buildDisjunctionCharacterReader({ caseInsensitive, dotAll, node });
case 'group':
return buildGroupCharacterReader({ caseInsensitive, node });
return buildGroupCharacterReader({ caseInsensitive, dotAll, node });
case 'quantifier':
return buildQuantifierCharacterReader({ caseInsensitive, node });
return buildQuantifierCharacterReader({ caseInsensitive, dotAll, node });
}
}
7 changes: 6 additions & 1 deletion src/character-reader/character-reader-level-1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,11 @@ export type CharacterReaderLevel1 = Reader<
*/
export function buildCharacterReaderLevel1({
caseInsensitive,
dotAll,
node,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: MyRootNode;
}): CharacterReaderLevel1 {
const startThread = function* (
Expand Down Expand Up @@ -172,5 +174,8 @@ export function buildCharacterReaderLevel1({
return { bounded: false, preceedingZeroWidthEntries };
};

return startThread(buildCharacterReader({ caseInsensitive, node }), []);
return startThread(
buildCharacterReader({ caseInsensitive, dotAll, node }),
[],
);
}
4 changes: 3 additions & 1 deletion src/character-reader/character-reader-level-2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,12 @@ function* getGroupContentsReader({
*/
export function buildCharacterReaderLevel2({
caseInsensitive,
dotAll,
node,
nodeExtra,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: MyRootNode;
nodeExtra: NodeExtra;
}): CharacterReaderLevel2 {
Expand Down Expand Up @@ -573,7 +575,7 @@ export function buildCharacterReaderLevel2({
return startThread({
characterReader: buildForkableReader(
characterReaderLevel1ToInternalReader(
buildCharacterReaderLevel1({ caseInsensitive, node }),
buildCharacterReaderLevel1({ caseInsensitive, dotAll, node }),
),
),
groupContentsStore: new Map(),
Expand Down
4 changes: 3 additions & 1 deletion src/character-reader/character-reader-level-3.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,12 @@ function* isReaderUnbounded(
*/
export function buildCharacterReaderLevel3({
caseInsensitive,
dotAll,
node,
nodeExtra,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: MyRootNode;
nodeExtra: NodeExtra;
}): CharacterReaderLevel3 {
Expand Down Expand Up @@ -227,7 +229,7 @@ export function buildCharacterReaderLevel3({

return startThread(
buildForkableReader(
buildCharacterReaderLevel2({ caseInsensitive, node, nodeExtra }),
buildCharacterReaderLevel2({ caseInsensitive, dotAll, node, nodeExtra }),
),
);
}
4 changes: 4 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ program
.command('check')
.argument('<regex pattern>', 'the regex pattern')
.option('--caseInsensitive', 'enable case insensitive mode', false)
.option('--dotAll', 'enable dot-all mode', false)
.option('--unicode', 'enable unicode mode', false)
.option(
'--maxBacktracks <number>',
Expand Down Expand Up @@ -74,11 +75,13 @@ program
resultsLimit,
timeout,
caseInsensitive,
dotAll,
unicode,
}: {
alwaysIncludeTrails: boolean;
caseInsensitive: boolean;
disableDowngrade: boolean;
dotAll: boolean;
json: boolean;
maxBacktracks: number;
maxSteps: number;
Expand All @@ -90,6 +93,7 @@ program
try {
const result = isSafePattern(pattern, {
caseInsensitive,
dotAll,
downgradePattern: !disableDowngrade,
maxBacktracks: coerceInfinity(maxBacktracks),
maxSteps: coerceInfinity(maxSteps),
Expand Down
14 changes: 7 additions & 7 deletions src/collect-results.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export type WalkerResult = Readonly<{
export type CollectResultsInput = Readonly<{
atomicGroupOffsets: ReadonlySet<number>;
caseInsensitive: boolean;
dotAll: boolean;
maxBacktracks: number;
maxSteps: number;
node: MyRootNode;
Expand All @@ -35,18 +36,17 @@ export function collectResults({
maxSteps,
timeout,
caseInsensitive,
dotAll,
}: CollectResultsInput): WalkerResult {
const nodeExtra = buildNodeExtra(node);
const leftStreamReader = buildCharacterReaderLevel3({
const input = {
caseInsensitive,
dotAll,
node,
nodeExtra,
});
const rightStreamReader = buildCharacterReaderLevel3({
caseInsensitive,
node,
nodeExtra,
});
};
const leftStreamReader = buildCharacterReaderLevel3(input);
const rightStreamReader = buildCharacterReaderLevel3(input);
const reader = buildCheckerReader({
atomicGroupOffsets,
leftStreamReader,
Expand Down
5 changes: 4 additions & 1 deletion src/nodes/disjunction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,27 @@ import { MyFeatures } from '../parse';

export function buildDisjunctionCharacterReader({
caseInsensitive,
dotAll,
node,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: Disjunction<MyFeatures>;
}): CharacterReader {
return chainReaders([
buildArrayReader(
node.body.slice(0, -1).map((part) => {
return {
reader: (): CharacterReader =>
buildCharacterReader({ caseInsensitive, node: part }),
buildCharacterReader({ caseInsensitive, dotAll, node: part }),
subType: null,
type: characterReaderTypeSplit,
};
}),
),
buildCharacterReader({
caseInsensitive,
dotAll,
node: node.body[node.body.length - 1],
}),
]);
Expand Down
22 changes: 15 additions & 7 deletions src/nodes/dot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,24 @@ import {
} from '../character-reader/character-reader-level-0';
import { Dot } from 'regjsparser';

export function* buildDotCharacterReader(node: Dot): CharacterReader {
export function* buildDotCharacterReader({
dotAll,
node,
}: {
dotAll: boolean;
node: Dot;
}): CharacterReader {
yield {
characterGroups: {
negated: true,
// [\n\r\u2028-\u2029]
ranges: [
[10, 10],
[13, 13],
[8232, 8233],
],
ranges: dotAll
? []
: // [\n\r\u2028-\u2029]
[
[10, 10],
[13, 13],
[8232, 8233],
],
unicodePropertyEscapes: new Set(),
},
node,
Expand Down
9 changes: 8 additions & 1 deletion src/nodes/group.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ export function getLookaheadStack(stack: Stack): LookaheadStack {

export function buildGroupCharacterReader({
caseInsensitive,
dotAll,
node,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: Group<MyFeatures>;
}): CharacterReader {
switch (node.behavior) {
Expand All @@ -76,6 +78,7 @@ export function buildGroupCharacterReader({
map(
buildSequenceCharacterReader({
caseInsensitive,
dotAll,
nodes: node.body,
}),
(value) => {
Expand All @@ -95,7 +98,11 @@ export function buildGroupCharacterReader({
case 'ignore':
case 'normal': {
return map(
buildSequenceCharacterReader({ caseInsensitive, nodes: node.body }),
buildSequenceCharacterReader({
caseInsensitive,
dotAll,
nodes: node.body,
}),
(value) => {
return {
...value,
Expand Down
8 changes: 7 additions & 1 deletion src/nodes/quantifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@ export function buildQuantifierTrail(

export function buildQuantifierCharacterReader({
caseInsensitive,
dotAll,
node,
}: {
caseInsensitive: boolean;
dotAll: boolean;
node: Quantifier<MyFeatures>;
}): CharacterReader {
const { min, max = Infinity } = node;
Expand All @@ -92,7 +94,11 @@ export function buildQuantifierCharacterReader({
]
: []),
(): CharacterReader =>
buildCharacterReader({ caseInsensitive, node: node.body[0] }),
buildCharacterReader({
caseInsensitive,
dotAll,
node: node.body[0],
}),
]),
(value) => {
const inInfinitePortion = i >= min && i >= 1 && max === Infinity;
Expand Down
Loading

0 comments on commit 0a9ff3b

Please sign in to comment.