From e780b38605b2dae351d0f7daae6522477a047dd6 Mon Sep 17 00:00:00 2001 From: Ruslan Kurbanali Date: Fri, 18 Oct 2024 18:44:11 +0300 Subject: [PATCH] AG-36228 Optimized performance of parsing uBlock filter parameters Merge in ADGUARD-FILTERS/tsurlfilter from fix/AG-36228 to master Squashed commit of the following: commit 23c9ac9f44834ceee24d40abf835ec9e4dbb88b7 Author: Slava Leleka Date: Fri Oct 18 13:49:12 2024 +0300 Revert "fix names" This reverts commit ec3e324084c616e2762af8446c434bbef3222e2c. commit ec3e324084c616e2762af8446c434bbef3222e2c Author: Slava Leleka Date: Fri Oct 18 13:48:00 2024 +0300 fix names commit 11898ac3d35f09da93a0eb1c33e7408b3b8b6035 Author: Kurbanali Ruslan Date: Thu Oct 17 14:08:46 2024 +0500 added changelog commit dfb2481402255ee99ca992fd89169e2b87892d07 Author: Kurbanali Ruslan Date: Thu Oct 17 13:42:54 2024 +0500 added test cases commit ef598e066bc99531c93d04244756fe155e8b6459 Author: Kurbanali Ruslan Date: Thu Oct 17 13:42:48 2024 +0500 fixed slow code commit d0411977730cc98ada0c51dfd36500b6e2887a37 Author: Kurbanali Ruslan Date: Thu Oct 17 13:42:29 2024 +0500 added params to string utils --- packages/agtree/CHANGELOG.md | 8 + .../src/parser/misc/ubo-parameter-list.ts | 55 ++- packages/agtree/src/utils/string.ts | 8 +- .../parser/misc/ubo-parameter-list.test.ts | 335 +++++++++++++++++- 4 files changed, 391 insertions(+), 15 deletions(-) diff --git a/packages/agtree/CHANGELOG.md b/packages/agtree/CHANGELOG.md index 6fe3ee7e1..2fa84cd4d 100644 --- a/packages/agtree/CHANGELOG.md +++ b/packages/agtree/CHANGELOG.md @@ -8,6 +8,14 @@ The format is based on [Keep a Changelog][keepachangelog], and this project adhe [keepachangelog]: https://keepachangelog.com/en/1.0.0/ [semver]: https://semver.org/spec/v2.0.0.html +## Unreleased + +### Fixed + +- Optimized performance of parsing uBlock filter parameters [AdguardBrowserExtension#2962]. + +[AdguardBrowserExtension#2962]: https://github.com/AdguardTeam/AdguardBrowserExtension/issues/2962 + ## [2.1.2] - 2024-09-19 ### Fixed diff --git a/packages/agtree/src/parser/misc/ubo-parameter-list.ts b/packages/agtree/src/parser/misc/ubo-parameter-list.ts index 07ca70b29..4ff22065a 100644 --- a/packages/agtree/src/parser/misc/ubo-parameter-list.ts +++ b/packages/agtree/src/parser/misc/ubo-parameter-list.ts @@ -1,7 +1,7 @@ /* eslint-disable no-param-reassign */ import { StringUtils } from '../../utils/string'; import { type ParameterList } from '../common'; -import { COMMA } from '../../utils/constants'; +import { COMMA, ESCAPE_CHARACTER } from '../../utils/constants'; import { defaultParserOptions } from '../options'; import { ValueParser } from './value'; import { AdblockSyntaxError } from '../../errors/adblock-syntax-error'; @@ -61,13 +61,6 @@ export class UboParameterListParser extends ParameterListParser { const nextSeparatorIndex = StringUtils.skipWS(raw, possibleClosingQuoteIndex + 1); if (nextSeparatorIndex === length) { - if (requireQuotes) { - throw new AdblockSyntaxError( - 'Expected separator, got end of string', - baseOffset + nextSeparatorIndex, - baseOffset + length, - ); - } // If the separator is not found, the param end is the end of the string paramEnd = StringUtils.skipWSBack(raw, length - 1) + 1; offset = length; @@ -83,13 +76,51 @@ export class UboParameterListParser extends ParameterListParser { baseOffset + length, ); } - // Param end should be the last separator before the quote - offset = StringUtils.findNextUnescapedCharacterBackwards( + + /** + * At that point found `possibleClosingQuoteIndex` is wrong + * | is `offset` + * ~ is `possibleClosingQuoteIndex` + * ^ is `nextSeparatorIndex` + * + * Example 1: "abc, ').cba='1'" + * | ~^ + * Example 2: "abc, ').cba, '1'" + * | ~^ + * Example 3: "abc, ').cba='1', cba" + * | ~^ + * + * Search for separator before `possibleClosingQuoteIndex` + */ + + const separatorIndexBeforeQuote = StringUtils.findNextUnescapedCharacterBackwards( raw, separator, possibleClosingQuoteIndex, - ) + 1; - paramEnd = StringUtils.skipWSBack(raw, offset - 2) + 1; + ESCAPE_CHARACTER, + offset + 1, + ); + if (separatorIndexBeforeQuote !== -1) { + // Found separator before (Example 2) + paramEnd = StringUtils.skipWSBack(raw, separatorIndexBeforeQuote - 1) + 1; + offset = separatorIndexBeforeQuote + 1; + } else { + // Didn't found separator before, search after + const separatorIndexAfterQuote = StringUtils.findNextUnescapedCharacter( + raw, + separator, + possibleClosingQuoteIndex, + ); + if (separatorIndexAfterQuote !== -1) { + // We found separator after (Example 3) + paramEnd = StringUtils.skipWSBack(raw, separatorIndexAfterQuote - 1) + 1; + offset = separatorIndexAfterQuote + 1; + } else { + // If the separator is not found, the param end is the end of the string (Example 1) + paramEnd = StringUtils.skipWSBack(raw, length - 1) + 1; + offset = length; + } + } } } else { if (requireQuotes) { diff --git a/packages/agtree/src/utils/string.ts b/packages/agtree/src/utils/string.ts index 8ef18df0c..2a0bb49ac 100644 --- a/packages/agtree/src/utils/string.ts +++ b/packages/agtree/src/utils/string.ts @@ -35,6 +35,7 @@ export class StringUtils { * @param searchedCharacter - Searched character * @param start - Start index * @param escapeCharacter - Escape character, \ by default + * @param end - End index (excluded) * @returns Index or -1 if the character not found */ public static findNextUnescapedCharacter( @@ -42,8 +43,9 @@ export class StringUtils { searchedCharacter: string, start = 0, escapeCharacter: string = ESCAPE_CHARACTER, + end = pattern.length, ): number { - for (let i = start; i < pattern.length; i += 1) { + for (let i = start; i < end; i += 1) { // The searched character cannot be preceded by an escape if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) { return i; @@ -59,6 +61,7 @@ export class StringUtils { * @param searchedCharacter - Searched character * @param start - Start index * @param escapeCharacter - Escape character, \ by default + * @param end - End index (Included) * @returns Index or -1 if the character not found */ public static findNextUnescapedCharacterBackwards( @@ -66,8 +69,9 @@ export class StringUtils { searchedCharacter: string, start = pattern.length - 1, escapeCharacter: string = ESCAPE_CHARACTER, + end = 0, ): number { - for (let i = start; i >= 0; i -= 1) { + for (let i = start; i >= end; i -= 1) { // The searched character cannot be preceded by an escape if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) { return i; diff --git a/packages/agtree/test/parser/misc/ubo-parameter-list.test.ts b/packages/agtree/test/parser/misc/ubo-parameter-list.test.ts index 2615f5f53..75d6797a0 100644 --- a/packages/agtree/test/parser/misc/ubo-parameter-list.test.ts +++ b/packages/agtree/test/parser/misc/ubo-parameter-list.test.ts @@ -3,9 +3,121 @@ import { NodeExpectContext, type NodeExpectFn } from '../../helpers/node-utils'; import { AdblockSyntaxError } from '../../../src/errors/adblock-syntax-error'; import { UboParameterListParser } from '../../../src/parser/misc/ubo-parameter-list'; import { defaultParserOptions } from '../../../src/parser/options'; +import { type ParameterList } from '../../../src/parser/common'; describe('UboParameterListParser', () => { - // valid cases are tested in `../cosmetic/body/ubo-scriptlet.test.ts` + describe('UboParameterListParser.parse - valid cases when requireQuotes enabled', () => { + test.each<{ actual: string; expected: ParameterList }>([ + { + actual: String.raw`'abc'`, + expected: { + type: 'ParameterList', + start: 0, + end: 5, + children: [ + { + type: 'Value', + start: 0, + end: 5, + value: String.raw`'abc'`, + }, + ], + }, + }, + { + actual: String.raw` '' `, + expected: { + type: 'ParameterList', + start: 0, + end: 5, + children: [ + { + type: 'Value', + start: 1, + end: 3, + value: String.raw`''`, + }, + ], + }, + }, + { + actual: String.raw`'abc', 'cba'`, + expected: { + type: 'ParameterList', + start: 0, + end: 12, + children: [ + { + type: 'Value', + start: 0, + end: 5, + value: String.raw`'abc'`, + }, + { + type: 'Value', + start: 7, + end: 12, + value: String.raw`'cba'`, + }, + ], + }, + }, + { + actual: String.raw` '' , '' `, + expected: { + type: 'ParameterList', + start: 0, + end: 12, + children: [ + { + type: 'Value', + start: 1, + end: 3, + value: String.raw`''`, + }, + { + type: 'Value', + start: 8, + end: 10, + value: String.raw`''`, + }, + ], + }, + }, + { + actual: String.raw` "" , "", "" `, + expected: { + type: 'ParameterList', + start: 0, + end: 17, + children: [ + { + type: 'Value', + start: 1, + end: 3, + value: String.raw`""`, + }, + { + type: 'Value', + start: 8, + end: 10, + value: String.raw`""`, + }, + { + type: 'Value', + start: 13, + end: 15, + value: String.raw`""`, + }, + ], + }, + }, + ])("should parse correctly on input '$actual'", ({ actual, expected }) => { + const result = UboParameterListParser.parse(actual, defaultParserOptions, 0, COMMA, true); + + expect(result).toMatchObject(expected); + }); + }); describe('UboParameterListParser.parse - invalid cases when requireQuotes enabled', () => { test.each<{ actual: string; expected: NodeExpectFn }>([ @@ -59,6 +171,16 @@ describe('UboParameterListParser', () => { ); }, }, + { + actual: String.raw`'abc' 'bbb'`, + // ~~~~~ + expected: (context: NodeExpectContext): AdblockSyntaxError => { + return new AdblockSyntaxError( + "Expected separator, got: '''", + ...context.toTuple(context.getRangeFor(String.raw`'bbb'`)), + ); + }, + }, ])("should throw on input: '$actual'", ({ actual, expected: expectedFn }) => { const fn = jest.fn(() => UboParameterListParser.parse(actual, defaultParserOptions, 0, COMMA, true)); @@ -75,4 +197,215 @@ describe('UboParameterListParser', () => { expect(error).toHaveProperty('end', expected.end); }); }); + + describe('UboParameterListParser.parse - valid cases when requireQuotes disabled', () => { + test.each<{ actual: string; expected: ParameterList; }>([ + { + actual: String.raw`abc`, + expected: { + type: 'ParameterList', + start: 0, + end: 3, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + ], + }, + }, + { + actual: String.raw`abc, cba`, + expected: { + type: 'ParameterList', + start: 0, + end: 8, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 8, + value: String.raw`cba`, + }, + ], + }, + }, + { + actual: String.raw`abc, cba,`, + expected: { + type: 'ParameterList', + start: 0, + end: 9, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 8, + value: String.raw`cba`, + }, + null, + ], + }, + }, + { + actual: String.raw`abc, cba, `, + expected: { + type: 'ParameterList', + start: 0, + end: 11, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 8, + value: String.raw`cba`, + }, + null, + ], + }, + }, + { + actual: String.raw`abc, , cba,`, + expected: { + type: 'ParameterList', + start: 0, + end: 11, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + null, + { + type: 'Value', + start: 7, + end: 10, + value: String.raw`cba`, + }, + null, + ], + }, + }, + { + actual: String.raw`'abc `, + expected: { + type: 'ParameterList', + start: 0, + end: 7, + children: [ + { + type: 'Value', + start: 0, + end: 4, + value: String.raw`'abc`, + }, + ], + }, + }, + { + actual: String.raw`abc, ').cba='1'`, + expected: { + type: 'ParameterList', + start: 0, + end: 15, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 15, + value: String.raw`').cba='1'`, + }, + ], + }, + }, + { + actual: String.raw`abc, ').cba, '1'`, + expected: { + type: 'ParameterList', + start: 0, + end: 16, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 11, + value: String.raw`').cba`, + }, + { + type: 'Value', + start: 13, + end: 16, + value: String.raw`'1'`, + }, + ], + }, + }, + { + actual: String.raw`abc, ').cba='1', cba`, + expected: { + type: 'ParameterList', + start: 0, + end: 20, + children: [ + { + type: 'Value', + start: 0, + end: 3, + value: String.raw`abc`, + }, + { + type: 'Value', + start: 5, + end: 15, + value: String.raw`').cba='1'`, + }, + { + type: 'Value', + start: 17, + end: 20, + value: String.raw`cba`, + }, + ], + }, + }, + ])("should parse correctly on input '$actual'", ({ actual, expected }) => { + const result = UboParameterListParser.parse(actual, defaultParserOptions, 0, COMMA, false); + + expect(result).toMatchObject(expected); + }); + }); });