From 33bc1bba22e4838ffedd56dd6476cccbe90f2a2b Mon Sep 17 00:00:00 2001 From: Elizaveta Egorova Date: Wed, 30 Oct 2024 14:34:17 +0300 Subject: [PATCH] =?UTF-8?q?AG-36910=20Improve=20'href-sanitizer'=20?= =?UTF-8?q?=E2=80=94=20add=20'removeParam'=20and=20'removeHash'=20values?= =?UTF-8?q?=20in=20'transform'=20option.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashed commit of the following: commit 737dda8ddeba4543e73b655906eb60060ee4bb5d Author: jellizaveta Date: Mon Oct 28 20:26:25 2024 +0300 add comment commit da34a66a638020f70f757b91ba7dbab9c8c044b8 Author: jellizaveta Date: Mon Oct 28 20:04:41 2024 +0300 update script commit f3c3616065d6af3b319a1f847cf27d053c833d71 Author: jellizaveta Date: Mon Oct 28 19:49:42 2024 +0300 fix comments, update script commit 99439d67073afc4840d32d30533d2219e1422a66 Author: jellizaveta Date: Mon Oct 28 15:45:40 2024 +0300 refactor commit bafa8498627f7cebddb26c96665a086d85d54450 Author: jellizaveta Date: Mon Oct 28 15:16:15 2024 +0300 update compatibility table commit 83e1d96db2cde0df89ae26ab2b5503e2387ffede Merge: 7fb9cf06 e4cb5f3e Author: jellizaveta Date: Mon Oct 28 15:05:16 2024 +0300 Merge branch 'fix/AG-36910' of ssh://bit.int.agrd.dev:7999/adguard-filters/scriptlets into fix/AG-36910 commit 7fb9cf06168d072205190f3d1719e8b8918fe327 Author: jellizaveta Date: Mon Oct 28 15:01:33 2024 +0300 update var names, docs, conditions commit c601f8dedbeb96246eb1d2879980b1320261e90f Author: jellizaveta Date: Fri Oct 25 20:37:26 2024 +0300 update docs commit ff6f0477669c579b4b037326c4a0ee762b62e9e2 Author: jellizaveta Date: Fri Oct 25 20:31:22 2024 +0300 moved the calculations inside the function commit e4cb5f3e795b6e8a8c4a0999340c168942935908 Author: Slava Leleka Date: Fri Oct 25 20:10:51 2024 +0300 src/scriptlets/href-sanitizer.ts edited online with Bitbucket commit 9736333e41dc0c74710ce2fa771edaa162570b55 Author: jellizaveta Date: Fri Oct 25 19:59:04 2024 +0300 fix docs commit 1ecfacb51f3051c73a18fa11ce893c42e50db8d2 Merge: 5ccadb6f a875fdfc Author: jellizaveta Date: Fri Oct 25 19:47:47 2024 +0300 merge master, resolve conflicts commit 5ccadb6f59612fb4e7a1593c6b94654441d1b214 Author: jellizaveta Date: Fri Oct 25 19:37:30 2024 +0300 AG-36910 Improve 'href-sanitizer' — add 'removeParam' and 'removeHash' values in 'transform' option. #460 --- CHANGELOG.md | 2 + scripts/compatibility-table.json | 2 +- src/scriptlets/href-sanitizer.ts | 132 ++++++++++++++++++++++-- tests/scriptlets/href-sanitizer.test.js | 58 ++++++++++- 4 files changed, 183 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54ea01ff1..dcd19a423 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic - `prevent-canvas` scriptlet [#451] - `parentSelector` option to search for nodes for `remove-node-text` scriptlet [#397] - `transform` option with `base64decode` value for `href-sanitizer` scriptlet [#455] +- `removeParam` and `removeHash` values in `transform` option for `href-sanitizer` scriptlet [#460] - new values to `set-cookie` and `set-local-storage-item` scriptlets: `forbidden`, `forever` [#458] ### Changed @@ -35,6 +36,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic [#397]: https://github.com/AdguardTeam/Scriptlets/issues/397 [#458]: https://github.com/AdguardTeam/Scriptlets/issues/458 [#457]: https://github.com/AdguardTeam/Scriptlets/issues/457 +[#460]: https://github.com/AdguardTeam/Scriptlets/issues/460 ## [v1.12.1] - 2024-09-20 diff --git a/scripts/compatibility-table.json b/scripts/compatibility-table.json index ee586eb0b..8dd6caa87 100644 --- a/scripts/compatibility-table.json +++ b/scripts/compatibility-table.json @@ -212,7 +212,7 @@ }, { "adg": "set-attr", - "ubo": "set-attr.js" + "ubo": "set-attr.js (removed)" }, { "adg": "set-constant", diff --git a/src/scriptlets/href-sanitizer.ts b/src/scriptlets/href-sanitizer.ts index 06c8576b5..da6c479e3 100644 --- a/src/scriptlets/href-sanitizer.ts +++ b/src/scriptlets/href-sanitizer.ts @@ -31,8 +31,12 @@ import { * - `text` — use the text content of the matched element, * - `[]` copy the value from attribute `attribute-name` on the same element, * - `?` copy the value from URL parameter `parameter-name` of the same element's `href` attribute. - * - `transform` — optional, defaults to no transforming: + * - `transform` — optional, defaults to no transforming. Possible values: * - `base64decode` — decode the base64 string from specified attribute. + * - `removeHash` — remove the hash from the URL. + * - `removeParam[:]` — remove the specified parameters from the URL, + * where `` is a comma-separated list of parameter names; + * if no parameter is specified, remove all parameters. * * > Note that in the case where the discovered value does not correspond to a valid URL with the appropriate * > http or https protocols, the value will not be set. @@ -111,6 +115,60 @@ import { * * ``` * + * 5. Remove the hash from the URL: + * + * ```adblock + * example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'removeHash') + * ``` + * + * ```html + * + *
+ * + *
+ * + * + *
+ * + *
+ * ``` + * + * 6. Remove the all parameter(s) from the URL: + * + * ```adblock + * example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'removeParam') + * ``` + * + * ```html + * + *
+ * + *
+ * + * + *
+ * + *
+ * ``` + * + * 7. Remove the specified parameter(s) from the URL: + * + * ```adblock + * example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'removeParam:utm_source,utm_medium') + * ``` + * + * ```html + * + *
+ * + *
+ * + * + *
+ * + *
+ * ``` + * * @added v1.10.25. */ @@ -125,7 +183,13 @@ export function hrefSanitizer( return; } - const BASE64_TRANSFORM_MARKER = 'base64decode'; + // transform markers + const BASE64_DECODE_TRANSFORM_MARKER = 'base64decode'; + const REMOVE_HASH_TRANSFORM_MARKER = 'removeHash'; + const REMOVE_PARAM_TRANSFORM_MARKER = 'removeParam'; + // separator markers + const MARKER_SEPARATOR = ':'; + const COMMA = ','; // Regular expression to find not valid characters at the beginning and at the end of the string, // \x21-\x7e is a range that includes the ASCII characters from ! (hex 21) to ~ (hex 7E). @@ -337,6 +401,56 @@ export function hrefSanitizer( return validEncodedHash ? decodeBase64SeveralTimes(validEncodedHash, DECODE_ATTEMPTS_NUMBER) : ''; }; + /** + * Removes the hash from the URL. + * @param url URL to remove the hash from + * @returns URL without the hash or empty string if no hash is found + */ + const removeHash = (url: string) => { + const urlObj = new URL(url, window.location.origin); + + if (!urlObj.hash) { + return ''; + } + + urlObj.hash = ''; + return urlObj.toString(); + }; + + /** + * Removes the specified parameter from the URL. + * @param url URL to remove the parameter from + * @param transformValue parameter value(s) to remove with marker + * @returns URL without the parameter(s) or empty string if no parameter is found + */ + const removeParam = (url: string, transformValue: string) => { + const urlObj = new URL(url, window.location.origin); + + // get the parameter values to remove + const paramNamesToRemoveStr = transformValue.split(MARKER_SEPARATOR)[1]; + + if (!paramNamesToRemoveStr) { + urlObj.search = ''; + return urlObj.toString(); + } + + const initSearchParamsLength = urlObj.searchParams.toString().length; + + const removeParams = paramNamesToRemoveStr.split(COMMA); + removeParams.forEach((param) => { + if (urlObj.searchParams.has(param)) { + urlObj.searchParams.delete(param); + } + }); + + // if the parameter(s) is not found, return empty string + if (initSearchParamsLength === urlObj.searchParams.toString().length) { + return ''; + } + + return urlObj.toString(); + }; + /** * Extracts the base64 part from a string. * If no base64 string is found, `null` is returned. @@ -344,7 +458,7 @@ export function hrefSanitizer( * @returns The base64 part of the string, or `null` if none is found. */ const decodeBase64URL = (url: string) => { - const { search, hash } = new URL(url); + const { search, hash } = new URL(url, document.location.href); if (search.length > 0) { return decodeSearchString(search); @@ -394,13 +508,19 @@ export function hrefSanitizer( return; } let newHref = extractNewHref(elem, attribute); - // apply transform if specified if (transform) { - switch (transform) { - case BASE64_TRANSFORM_MARKER: + switch (true) { + case transform === BASE64_DECODE_TRANSFORM_MARKER: newHref = base64Decode(newHref); break; + case transform === REMOVE_HASH_TRANSFORM_MARKER: + newHref = removeHash(newHref); + break; + case transform.startsWith(REMOVE_PARAM_TRANSFORM_MARKER): { + newHref = removeParam(newHref, transform); + break; + } default: logMessage(source, `Invalid transform option: "${transform}"`); return; diff --git a/tests/scriptlets/href-sanitizer.test.js b/tests/scriptlets/href-sanitizer.test.js index a6e491990..def85765e 100644 --- a/tests/scriptlets/href-sanitizer.test.js +++ b/tests/scriptlets/href-sanitizer.test.js @@ -27,10 +27,12 @@ const createElem = (href, text, attributeName, attributeValue) => { }; const removeElem = () => { - const elem = document.getElementById('testHref'); - if (elem) { - elem.remove(); - } + const elem = document.querySelectorAll('#testHref'); + elem.forEach((el) => { + if (el) { + el.remove(); + } + }); }; const beforeEach = () => { @@ -64,6 +66,54 @@ test('Checking if alias name works', (assert) => { assert.strictEqual(codeByAdgParams, codeByUboParams, 'ubo name - ok'); }); +test('Sanitize href - remove all parameters from href', (assert) => { + const expectedHref = 'https://foo.com/123123'; + const elem = createElem('https://foo.com/123123?utm_source=nova&utm_medium=tg&utm_campaign=main'); + const selector = 'a[href^="https://foo.com/123123"]'; + + const scriptletArgs = [selector, '[href]', 'removeParam']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'all params from href was removed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - remove parameters from href', (assert) => { + const expectedHref = 'https://foo.com/watch?utm_campaign=main'; + const elem = createElem('https://foo.com/watch?v=dbjPnXaacAU&pp=ygUEdGVzdA%3D%3D&utm_campaign=main'); + const selector = 'a[href^="https://foo.com/watch"]'; + + const scriptletArgs = [selector, '[href]', 'removeParam:v,pp']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'v and pp params from href was removed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - remove parameter from href', (assert) => { + const expectedHref = 'https://example.org/watch?v=dbjPnXaacAU'; + const elem = createElem('https://example.org/watch?v=dbjPnXaacAU&pp=ygUEdGVzdA%3D%3D'); + const selector = 'a[href^="https://example.org/watch"]'; + + const scriptletArgs = [selector, '[href]', 'removeParam:pp']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'pp param from href was removed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - remove hash', (assert) => { + const expectedHref = 'https://example.org/?article'; + const elem = createElem('https://example.org/?article#utm_source=Facebook'); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'removeHash']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'hash from href was removed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + test('Sanitize href - no URL was found in base64', (assert) => { // encoded string is 'some text, no urls' const hrefWithBase64 = 'http://foo.com/#c29tZSB0ZXh0LCBubyB1cmxz';