From 1de3e88e4756414077825a041d83beec34fb4e4e Mon Sep 17 00:00:00 2001 From: Elizaveta Egorova Date: Wed, 16 Oct 2024 19:11:35 +0300 Subject: [PATCH] AG-36282 Add 'transform' option with 'base64decode' value for 'href-sanitizer' scriptlet Squashed commit of the following: commit 66df93fea4a37c6e4e999182e7ae6ff2cb4d3d86 Author: jellizaveta Date: Wed Oct 16 13:10:27 2024 +0300 fix description, update script commit 8d6f7f0a49a5fdde9c0f96cfe5fa10915b32ae5a Author: jellizaveta Date: Tue Oct 15 21:22:36 2024 +0300 simplify code, add constant commit 6f6d257bd8c428479cfa5e4a538cb0e82286e2c7 Author: jellizaveta Date: Tue Oct 15 20:21:22 2024 +0300 fix name, comment, simplify code commit cd29a52d62043da0204b0f1d92b580da011cf85d Author: Slava Leleka Date: Tue Oct 15 19:50:38 2024 +0300 src/scriptlets/href-sanitizer.ts edited online with Bitbucket commit 66569648cecf179ddc67a5f7a283f3eebdcbde9b Author: Slava Leleka Date: Tue Oct 15 19:50:31 2024 +0300 src/scriptlets/href-sanitizer.ts edited online with Bitbucket commit cc147696392529bf27b9a97937c2200854448665 Author: Slava Leleka Date: Tue Oct 15 19:50:26 2024 +0300 src/scriptlets/href-sanitizer.ts edited online with Bitbucket commit 82ee60f33ea70668b4617d3b5b628dc2ad0cb0f9 Merge: ffcfd670 901cb2e8 Author: jellizaveta Date: Tue Oct 15 16:13:54 2024 +0300 merge master commit ffcfd670d88186383d56ca2ea2ea023e445bf221 Merge: 83ee0aa6 dac16b70 Author: jellizaveta Date: Mon Oct 14 17:08:24 2024 +0300 resolve conflicts commit 83ee0aa6184fe1a44461a87a8e9aa2fa7eb7f344 Author: jellizaveta Date: Mon Oct 14 17:03:26 2024 +0300 add the ability to decode base64 string multiple times, fix typo, decode search params, add tests commit 436985f08beb54a90961d58ebea288fda6fdeac5 Author: jellizaveta Date: Mon Oct 14 13:19:25 2024 +0300 fix typo commit 5eb91b9f17656edf3711a7c7ac62a320a8888ab2 Author: jellizaveta Date: Fri Oct 11 21:40:00 2024 +0300 fix indent in docs commit f8384c64d2881e1a500e3ce5c27ec0b5465de1ee Merge: c81eeb64 0ca98f3f Author: jellizaveta Date: Fri Oct 11 20:18:16 2024 +0300 merge master commit c81eeb64597ccc3ebe1eb53a246338ef803ce25d Author: jellizaveta Date: Fri Oct 11 20:08:58 2024 +0300 AG-36282 Add 'transform' option with 'base64decode' value for 'href-sanitizer' scriptlet. #455 --- CHANGELOG.md | 2 + src/scriptlets/href-sanitizer.ts | 218 +++++++++++++++++++++++- tests/scriptlets/href-sanitizer.test.js | 128 ++++++++++++-- 3 files changed, 334 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7280a86a5..5ce5afd5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic - `prevent-canvas` scriptlet [#451] - `parentSelector` option to search for nodes for `remove-node-text` scriptlet [#397] +- `transform` option with `base64decode` value for `href-sanitizer` scriptlet [#455] - new values to `set-cookie` and `set-local-storage-item` scriptlets: `forbidden`, `forever` [#458] ### Changed @@ -27,6 +28,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic [Unreleased]: https://github.com/AdguardTeam/Scriptlets/compare/v1.12.1...HEAD [#451]: https://github.com/AdguardTeam/Scriptlets/issues/451 [#415]: https://github.com/AdguardTeam/Scriptlets/issues/415 +[#455]: https://github.com/AdguardTeam/Scriptlets/issues/455 [#414]: https://github.com/AdguardTeam/Scriptlets/issues/414 [#441]: https://github.com/AdguardTeam/Scriptlets/issues/441 [#397]: https://github.com/AdguardTeam/Scriptlets/issues/397 diff --git a/src/scriptlets/href-sanitizer.ts b/src/scriptlets/href-sanitizer.ts index b49d3e65f..06c8576b5 100644 --- a/src/scriptlets/href-sanitizer.ts +++ b/src/scriptlets/href-sanitizer.ts @@ -22,15 +22,20 @@ import { * ### Syntax * * ```text - * example.org#%#//scriptlet('href-sanitizer', selector[, attribute]) + * example.org#%#//scriptlet('href-sanitizer', selector[, attribute, [ transform]]) * ``` * * - `selector` — required, a CSS selector to match the elements to be sanitized, * which should be anchor elements (``) with `href` attribute. * - `attribute` — optional, default to `text`: * - `text` — use the text content of the matched element, - * - `[attribute-name]` copy the value from attribute `attribute-name` on the same element, - * - `?parameter` copy the value from URL parameter `parameter` of the same element's `href` attribute. + * - `[]` copy the value from attribute `attribute-name` on the same element, + * - `?` copy the value from URL parameter `parameter-name` of the same element's `href` attribute. + * - `transform` — optional, defaults to no transforming: + * - `base64decode` — decode the base64 string from specified attribute. + * + * > Note that in the case where the discovered value does not correspond to a valid URL with the appropriate + * > http or https protocols, the value will not be set. * * ### Examples * @@ -88,6 +93,24 @@ import { * * ``` * + * 4. Decode the base64 string from specified attribute: + * + * ```adblock + * example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'base64decode') + * ``` + * + * ```html + * + *
+ * + *
+ * + * + *
+ * + *
+ * ``` + * * @added v1.10.25. */ @@ -95,12 +118,15 @@ export function hrefSanitizer( source: Source, selector: string, attribute = 'text', + transform = '', ) { if (!selector) { logMessage(source, 'Selector is required.'); return; } + const BASE64_TRANSFORM_MARKER = 'base64decode'; + // Regular expression to find not valid characters at the beginning and at the end of the string, // \x21-\x7e is a range that includes the ASCII characters from ! (hex 21) to ~ (hex 7E). // This range covers numbers, English letters, and common symbols. @@ -144,6 +170,21 @@ export function hrefSanitizer( return ''; }; + /** + * Validates whether a given string is a URL. + * + * @param url The URL string to validate. + * @returns `true` if the string is a valid URL, otherwise `false`. + */ + const isValidURL = (url: string): boolean => { + try { + new URL(url); + return true; + } catch { + return false; + } + }; + /** * Validates a URL, if valid return URL, * otherwise return null. @@ -177,6 +218,161 @@ export function hrefSanitizer( return element.nodeName.toLowerCase() === 'a' && element.hasAttribute('href'); }; + /** + * Recursively searches for the first valid URL within a nested object. + * + * @param obj The object to search for URLs. + * @returns The first found URL as a string, or `null` if none are found. + */ + const extractURLFromObject = (obj: Record): string | null => { + for (const key in obj) { + if (!Object.prototype.hasOwnProperty.call(obj, key)) { + continue; + } + + const value = obj[key]; + + if (typeof value === 'string' && isValidURL(value)) { + return value; + } + + if (typeof value === 'object' && value !== null) { + const result = extractURLFromObject(value as Record); + if (result) { + return result; + } + } + } + + return null; + }; + + /** + * Checks if the given content has object format. + * @param content The content to check. + * @returns `true` if the content has object format, `false` otherwise. + */ + const isStringifiedObject = (content: string) => content.startsWith('{') && content.endsWith('}'); + + /** + * Decodes a base64 string several times. If the result is a valid URL, it is returned. + * If the result is a JSON object, the first valid URL within the object is returned. + * @param text The base64 string to decode. + * @param times The number of times to decode the base64 string. + * @returns Decoded base64 string or empty string if no valid URL is found. + */ + const decodeBase64SeveralTimes = (text: string, times: number): string | null => { + let result = text; + for (let i = 0; i < times; i += 1) { + try { + result = atob(result); + } catch (e) { + // Not valid base64 string + if (result === text) { + return ''; + } + } + } + // if found valid URL, return it + if (isValidURL(result)) { + return result; + } + // if the result is an object, try to extract URL from it + if (isStringifiedObject(result)) { + try { + const parsedResult = JSON.parse(result); + return extractURLFromObject(parsedResult); + } catch (ex) { + return ''; + } + } + logMessage(source, `Failed to decode base64 string: ${text}`); + return ''; + }; + + // URL components markers + const SEARCH_QUERY_MARKER = '?'; + const SEARCH_PARAMS_MARKER = '&'; + const HASHBANG_MARKER = '#!'; + const ANCHOR_MARKER = '#'; + // decode attempts for base64 string + const DECODE_ATTEMPTS_NUMBER = 10; + + /** + * Decodes the search string by removing the search query marker and decoding the base64 string. + * @param search Search string to decode + * @returns Decoded search string or empty string if no valid URL is found + */ + const decodeSearchString = (search: string) => { + const searchString = search.replace(SEARCH_QUERY_MARKER, ''); + let decodedParam; + let validEncodedParam; + if (searchString.includes(SEARCH_PARAMS_MARKER)) { + const searchParamsArray = searchString.split(SEARCH_PARAMS_MARKER); + searchParamsArray.forEach((param) => { + decodedParam = decodeBase64SeveralTimes(param, DECODE_ATTEMPTS_NUMBER); + if (decodedParam && decodedParam.length > 0) { + validEncodedParam = decodedParam; + } + }); + return validEncodedParam; + } + return decodeBase64SeveralTimes(searchString, DECODE_ATTEMPTS_NUMBER); + }; + + /** + * Decodes the hash string by removing the hashbang or anchor marker and decoding the base64 string. + * @param hash Hash string to decode + * @returns Decoded hash string or empty string if no valid URL is found + */ + const decodeHashString = (hash: string) => { + let validEncodedHash = ''; + + if (hash.includes(HASHBANG_MARKER)) { + validEncodedHash = hash.replace(HASHBANG_MARKER, ''); + } else if (hash.includes(ANCHOR_MARKER)) { + validEncodedHash = hash.replace(ANCHOR_MARKER, ''); + } + + return validEncodedHash ? decodeBase64SeveralTimes(validEncodedHash, DECODE_ATTEMPTS_NUMBER) : ''; + }; + + /** + * Extracts the base64 part from a string. + * If no base64 string is found, `null` is returned. + * @param url String to extract the base64 part from. + * @returns The base64 part of the string, or `null` if none is found. + */ + const decodeBase64URL = (url: string) => { + const { search, hash } = new URL(url); + + if (search.length > 0) { + return decodeSearchString(search); + } + + if (hash.length > 0) { + return decodeHashString(hash); + } + + logMessage(source, `Failed to execute base64 from URL: ${url}`); + return null; + }; + + /** + * Decodes a base64 string from the given href. + * If the href is a valid URL, the base64 string is decoded. + * If the href is not a valid URL, the base64 string is decoded several times. + * @param href The href to decode. + * @returns The decoded base64 string. + */ + const base64Decode = (href: string): string => { + if (isValidURL(href)) { + return decodeBase64URL(href) || ''; + } + + return decodeBase64SeveralTimes(href, DECODE_ATTEMPTS_NUMBER) || ''; + }; + /** * Sanitizes the href attribute of elements matching the given selector. * @@ -194,9 +390,23 @@ export function hrefSanitizer( elements.forEach((elem) => { try { if (!isSanitizableAnchor(elem)) { + logMessage(source, `${elem} is not a valid element to sanitize`); return; } - const newHref = extractNewHref(elem, attribute); + let newHref = extractNewHref(elem, attribute); + + // apply transform if specified + if (transform) { + switch (transform) { + case BASE64_TRANSFORM_MARKER: + newHref = base64Decode(newHref); + break; + default: + logMessage(source, `Invalid transform option: "${transform}"`); + return; + } + } + const newValidHref = getValidURL(newHref); if (!newValidHref) { logMessage(source, `Invalid URL: ${newHref}`); diff --git a/tests/scriptlets/href-sanitizer.test.js b/tests/scriptlets/href-sanitizer.test.js index d3c2de510..a6e491990 100644 --- a/tests/scriptlets/href-sanitizer.test.js +++ b/tests/scriptlets/href-sanitizer.test.js @@ -4,6 +4,14 @@ import { runScriptlet, clearGlobalProps } from '../helpers'; const { test, module } = QUnit; const name = 'href-sanitizer'; +/** + * Create link with href attribute and optional text and additional attribute + * @param {string} href - link href + * @param {string} text - link text + * @param {string} attributeName - additional attribute name + * @param {string} attributeValue - additional attribute value + * @returns {HTMLAnchorElement} - created link element + */ const createElem = (href, text, attributeName, attributeValue) => { const a = document.createElement('a'); a.setAttribute('href', href); @@ -56,7 +64,107 @@ test('Checking if alias name works', (assert) => { assert.strictEqual(codeByAdgParams, codeByUboParams, 'ubo name - ok'); }); -test('Santize href - text content', (assert) => { +test('Sanitize href - no URL was found in base64', (assert) => { + // encoded string is 'some text, no urls' + const hrefWithBase64 = 'http://foo.com/#c29tZSB0ZXh0LCBubyB1cmxz'; + const elem = createElem(hrefWithBase64); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), hrefWithBase64, 'href has not been changed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - no URL was found in base64 string in query parameter', (assert) => { + const hrefWithBase64 = 'http://www.foo.com/out/?aGVsbG9fZGFya25lc3M=&aGVsbG9fZGFya25lc3M='; + const elem = createElem(hrefWithBase64); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), hrefWithBase64, 'href has not been changed'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - decode base64 string in query parameter', (assert) => { + const hrefWithBase64 = 'http://www.foo.com/out/?aGVsbG9fZGFya25lc3M=&aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw=='; + const expectedHref = 'http://example.com/?v=123'; + const elem = createElem(hrefWithBase64); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - decode base64 string in anchor(#) of href attribute link', (assert) => { + const expectedHref = 'http://example.com/?v=123'; + const hrefWithBase64 = 'http://foo.com/#aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw=='; + const elem = createElem(hrefWithBase64); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - decode base64 string in hashbang(#!) of href attribute link few times', (assert) => { + const expectedHref = 'https://www.example.com/file/123/file.rar/file'; + const hrefWithBase64 = 'https://foo.com/#!WVVoU01HTklUVFpNZVRrelpETmpkVnBZYUdoaVdFSnpXbE0xYW1JeU1IWmFiV3h6V2xNNGVFMXFUWFphYld4eldsTTFlVmxZU1haYWJXeHpXbEU5UFE9PQ=='; + const elem = createElem(hrefWithBase64); + const selector = 'a[href]'; + + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - decode base64 string in data-href attribute', (assert) => { + const expectedHref = 'https://example.org/'; + const elem = createElem('https://google.com/', expectedHref, 'data-href', 'aHR0cHM6Ly9leGFtcGxlLm9yZy8='); + const selector = 'a[href^="https://google.com/'; + + const scriptletArgs = [selector, '[data-href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - decode base64 string in href attribute', (assert) => { + const expectedHref = 'http://example.com/?v=123'; + const hrefWithBase64 = 'http://www.foo.com/out/?aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw=='; + const elem = createElem(hrefWithBase64); + const selector = 'a[href*="out/?"]'; + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized and base64 was decoded'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - base64 where link decoded in object in search query ', (assert) => { + const expectedHref = 'http://example.com/?v=3468'; + const hrefWithBase64 = 'http://www.foo.com/out/?eyJsIjoiaHR0cDovL2V4YW1wbGUuY29tLz92PTM0NjgiLCJjIjoxfQ=='; + const elem = createElem(hrefWithBase64); + const selector = 'a[href*="out/?"]'; + const scriptletArgs = [selector, '[href]', 'base64decode']; + runScriptlet(name, scriptletArgs); + + assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized and base64 was decoded'); + assert.strictEqual(window.hit, 'FIRED'); +}); + +test('Sanitize href - text content', (assert) => { const expectedHref = 'https://example.org/'; const elem = createElem('https://example.com/foo?redirect=https%3A%2F%2Fexample.org%2F', expectedHref); const selector = 'a[href*="?redirect="]'; @@ -68,7 +176,7 @@ test('Santize href - text content', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - text content, create element after running scriptlet', (assert) => { +test('Sanitize href - text content, create element after running scriptlet', (assert) => { const selector = 'a[href*="foo.com"]'; const scriptletArgs = [selector]; runScriptlet(name, scriptletArgs); @@ -84,7 +192,7 @@ test('Santize href - text content, create element after running scriptlet', (ass }, 10); }); -test('Santize href - text content special characters', (assert) => { +test('Sanitize href - text content special characters', (assert) => { const expectedHref = 'https://example.com/search?q=łódź'; const elem = createElem('https://example.org/foo', expectedHref); const selector = 'a[href*="//example.org"]'; @@ -96,7 +204,7 @@ test('Santize href - text content special characters', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - text content, Twitter like case', (assert) => { +test('Sanitize href - text content, Twitter like case', (assert) => { const elem = createElem('https://example.com/foo', 'https://agrd.io/promo_turk_83off…'); // Link from Twitter/X const expectedHref = 'https://agrd.io/promo_turk_83off'; const selector = 'a[href*="//example.com"]'; @@ -108,7 +216,7 @@ test('Santize href - text content, Twitter like case', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - query parameter 1', (assert) => { +test('Sanitize href - query parameter 1', (assert) => { const elem = createElem('https://example.com/foo?redirect=https://example.org/'); const expectedHref = 'https://example.org/'; const selector = 'a[href*="?redirect="]'; @@ -121,7 +229,7 @@ test('Santize href - query parameter 1', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - query parameter 2', (assert) => { +test('Sanitize href - query parameter 2', (assert) => { const elem = createElem('https://greenmangaming.sjv.io/c/3659980/1281797/15105?u=https://www.greenmangaming.com/games/grand-theft-auto-v-premium-edition-pc'); const expectedHref = 'https://www.greenmangaming.com/games/grand-theft-auto-v-premium-edition-pc'; const selector = 'a[href^="https://greenmangaming.sjv.io/c/"][href*="?u="]'; @@ -134,7 +242,7 @@ test('Santize href - query parameter 2', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - get href from attribute', (assert) => { +test('Sanitize href - get href from attribute', (assert) => { const expectedHref = 'https://example.org/'; const elem = createElem('https://foo.com/bar', '', 'data-href', expectedHref); const selector = 'a[href="https://foo.com/bar"]'; @@ -147,7 +255,7 @@ test('Santize href - get href from attribute', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - invalid URL', (assert) => { +test('Sanitize href - invalid URL', (assert) => { const expectedHref = 'https://foo.com/bar'; const elem = createElem(expectedHref, 'https://?'); const selector = 'a[href="https://foo.com/bar"]'; @@ -159,7 +267,7 @@ test('Santize href - invalid URL', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - parameter, invalid URL', (assert) => { +test('Sanitize href - parameter, invalid URL', (assert) => { const expectedHref = 'https://?example.com/foo?redirect=https://example.org/'; const elem = createElem(expectedHref); const selector = 'a[href*="?redirect="]'; @@ -172,7 +280,7 @@ test('Santize href - parameter, invalid URL', (assert) => { assert.strictEqual(window.hit, 'FIRED'); }); -test('Santize href - not allowed protocol', (assert) => { +test('Sanitize href - not allowed protocol', (assert) => { const expectedHref = 'https://example.com/foo?redirect=javascript:alert(1)'; const elem = createElem(expectedHref); const selector = 'a[href*="?redirect="]';