@@ -22,15 +22,20 @@ import {
22
22
* ### Syntax
23
23
*
24
24
* ```text
25
- * example.org#%#//scriptlet('href-sanitizer', selector[, attribute])
25
+ * example.org#%#//scriptlet('href-sanitizer', selector[, attribute, [ transform] ])
26
26
* ```
27
27
*
28
28
* - `selector` — required, a CSS selector to match the elements to be sanitized,
29
29
* which should be anchor elements (`<a>`) with `href` attribute.
30
30
* - `attribute` — optional, default to `text`:
31
31
* - `text` — use the text content of the matched element,
32
- * - `[attribute-name]` copy the value from attribute `attribute-name` on the same element,
33
- * - `?parameter` copy the value from URL parameter `parameter` of the same element's `href` attribute.
32
+ * - `[<attribute-name>]` copy the value from attribute `attribute-name` on the same element,
33
+ * - `?<parameter-name>` copy the value from URL parameter `parameter-name` of the same element's `href` attribute.
34
+ * - `transform` — optional, defaults to no transforming:
35
+ * - `base64decode` — decode the base64 string from specified attribute.
36
+ *
37
+ * > Note that in the case where the discovered value does not correspond to a valid URL with the appropriate
38
+ * > http or https protocols, the value will not be set.
34
39
*
35
40
* ### Examples
36
41
*
@@ -88,19 +93,40 @@ import {
88
93
* </div>
89
94
* ```
90
95
*
96
+ * 4. Decode the base64 string from specified attribute:
97
+ *
98
+ * ```adblock
99
+ * example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'base64decode')
100
+ * ```
101
+ *
102
+ * ```html
103
+ * <!-- before -->
104
+ * <div>
105
+ * <a href="http://www.foo.com/out/?aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw=="></a>
106
+ * </div>
107
+ *
108
+ * <!-- after -->
109
+ * <div>
110
+ * <a href="http://example.com/?v=123"></a>
111
+ * </div>
112
+ * ```
113
+ *
91
114
* @added v1.10.25.
92
115
*/
93
116
94
117
export function hrefSanitizer (
95
118
source : Source ,
96
119
selector : string ,
97
120
attribute = 'text' ,
121
+ transform = '' ,
98
122
) {
99
123
if ( ! selector ) {
100
124
logMessage ( source , 'Selector is required.' ) ;
101
125
return ;
102
126
}
103
127
128
+ const BASE64_TRANSFORM_MARKER = 'base64decode' ;
129
+
104
130
// Regular expression to find not valid characters at the beginning and at the end of the string,
105
131
// \x21-\x7e is a range that includes the ASCII characters from ! (hex 21) to ~ (hex 7E).
106
132
// This range covers numbers, English letters, and common symbols.
@@ -144,6 +170,21 @@ export function hrefSanitizer(
144
170
return '' ;
145
171
} ;
146
172
173
+ /**
174
+ * Validates whether a given string is a URL.
175
+ *
176
+ * @param url The URL string to validate.
177
+ * @returns `true` if the string is a valid URL, otherwise `false`.
178
+ */
179
+ const isValidURL = ( url : string ) : boolean => {
180
+ try {
181
+ new URL ( url ) ;
182
+ return true ;
183
+ } catch {
184
+ return false ;
185
+ }
186
+ } ;
187
+
147
188
/**
148
189
* Validates a URL, if valid return URL,
149
190
* otherwise return null.
@@ -177,6 +218,161 @@ export function hrefSanitizer(
177
218
return element . nodeName . toLowerCase ( ) === 'a' && element . hasAttribute ( 'href' ) ;
178
219
} ;
179
220
221
+ /**
222
+ * Recursively searches for the first valid URL within a nested object.
223
+ *
224
+ * @param obj The object to search for URLs.
225
+ * @returns The first found URL as a string, or `null` if none are found.
226
+ */
227
+ const extractURLFromObject = ( obj : Record < string , unknown > ) : string | null => {
228
+ for ( const key in obj ) {
229
+ if ( ! Object . prototype . hasOwnProperty . call ( obj , key ) ) {
230
+ continue ;
231
+ }
232
+
233
+ const value = obj [ key ] ;
234
+
235
+ if ( typeof value === 'string' && isValidURL ( value ) ) {
236
+ return value ;
237
+ }
238
+
239
+ if ( typeof value === 'object' && value !== null ) {
240
+ const result = extractURLFromObject ( value as Record < string , unknown > ) ;
241
+ if ( result ) {
242
+ return result ;
243
+ }
244
+ }
245
+ }
246
+
247
+ return null ;
248
+ } ;
249
+
250
+ /**
251
+ * Checks if the given content has object format.
252
+ * @param content The content to check.
253
+ * @returns `true` if the content has object format, `false` otherwise.
254
+ */
255
+ const isStringifiedObject = ( content : string ) => content . startsWith ( '{' ) && content . endsWith ( '}' ) ;
256
+
257
+ /**
258
+ * Decodes a base64 string several times. If the result is a valid URL, it is returned.
259
+ * If the result is a JSON object, the first valid URL within the object is returned.
260
+ * @param text The base64 string to decode.
261
+ * @param times The number of times to decode the base64 string.
262
+ * @returns Decoded base64 string or empty string if no valid URL is found.
263
+ */
264
+ const decodeBase64SeveralTimes = ( text : string , times : number ) : string | null => {
265
+ let result = text ;
266
+ for ( let i = 0 ; i < times ; i += 1 ) {
267
+ try {
268
+ result = atob ( result ) ;
269
+ } catch ( e ) {
270
+ // Not valid base64 string
271
+ if ( result === text ) {
272
+ return '' ;
273
+ }
274
+ }
275
+ }
276
+ // if found valid URL, return it
277
+ if ( isValidURL ( result ) ) {
278
+ return result ;
279
+ }
280
+ // if the result is an object, try to extract URL from it
281
+ if ( isStringifiedObject ( result ) ) {
282
+ try {
283
+ const parsedResult = JSON . parse ( result ) ;
284
+ return extractURLFromObject ( parsedResult ) ;
285
+ } catch ( ex ) {
286
+ return '' ;
287
+ }
288
+ }
289
+ logMessage ( source , `Failed to decode base64 string: ${ text } ` ) ;
290
+ return '' ;
291
+ } ;
292
+
293
+ // URL components markers
294
+ const SEARCH_QUERY_MARKER = '?' ;
295
+ const SEARCH_PARAMS_MARKER = '&' ;
296
+ const HASHBANG_MARKER = '#!' ;
297
+ const ANCHOR_MARKER = '#' ;
298
+ // decode attempts for base64 string
299
+ const DECODE_ATTEMPTS_NUMBER = 10 ;
300
+
301
+ /**
302
+ * Decodes the search string by removing the search query marker and decoding the base64 string.
303
+ * @param search Search string to decode
304
+ * @returns Decoded search string or empty string if no valid URL is found
305
+ */
306
+ const decodeSearchString = ( search : string ) => {
307
+ const searchString = search . replace ( SEARCH_QUERY_MARKER , '' ) ;
308
+ let decodedParam ;
309
+ let validEncodedParam ;
310
+ if ( searchString . includes ( SEARCH_PARAMS_MARKER ) ) {
311
+ const searchParamsArray = searchString . split ( SEARCH_PARAMS_MARKER ) ;
312
+ searchParamsArray . forEach ( ( param ) => {
313
+ decodedParam = decodeBase64SeveralTimes ( param , DECODE_ATTEMPTS_NUMBER ) ;
314
+ if ( decodedParam && decodedParam . length > 0 ) {
315
+ validEncodedParam = decodedParam ;
316
+ }
317
+ } ) ;
318
+ return validEncodedParam ;
319
+ }
320
+ return decodeBase64SeveralTimes ( searchString , DECODE_ATTEMPTS_NUMBER ) ;
321
+ } ;
322
+
323
+ /**
324
+ * Decodes the hash string by removing the hashbang or anchor marker and decoding the base64 string.
325
+ * @param hash Hash string to decode
326
+ * @returns Decoded hash string or empty string if no valid URL is found
327
+ */
328
+ const decodeHashString = ( hash : string ) => {
329
+ let validEncodedHash = '' ;
330
+
331
+ if ( hash . includes ( HASHBANG_MARKER ) ) {
332
+ validEncodedHash = hash . replace ( HASHBANG_MARKER , '' ) ;
333
+ } else if ( hash . includes ( ANCHOR_MARKER ) ) {
334
+ validEncodedHash = hash . replace ( ANCHOR_MARKER , '' ) ;
335
+ }
336
+
337
+ return validEncodedHash ? decodeBase64SeveralTimes ( validEncodedHash , DECODE_ATTEMPTS_NUMBER ) : '' ;
338
+ } ;
339
+
340
+ /**
341
+ * Extracts the base64 part from a string.
342
+ * If no base64 string is found, `null` is returned.
343
+ * @param url String to extract the base64 part from.
344
+ * @returns The base64 part of the string, or `null` if none is found.
345
+ */
346
+ const decodeBase64URL = ( url : string ) => {
347
+ const { search, hash } = new URL ( url ) ;
348
+
349
+ if ( search . length > 0 ) {
350
+ return decodeSearchString ( search ) ;
351
+ }
352
+
353
+ if ( hash . length > 0 ) {
354
+ return decodeHashString ( hash ) ;
355
+ }
356
+
357
+ logMessage ( source , `Failed to execute base64 from URL: ${ url } ` ) ;
358
+ return null ;
359
+ } ;
360
+
361
+ /**
362
+ * Decodes a base64 string from the given href.
363
+ * If the href is a valid URL, the base64 string is decoded.
364
+ * If the href is not a valid URL, the base64 string is decoded several times.
365
+ * @param href The href to decode.
366
+ * @returns The decoded base64 string.
367
+ */
368
+ const base64Decode = ( href : string ) : string => {
369
+ if ( isValidURL ( href ) ) {
370
+ return decodeBase64URL ( href ) || '' ;
371
+ }
372
+
373
+ return decodeBase64SeveralTimes ( href , DECODE_ATTEMPTS_NUMBER ) || '' ;
374
+ } ;
375
+
180
376
/**
181
377
* Sanitizes the href attribute of elements matching the given selector.
182
378
*
@@ -194,9 +390,23 @@ export function hrefSanitizer(
194
390
elements . forEach ( ( elem ) => {
195
391
try {
196
392
if ( ! isSanitizableAnchor ( elem ) ) {
393
+ logMessage ( source , `${ elem } is not a valid element to sanitize` ) ;
197
394
return ;
198
395
}
199
- const newHref = extractNewHref ( elem , attribute ) ;
396
+ let newHref = extractNewHref ( elem , attribute ) ;
397
+
398
+ // apply transform if specified
399
+ if ( transform ) {
400
+ switch ( transform ) {
401
+ case BASE64_TRANSFORM_MARKER :
402
+ newHref = base64Decode ( newHref ) ;
403
+ break ;
404
+ default :
405
+ logMessage ( source , `Invalid transform option: "${ transform } "` ) ;
406
+ return ;
407
+ }
408
+ }
409
+
200
410
const newValidHref = getValidURL ( newHref ) ;
201
411
if ( ! newValidHref ) {
202
412
logMessage ( source , `Invalid URL: ${ newHref } ` ) ;
0 commit comments