Skip to content

Commit 416185b

Browse files
refactor(phishing-controller): enhance URL validation and caching in … (#5688)
### Explanation The bulk URL scanning functionality in PhishingController previously didn't leverage the URL scan cache that was already implemented for single URL scanning. This meant that even if a URL had been recently scanned, it would be scanned again when included in a bulk scan request, causing unnecessary API calls and increased response times. This PR extends the caching functionality to the `bulkScanUrls` method, allowing it to: 1. Check the cache for each URL before sending API requests 2. Only scan URLs that aren't already in the cache 3. Add newly scanned results to the cache for future use 4. Return a combined response of both cached and newly scanned results This optimization significantly reduces API calls and improves response times for bulk scan operations, especially when the same URLs are frequently scanned. ### References Related to #5625 (Add URL scan cache functionality) Extends functionality from #5682 (Add bulk scan functionality)
1 parent d91e7b0 commit 416185b

File tree

3 files changed

+219
-30
lines changed

3 files changed

+219
-30
lines changed

packages/phishing-controller/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2020

2121
### Changed
2222

23+
- Enhance `bulkScanUrls` method to leverage URL scan cache for improved performance ([#5688](https://github.com/MetaMask/core/pull/5688))
24+
- URLs are now checked against the cache before making API requests
25+
- Only uncached URLs are sent to the phishing detection API
26+
- API results are automatically stored in the cache for future use
2327
- Bump `@metamask/controller-utils` to `^11.7.0` ([#5583](https://github.com/MetaMask/core/pull/5583))
2428

2529
## [12.4.1]

packages/phishing-controller/src/PhishingController.test.ts

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2870,6 +2870,160 @@ describe('PhishingController', () => {
28702870
expect(response.errors.api_error).toContain('404 Not Found');
28712871
expect(response.errors.api_error).toContain('500 Internal Server Error');
28722872
});
2873+
2874+
it('should use cached results for previously scanned URLs and only fetch uncached URLs', async () => {
2875+
const cachedUrl = 'https://cached-example.com';
2876+
const uncachedUrl = 'https://uncached-example.com';
2877+
const mixedUrls = [cachedUrl, uncachedUrl];
2878+
2879+
// Set up the cache with a pre-existing result
2880+
const cachedResult: PhishingDetectionScanResult = {
2881+
domainName: 'cached-example.com',
2882+
recommendedAction: RecommendedAction.None,
2883+
};
2884+
2885+
// First cache a result via scanUrl
2886+
nock(PHISHING_DETECTION_BASE_URL)
2887+
.get(
2888+
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('cached-example.com')}`,
2889+
)
2890+
.reply(200, {
2891+
recommendedAction: RecommendedAction.None,
2892+
});
2893+
2894+
await controller.scanUrl(cachedUrl);
2895+
2896+
// Now set up the mock for the bulk API call with only the uncached URL
2897+
const expectedPostBody = {
2898+
urls: [uncachedUrl],
2899+
};
2900+
2901+
const bulkApiResponse: BulkPhishingDetectionScanResponse = {
2902+
results: {
2903+
[uncachedUrl]: {
2904+
domainName: 'uncached-example.com',
2905+
recommendedAction: RecommendedAction.Warn,
2906+
},
2907+
},
2908+
errors: {},
2909+
};
2910+
2911+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2912+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, expectedPostBody)
2913+
.reply(200, bulkApiResponse);
2914+
2915+
// Call bulkScanUrls with both URLs
2916+
const response = await controller.bulkScanUrls(mixedUrls);
2917+
2918+
// Verify that only the uncached URL was requested from the API
2919+
expect(scope.isDone()).toBe(true);
2920+
2921+
// Verify the combined results include both the cached and newly fetched results
2922+
expect(response.results).toStrictEqual({
2923+
[cachedUrl]: cachedResult,
2924+
[uncachedUrl]: bulkApiResponse.results[uncachedUrl],
2925+
});
2926+
2927+
// Verify the newly fetched result is now in the cache
2928+
const newlyCachedResult = await controller.scanUrl(uncachedUrl);
2929+
expect(newlyCachedResult).toStrictEqual(
2930+
bulkApiResponse.results[uncachedUrl],
2931+
);
2932+
2933+
// Should not make a new API call for the second scanUrl call
2934+
// eslint-disable-next-line import-x/no-named-as-default-member
2935+
expect(nock.pendingMocks()).toHaveLength(0);
2936+
});
2937+
it('should handle invalid URLs properly when mixed with valid URLs and cache results correctly', async () => {
2938+
const validUrl = 'https://valid-example.com';
2939+
const invalidUrl = 'not-a-url';
2940+
const mixedUrls = [validUrl, invalidUrl];
2941+
2942+
const bulkApiResponse: BulkPhishingDetectionScanResponse = {
2943+
results: {
2944+
[validUrl]: {
2945+
domainName: 'valid-example.com',
2946+
recommendedAction: RecommendedAction.None,
2947+
},
2948+
},
2949+
errors: {},
2950+
};
2951+
2952+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2953+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2954+
urls: [validUrl],
2955+
})
2956+
.reply(200, bulkApiResponse);
2957+
2958+
// Call bulkScanUrls with both URLs
2959+
const response = await controller.bulkScanUrls(mixedUrls);
2960+
2961+
// Verify that only the valid URL was requested from the API
2962+
expect(scope.isDone()).toBe(true);
2963+
2964+
// Verify the results include the valid URL result and an error for the invalid URL
2965+
expect(response.results[validUrl]).toStrictEqual(
2966+
bulkApiResponse.results[validUrl],
2967+
);
2968+
expect(response.errors[invalidUrl]).toContain(
2969+
'url is not a valid web URL',
2970+
);
2971+
2972+
// Verify the valid result is now in the cache
2973+
const cachedResult = await controller.scanUrl(validUrl);
2974+
expect(cachedResult).toStrictEqual(bulkApiResponse.results[validUrl]);
2975+
2976+
// Should not make a new API call for the cached URL
2977+
// eslint-disable-next-line import-x/no-named-as-default-member
2978+
expect(nock.pendingMocks()).toHaveLength(0);
2979+
});
2980+
2981+
it('should use cache for all URLs if all are already cached', async () => {
2982+
// First cache the results individually
2983+
const cachedUrls = ['https://domain1.com', 'https://domain2.com'];
2984+
const cachedResults = [
2985+
{
2986+
domainName: 'domain1.com',
2987+
recommendedAction: RecommendedAction.None,
2988+
},
2989+
{
2990+
domainName: 'domain2.com',
2991+
recommendedAction: RecommendedAction.Block,
2992+
},
2993+
];
2994+
2995+
// Set up nock for individual caching
2996+
nock(PHISHING_DETECTION_BASE_URL)
2997+
.get(
2998+
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain1.com')}`,
2999+
)
3000+
.reply(200, {
3001+
recommendedAction: RecommendedAction.None,
3002+
});
3003+
3004+
nock(PHISHING_DETECTION_BASE_URL)
3005+
.get(
3006+
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain2.com')}`,
3007+
)
3008+
.reply(200, {
3009+
recommendedAction: RecommendedAction.Block,
3010+
});
3011+
3012+
// Cache the results
3013+
await controller.scanUrl(cachedUrls[0]);
3014+
await controller.scanUrl(cachedUrls[1]);
3015+
3016+
// No API call should be made for bulkScanUrls
3017+
const response = await controller.bulkScanUrls(cachedUrls);
3018+
3019+
// Verify we got the results from cache
3020+
expect(response.results[cachedUrls[0]]).toStrictEqual(cachedResults[0]);
3021+
expect(response.results[cachedUrls[1]]).toStrictEqual(cachedResults[1]);
3022+
3023+
// Verify no API calls were made
3024+
// eslint-disable-next-line import-x/no-named-as-default-member
3025+
expect(nock.pendingMocks()).toHaveLength(0);
3026+
});
28733027
});
28743028
});
28753029

packages/phishing-controller/src/PhishingController.ts

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -746,44 +746,75 @@ export class PhishingController extends BaseController<
746746
}
747747

748748
const MAX_URL_LENGTH = 2048;
749+
const combinedResponse: BulkPhishingDetectionScanResponse = {
750+
results: {},
751+
errors: {},
752+
};
753+
754+
// Extract hostnames from URLs and check for validity and length constraints
755+
const urlsToHostnames: Record<string, string> = {};
756+
const urlsToFetch: string[] = [];
757+
749758
for (const url of urls) {
750759
if (url.length > MAX_URL_LENGTH) {
751-
return {
752-
results: {},
753-
errors: {
754-
[url]: [`URL length must not exceed ${MAX_URL_LENGTH} characters`],
755-
},
756-
};
760+
combinedResponse.errors[url] = [
761+
`URL length must not exceed ${MAX_URL_LENGTH} characters`,
762+
];
763+
continue;
764+
}
765+
766+
const [hostname, ok] = getHostnameFromWebUrl(url);
767+
if (!ok) {
768+
combinedResponse.errors[url] = ['url is not a valid web URL'];
769+
continue;
757770
}
758-
}
759771

760-
// The API has a limit of 50 URLs per request, so we batch the requests
761-
const MAX_URLS_PER_BATCH = 50;
762-
const batches: string[][] = [];
763-
for (let i = 0; i < urls.length; i += MAX_URLS_PER_BATCH) {
764-
batches.push(urls.slice(i, i + MAX_URLS_PER_BATCH));
772+
// Check if result is already in cache
773+
const cachedResult = this.#urlScanCache.get(hostname);
774+
if (cachedResult) {
775+
// Use cached result
776+
combinedResponse.results[url] = cachedResult;
777+
} else {
778+
// Add to list of URLs to fetch
779+
urlsToHostnames[url] = hostname;
780+
urlsToFetch.push(url);
781+
}
765782
}
766783

767-
// Process each batch in parallel
768-
const batchResults = await Promise.all(
769-
batches.map((batchUrls) => this.#processBatch(batchUrls)),
770-
);
784+
// If there are URLs to fetch, process them in batches
785+
if (urlsToFetch.length > 0) {
786+
// The API has a limit of 50 URLs per request, so we batch the requests
787+
const MAX_URLS_PER_BATCH = 50;
788+
const batches: string[][] = [];
789+
for (let i = 0; i < urlsToFetch.length; i += MAX_URLS_PER_BATCH) {
790+
batches.push(urlsToFetch.slice(i, i + MAX_URLS_PER_BATCH));
791+
}
771792

772-
// Combine all batch results
773-
const combinedResponse: BulkPhishingDetectionScanResponse = {
774-
results: {},
775-
errors: {},
776-
};
777-
// Merge results and errors from all batches
778-
batchResults.forEach((batchResponse) => {
779-
Object.assign(combinedResponse.results, batchResponse.results);
780-
Object.entries(batchResponse.errors).forEach(([key, messages]) => {
781-
combinedResponse.errors[key] = [
782-
...(combinedResponse.errors[key] || []),
783-
...messages,
784-
];
793+
// Process each batch in parallel
794+
const batchResults = await Promise.all(
795+
batches.map((batchUrls) => this.#processBatch(batchUrls)),
796+
);
797+
798+
// Merge results and errors from all batches
799+
batchResults.forEach((batchResponse) => {
800+
// Add results to cache and combine with response
801+
Object.entries(batchResponse.results).forEach(([url, result]) => {
802+
const hostname = urlsToHostnames[url];
803+
if (hostname) {
804+
this.#urlScanCache.add(hostname, result);
805+
}
806+
combinedResponse.results[url] = result;
807+
});
808+
809+
// Combine errors
810+
Object.entries(batchResponse.errors).forEach(([key, messages]) => {
811+
combinedResponse.errors[key] = [
812+
...(combinedResponse.errors[key] || []),
813+
...messages,
814+
];
815+
});
785816
});
786-
});
817+
}
787818

788819
return combinedResponse;
789820
};

0 commit comments

Comments
 (0)