diff --git a/packages/phishing-controller/CHANGELOG.md b/packages/phishing-controller/CHANGELOG.md index f9c66f6c409..b21680d9ead 100644 --- a/packages/phishing-controller/CHANGELOG.md +++ b/packages/phishing-controller/CHANGELOG.md @@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Enhance `bulkScanUrls` method to leverage URL scan cache for improved performance ([#5688](https://github.com/MetaMask/core/pull/5688)) + - URLs are now checked against the cache before making API requests + - Only uncached URLs are sent to the phishing detection API + - API results are automatically stored in the cache for future use - Bump `@metamask/controller-utils` to `^11.7.0` ([#5583](https://github.com/MetaMask/core/pull/5583)) ## [12.4.1] diff --git a/packages/phishing-controller/src/PhishingController.test.ts b/packages/phishing-controller/src/PhishingController.test.ts index c1347b6ada2..f3f9b61123f 100644 --- a/packages/phishing-controller/src/PhishingController.test.ts +++ b/packages/phishing-controller/src/PhishingController.test.ts @@ -2870,6 +2870,160 @@ describe('PhishingController', () => { expect(response.errors.api_error).toContain('404 Not Found'); expect(response.errors.api_error).toContain('500 Internal Server Error'); }); + + it('should use cached results for previously scanned URLs and only fetch uncached URLs', async () => { + const cachedUrl = 'https://cached-example.com'; + const uncachedUrl = 'https://uncached-example.com'; + const mixedUrls = [cachedUrl, uncachedUrl]; + + // Set up the cache with a pre-existing result + const cachedResult: PhishingDetectionScanResult = { + domainName: 'cached-example.com', + recommendedAction: RecommendedAction.None, + }; + + // First cache a result via scanUrl + nock(PHISHING_DETECTION_BASE_URL) + .get( + `/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('cached-example.com')}`, + ) + .reply(200, { + recommendedAction: RecommendedAction.None, + }); + + await controller.scanUrl(cachedUrl); + + // Now set up the mock for the bulk API call with only the uncached URL + const expectedPostBody = { + urls: [uncachedUrl], + }; + + const bulkApiResponse: BulkPhishingDetectionScanResponse = { + results: { + [uncachedUrl]: { + domainName: 'uncached-example.com', + recommendedAction: RecommendedAction.Warn, + }, + }, + errors: {}, + }; + + const scope = nock(PHISHING_DETECTION_BASE_URL) + .post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, expectedPostBody) + .reply(200, bulkApiResponse); + + // Call bulkScanUrls with both URLs + const response = await controller.bulkScanUrls(mixedUrls); + + // Verify that only the uncached URL was requested from the API + expect(scope.isDone()).toBe(true); + + // Verify the combined results include both the cached and newly fetched results + expect(response.results).toStrictEqual({ + [cachedUrl]: cachedResult, + [uncachedUrl]: bulkApiResponse.results[uncachedUrl], + }); + + // Verify the newly fetched result is now in the cache + const newlyCachedResult = await controller.scanUrl(uncachedUrl); + expect(newlyCachedResult).toStrictEqual( + bulkApiResponse.results[uncachedUrl], + ); + + // Should not make a new API call for the second scanUrl call + // eslint-disable-next-line import-x/no-named-as-default-member + expect(nock.pendingMocks()).toHaveLength(0); + }); + it('should handle invalid URLs properly when mixed with valid URLs and cache results correctly', async () => { + const validUrl = 'https://valid-example.com'; + const invalidUrl = 'not-a-url'; + const mixedUrls = [validUrl, invalidUrl]; + + const bulkApiResponse: BulkPhishingDetectionScanResponse = { + results: { + [validUrl]: { + domainName: 'valid-example.com', + recommendedAction: RecommendedAction.None, + }, + }, + errors: {}, + }; + + const scope = nock(PHISHING_DETECTION_BASE_URL) + .post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, { + urls: [validUrl], + }) + .reply(200, bulkApiResponse); + + // Call bulkScanUrls with both URLs + const response = await controller.bulkScanUrls(mixedUrls); + + // Verify that only the valid URL was requested from the API + expect(scope.isDone()).toBe(true); + + // Verify the results include the valid URL result and an error for the invalid URL + expect(response.results[validUrl]).toStrictEqual( + bulkApiResponse.results[validUrl], + ); + expect(response.errors[invalidUrl]).toContain( + 'url is not a valid web URL', + ); + + // Verify the valid result is now in the cache + const cachedResult = await controller.scanUrl(validUrl); + expect(cachedResult).toStrictEqual(bulkApiResponse.results[validUrl]); + + // Should not make a new API call for the cached URL + // eslint-disable-next-line import-x/no-named-as-default-member + expect(nock.pendingMocks()).toHaveLength(0); + }); + + it('should use cache for all URLs if all are already cached', async () => { + // First cache the results individually + const cachedUrls = ['https://domain1.com', 'https://domain2.com']; + const cachedResults = [ + { + domainName: 'domain1.com', + recommendedAction: RecommendedAction.None, + }, + { + domainName: 'domain2.com', + recommendedAction: RecommendedAction.Block, + }, + ]; + + // Set up nock for individual caching + nock(PHISHING_DETECTION_BASE_URL) + .get( + `/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain1.com')}`, + ) + .reply(200, { + recommendedAction: RecommendedAction.None, + }); + + nock(PHISHING_DETECTION_BASE_URL) + .get( + `/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain2.com')}`, + ) + .reply(200, { + recommendedAction: RecommendedAction.Block, + }); + + // Cache the results + await controller.scanUrl(cachedUrls[0]); + await controller.scanUrl(cachedUrls[1]); + + // No API call should be made for bulkScanUrls + const response = await controller.bulkScanUrls(cachedUrls); + + // Verify we got the results from cache + expect(response.results[cachedUrls[0]]).toStrictEqual(cachedResults[0]); + expect(response.results[cachedUrls[1]]).toStrictEqual(cachedResults[1]); + + // Verify no API calls were made + // eslint-disable-next-line import-x/no-named-as-default-member + expect(nock.pendingMocks()).toHaveLength(0); + }); }); }); diff --git a/packages/phishing-controller/src/PhishingController.ts b/packages/phishing-controller/src/PhishingController.ts index aabeffe11ec..b36724d9997 100644 --- a/packages/phishing-controller/src/PhishingController.ts +++ b/packages/phishing-controller/src/PhishingController.ts @@ -746,44 +746,75 @@ export class PhishingController extends BaseController< } const MAX_URL_LENGTH = 2048; + const combinedResponse: BulkPhishingDetectionScanResponse = { + results: {}, + errors: {}, + }; + + // Extract hostnames from URLs and check for validity and length constraints + const urlsToHostnames: Record = {}; + const urlsToFetch: string[] = []; + for (const url of urls) { if (url.length > MAX_URL_LENGTH) { - return { - results: {}, - errors: { - [url]: [`URL length must not exceed ${MAX_URL_LENGTH} characters`], - }, - }; + combinedResponse.errors[url] = [ + `URL length must not exceed ${MAX_URL_LENGTH} characters`, + ]; + continue; + } + + const [hostname, ok] = getHostnameFromWebUrl(url); + if (!ok) { + combinedResponse.errors[url] = ['url is not a valid web URL']; + continue; } - } - // The API has a limit of 50 URLs per request, so we batch the requests - const MAX_URLS_PER_BATCH = 50; - const batches: string[][] = []; - for (let i = 0; i < urls.length; i += MAX_URLS_PER_BATCH) { - batches.push(urls.slice(i, i + MAX_URLS_PER_BATCH)); + // Check if result is already in cache + const cachedResult = this.#urlScanCache.get(hostname); + if (cachedResult) { + // Use cached result + combinedResponse.results[url] = cachedResult; + } else { + // Add to list of URLs to fetch + urlsToHostnames[url] = hostname; + urlsToFetch.push(url); + } } - // Process each batch in parallel - const batchResults = await Promise.all( - batches.map((batchUrls) => this.#processBatch(batchUrls)), - ); + // If there are URLs to fetch, process them in batches + if (urlsToFetch.length > 0) { + // The API has a limit of 50 URLs per request, so we batch the requests + const MAX_URLS_PER_BATCH = 50; + const batches: string[][] = []; + for (let i = 0; i < urlsToFetch.length; i += MAX_URLS_PER_BATCH) { + batches.push(urlsToFetch.slice(i, i + MAX_URLS_PER_BATCH)); + } - // Combine all batch results - const combinedResponse: BulkPhishingDetectionScanResponse = { - results: {}, - errors: {}, - }; - // Merge results and errors from all batches - batchResults.forEach((batchResponse) => { - Object.assign(combinedResponse.results, batchResponse.results); - Object.entries(batchResponse.errors).forEach(([key, messages]) => { - combinedResponse.errors[key] = [ - ...(combinedResponse.errors[key] || []), - ...messages, - ]; + // Process each batch in parallel + const batchResults = await Promise.all( + batches.map((batchUrls) => this.#processBatch(batchUrls)), + ); + + // Merge results and errors from all batches + batchResults.forEach((batchResponse) => { + // Add results to cache and combine with response + Object.entries(batchResponse.results).forEach(([url, result]) => { + const hostname = urlsToHostnames[url]; + if (hostname) { + this.#urlScanCache.add(hostname, result); + } + combinedResponse.results[url] = result; + }); + + // Combine errors + Object.entries(batchResponse.errors).forEach(([key, messages]) => { + combinedResponse.errors[key] = [ + ...(combinedResponse.errors[key] || []), + ...messages, + ]; + }); }); - }); + } return combinedResponse; };