Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/phishing-controller/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- Enhance `bulkScanUrls` method to leverage URL scan cache for improved performance ([#5688](https://github.com/MetaMask/core/pull/5688))
- URLs are now checked against the cache before making API requests
- Only uncached URLs are sent to the phishing detection API
- API results are automatically stored in the cache for future use
- Bump `@metamask/controller-utils` to `^11.7.0` ([#5583](https://github.com/MetaMask/core/pull/5583))

## [12.4.1]
Expand Down
154 changes: 154 additions & 0 deletions packages/phishing-controller/src/PhishingController.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2870,6 +2870,160 @@ describe('PhishingController', () => {
expect(response.errors.api_error).toContain('404 Not Found');
expect(response.errors.api_error).toContain('500 Internal Server Error');
});

it('should use cached results for previously scanned URLs and only fetch uncached URLs', async () => {
const cachedUrl = 'https://cached-example.com';
const uncachedUrl = 'https://uncached-example.com';
const mixedUrls = [cachedUrl, uncachedUrl];

// Set up the cache with a pre-existing result
const cachedResult: PhishingDetectionScanResult = {
domainName: 'cached-example.com',
recommendedAction: RecommendedAction.None,
};

// First cache a result via scanUrl
nock(PHISHING_DETECTION_BASE_URL)
.get(
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('cached-example.com')}`,
)
.reply(200, {
recommendedAction: RecommendedAction.None,
});

await controller.scanUrl(cachedUrl);

// Now set up the mock for the bulk API call with only the uncached URL
const expectedPostBody = {
urls: [uncachedUrl],
};

const bulkApiResponse: BulkPhishingDetectionScanResponse = {
results: {
[uncachedUrl]: {
domainName: 'uncached-example.com',
recommendedAction: RecommendedAction.Warn,
},
},
errors: {},
};

const scope = nock(PHISHING_DETECTION_BASE_URL)
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, expectedPostBody)
.reply(200, bulkApiResponse);

// Call bulkScanUrls with both URLs
const response = await controller.bulkScanUrls(mixedUrls);

// Verify that only the uncached URL was requested from the API
expect(scope.isDone()).toBe(true);

// Verify the combined results include both the cached and newly fetched results
expect(response.results).toStrictEqual({
[cachedUrl]: cachedResult,
[uncachedUrl]: bulkApiResponse.results[uncachedUrl],
});

// Verify the newly fetched result is now in the cache
const newlyCachedResult = await controller.scanUrl(uncachedUrl);
expect(newlyCachedResult).toStrictEqual(
bulkApiResponse.results[uncachedUrl],
);

// Should not make a new API call for the second scanUrl call
// eslint-disable-next-line import-x/no-named-as-default-member
expect(nock.pendingMocks()).toHaveLength(0);
});
it('should handle invalid URLs properly when mixed with valid URLs and cache results correctly', async () => {
const validUrl = 'https://valid-example.com';
const invalidUrl = 'not-a-url';
const mixedUrls = [validUrl, invalidUrl];

const bulkApiResponse: BulkPhishingDetectionScanResponse = {
results: {
[validUrl]: {
domainName: 'valid-example.com',
recommendedAction: RecommendedAction.None,
},
},
errors: {},
};

const scope = nock(PHISHING_DETECTION_BASE_URL)
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
urls: [validUrl],
})
.reply(200, bulkApiResponse);

// Call bulkScanUrls with both URLs
const response = await controller.bulkScanUrls(mixedUrls);

// Verify that only the valid URL was requested from the API
expect(scope.isDone()).toBe(true);

// Verify the results include the valid URL result and an error for the invalid URL
expect(response.results[validUrl]).toStrictEqual(
bulkApiResponse.results[validUrl],
);
expect(response.errors[invalidUrl]).toContain(
'url is not a valid web URL',
);

// Verify the valid result is now in the cache
const cachedResult = await controller.scanUrl(validUrl);
expect(cachedResult).toStrictEqual(bulkApiResponse.results[validUrl]);

// Should not make a new API call for the cached URL
// eslint-disable-next-line import-x/no-named-as-default-member
expect(nock.pendingMocks()).toHaveLength(0);
});

it('should use cache for all URLs if all are already cached', async () => {
// First cache the results individually
const cachedUrls = ['https://domain1.com', 'https://domain2.com'];
const cachedResults = [
{
domainName: 'domain1.com',
recommendedAction: RecommendedAction.None,
},
{
domainName: 'domain2.com',
recommendedAction: RecommendedAction.Block,
},
];

// Set up nock for individual caching
nock(PHISHING_DETECTION_BASE_URL)
.get(
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain1.com')}`,
)
.reply(200, {
recommendedAction: RecommendedAction.None,
});

nock(PHISHING_DETECTION_BASE_URL)
.get(
`/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent('domain2.com')}`,
)
.reply(200, {
recommendedAction: RecommendedAction.Block,
});

// Cache the results
await controller.scanUrl(cachedUrls[0]);
await controller.scanUrl(cachedUrls[1]);

// No API call should be made for bulkScanUrls
const response = await controller.bulkScanUrls(cachedUrls);

// Verify we got the results from cache
expect(response.results[cachedUrls[0]]).toStrictEqual(cachedResults[0]);
expect(response.results[cachedUrls[1]]).toStrictEqual(cachedResults[1]);

// Verify no API calls were made
// eslint-disable-next-line import-x/no-named-as-default-member
expect(nock.pendingMocks()).toHaveLength(0);
});
});
});

Expand Down
91 changes: 61 additions & 30 deletions packages/phishing-controller/src/PhishingController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -746,44 +746,75 @@ export class PhishingController extends BaseController<
}

const MAX_URL_LENGTH = 2048;
const combinedResponse: BulkPhishingDetectionScanResponse = {
results: {},
errors: {},
};

// Extract hostnames from URLs and check for validity and length constraints
const urlsToHostnames: Record<string, string> = {};
const urlsToFetch: string[] = [];

for (const url of urls) {
if (url.length > MAX_URL_LENGTH) {
return {
results: {},
errors: {
[url]: [`URL length must not exceed ${MAX_URL_LENGTH} characters`],
},
};
combinedResponse.errors[url] = [
`URL length must not exceed ${MAX_URL_LENGTH} characters`,
];
continue;
}

const [hostname, ok] = getHostnameFromWebUrl(url);
if (!ok) {
combinedResponse.errors[url] = ['url is not a valid web URL'];
continue;
}
}

// The API has a limit of 50 URLs per request, so we batch the requests
const MAX_URLS_PER_BATCH = 50;
const batches: string[][] = [];
for (let i = 0; i < urls.length; i += MAX_URLS_PER_BATCH) {
batches.push(urls.slice(i, i + MAX_URLS_PER_BATCH));
// Check if result is already in cache
const cachedResult = this.#urlScanCache.get(hostname);
if (cachedResult) {
// Use cached result
combinedResponse.results[url] = cachedResult;
} else {
// Add to list of URLs to fetch
urlsToHostnames[url] = hostname;
urlsToFetch.push(url);
}
}

// Process each batch in parallel
const batchResults = await Promise.all(
batches.map((batchUrls) => this.#processBatch(batchUrls)),
);
// If there are URLs to fetch, process them in batches
if (urlsToFetch.length > 0) {
// The API has a limit of 50 URLs per request, so we batch the requests
const MAX_URLS_PER_BATCH = 50;
const batches: string[][] = [];
for (let i = 0; i < urlsToFetch.length; i += MAX_URLS_PER_BATCH) {
batches.push(urlsToFetch.slice(i, i + MAX_URLS_PER_BATCH));
}

// Combine all batch results
const combinedResponse: BulkPhishingDetectionScanResponse = {
results: {},
errors: {},
};
// Merge results and errors from all batches
batchResults.forEach((batchResponse) => {
Object.assign(combinedResponse.results, batchResponse.results);
Object.entries(batchResponse.errors).forEach(([key, messages]) => {
combinedResponse.errors[key] = [
...(combinedResponse.errors[key] || []),
...messages,
];
// Process each batch in parallel
const batchResults = await Promise.all(
batches.map((batchUrls) => this.#processBatch(batchUrls)),
);

// Merge results and errors from all batches
batchResults.forEach((batchResponse) => {
// Add results to cache and combine with response
Object.entries(batchResponse.results).forEach(([url, result]) => {
const hostname = urlsToHostnames[url];
if (hostname) {
this.#urlScanCache.add(hostname, result);
}
combinedResponse.results[url] = result;
});

// Combine errors
Object.entries(batchResponse.errors).forEach(([key, messages]) => {
combinedResponse.errors[key] = [
...(combinedResponse.errors[key] || []),
...messages,
];
});
});
});
}

return combinedResponse;
};
Expand Down
Loading