Skip to content

Commit f92caba

Browse files
Feat/bulk scan phishing controller (#5682)
# Add bulk URL phishing scanning capability to PhishingController ## Explanation The PhishingController currently supports scanning single URLs for phishing detection, but there are cases where we need to check multiple URLs at once for efficiency. This PR adds a new `bulkScanUrls` method that allows scanning up to 250 URLs in a single operation, improving performance when multiple URLs need to be checked. The implementation includes: - A new `bulkScanUrls` method that can process up to 250 URLs (in batches of 50) - URL validation for length and quantity limits - Error handling for API errors, timeouts, and validation failures - Parallel processing of URL batches for improved performance ## References * Implements new bulk scan API endpoint functionality * Related to improved phishing detection performance ## Checklist - [x] I've updated the test suite for new or updated code as appropriate - [x] I've updated documentation (JSDoc, Markdown, etc.) for new or updated code as appropriate - [x] I've communicated my changes to consumers by [updating changelogs for packages I've changed](https://github.com/MetaMask/core/tree/main/docs/contributing.md#updating-changelogs), highlighting breaking changes as necessary - [ ] I've prepared draft pull requests for clients and consumer packages to resolve any breaking changes
1 parent 1adb5da commit f92caba

File tree

3 files changed

+441
-1
lines changed

3 files changed

+441
-1
lines changed

packages/phishing-controller/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add `bulkScanUrls` method to `PhishingController` for scanning multiple URLs for phishing in bulk ([#5682](https://github.com/MetaMask/core/pull/5682))
13+
- Add `BulkPhishingDetectionScanResponse` type for bulk URL scan results ([#5682](https://github.com/MetaMask/core/pull/5682))
14+
- Add `PHISHING_DETECTION_BULK_SCAN_ENDPOINT` constant ([#5682](https://github.com/MetaMask/core/pull/5682))
15+
1016
### Changed
1117

1218
- Bump `@metamask/controller-utils` to `^11.7.0` ([#5583](https://github.com/MetaMask/core/pull/5583))

packages/phishing-controller/src/PhishingController.test.ts

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import {
1515
C2_DOMAIN_BLOCKLIST_ENDPOINT,
1616
PHISHING_DETECTION_BASE_URL,
1717
PHISHING_DETECTION_SCAN_ENDPOINT,
18+
PHISHING_DETECTION_BULK_SCAN_ENDPOINT,
19+
type BulkPhishingDetectionScanResponse,
1820
} from './PhishingController';
1921
import { formatHostnameToUrl } from './tests/utils';
2022
import type { PhishingDetectionScanResult } from './types';
@@ -2587,4 +2589,285 @@ describe('PhishingController', () => {
25872589
expect(scope.isDone()).toBe(true);
25882590
});
25892591
});
2592+
2593+
describe('bulkScanUrls', () => {
2594+
let controller: PhishingController;
2595+
let clock: sinon.SinonFakeTimers;
2596+
const testUrls: string[] = [
2597+
'https://example1.com',
2598+
'https://example2.com',
2599+
'https://example3.com',
2600+
];
2601+
const mockResponse: BulkPhishingDetectionScanResponse = {
2602+
results: {
2603+
'https://example1.com': {
2604+
domainName: 'example1.com',
2605+
recommendedAction: RecommendedAction.None,
2606+
},
2607+
'https://example2.com': {
2608+
domainName: 'example2.com',
2609+
recommendedAction: RecommendedAction.Block,
2610+
},
2611+
'https://example3.com': {
2612+
domainName: 'example3.com',
2613+
recommendedAction: RecommendedAction.None,
2614+
},
2615+
},
2616+
errors: {},
2617+
};
2618+
2619+
beforeEach(() => {
2620+
controller = getPhishingController();
2621+
clock = sinon.useFakeTimers();
2622+
});
2623+
2624+
afterEach(() => {
2625+
clock.restore();
2626+
});
2627+
2628+
it('should return the scan results for multiple URLs', async () => {
2629+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2630+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2631+
urls: testUrls,
2632+
})
2633+
.reply(200, mockResponse);
2634+
2635+
const response = await controller.bulkScanUrls(testUrls);
2636+
expect(response).toStrictEqual(mockResponse);
2637+
expect(scope.isDone()).toBe(true);
2638+
});
2639+
2640+
it('should handle empty URL arrays', async () => {
2641+
const response = await controller.bulkScanUrls([]);
2642+
expect(response).toStrictEqual({
2643+
results: {},
2644+
errors: {},
2645+
});
2646+
});
2647+
2648+
it('should enforce maximum URL limit', async () => {
2649+
const tooManyUrls = Array(251).fill('https://example.com');
2650+
const response = await controller.bulkScanUrls(tooManyUrls);
2651+
expect(response).toStrictEqual({
2652+
results: {},
2653+
errors: {
2654+
too_many_urls: ['Maximum of 250 URLs allowed per request'],
2655+
},
2656+
});
2657+
});
2658+
2659+
it('should validate URL length', async () => {
2660+
const longUrl = `https://example.com/${'a'.repeat(2048)}`;
2661+
const response = await controller.bulkScanUrls([longUrl]);
2662+
expect(response).toStrictEqual({
2663+
results: {},
2664+
errors: {
2665+
[longUrl]: ['URL length must not exceed 2048 characters'],
2666+
},
2667+
});
2668+
});
2669+
2670+
it.each([
2671+
[400, 'Bad Request'],
2672+
[401, 'Unauthorized'],
2673+
[403, 'Forbidden'],
2674+
[404, 'Not Found'],
2675+
[500, 'Internal Server Error'],
2676+
[502, 'Bad Gateway'],
2677+
[503, 'Service Unavailable'],
2678+
[504, 'Gateway Timeout'],
2679+
])(
2680+
'should return an error response on %i status code',
2681+
async (statusCode, statusText) => {
2682+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2683+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2684+
urls: testUrls,
2685+
})
2686+
.reply(statusCode);
2687+
2688+
const response = await controller.bulkScanUrls(testUrls);
2689+
expect(response).toStrictEqual({
2690+
results: {},
2691+
errors: {
2692+
api_error: [`${statusCode} ${statusText}`],
2693+
},
2694+
});
2695+
expect(scope.isDone()).toBe(true);
2696+
},
2697+
);
2698+
2699+
it('should handle timeouts correctly', async () => {
2700+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2701+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2702+
urls: testUrls,
2703+
})
2704+
.delayConnection(20000)
2705+
.reply(200, {});
2706+
2707+
const promise = controller.bulkScanUrls(testUrls);
2708+
clock.tick(15000);
2709+
const response = await promise;
2710+
expect(response).toStrictEqual({
2711+
results: {},
2712+
errors: {
2713+
network_error: ['timeout of 15000ms exceeded'],
2714+
},
2715+
});
2716+
expect(scope.isDone()).toBe(false);
2717+
});
2718+
2719+
it('should process URLs in batches when more than 50 URLs are provided', async () => {
2720+
const batchSize = 50;
2721+
const totalUrls = 120;
2722+
const manyUrls = Array(totalUrls)
2723+
.fill(0)
2724+
.map((_, i) => `https://example${i}.com`);
2725+
2726+
// Expected batches
2727+
const batch1 = manyUrls.slice(0, batchSize);
2728+
const batch2 = manyUrls.slice(batchSize, 2 * batchSize);
2729+
const batch3 = manyUrls.slice(2 * batchSize);
2730+
2731+
// Mock responses for each batch
2732+
const mockBatch1Response: BulkPhishingDetectionScanResponse = {
2733+
results: batch1.reduce<Record<string, PhishingDetectionScanResult>>(
2734+
(acc, url) => {
2735+
acc[url] = {
2736+
domainName: url.replace('https://', ''),
2737+
recommendedAction: RecommendedAction.None,
2738+
};
2739+
return acc;
2740+
},
2741+
{},
2742+
),
2743+
errors: {},
2744+
};
2745+
2746+
const mockBatch2Response: BulkPhishingDetectionScanResponse = {
2747+
results: batch2.reduce<Record<string, PhishingDetectionScanResult>>(
2748+
(acc, url) => {
2749+
acc[url] = {
2750+
domainName: url.replace('https://', ''),
2751+
recommendedAction: RecommendedAction.None,
2752+
};
2753+
return acc;
2754+
},
2755+
{},
2756+
),
2757+
errors: {},
2758+
};
2759+
2760+
const mockBatch3Response: BulkPhishingDetectionScanResponse = {
2761+
results: batch3.reduce<Record<string, PhishingDetectionScanResult>>(
2762+
(acc, url) => {
2763+
acc[url] = {
2764+
domainName: url.replace('https://', ''),
2765+
recommendedAction: RecommendedAction.None,
2766+
};
2767+
return acc;
2768+
},
2769+
{},
2770+
),
2771+
errors: {},
2772+
};
2773+
2774+
// Setup nock to handle all three batch requests
2775+
const scope1 = nock(PHISHING_DETECTION_BASE_URL)
2776+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2777+
urls: batch1,
2778+
})
2779+
.reply(200, mockBatch1Response);
2780+
2781+
const scope2 = nock(PHISHING_DETECTION_BASE_URL)
2782+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2783+
urls: batch2,
2784+
})
2785+
.reply(200, mockBatch2Response);
2786+
2787+
const scope3 = nock(PHISHING_DETECTION_BASE_URL)
2788+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2789+
urls: batch3,
2790+
})
2791+
.reply(200, mockBatch3Response);
2792+
2793+
const response = await controller.bulkScanUrls(manyUrls);
2794+
2795+
// Verify all scopes were called
2796+
expect(scope1.isDone()).toBe(true);
2797+
expect(scope2.isDone()).toBe(true);
2798+
expect(scope3.isDone()).toBe(true);
2799+
2800+
// Check all results were merged correctly
2801+
const combinedResults = {
2802+
...mockBatch1Response.results,
2803+
...mockBatch2Response.results,
2804+
...mockBatch3Response.results,
2805+
};
2806+
2807+
expect(Object.keys(response.results)).toHaveLength(totalUrls);
2808+
expect(response.results).toStrictEqual(combinedResults);
2809+
});
2810+
2811+
it('should handle mixed results with both successful scans and errors', async () => {
2812+
const mixedResponse: BulkPhishingDetectionScanResponse = {
2813+
results: {
2814+
'https://example1.com': {
2815+
domainName: 'example1.com',
2816+
recommendedAction: RecommendedAction.None,
2817+
},
2818+
},
2819+
errors: {
2820+
'https://example2.com': ['Failed to process URL'],
2821+
'https://example3.com': ['Domain not found'],
2822+
},
2823+
};
2824+
2825+
const scope = nock(PHISHING_DETECTION_BASE_URL)
2826+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2827+
urls: testUrls,
2828+
})
2829+
.reply(200, mixedResponse);
2830+
2831+
const response = await controller.bulkScanUrls(testUrls);
2832+
expect(response).toStrictEqual(mixedResponse);
2833+
expect(scope.isDone()).toBe(true);
2834+
});
2835+
2836+
it('should have error merging issues when multiple batches return errors with the same key', async () => {
2837+
// Create enough URLs to need two batches (over 50)
2838+
const batchSize = 50;
2839+
const totalUrls = 100;
2840+
const manyUrls = Array(totalUrls)
2841+
.fill(0)
2842+
.map((_, i) => `https://example${i}.com`);
2843+
2844+
// The URLs will be split into two batches
2845+
const batch1 = manyUrls.slice(0, batchSize);
2846+
const batch2 = manyUrls.slice(batchSize);
2847+
2848+
// Setup nock to handle both batch requests with different error responses
2849+
const scope1 = nock(PHISHING_DETECTION_BASE_URL)
2850+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2851+
urls: batch1,
2852+
})
2853+
.reply(404, { error: 'Not Found' });
2854+
2855+
const scope2 = nock(PHISHING_DETECTION_BASE_URL)
2856+
.post(`/${PHISHING_DETECTION_BULK_SCAN_ENDPOINT}`, {
2857+
urls: batch2,
2858+
})
2859+
.reply(500, { error: 'Internal Server Error' });
2860+
2861+
const response = await controller.bulkScanUrls(manyUrls);
2862+
2863+
expect(scope1.isDone()).toBe(true);
2864+
expect(scope2.isDone()).toBe(true);
2865+
2866+
// With the fixed implementation, we should now preserve all errors
2867+
expect(response.errors).toHaveProperty('api_error');
2868+
expect(response.errors.api_error).toHaveLength(2);
2869+
expect(response.errors.api_error).toContain('404 Not Found');
2870+
expect(response.errors.api_error).toContain('500 Internal Server Error');
2871+
});
2872+
});
25902873
});

0 commit comments

Comments
 (0)