@@ -4,11 +4,30 @@ const getUrlToCheck = (currentUrl, deploymentUrl) => {
44 return url . toString ( ) ;
55} ;
66
7- const checkUrl = async ( url ) => {
8- const { status } = await fetch ( url , {
9- method : "GET" ,
10- } ) ;
11- return [ status , new URL ( url ) . pathname ] ;
7+ const sleep = ( ms ) => new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
8+
9+ const checkUrl = async ( url , retries = 3 ) => {
10+ for ( let i = 0 ; i < retries ; i ++ ) {
11+ try {
12+ const { status } = await fetch ( url , {
13+ method : "GET" ,
14+ headers : {
15+ "User-Agent" : "refine-link-checker" ,
16+ } ,
17+ } ) ;
18+ return [ status , new URL ( url ) . pathname ] ;
19+ } catch ( error ) {
20+ if ( i === retries - 1 ) {
21+ console . error (
22+ `Failed to fetch ${ new URL ( url ) . pathname } after ${ retries } attempts:` ,
23+ error . message ,
24+ ) ;
25+ return [ 500 , new URL ( url ) . pathname ] ;
26+ }
27+ // Wait before retrying
28+ await sleep ( 1000 * ( i + 1 ) ) ;
29+ }
30+ }
1231} ;
1332
1433const toChunks = ( array , chunkSize ) => {
@@ -19,23 +38,19 @@ const toChunks = (array, chunkSize) => {
1938 return chunks ;
2039} ;
2140
22- const checkChunk = ( chunk , deploymentUrl , success , fail ) => {
23- return new Promise ( ( resolve ) => {
24- const promises = chunk . map ( ( url ) => {
25- return checkUrl ( getUrlToCheck ( url , deploymentUrl ) ) ;
26- } ) ;
27-
28- Promise . all ( promises ) . then ( ( results ) => {
29- results . forEach ( ( [ status , url ] ) => {
30- if ( status === 200 ) {
31- success . push ( url ) ;
32- } else {
33- fail . push ( url ) ;
34- }
35- } ) ;
36- resolve ( ) ;
37- } ) ;
38- } ) ;
41+ const checkChunk = async ( chunk , deploymentUrl , success , fail ) => {
42+ for ( const url of chunk ) {
43+ const [ status , pathname ] = await checkUrl (
44+ getUrlToCheck ( url , deploymentUrl ) ,
45+ ) ;
46+ if ( status === 200 ) {
47+ success . push ( pathname ) ;
48+ } else {
49+ fail . push ( pathname ) ;
50+ }
51+ // Small delay between requests to avoid overwhelming the server
52+ await sleep ( 100 ) ;
53+ }
3954} ;
4055
4156const checkExistingLinks = async ( sitemapUrl , deploymentUrl ) => {
@@ -50,17 +65,24 @@ const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
5065
5166 console . log ( "Checking for existing urls in:" , sitemapUrl ) ;
5267 console . log ( "Deployment url:" , deploymentUrl ) ;
68+ console . log ( `Total URLs to check: ${ urls . length } ` ) ;
5369
54- const chunks = toChunks ( urls , 10 ) ;
70+ const chunks = toChunks ( urls , 50 ) ;
5571
5672 let done = 0 ;
5773
5874 for ( const chunk of chunks ) {
59- console . log ( `Checking chunk ${ done + 1 } /${ chunks . length } ` ) ;
6075 done ++ ;
76+ console . log (
77+ `Checking chunk ${ done } /${ chunks . length } (${
78+ success . length + fail . length
79+ } /${ urls . length } URLs processed)`,
80+ ) ;
6181 await checkChunk ( chunk , deploymentUrl , success , fail ) ;
6282 }
6383
84+ console . log ( `\nResults: ${ success . length } successful, ${ fail . length } failed` ) ;
85+
6486 if ( fail . length > 0 ) {
6587 console . log ( "Broken links:" ) ;
6688 fail . forEach ( ( link ) => {
0 commit comments