Skip to content

Commit ff571a3

Browse files
fix(docs): link checking with retries and user-agent header
1 parent f37d2f4 commit ff571a3

File tree

1 file changed

+46
-24
lines changed

1 file changed

+46
-24
lines changed

.github/workflows/scripts/check-existing-doc-links.js

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,30 @@ const getUrlToCheck = (currentUrl, deploymentUrl) => {
44
return url.toString();
55
};
66

7-
const checkUrl = async (url) => {
8-
const { status } = await fetch(url, {
9-
method: "GET",
10-
});
11-
return [status, new URL(url).pathname];
7+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
8+
9+
const checkUrl = async (url, retries = 3) => {
10+
for (let i = 0; i < retries; i++) {
11+
try {
12+
const { status } = await fetch(url, {
13+
method: "GET",
14+
headers: {
15+
"User-Agent": "refine-link-checker",
16+
},
17+
});
18+
return [status, new URL(url).pathname];
19+
} catch (error) {
20+
if (i === retries - 1) {
21+
console.error(
22+
`Failed to fetch ${new URL(url).pathname} after ${retries} attempts:`,
23+
error.message,
24+
);
25+
return [500, new URL(url).pathname];
26+
}
27+
// Wait before retrying
28+
await sleep(1000 * (i + 1));
29+
}
30+
}
1231
};
1332

1433
const toChunks = (array, chunkSize) => {
@@ -19,23 +38,19 @@ const toChunks = (array, chunkSize) => {
1938
return chunks;
2039
};
2140

22-
const checkChunk = (chunk, deploymentUrl, success, fail) => {
23-
return new Promise((resolve) => {
24-
const promises = chunk.map((url) => {
25-
return checkUrl(getUrlToCheck(url, deploymentUrl));
26-
});
27-
28-
Promise.all(promises).then((results) => {
29-
results.forEach(([status, url]) => {
30-
if (status === 200) {
31-
success.push(url);
32-
} else {
33-
fail.push(url);
34-
}
35-
});
36-
resolve();
37-
});
38-
});
41+
const checkChunk = async (chunk, deploymentUrl, success, fail) => {
42+
for (const url of chunk) {
43+
const [status, pathname] = await checkUrl(
44+
getUrlToCheck(url, deploymentUrl),
45+
);
46+
if (status === 200) {
47+
success.push(pathname);
48+
} else {
49+
fail.push(pathname);
50+
}
51+
// Small delay between requests to avoid overwhelming the server
52+
await sleep(100);
53+
}
3954
};
4055

4156
const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
@@ -50,17 +65,24 @@ const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
5065

5166
console.log("Checking for existing urls in:", sitemapUrl);
5267
console.log("Deployment url:", deploymentUrl);
68+
console.log(`Total URLs to check: ${urls.length}`);
5369

54-
const chunks = toChunks(urls, 10);
70+
const chunks = toChunks(urls, 50);
5571

5672
let done = 0;
5773

5874
for (const chunk of chunks) {
59-
console.log(`Checking chunk ${done + 1}/${chunks.length}`);
6075
done++;
76+
console.log(
77+
`Checking chunk ${done}/${chunks.length} (${
78+
success.length + fail.length
79+
}/${urls.length} URLs processed)`,
80+
);
6181
await checkChunk(chunk, deploymentUrl, success, fail);
6282
}
6383

84+
console.log(`\nResults: ${success.length} successful, ${fail.length} failed`);
85+
6486
if (fail.length > 0) {
6587
console.log("Broken links:");
6688
fail.forEach((link) => {

0 commit comments

Comments
 (0)