Skip to content

Commit 3451214

Browse files
committed
Check img src too, allow disabling external url checking
1 parent e1238d7 commit 3451214

File tree

6 files changed

+52
-27
lines changed

6 files changed

+52
-27
lines changed

README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@
22

33
An Astro integration that checks for broken links in your website during static build. It logs any broken links to the console and writes them to a file, grouping them by the document in which they occur.
44

5-
## Features
5+
## Goals
66

77
- **Checks Internal and External Links**: Validates all `<a href="...">` links found in your HTML pages.
88
- **Logs Broken Links**: Outputs broken link information to both the console and a log file.
9-
- **Grouped by Document**: Broken links are grouped by the document in which they occur, making it easier to identify and fix issues.
10-
- **Caching Mechanism**: Avoids redundant checks by caching the results of previously checked links.
11-
- **Parallel Processing**: Checks links in parallel to improve performance.
12-
- **Development Mode Middleware**: Checks links on each page load during development.
13-
- **Post-Build Validation**: Scans all generated HTML files after building your site.
9+
- **Grouped by broken URL**: To allow for quick search and replacement, a list of all pages containing the broken URL is logged.
10+
- **Caching Mechanism**: Avoids redundant checks by caching the results of previously checked links, both internal and external, whether they are valid or not.
11+
- **Parallel Processing**: Checks links and does IO and network operations in parallel to improve performance. We first collect all links from all pages, then only check each once, first loading the tsv cache, then saving it again when we are done. All http requests happen in parallel.
12+
- **Local redirect awareness**: If a link is redirected in astro.config.mjs, it will be followed.
13+
- **Timeouts and retries**: To avoid false positives, links that fail to load with ECONNRESET are retried 3 times with exponential backoff. Timeouts are set to 3 seconnd max including retries.
14+
- **Link text preservation**: The contents of "href" are only normalized to a domain-relative path (like /foo/bar/) if they are "../relative" or "./relative" or "relative" etc. It is otherwise preserved for reportinng purposes.
15+
- **Cross-platform compatibility**: The physical paths of the html files are normalized to domain relative paths.
16+
- **Disk cachinng of remote links**: To speed up subsequent builds, a tab-delimied text file is optionally written to disk containing the contents of all remote links checked and the status code returned by the server, in the form URL<tab>ok/failed<tab>status code<tab>ISO-8601-formatted timestamp.
17+
18+
19+
1420

1521
## Installation
1622

@@ -28,6 +34,10 @@ export default defineConfig({
2834
integrations: [
2935
astroBrokenLinksChecker({
3036
logFilePath: 'broken-links.log', // Optional: specify the log file path
37+
remoteLinksCacheFilePath: 'remote-links-cache.tsv', // Optional: specify the path to a tab-separated file to cache remote links
38+
maxConcurrency: 10, // Optional: specify the maximum number of concurrent link checks
39+
timeout: 3000, // Optional: specify the maximum time in milliseconds for a link check to complete
40+
cacheExpiryMinutes: 30, // Optional: specify the number of minutes after which a cached externallink should be re-checked
3141
}),
3242
],
3343
});

check-links.js

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,18 @@ export async function checkLinksInHtml(
1313
checkedLinks = new Map(),
1414
distPath = '',
1515
astroConfigRedirects = {},
16-
logger
16+
logger,
17+
checkExternalLinks = true
1718
) {
1819
const root = parse(htmlContent);
1920
const linkElements = root.querySelectorAll('a[href]');
2021
const links = linkElements.map((el) => el.getAttribute('href'));
22+
// add img src
23+
const imgElements = root.querySelectorAll('img[src]');
24+
const imgLinks = imgElements.map((el) => el.getAttribute('src'));
25+
links.push(...imgLinks);
2126

22-
const limit = pLimit(10); // Limit to 10 concurrent link checks
27+
const limit = pLimit(50); // Limit to 10 concurrent link checks
2328

2429
const checkLinkPromises = links.map((link) =>
2530
limit(async () => {
@@ -89,27 +94,30 @@ export async function checkLinksInHtml(
8994
}
9095
} else {
9196
// External link, check via HTTP request. Retry 3 times if ECONNRESET
92-
let retries = 0;
93-
while (retries < 3) {
94-
try {
95-
const response = await fetch(fetchLink, { method: 'GET' });
96-
isBroken = !response.ok;
97-
if (isBroken) {
98-
logger.error(`${response.status} Error fetching ${fetchLink}`);
97+
if (checkExternalLinks) {
98+
let retries = 0;
99+
while (retries < 3) {
100+
try {
101+
const response = await fetch(fetchLink, { method: 'GET' });
102+
isBroken = !response.ok;
103+
if (isBroken) {
104+
logger.error(`${response.status} Error fetching ${fetchLink}`);
105+
}
106+
break;
107+
} catch (error) {
108+
isBroken = true;
109+
let statusCodeNumber = error.errno == 'ENOTFOUND' ? 404 : (error.errno);
110+
logger.error(`${statusCodeNumber} error fetching ${fetchLink}`);
111+
if (error.errno === 'ECONNRESET') {
112+
retries++;
113+
continue;
114+
}
115+
break;
116+
}
99117
}
100-
break;
101-
} catch (error) {
102-
isBroken = true;
103-
let statusCodeNumber = error.errno == 'ENOTFOUND' ? 404 : (error.errno);
104-
logger.error(`${statusCodeNumber} error fetching ${fetchLink}`);
105-
if (error.errno === 'ECONNRESET') {
106-
retries++;
107-
continue;
108-
}
109-
break;
110118
}
111119
}
112-
}
120+
113121

114122
// Cache the link's validity
115123
checkedLinks.set(fetchLink, !isBroken);

index.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ export default function astroBrokenLinksChecker(options = {}) {
4040
checkedLinks,
4141
distPath,
4242
astroConfigRedirects,
43-
logger
43+
logger,
44+
options.checkExternalLinks
4445
);
4546
});
4647
await Promise.all(checkHtmlPromises);

tests/integration.test.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ describe('Astro Broken Links Checker Integration', () => {
4343
expect(logContent).toContain('../path/changing/relative-broken-link');
4444
expect(logContent).toContain('https://non-existent-page.com/page');
4545
expect(logContent).toContain('https://non-existent-page.com/page?query=string#fragment');
46+
expect(logContent).toContain('https://non-existent-page.com/image.jpg');
47+
expect(logContent).toContain('/missing.jpg');
4648

4749
expect(logContent).toContain('Found in');
4850
expect(logContent).toContain('/');
@@ -56,5 +58,6 @@ describe('Astro Broken Links Checker Integration', () => {
5658
expect(logContent).not.toContain('Broken link: /\n'); // Expect '/about' to not be reported as broken
5759
expect(logContent).not.toContain('Broken link: https://microsoft.com'); // Expect 'https://microsoft.com' to not be reported as broken
5860
expect(logContent).not.toContain('Broken link: /redirected'); // Expect '/redirected' to not be reported as broken
61+
expect(logContent).not.toContain('Broken link: /exists.jpg'); // Expect '/exists.jpg' to not be reported as broken
5962
});
6063
});

tests/public/exists.jpg

Loading

tests/src/pages/about.astro

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@
88
<a href="/broken/with?query=string#fragment">Broken link with query and fragment</a>
99
<a href="https://non-existent-page.com/page">Non Existent Page</a>
1010
<a href="https://non-existent-page.com/page?query=string#fragment">Non Existent Page with query and fragment</a>
11+
<img src="https://non-existent-page.com/image.jpg" alt="Non Existent Image">
12+
<img src="/missing.jpg" alt="Non Existent Local Image">
13+
<img src="/exists.jpg" alt="Real Local Image">

0 commit comments

Comments
 (0)