Skip to content

Commit fa91cb7

Browse files
authored
Merge pull request #849 from CorrelAid/new_build_method
re-add jitter, add file description
2 parents 2e304d3 + f8b16e2 commit fa91cb7

File tree

2 files changed

+64
-32
lines changed

2 files changed

+64
-32
lines changed

.github/workflows/cd.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ jobs:
6969
vercel-org-id: ${{ secrets.ORG_ID }}
7070
vercel-project-id: ${{ secrets.PROJECT_ID }}
7171
working-directory: ./
72-
vercel-args: '--prod'
72+
vercel-args: '--prod --archive=split-tgz'

dl_external_imgs.js

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
/**
2+
* Asset Localization Script for Static Website Build
3+
*
4+
* This Node.js script handles the localization of external assets:
5+
* - Downloads and caches external assets (images and PDFs) from Directus
6+
* - Processes HTML and JSON files in a build directory (defaults to '.svelte-kit/cloudflare')
7+
* - Replaces remote URLs with local file paths in the processed files
8+
*
9+
* Features:
10+
* - Concurrent download management
11+
* - Error handling with retries
12+
* - Rate limiting protection
13+
* - Fallback asset replacement for failed downloads (403 errors)
14+
* - Progress tracking and logging
15+
*
16+
* Only runs when PUBLIC_ADAPTER='STATIC'
17+
*/
118
import glob from 'tiny-glob';
219
import {createWriteStream} from 'fs';
320
import {existsSync} from 'fs';
@@ -9,6 +26,24 @@ import path from 'node:path';
926
import dotenv from 'dotenv';
1027
import fetch from 'node-fetch';
1128

29+
// Configuration constants
30+
const CONFIG = {
31+
FILE_CONCURRENCY: 7, // Number of concurrent file processes
32+
DOWNLOAD_CONCURRENCY: 2, // Number of concurrent downloads
33+
MAX_RETRIES: 3, // Maximum number of retry attempts
34+
BASE_RETRY_DELAY: 5000, // Base delay for retries in ms
35+
DOWNLOAD_TIMEOUT: 300000, // Download timeout in ms (5 minutes)
36+
JITTER_MAX: 3000, // Maximum jitter in ms
37+
JITTER_MIN: 500, // Minimum jitter in ms
38+
};
39+
40+
// Utility function to add jitter to delays
41+
const addJitter = (baseDelay) => {
42+
const jitter =
43+
Math.random() * (CONFIG.JITTER_MAX - CONFIG.JITTER_MIN) + CONFIG.JITTER_MIN;
44+
return baseDelay + jitter;
45+
};
46+
1247
dotenv.config({
1348
path: path.resolve(process.cwd(), `.env.${process.env.NODE_ENV}.local`),
1449
});
@@ -20,15 +55,11 @@ dotenv.config({path: path.resolve(process.cwd(), '.env')});
2055

2156
const downloadCache = new Map();
2257
const urlToPathCache = new Map();
23-
2458
const URL = `${process.env.PUBLIC_API_URL}/assets`;
25-
2659
const buildDirectory = process.env.BUILD_DIR || '.svelte-kit/cloudflare';
2760
const newAssetsDirectory = buildDirectory + '/assets';
28-
2961
const asset403Counts = new Map();
3062
const REPLACEMENT_ASSET_ID = 'a525ce03-7e70-446f-9eff-1edd222aa002';
31-
3263
const startTime = Date.now();
3364

3465
async function postbuild() {
@@ -49,13 +80,12 @@ async function postbuild() {
4980
);
5081
console.log(`Processing ${targetFiles.length} files...`);
5182

52-
const concurrencyLimit = 7;
5383
const queue = [];
5484
let activePromises = 0;
5585
let completedFiles = 0;
5686

5787
const processQueue = async () => {
58-
while (queue.length > 0 && activePromises < concurrencyLimit) {
88+
while (queue.length > 0 && activePromises < CONFIG.FILE_CONCURRENCY) {
5989
const task = queue.shift();
6090
activePromises++;
6191

@@ -86,7 +116,7 @@ async function postbuild() {
86116
processQueue();
87117

88118
while (queue.length > 0 || activePromises > 0) {
89-
await new Promise((resolve) => setTimeout(resolve, 100));
119+
await new Promise((resolve) => setTimeout(resolve, addJitter(100)));
90120
}
91121

92122
const duration = Date.now() - startTime;
@@ -111,14 +141,12 @@ async function processFile(filePath, fileContent) {
111141
downloadPath += type === 'image' ? '.webp' : '.pdf';
112142

113143
try {
114-
// Check cache first
115144
if (urlToPathCache.has(url)) {
116145
const cachedPath = urlToPathCache.get(url);
117146
await replaceURL(url, cachedPath, filePath);
118147
continue;
119148
}
120149

121-
// Check if file already exists
122150
if (existsSync(downloadPath)) {
123151
let relativePath = downloadPath.replace(
124152
`${process.cwd()}/${buildDirectory}`,
@@ -164,7 +192,6 @@ async function findUrls(fileContent, filePath) {
164192
urls.push({url: url, type: 'image'});
165193
} else {
166194
try {
167-
// Just do a HEAD request to check if it's accessible
168195
const response = await fetch(url, {
169196
method: 'HEAD',
170197
headers: {
@@ -199,9 +226,11 @@ const getRandomUserAgent = () => {
199226

200227
async function downloadFile(url, downloadPath) {
201228
try {
202-
// 1. Increase fetch timeout and add keep-alive
203229
const controller = new AbortController();
204-
const timeout = setTimeout(() => controller.abort(), 300000); // 5 minutes for large files
230+
const timeout = setTimeout(
231+
() => controller.abort(),
232+
CONFIG.DOWNLOAD_TIMEOUT,
233+
);
205234

206235
const response = await fetch(url, {
207236
signal: controller.signal,
@@ -210,11 +239,11 @@ async function downloadFile(url, downloadPath) {
210239
'Accept-Encoding': 'gzip,deflate',
211240
'Cache-Control': 'no-cache',
212241
Connection: 'keep-alive',
213-
'Keep-Alive': 'timeout=300', // 5 minutes keep-alive
242+
'Keep-Alive': 'timeout=300',
214243
Accept: '*/*',
215244
},
216245
compress: true,
217-
timeout: 300000,
246+
timeout: CONFIG.DOWNLOAD_TIMEOUT,
218247
});
219248

220249
clearTimeout(timeout);
@@ -246,7 +275,6 @@ async function downloadFile(url, downloadPath) {
246275

247276
stream.on('data', (chunk) => {
248277
downloadedSize += chunk.length;
249-
250278
if (!fileStream.write(chunk)) {
251279
stream.pause();
252280
}
@@ -273,7 +301,7 @@ async function downloadFile(url, downloadPath) {
273301

274302
const downloadTimeout = setTimeout(() => {
275303
cleanup(new Error(`Download timeout after 5 minutes: ${url}`));
276-
}, 300000);
304+
}, CONFIG.DOWNLOAD_TIMEOUT);
277305

278306
fileStream.on('finish', () => {
279307
clearTimeout(downloadTimeout);
@@ -287,12 +315,22 @@ async function downloadFile(url, downloadPath) {
287315
}
288316
}
289317

290-
async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
318+
async function retryOnTimeout(
319+
url,
320+
downloadPath,
321+
maxRetries = CONFIG.MAX_RETRIES,
322+
) {
291323
let retryCount = 0;
292324
const assetId = url.split('/assets/')[1]?.split('?')[0];
293325

294326
while (retryCount < maxRetries) {
295327
try {
328+
if (retryCount > 0) {
329+
await new Promise((resolve) =>
330+
setTimeout(resolve, addJitter(Math.random() * 1000)),
331+
);
332+
}
333+
296334
const headResponse = await fetch(url, {
297335
method: 'HEAD',
298336
headers: {
@@ -301,7 +339,6 @@ async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
301339
},
302340
});
303341

304-
// Handle 403 errors
305342
if (headResponse.status === 403) {
306343
if (assetId) {
307344
const currentCount = asset403Counts.get(assetId) || 0;
@@ -320,10 +357,11 @@ async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
320357
throw new Error(`HTTP error! status: 403`);
321358
}
322359

323-
// Handle rate limiting
324360
if (headResponse.status === 429) {
325361
const retryAfter = headResponse.headers.get('Retry-After');
326-
const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : 5000;
362+
const waitTime = addJitter(
363+
retryAfter ? parseInt(retryAfter) * 1000 : CONFIG.BASE_RETRY_DELAY,
364+
);
327365
await new Promise((resolve) => setTimeout(resolve, waitTime));
328366
continue;
329367
}
@@ -335,19 +373,15 @@ async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
335373
const fileSize = parseInt(
336374
headResponse.headers.get('content-length') || '0',
337375
);
338-
339-
// Clean up URL before download
340376
const cleanedUrl = cleanupUrl(url);
341377

342-
// Adjust strategy based on file size
343378
if (fileSize > 5 * 1024 * 1024) {
344379
console.log(
345380
`Large file detected (${fileSize} bytes), using chunked download`,
346381
);
347382
return downloadFile(cleanedUrl, downloadPath);
348383
}
349384

350-
// Regular download for smaller files
351385
return await downloadFile(cleanedUrl, downloadPath);
352386
} catch (error) {
353387
retryCount++;
@@ -356,7 +390,9 @@ async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
356390
throw error;
357391
}
358392

359-
const backoffTime = 5000 * Math.pow(2, retryCount);
393+
const backoffTime = addJitter(
394+
CONFIG.BASE_RETRY_DELAY * Math.pow(2, retryCount),
395+
);
360396
console.warn(
361397
`Retry ${retryCount}/${maxRetries} for ${url} in ${backoffTime}ms`,
362398
);
@@ -365,19 +401,16 @@ async function retryOnTimeout(url, downloadPath, maxRetries = 3) {
365401
}
366402
}
367403

368-
// 7. Add queue management for concurrent downloads
369404
const downloadQueue = new Map();
370-
const maxConcurrentDownloads = 2; // Reduce concurrent downloads
371405
let activeDownloads = 0;
372406

373407
async function queueDownload(url, downloadPath) {
374-
// Check download cache
375408
if (downloadCache.has(url)) {
376409
return downloadCache.get(url);
377410
}
378411

379-
while (activeDownloads >= maxConcurrentDownloads) {
380-
await new Promise((resolve) => setTimeout(resolve, 1000));
412+
while (activeDownloads >= CONFIG.DOWNLOAD_CONCURRENCY) {
413+
await new Promise((resolve) => setTimeout(resolve, addJitter(1000)));
381414
}
382415

383416
activeDownloads++;
@@ -397,7 +430,6 @@ async function replaceURL(url, relativePath, filePath) {
397430
await writeFile(filePath, newFileContent);
398431
}
399432

400-
// Update the main execution
401433
if (process.env.PUBLIC_ADAPTER === 'STATIC') {
402434
postbuild().catch((error) => {
403435
console.error('\nScript failed:', error);

0 commit comments

Comments
 (0)