diff --git a/package.json b/package.json index 3c7afcab..f1483649 100644 --- a/package.json +++ b/package.json @@ -12,30 +12,33 @@ "license": "AGPL-3.0-or-later", "dependencies": { "@fortawesome/fontawesome-free": "^5.13.0", - "@ipld/car": "^5.3.1", - "@webrecorder/awp-sw": "^0.5.3", + "@ipld/car": "^5.3.2", + "@ipld/unixfs": "^3.0.0", "@webrecorder/wabac": "^2.22.16", "auto-js-ipfs": "^2.3.0", "browsertrix-behaviors": "^0.8.5", "btoa": "^1.2.1", "bulma": "^0.9.3", - "client-zip": "^2.2.2", + "client-zip": "^2.3.0", + "idb": "^7.1.1", "hash-wasm": "^4.9.0", "http-status-codes": "^2.1.4", "keyword-mark-element": "^0.1.2", "node-fetch": "2.6.7", + "p-queue": "^8.0.1", "pdfjs-dist": "2.2.228", "pretty-bytes": "^5.6.0", "replaywebpage": "^2.3.7", "stream-browserify": "^3.0.0", "tsconfig-paths-webpack-plugin": "^4.1.0", "unused-filename": "^4.0.1", - "uuid": "^8.3.2", + "uuid": "^9.0.0", "warcio": "^2.4.4" }, "devDependencies": { "@typescript-eslint/eslint-plugin": "^6.15.0", "@typescript-eslint/parser": "^6.15.0", + "@types/uuid": "^10.0.0", "copy-webpack-plugin": "^9.0.1", "css-loader": "^6.2.0", "electron": "^32.2.0", diff --git a/src/ext/browser-recorder.ts b/src/ext/browser-recorder.ts index 91d3b52e..01f0d442 100644 --- a/src/ext/browser-recorder.ts +++ b/src/ext/browser-recorder.ts @@ -254,8 +254,8 @@ class BrowserRecorder extends Recorder { // @ts-expect-error - TS2339 - Property 'behaviorState' does not exist on type 'BrowserRecorder'. if (this.behaviorState === BEHAVIOR_RUNNING) { title = "Archiving: Autopilot Running!"; - color = "#0891b2"; - text = ""; + color = "#0096ff"; + text = " "; // @ts-expect-error - TS2339 - Property 'numPending' does not exist on type 'BrowserRecorder'. } else if (this.numPending === 0) { title = "Archiving: No URLs pending, can continue"; diff --git a/src/sw/api.ts b/src/sw/api.ts new file mode 100644 index 00000000..5745934f --- /dev/null +++ b/src/sw/api.ts @@ -0,0 +1,483 @@ +import { API, type SWCollections, tsToDate } from "@webrecorder/wabac/swlib"; + +import { Downloader, type Metadata } from "./downloader"; +import { Signer } from "./keystore"; +import { ipfsAdd, ipfsRemove, setAutoIPFSUrl } from "./ipfsutils"; +import { RecProxy } from "./recproxy"; +import { type Collection } from "@webrecorder/wabac/swlib"; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type RouteMatch = Record<string, any>; + +declare let self: ServiceWorkerGlobalScope; + +const DEFAULT_SOFTWARE_STRING = `Webrecorder ArchiveWeb.page ${__AWP_VERSION__}, using warcio.js ${__WARCIO_VERSION__}`; + +// =========================================================================== +class ExtAPI extends API { + softwareString = ""; + uploading: Map<string, CountingStream> = new Map<string, CountingStream>(); + + constructor( + collections: SWCollections, + { softwareString = "", replaceSoftwareString = false } = {}, + ) { + super(collections); + this.softwareString = replaceSoftwareString + ? softwareString + : softwareString + DEFAULT_SOFTWARE_STRING; + } + + override get routes(): Record<string, string | [string, string]> { + return { + ...super.routes, + downloadPages: "c/:coll/dl", + upload: ["c/:coll/upload", "POST"], + uploadStatus: "c/:coll/upload", + uploadDelete: ["c/:coll/upload", "DELETE"], + recPending: "c/:coll/recPending", + pageTitle: ["c/:coll/pageTitle", "POST"], + ipfsAdd: ["c/:coll/ipfs", "POST"], + ipfsRemove: ["c/:coll/ipfs", "DELETE"], + ipfsDaemonUrl: ["ipfs/daemonUrl", "POST"], + publicKey: "publicKey", + }; + } + + downloaderOpts() { + const softwareString = this.softwareString; + + const signer = new Signer(softwareString, { cacheSig: true }); + + return { softwareString, signer }; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + override async handleApi(request: Request, params: any, event: FetchEvent) { + switch (params._route) { + case "downloadPages": + return await this.handleDownload(params); + + case "upload": + return await this.handleUpload(params, request, event); + + case "uploadStatus": + return await this.getUploadStatus(params); + + case "uploadDelete": + return await this.deleteUpload(params); + + case "recPending": + return await this.recordingPending(params); + + case "pageTitle": + return await this.updatePageTitle(params.coll, request); + + case "publicKey": + return await this.getPublicKey(); + + case "ipfsAdd": + //return await this.startIpfsAdd(event, request, params.coll); + return {}; + + case "ipfsRemove": + //return await this.ipfsRemove(request, params.coll); + return {}; + + case "ipfsDaemonUrl": + return await this.setIPFSDaemonUrlFromBody(request); + + default: + return await super.handleApi(request, params, event); + } + } + + async handleDownload(params: RouteMatch) { + const { dl, error } = await this.getDownloader(params); + if (error) { + return error; + } + return dl.download(); + } + + async getDownloader(params: RouteMatch) { + const coll = await this.collections.loadColl(params.coll); + if (!coll) { + return { error: { error: "collection_not_found" } }; + } + + const pageQ = params["_query"].get("pages"); + const pageList = pageQ === "all" ? null : pageQ.split(","); + + const format = params["_query"].get("format") || "wacz"; + const filename = params["_query"].get("filename"); + + return { + dl: new Downloader({ + ...this.downloaderOpts(), + coll, + format, + filename, + pageList, + }), + }; + } + + async handleUpload(params: RouteMatch, request: Request, event: FetchEvent) { + const uploading = this.uploading; + + const prevUpload = uploading.get(params.coll); + + const { url, headers, abortUpload } = await request.json(); + + if (prevUpload && prevUpload.status === "uploading") { + if (abortUpload && prevUpload.abort) { + prevUpload.abort(); + return { aborted: true }; + } + return { error: "already_uploading" }; + } else if (abortUpload) { + return { error: "not_uploading" }; + } + + const { dl, error } = await this.getDownloader(params); + if (error) { + return error; + } + const dlResp = await dl.download(); + if (!(dlResp instanceof Response)) { + return dlResp; + } + const filename = dlResp.filename || ""; + + const abort = new AbortController(); + const signal = abort.signal; + + const counter = new CountingStream(dl.metadata.size, abort); + + const body = dlResp.body!.pipeThrough(counter.transformStream()); + + try { + const urlObj = new URL(url); + urlObj.searchParams.set("filename", filename || ""); + urlObj.searchParams.set("name", dl.metadata["title"] || filename || ""); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + const fetchPromise = fetch(urlObj.href, { + method: "PUT", + headers, + duplex: "half", + body, + signal, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any); + uploading.set(params.coll, counter); + if (event.waitUntil) { + event.waitUntil( + this.uploadFinished( + fetchPromise, + params.coll, + dl.metadata, + filename, + counter, + ), + ); + } + return { uploading: true }; + } catch (e: unknown) { + uploading.delete(params.coll); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return { error: "upload_failed", details: (e as any).toString() }; + } + } + + async uploadFinished( + fetchPromise: Promise<Response>, + collId: string, + metadata: Metadata, + filename: string, + counter: CountingStream, + ) { + try { + const resp = await fetchPromise; + const json = await resp.json(); + + console.log(`Upload finished for ${filename} ${collId}`); + + metadata.uploadTime = new Date().getTime(); + metadata.uploadId = json.id; + if (!metadata.mtime) { + metadata.mtime = metadata.uploadTime; + } + if (!metadata.ctime) { + metadata.ctime = metadata.uploadTime; + } + await this.collections.updateMetadata( + collId, + metadata as Record<string, string>, + ); + counter.status = "done"; + } catch (e) { + console.log(`Upload failed for ${filename} ${collId}`); + console.log(e); + counter.status = counter.aborted ? "aborted" : "failed"; + } + } + + async deleteUpload(params: RouteMatch) { + const collId = params.coll; + + this.uploading.delete(collId); + + const coll = await this.collections.loadColl(collId); + + if (coll?.metadata) { + coll.metadata.uploadTime = null; + coll.metadata.uploadId = null; + await this.collections.updateMetadata(collId, coll.metadata); + return { deleted: true }; + } + + return { deleted: false }; + } + + async getUploadStatus(params: RouteMatch) { + let result: Metadata = {}; + const counter = this.uploading.get(params.coll); + + if (!counter) { + result = { status: "idle" }; + } else { + const { size, totalSize, status } = counter; + result = { status, size, totalSize }; + + if (status !== "uploading") { + this.uploading.delete(params.coll); + } + } + + const coll = await this.collections.loadColl(params.coll); + + if (coll?.metadata) { + result.uploadTime = coll.metadata.uploadTime; + result.uploadId = coll.metadata.uploadId; + result.ctime = coll.metadata.ctime; + result.mtime = coll.metadata.mtime; + } + + return result; + } + + async recordingPending(params: RouteMatch) { + const coll = await this.collections.loadColl(params.coll); + if (!coll) { + return { error: "collection_not_found" }; + } + + if (!(coll.store instanceof RecProxy)) { + return { error: "invalid_collection" }; + } + + const numPending = await coll.store.getCounter(); + + return { numPending }; + } + + async prepareColl(collId: string, request: Request) { + const coll = await this.collections.loadColl(collId); + if (!coll) { + return { error: "collection_not_found" }; + } + + const body = await this.setIPFSDaemonUrlFromBody(request); + + return { coll, body }; + } + + async setIPFSDaemonUrlFromBody(request: Request) { + let body; + + try { + body = await request.json(); + if (body.ipfsDaemonUrl) { + setAutoIPFSUrl(body.ipfsDaemonUrl); + } + } catch (_e: unknown) { + body = {}; + } + + return body; + } + + async startIpfsAdd(event: FetchEvent, request: Request, collId: string) { + const { coll, body } = await this.prepareColl(collId, request); + + const client = await self.clients.get(event.clientId); + + const p = runIPFSAdd( + collId, + coll, + client, + this.downloaderOpts(), + this.collections, + body, + ); + + if (event.waitUntil) { + event.waitUntil(p); + } + + try { + await p; + } catch (_e) { + return { error: "ipfs_not_available" }; + } + + return { collId }; + } + + async ipfsRemove(request: Request, collId: string) { + const { coll } = await this.prepareColl(collId, request); + + if (await ipfsRemove(coll)) { + await this.collections.updateMetadata(coll.name, coll.config.metadata); + return { removed: true }; + } + + return { removed: false }; + } + + async updatePageTitle(collId: string, request: Request) { + const json = await request.json(); + const { url, title } = json; + let { ts } = json; + + ts = tsToDate(ts).getTime(); + + const coll = await this.collections.loadColl(collId); + if (!coll) { + return { error: "collection_not_found" }; + } + + //await coll.store.db.init(); + + const result = await coll.store.lookupUrl(url, ts); + + if (!result) { + return { error: "page_not_found" }; + } + + // drop to second precision for comparison + const roundedTs = Math.floor(result.ts / 1000) * 1000; + if (url !== result.url || ts !== roundedTs) { + return { error: "no_exact_match" }; + } + + const page = await coll.store.db.getFromIndex("pages", "url", url); + if (!page) { + return { error: "page_not_found" }; + } + page.title = title; + await coll.store.db.put("pages", page); + + return { added: true }; + } + + async getPublicKey() { + const { signer } = this.downloaderOpts(); + const keys = await signer.loadKeys(); + if (!keys?.public) { + return {}; + } else { + return { publicKey: keys.public }; + } + } +} + +// =========================================================================== +async function runIPFSAdd( + collId: string, + coll: Collection, + client: Client | undefined, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + opts: any, + collections: SWCollections, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + replayOpts: any, +) { + let size = 0; + let totalSize = 0; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const sendMessage = (type: string, result: any = null) => { + if (client) { + client.postMessage({ + type, + collId, + size, + result, + totalSize, + }); + } + }; + + const { url, cid } = await ipfsAdd( + coll, + opts, + replayOpts, + (incSize: number, _totalSize: number) => { + size += incSize; + totalSize = _totalSize; + sendMessage("ipfsProgress"); + }, + ); + + const result = { cid, ipfsURL: url }; + + sendMessage("ipfsAdd", result); + + if (coll.config.metadata) { + await collections.updateMetadata(coll.name, coll.config.metadata); + } +} + +// =========================================================================== +class CountingStream { + totalSize: number; + status: string; + size = 0; + _abort?: AbortController; + aborted: boolean; + + constructor(totalSize?: number, abort?: AbortController) { + this.totalSize = totalSize || 0; + this.status = "uploading"; + this.size = 0; + this._abort = abort; + this.aborted = false; + } + + abort() { + if (this._abort) { + this._abort.abort(); + this.aborted = true; + } + } + + transformStream() { + const counterStream = this; + + return new TransformStream({ + start() { + counterStream.size = 0; + }, + + transform(chunk, controller) { + counterStream.size += chunk.length; + //console.log(`Uploaded: ${counterStream.size}`); + controller.enqueue(chunk); + }, + }); + } +} + +export { ExtAPI }; diff --git a/src/sw/downloader.ts b/src/sw/downloader.ts new file mode 100644 index 00000000..d7e1ae32 --- /dev/null +++ b/src/sw/downloader.ts @@ -0,0 +1,1111 @@ +import { makeZip } from "client-zip"; + +import { Deflate } from "pako"; + +import { v5 as uuidv5 } from "uuid"; + +import { createSHA256 } from "hash-wasm"; +import { type IHasher } from "hash-wasm/dist/lib/WASMInterface.js"; + +import { getSurt, WARCRecord, WARCSerializer } from "warcio"; + +import { + getTSMillis, + getStatusText, + digestMessage, + type Collection, + type ArchiveDB, + type ResourceEntry, +} from "@webrecorder/wabac/swlib"; +import { type DataSignature, type Signer } from "./keystore"; +import { type ExtPageEntry } from "./recproxy"; + +export type SizeCallback = (size: number) => void; + +export type ResponseWithFilename = Response & { + filename?: string; +}; + +type ClientZipEntry = { + name: string; + lastModified: Date; + input: AsyncGenerator<Uint8Array>; +}; + +type FileStats = { + filename: string; + size: number; + hash?: string; +}; + +export type DownloaderOpts = { + coll: Collection; + format: string; + filename?: string; + pageList?: string[]; + signer?: Signer; + softwareString?: string; + gzip?: boolean; + uuidNamespace?: string; + markers?: Markers; +}; + +export type Markers = { + ZIP?: Uint8Array; + WARC_PAYLOAD?: Uint8Array; + WARC_GROUP?: Uint8Array; +}; + +type DLResourceEntry = ResourceEntry & { + offset?: number; + length?: number; + timestamp?: string; + skipped?: boolean; + text?: string; + + pageId: string; + digest: string; +}; + +type CDXJEntry = { + url: string; + digest: string; + mime: string; + offset: number; + length: number; + recordDigest: string; + status: number; + + method?: string; + filename?: string; + requestBody?: string; +}; + +type DLPageData = { + title: string; + url: string; + id: string; + size: number; + ts: string; + + favIconUrl?: string; + text?: string; +}; + +type Gen = + | AsyncGenerator<Uint8Array> + | AsyncGenerator<string> + | Generator<Uint8Array> + | Generator<string>; + +type WARCVersion = "WARC/1.0" | "WARC/1.1"; + +type DigestCache = { + url: string; + date: string; + payloadDigest?: string; +}; + +type DataPackageJSON = { + profile: string; + resources: { + name: string; + path: string; + hash: string; + bytes: number; + }[]; + + wacz_version: string; + software: string; + created: string; + + title?: string; + description?: string; + modified?: string; +}; + +export type Metadata = { + uploadId?: string; + uploadTime?: number; + ctime?: number; + mtime?: number; + size?: number; + title?: string; + desc?: string; + status?: string; + totalSize?: number; +}; + +// =========================================================================== +const WACZ_VERSION = "1.1.1"; + +const SPLIT_REQUEST_Q_RX = /(.*?)[?&](?:__wb_method=|__wb_post=)[^&]+&(.*)/; + +const LINES_PER_BLOCK = 1024; +const RESOURCE_BATCH_SIZE = LINES_PER_BLOCK * 8; + +const DEFAULT_UUID_NAMESPACE = "f9ec3936-7f66-4461-bec4-34f4495ea242"; + +const DATAPACKAGE_FILENAME = "datapackage.json"; +const DIGEST_FILENAME = "datapackage-digest.json"; + +const encoder = new TextEncoder(); + +const EMPTY = new Uint8Array([]); + +async function* getPayload(payload: Uint8Array) { + yield payload; +} + +async function* hashingGen( + gen: Gen, + stats: FileStats, + hasher: IHasher, + sizeCallback: SizeCallback | null, + zipMarker?: Uint8Array, +) { + stats.size = 0; + + hasher.init(); + + if (zipMarker) { + yield zipMarker; + } + + for await (let chunk of gen) { + if (typeof chunk === "string") { + chunk = encoder.encode(chunk); + } + + yield chunk; + stats.size += chunk.byteLength; + if (sizeCallback) { + sizeCallback(chunk.byteLength); + } + hasher.update(chunk); + } + + if (zipMarker) { + yield zipMarker; + } + + stats.hash = hasher.digest("hex"); +} + +// =========================================================================== +class Downloader { + db: ArchiveDB; + pageList: string[] | null; + collId: string; + metadata: Metadata; + gzip: boolean; + + markers: Markers; + warcName: string; + alreadyDecoded: boolean; + + softwareString: string; + uuidNamespace: string; + + createdDateDt: Date; + createdDate: string; + modifiedDate: string | null; + + format: string; + warcVersion: WARCVersion; + + digestOpts: { + algo: string; + prefix: string; + base32?: boolean; + }; + + filename: string; + + signer: Signer | null; + + offset = 0; + firstResources: ResourceEntry[] = []; + textResources: DLResourceEntry[] = []; + cdxjLines: string[] = []; + + // compressed index (idx) entries + indexLines: string[] = []; + + digestsVisted: Record<string, DigestCache> = {}; + fileHasher: IHasher | null = null; + recordHasher: IHasher | null = null; + + datapackageDigest = ""; + + fileStats: FileStats[] = []; + hashType = ""; + + lastUrl?: string; + lastPageId?: string; + + constructor({ + coll, + format = "wacz", + filename, + pageList, + signer, + softwareString, + gzip = true, + uuidNamespace, + markers, + }: DownloaderOpts) { + this.db = coll.store; + this.pageList = pageList || null; + this.collId = coll.name; + this.metadata = coll.config.metadata || {}; + this.gzip = gzip; + + this.markers = markers || {}; + + this.warcName = this.gzip ? "data.warc.gz" : "data.warc"; + + this.alreadyDecoded = !coll.config["decode"] && !coll.config["loadUrl"]; + + this.softwareString = softwareString || "ArchiveWeb.page"; + + this.uuidNamespace = uuidNamespace || DEFAULT_UUID_NAMESPACE; + + this.createdDateDt = new Date(coll.config.ctime!); + this.createdDate = this.createdDateDt.toISOString(); + this.modifiedDate = coll.config.metadata!.mtime + ? new Date(coll.config.metadata!.mtime).toISOString() + : null; + + this.format = format; + this.warcVersion = format === "warc1.0" ? "WARC/1.0" : "WARC/1.1"; + + if (format === "warc1.0") { + this.digestOpts = { algo: "sha-1", prefix: "sha1:", base32: true }; + } else { + this.digestOpts = { algo: "sha-256", prefix: "sha256:" }; + } + + // determine filename from title, if it exists + if (!filename && coll.config.metadata!.title) { + filename = encodeURIComponent( + coll.config.metadata!.title.toLowerCase().replace(/\s/g, "-"), + ); + } + + if (!filename) { + filename = "webarchive"; + } + this.filename = filename; + + this.signer = signer || null; + } + + async download(sizeCallback: SizeCallback | null = null) { + switch (this.format) { + case "wacz": + return this.downloadWACZ(this.filename, sizeCallback); + + case "warc": + case "warc1.0": + return this.downloadWARC(this.filename, sizeCallback); + + default: + return { error: "invalid 'format': must be wacz or warc" }; + } + } + + downloadWARC(filename: string, sizeCallback: SizeCallback | null = null) { + filename = (filename || "webarchive").split(".")[0] + ".warc"; + + // eslint-disable-next-line @typescript-eslint/no-this-alias + const dl = this; + + const rs = new ReadableStream({ + async start(controller) { + await dl.queueWARC(controller, filename, sizeCallback); + }, + }); + + const headers = { + "Content-Disposition": `attachment; filename="${filename}"`, + "Content-Type": "application/octet-stream", + }; + + const resp: ResponseWithFilename = new Response(rs, { headers }); + resp.filename = filename; + return resp; + } + + async loadResourcesBlock( + start: [string, number] | [] = [], + ): Promise<ResourceEntry[]> { + return await this.db.db!.getAll( + "resources", + IDBKeyRange.lowerBound(start, true), + RESOURCE_BATCH_SIZE, + ); + } + + async *iterResources(resources: ResourceEntry[]) { + let start: [string, number] | [] = []; + //let count = 0; + + while (resources.length) { + const last: ResourceEntry = resources[resources.length - 1]!; + + if (this.pageList) { + resources = resources.filter((res) => + this.pageList!.includes(res.pageId || ""), + ); + } + //count += resources.length; + yield* resources; + + start = [last.url, last.ts]; + resources = await this.loadResourcesBlock(start); + } + // if (count !== this.numResources) { + // console.warn(`Iterated ${count}, but expected ${this.numResources}`); + // } + } + + async queueWARC( + controller: ReadableStreamDefaultController, + filename: string, + sizeCallback: SizeCallback | null, + ) { + this.firstResources = await this.loadResourcesBlock(); + + for await (const chunk of this.generateWARC(filename)) { + controller.enqueue(chunk); + if (sizeCallback) { + sizeCallback(chunk.length); + } + } + + for await (const chunk of this.generateTextWARC(filename)) { + controller.enqueue(chunk); + if (sizeCallback) { + sizeCallback(chunk.length); + } + } + + controller.close(); + } + + addFile( + zip: ClientZipEntry[], + filename: string, + generator: Gen, + sizeCallback: SizeCallback | null, + ) { + const stats: FileStats = { filename, size: 0 }; + + if (filename !== DATAPACKAGE_FILENAME && filename !== DIGEST_FILENAME) { + this.fileStats.push(stats); + } + + zip.push({ + name: filename, + lastModified: this.createdDateDt, + input: hashingGen( + generator, + stats, + this.fileHasher!, + sizeCallback, + this.markers.ZIP, + ), + }); + } + + recordDigest(data: Uint8Array | string) { + this.recordHasher!.init(); + this.recordHasher!.update(data); + return this.hashType + ":" + this.recordHasher!.digest("hex"); + } + + getWARCRecordUUID(name: string) { + return `<urn:uuid:${uuidv5(name, this.uuidNamespace)}>`; + } + + async downloadWACZ(filename: string, sizeCallback: SizeCallback | null) { + filename = (filename || "webarchive").split(".")[0] + ".wacz"; + + this.fileHasher = await createSHA256(); + this.recordHasher = await createSHA256(); + this.hashType = "sha256"; + + const zip: ClientZipEntry[] = []; + + this.firstResources = await this.loadResourcesBlock(); + + this.addFile(zip, "pages/pages.jsonl", this.generatePages(), sizeCallback); + this.addFile( + zip, + `archive/${this.warcName}`, + this.generateWARC(filename + `#/archive/${this.warcName}`, true), + sizeCallback, + ); + //this.addFile(zip, "archive/text.warc", this.generateTextWARC(filename + "#/archive/text.warc"), false); + + // don't use compressed index if we'll have a single block, need to have at least enough for 2 blocks + if (this.firstResources.length < 2 * LINES_PER_BLOCK) { + this.addFile(zip, "indexes/index.cdx", this.generateCDX(), sizeCallback); + } else { + this.addFile( + zip, + "indexes/index.cdx.gz", + this.generateCompressedCDX("index.cdx.gz"), + sizeCallback, + ); + this.addFile(zip, "indexes/index.idx", this.generateIDX(), sizeCallback); + } + + this.addFile( + zip, + DATAPACKAGE_FILENAME, + this.generateDataPackage(), + sizeCallback, + ); + + this.addFile( + zip, + DIGEST_FILENAME, + this.generateDataManifest(), + sizeCallback, + ); + + const headers = { + "Content-Disposition": `attachment; filename="${filename}"`, + "Content-Type": "application/zip", + }; + + const rs = makeZip(zip); + const response: ResponseWithFilename = new Response(rs, { headers }); + response.filename = filename; + return response; + } + + async *generateWARC( + filename: string, + digestRecordAndCDX = false, + ): AsyncGenerator<Uint8Array> { + try { + let offset = 0; + + // if filename provided, add warcinfo + if (filename) { + const warcinfo = await this.createWARCInfo(filename); + yield warcinfo; + offset += warcinfo.length; + } + + if (this.markers.WARC_GROUP) { + yield this.markers.WARC_GROUP; + } + + for await (const res of this.iterResources(this.firstResources)) { + const resource: DLResourceEntry = res as DLResourceEntry; + resource.offset = offset; + const records = await this.createWARCRecord(resource); + if (!records) { + resource.skipped = true; + continue; + } + + // response record + const responseData: { length: number; digest?: string } = { length: 0 }; + yield* this.emitRecord(records[0]!, digestRecordAndCDX, responseData); + offset += responseData.length; + resource.length = responseData.length; + if (digestRecordAndCDX && !resource.recordDigest) { + //resource.recordDigest = this.recordDigest(records[0]); + resource.recordDigest = responseData.digest; + } + + // request record, if any + if (records.length > 1) { + const requestData = { length: 0 }; + yield* this.emitRecord(records[1]!, false, requestData); + offset += requestData.length; + } + + if (digestRecordAndCDX) { + this.cdxjLines.push(this.getCDXJ(resource, this.warcName)); + } + + if (this.markers.WARC_GROUP) { + yield this.markers.WARC_GROUP; + } + } + } catch (e) { + console.warn(e); + } + } + + async *emitRecord( + record: WARCRecord, + doDigest: boolean, + output: { length: number; digest?: string }, + ) { + const opts = { gzip: this.gzip, digest: this.digestOpts }; + const s = new WARCSerializer(record, opts); + + const chunks = []; + if (doDigest) { + this.recordHasher!.init(); + } + + for await (const chunk of s) { + if (doDigest) { + this.recordHasher!.update(chunk as Uint8Array); + } + chunks.push(chunk); + output.length += chunk.length; + } + + if (doDigest) { + output.digest = this.hashType + ":" + this.recordHasher!.digest("hex"); + } + + if ( + !this.gzip && + this.markers.WARC_PAYLOAD && + record.warcType !== "request" && + (chunks.length === 5 || chunks.length === 4) + ) { + if (chunks.length === 5) { + yield chunks[0]; + yield chunks[1]; + yield chunks[2]; + yield this.markers.WARC_PAYLOAD; + if (chunks[3].length) { + yield chunks[3]; + yield this.markers.WARC_PAYLOAD; + } + yield chunks[4]; + } else { + yield chunks[0]; + yield chunks[1]; + yield this.markers.WARC_PAYLOAD; + if (chunks[2].length) { + yield chunks[2]; + yield this.markers.WARC_PAYLOAD; + } + yield chunks[3]; + } + } else { + for (const chunk of chunks) { + yield chunk; + } + } + } + + async *generateTextWARC(filename: string) { + try { + let offset = 0; + + // if filename provided, add warcinfo + if (filename) { + const warcinfo = await this.createWARCInfo(filename); + yield warcinfo; + offset += warcinfo.length; + } + + for (const resource of this.textResources) { + resource.offset = offset; + const chunk = await this.createTextWARCRecord(resource); + yield chunk; + offset += chunk.length; + resource.length = chunk.length; + } + } catch (e) { + console.warn(e); + } + } + + getCDXJ(resource: DLResourceEntry, filename: string): string { + const data: CDXJEntry = { + url: resource.url, + digest: resource.digest, + mime: resource.mime!, + offset: resource.offset!, + length: resource.length!, + recordDigest: resource.recordDigest!, + status: resource.status!, + }; + + if (filename) { + data.filename = filename; + } + + if (resource.method && resource.method !== "GET") { + const m = resource.url.match(SPLIT_REQUEST_Q_RX); + if (m) { + data.url = m[1]!; + // resource.requestBody is the raw payload, use the converted one from the url for the cdx + data.requestBody = m[2]; + } + data.method = resource.method; + } + + return `${getSurt(resource.url)} ${resource.timestamp} ${JSON.stringify(data)}\n`; + } + + *generateCDX() { + this.cdxjLines.sort(); + + yield* this.cdxjLines; + } + + *generateCompressedCDX(filename: string) { + let offset = 0; + + let chunkDeflater: Deflate | null = null; + let count = 0; + let key = ""; + + // eslint-disable-next-line @typescript-eslint/no-this-alias + const dl = this; + + const finishChunk = () => { + const data = chunkDeflater!.result as Uint8Array; + const length = data.length; + const digest = dl.recordDigest(data); + + const idx = + key + " " + JSON.stringify({ offset, length, digest, filename }); + + dl.indexLines.push(idx); + + offset += length; + + chunkDeflater = null; + count = 0; + key = ""; + + return data; + }; + + for (const cdx of this.generateCDX()) { + if (!chunkDeflater) { + chunkDeflater = new Deflate({ gzip: true }); + } + + if (!key) { + key = cdx.split(" {", 1)[0] || ""; + } + + if (++count === LINES_PER_BLOCK) { + chunkDeflater.push(cdx, true); + yield finishChunk(); + } else { + chunkDeflater.push(cdx); + } + } + + if (chunkDeflater) { + chunkDeflater.push(EMPTY, true); + yield finishChunk(); + } + } + + async *generateDataManifest() { + const hash = this.datapackageDigest; + + const path = DATAPACKAGE_FILENAME; + + const data: { path: string; hash: string; signedData?: DataSignature } = { + path, + hash, + }; + + if (this.signer) { + try { + data.signedData = await this.signer.sign(hash, this.createdDate); + + this.signer.close(); + this.signer = null; + } catch (e) { + // failed to sign + console.log(e); + } + } + + const res = JSON.stringify(data, null, 2); + + yield res; + } + + async *generateDataPackage() { + const root: DataPackageJSON = { + profile: "data-package", + + resources: this.fileStats.map((stats) => { + const path = stats.filename; + return { + name: path.slice(path.lastIndexOf("/") + 1), + path, + hash: this.hashType + ":" + stats.hash, + bytes: stats.size, + }; + }), + + wacz_version: WACZ_VERSION, + software: this.softwareString, + created: this.createdDate, + }; + + if (this.metadata.title) { + root.title = this.metadata.title; + } + if (this.metadata.desc) { + root.description = this.metadata.desc; + } + + if (this.modifiedDate) { + root.modified = this.modifiedDate; + } + + const datapackageText = JSON.stringify(root, null, 2); + this.datapackageDigest = this.recordDigest(datapackageText); + yield datapackageText; + } + + async *generatePages() { + const pageIter: ExtPageEntry[] = ( + this.pageList + ? await this.db.getPages(this.pageList) + : await this.db.getAllPages() + ) as ExtPageEntry[]; + + yield JSON.stringify({ + format: "json-pages-1.0", + id: "pages", + title: "All Pages", + hasText: true, + }); + + for (const page of pageIter) { + const ts = new Date(page.ts).toISOString(); + + const pageData: DLPageData = { + title: page.title, + url: page.url, + id: page.id, + size: page.size, + ts, + }; + + if (page.favIconUrl) { + pageData.favIconUrl = page.favIconUrl; + } + if (page.text) { + pageData.text = page.text; + } + + yield "\n" + JSON.stringify(pageData); + + if (page.text) { + this.textResources.push({ + url: page.url, + ts: page.ts, + text: page.text, + pageId: page.id, + digest: "", + }); + } + } + } + + /* + async getLists() { + try { + const lists = await this.db.getAllCuratedByList(); + console.log(lists); + return yaml.safeDump(lists, {skipInvalid: true}); + } catch (e) { + console.log(e); + } + } +*/ + async *generateIDX() { + yield this.indexLines.join("\n"); + } + + async createWARCInfo(filename: string) { + const warcVersion = this.warcVersion; + const type = "warcinfo"; + + const info = { + software: this.softwareString, + format: + warcVersion === "WARC/1.0" + ? "WARC File Format 1.0" + : "WARC File Format 1.1", + isPartOf: this.metadata["title"] || this.collId, + }; + + //info["json-metadata"] = JSON.stringify(metadata); + + const warcHeaders = { + "WARC-Record-ID": this.getWARCRecordUUID(JSON.stringify(info)), + }; + + const date = this.createdDate; + + const record = WARCRecord.createWARCInfo( + { filename, type, date, warcHeaders, warcVersion }, + info, + ); + const buffer = await WARCSerializer.serialize(record, { + gzip: this.gzip, + digest: this.digestOpts, + }); + return buffer; + } + + fixupHttpHeaders(headersMap: Record<string, string>, length: number) { + // how many headers are we parsing here + const numHeaders = this.alreadyDecoded ? 3 : 1; + + let count = 0; + for (const [name] of Object.entries(headersMap)) { + const lowerName = name.toLowerCase(); + switch (lowerName) { + case "content-encoding": + case "transfer-encoding": + if (this.alreadyDecoded) { + headersMap["x-orig-" + name] = headersMap[name]!; + delete headersMap[name]; + ++count; + } + break; + + case "content-length": + headersMap[name] = "" + length; + ++count; + break; + } + if (count === numHeaders) { + break; + } + } + } + + async createWARCRecord(resource: DLResourceEntry) { + let url = resource.url; + const date = new Date(resource.ts).toISOString(); + resource.timestamp = getTSMillis(date); + const httpHeaders = resource.respHeaders || {}; + const warcVersion = this.warcVersion; + + const pageId = resource.pageId; + + let payload: Uint8Array | null | undefined = resource.payload; + let type: "response" | "request" | "resource" | "revisit"; + + let refersToUrl, refersToDate; + let refersToDigest; + let storeDigest: DigestCache | null = null; + + let method = "GET"; + let requestBody; + + // non-GET request/response: + // if original request body + original requestURL is preserved, write that with original method + // otherwise, just serialize the converted-to-GET form + if ( + resource.method && + resource.method !== "GET" && + resource.requestBody && + resource.requestUrl + ) { + // ensure payload is an arraybuffer + requestBody = + typeof resource.requestBody === "string" + ? encoder.encode(resource.requestBody) + : resource.requestBody; + method = resource.method; + url = resource.requestUrl; + } else { + requestBody = new Uint8Array([]); + } + + if (!resource.digest && resource.payload) { + resource.digest = await digestMessage(resource.payload, "sha-256"); + } + + const digestOriginal = this.digestsVisted[resource.digest]; + + if (resource.digest && digestOriginal) { + // if exact resource in a row, and same page, then just skip instead of writing revisit + if ( + url === this.lastUrl && + method === "GET" && + pageId === this.lastPageId + ) { + //console.log("Skip Dupe: " + url); + return null; + } + + type = "revisit"; + resource.mime = "warc/revisit"; + payload = EMPTY; + + refersToUrl = digestOriginal.url; + refersToDate = digestOriginal.date; + refersToDigest = digestOriginal.payloadDigest || resource.digest; + } else if (resource.origURL && resource.origTS) { + if (!resource.digest || !digestOriginal) { + //console.log("Skip fuzzy resource with no digest"); + return null; + } + + type = "revisit"; + resource.mime = "warc/revisit"; + payload = EMPTY; + + refersToUrl = resource.origURL; + refersToDate = new Date(resource.origTS).toISOString(); + refersToDigest = digestOriginal.payloadDigest || resource.digest; + } else { + type = "response"; + if (!payload) { + payload = (await this.db.loadPayload( + resource, + {}, + )) as Uint8Array | null; + } + + if (!payload) { + //console.log("Skipping No Payload For: " + url, resource); + return null; + } + + if (method === "GET") { + storeDigest = { url, date }; + this.digestsVisted[resource.digest] = storeDigest; + } + } + + const status = resource.status || 200; + const statusText = resource.statusText || getStatusText(status); + + const statusline = `HTTP/1.1 ${status} ${statusText}`; + + const responseRecordId = this.getWARCRecordUUID( + type + ":" + resource.timestamp + "/" + resource.url, + ); + + const warcHeaders: Record<string, string> = { + "WARC-Record-ID": responseRecordId, + }; + + if (pageId) { + warcHeaders["WARC-Page-ID"] = pageId; + } + + if (resource.extraOpts && Object.keys(resource.extraOpts).length) { + warcHeaders["WARC-JSON-Metadata"] = JSON.stringify(resource.extraOpts); + } + + if (refersToDigest) { + warcHeaders["WARC-Payload-Digest"] = refersToDigest; + } + + // remove encoding, set content-length as encoding never preserved in browser-based capture + this.fixupHttpHeaders(httpHeaders, payload.length); + + const record = WARCRecord.create( + { + url, + date, + type, + warcVersion, + warcHeaders, + statusline, + httpHeaders, + refersToUrl, + refersToDate, + }, + getPayload(payload), + ); + + //const buffer = await WARCSerializer.serialize(record, {gzip: this.gzip, digest: this.digestOpts}); + if (!resource.digest && record.warcPayloadDigest) { + resource.digest = record.warcPayloadDigest; + } + if (storeDigest && record.warcPayloadDigest) { + storeDigest.payloadDigest = record.warcPayloadDigest; + } + + this.lastPageId = pageId; + this.lastUrl = url; + + const records = [record]; + + if (resource.reqHeaders) { + const type = "request"; + const reqWarcHeaders: Record<string, string> = { + "WARC-Record-ID": this.getWARCRecordUUID( + type + ":" + resource.timestamp + "/" + resource.url, + ), + "WARC-Page-ID": pageId, + "WARC-Concurrent-To": responseRecordId, + }; + + const urlParsed = new URL(url); + const statusline = `${method} ${url.slice(urlParsed.origin.length)} HTTP/1.1`; + + const reqRecord = WARCRecord.create( + { + url, + date, + warcVersion, + type, + warcHeaders: reqWarcHeaders, + httpHeaders: resource.reqHeaders, + statusline, + }, + getPayload(requestBody), + ); + + //records.push(await WARCSerializer.serialize(reqRecord, {gzip: this.gzip, digest: this.digestOpts})); + records.push(reqRecord); + } + + return records; + } + + async createTextWARCRecord(resource: DLResourceEntry) { + const date = new Date(resource.ts).toISOString(); + const timestamp = getTSMillis(date); + resource.timestamp = timestamp; + const url = `urn:text:${timestamp}/${resource.url}`; + resource.url = url; + + const type = "resource"; + const warcHeaders = { "Content-Type": 'text/plain; charset="UTF-8"' }; + const warcVersion = this.warcVersion; + + const payload = getPayload(encoder.encode(resource.text)); + + const record = WARCRecord.create( + { url, date, warcHeaders, warcVersion, type }, + payload, + ); + + const buffer = await WARCSerializer.serialize(record, { + gzip: this.gzip, + digest: this.digestOpts, + }); + if (!resource.digest && record.warcPayloadDigest) { + resource.digest = record.warcPayloadDigest; + } + return buffer; + } +} + +export { Downloader }; diff --git a/src/sw/globals.d.ts b/src/sw/globals.d.ts new file mode 100644 index 00000000..c4ff7dfc --- /dev/null +++ b/src/sw/globals.d.ts @@ -0,0 +1,8 @@ +declare const __SW_NAME__: string; +declare const __WARCIO_VERSION__: string; +declare const __AWP_VERSION__: string; +declare const __VERSION__: string; +declare const __WEB3_STORAGE_TOKEN__: string; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +declare type TODOFixMe = any; diff --git a/src/sw/ipfsutils.ts b/src/sw/ipfsutils.ts new file mode 100644 index 00000000..3dc8f8fa --- /dev/null +++ b/src/sw/ipfsutils.ts @@ -0,0 +1,634 @@ +import { type CollMetadata, type Collection } from "@webrecorder/wabac/swlib"; +import { Downloader, type DownloaderOpts, type Markers } from "./downloader"; + +// @ts-expect-error no types +import { create as createAutoIPFS } from "auto-js-ipfs"; + +import * as UnixFS from "@ipld/unixfs"; +import { CarWriter } from "@ipld/car/writer"; +import Queue from "p-queue"; + +import { type Link } from "@ipld/unixfs/file/layout/queue"; +import { type FileLink } from "@ipld/unixfs/directory"; + +const autoipfsOpts = { + web3StorageToken: __WEB3_STORAGE_TOKEN__, + daemonURL: "", +}; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +let autoipfs: any = null; + +type ReplayOpts = { + filename?: string; + customSplits?: boolean; + gzip?: boolean; + replayBaseUrl?: string; + showEmbed?: boolean; + pageUrl?: string; + pageTitle?: string; + deepLink?: boolean; + loading?: boolean; +}; + +type MetadataWithIPFS = CollMetadata & { + ipfsPins?: { url: string; cid: string }[] | null; +}; + +export async function setAutoIPFSUrl(url: string) { + if (autoipfsOpts.daemonURL !== url) { + autoipfs = null; + } + autoipfsOpts.daemonURL = url; +} + +export async function ipfsAdd( + coll: Collection, + downloaderOpts: DownloaderOpts, + replayOpts: ReplayOpts = {}, + progress: (incSize: number, totalSize: number) => void, +) { + if (!autoipfs) { + autoipfs = await createAutoIPFS(autoipfsOpts); + } + + const filename = replayOpts.filename || "webarchive.wacz"; + + if (replayOpts.customSplits) { + const ZIP = new Uint8Array([]); + const WARC_PAYLOAD = new Uint8Array([]); + const WARC_GROUP = new Uint8Array([]); + downloaderOpts.markers = { ZIP, WARC_PAYLOAD, WARC_GROUP }; + } + + const gzip = replayOpts.gzip !== undefined ? replayOpts.gzip : true; + + const dl = new Downloader({ ...downloaderOpts, coll, filename, gzip }); + const dlResponse = await dl.download(); + + if (!(dlResponse instanceof Response)) { + throw new Error(dlResponse.error); + } + + const metadata: MetadataWithIPFS = coll.config.metadata || {}; + + if (!metadata.ipfsPins) { + metadata.ipfsPins = []; + } + + let concur; + let shardSize; + let capacity; + + if (autoipfs.type === "web3.storage") { + // for now, web3storage only allows a single-shard uploads, so set this high. + concur = 1; + shardSize = 1024 * 1024 * 10000; + capacity = 1048576 * 200; + } else { + concur = 3; + shardSize = 1024 * 1024 * 5; + // use default capacity + // capacity = undefined; + capacity = 1048576 * 200; + } + + const { readable, writable } = new TransformStream( + {}, + UnixFS.withCapacity(capacity), + ); + + const baseUrl = replayOpts.replayBaseUrl || self.location.href; + + const swContent = await fetchBuffer("sw.js", baseUrl); + const uiContent = await fetchBuffer("ui.js", baseUrl); + + let favicon = null; + + try { + favicon = await fetchBuffer("icon.png", baseUrl); + } catch (_e) { + console.warn("Couldn't load favicon"); + } + + const htmlContent = getReplayHtml(dlResponse.filename!, replayOpts); + + let totalSize = 0; + + if (coll.config.metadata?.size) { + totalSize = + coll.config.metadata.size + + swContent.length + + uiContent.length + + (favicon ? favicon.length : 0) + + htmlContent.length; + } + + progress(0, totalSize); + + let url = ""; + let cid = ""; + + let reject: ((reason?: string) => void) | null = null; + + const p2 = new Promise((res, rej) => (reject = rej)); + + const p = readable + .pipeThrough(new ShardingStream(shardSize)) + .pipeThrough(new ShardStoringStream(autoipfs, concur, reject!)) + .pipeTo( + new WritableStream({ + write: (res: { url: string; cid: string; size: number }) => { + if (res.url && res.cid) { + url = res.url; + cid = res.cid; + } + if (res.size) { + progress(res.size, totalSize); + } + }, + }), + ); + + ipfsGenerateCar( + writable, + dlResponse.filename || "", + dlResponse.body!, + swContent, + uiContent, + htmlContent, + replayOpts, + downloaderOpts.markers!, + favicon, + ).catch((e: unknown) => console.log("generate car failed", e)); + + await Promise.race([p, p2]); + + const res = { cid: cid.toString(), url }; + + metadata.ipfsPins.push(res); + + console.log("ipfs cid added " + url); + + return res; +} + +export async function ipfsRemove(coll: Collection) { + if (!autoipfs) { + autoipfs = await createAutoIPFS(autoipfsOpts); + } + + const metadata: MetadataWithIPFS = coll.config.metadata || {}; + + if (metadata.ipfsPins) { + for (const { url } of metadata.ipfsPins) { + try { + await autoipfs.clear(url); + } catch (_e) { + console.log("Failed to unpin"); + autoipfsOpts.daemonURL = ""; + return false; + } + } + + metadata.ipfsPins = null; + return true; + } + + return false; +} + +async function fetchBuffer(filename: string, replayBaseUrl: string) { + const resp = await fetch(new URL(filename, replayBaseUrl).href); + + return new Uint8Array(await resp.arrayBuffer()); +} + +async function ipfsWriteBuff( + writer: UnixFS.View<Uint8Array>, + name: string, + content: Uint8Array | AsyncIterable<Uint8Array>, + dir: UnixFS.DirectoryWriterView<Uint8Array>, +) { + const file = UnixFS.createFileWriter(writer); + if (content instanceof Uint8Array) { + await file.write(content); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + } else if (content[Symbol.asyncIterator]) { + for await (const chunk of content) { + await file.write(chunk); + } + } + const link = await file.close(); + dir.set(name, link); +} + +// =========================================================================== +export async function ipfsGenerateCar( + writable: WritableStream<UnixFS.Block>, + waczPath: string, + waczContent: ReadableStream<Uint8Array>, + swContent: Uint8Array, + uiContent: Uint8Array, + htmlContent: string, + replayOpts: ReplayOpts, + markers: Markers | null, + favicon: Uint8Array | null, + // eslint-disable-next-line @typescript-eslint/no-explicit-any +): Promise<any> { + const writer = UnixFS.createWriter<Uint8Array>({ writable }); + + const rootDir = UnixFS.createDirectoryWriter<Uint8Array>(writer); + + const encoder = new TextEncoder(); + + await ipfsWriteBuff(writer, "ui.js", uiContent, rootDir); + + if (replayOpts.showEmbed) { + const replayDir = UnixFS.createDirectoryWriter(writer); + await ipfsWriteBuff(writer, "sw.js", swContent, replayDir); + rootDir.set("replay", await replayDir.close()); + } else { + await ipfsWriteBuff(writer, "sw.js", swContent, rootDir); + } + + if (favicon) { + await ipfsWriteBuff(writer, "favicon.ico", favicon, rootDir); + } + + await ipfsWriteBuff( + writer, + "index.html", + encoder.encode(htmlContent), + rootDir, + ); + + if (!markers) { + await ipfsWriteBuff(writer, waczPath, iterate(waczContent), rootDir); + } else { + await splitByWarcRecordGroup( + writer, + waczPath, + iterate(waczContent), + rootDir, + markers, + ); + } + + const { cid } = await rootDir.close(); + + await writer.close(); + + return cid; +} + +async function splitByWarcRecordGroup( + writer: UnixFS.View<Uint8Array>, + waczPath: string, + warcIter: AsyncGenerator<Uint8Array>, + rootDir: UnixFS.DirectoryWriterView<Uint8Array>, + markers: Markers, +) { + let links: FileLink[] = []; + const fileLinks: FileLink[] = []; + let secondaryLinks: FileLink[] = []; + + let inZipFile = false; + let lastChunk = null; + let currName = ""; + + const decoder = new TextDecoder(); + + const dirs: Record<string, UnixFS.DirectoryWriterView<Uint8Array>> = {}; + + const { ZIP, WARC_PAYLOAD, WARC_GROUP } = markers; + + let file = UnixFS.createFileWriter(writer); + + function getDirAndName(fullpath: string): [string, string] { + const parts = fullpath.split("/"); + const filename = parts.pop() || ""; + return [parts.join("/"), filename]; + } + + const waczDir = UnixFS.createDirectoryWriter(writer); + + let count = 0; + + for await (const chunk of warcIter) { + if (chunk === ZIP && !inZipFile) { + if (lastChunk) { + currName = decoder.decode(lastChunk); + } + inZipFile = true; + + if (count) { + fileLinks.push(await file.close()); + count = 0; + file = UnixFS.createFileWriter(writer); + } + } else if (chunk === ZIP && inZipFile) { + if (count) { + links.push(await file.close()); + count = 0; + file = UnixFS.createFileWriter(writer); + } + + let link; + + if (secondaryLinks.length) { + if (links.length) { + throw new Error("invalid state, secondaryLinks + links?"); + } + link = await concat(writer, secondaryLinks); + secondaryLinks = []; + } else { + link = await concat(writer, links); + links = []; + } + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + fileLinks.push(link); + + const [dirName, filename] = getDirAndName(currName); + currName = ""; + + let dir; + + if (!dirName) { + dir = waczDir; + } else { + if (!dirs[dirName]) { + dirs[dirName] = UnixFS.createDirectoryWriter(writer); + } + dir = dirs[dirName]; + } + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + dir.set(filename, link); + + inZipFile = false; + } else if (chunk === WARC_PAYLOAD || chunk === WARC_GROUP) { + if (!inZipFile) { + throw new Error("invalid state"); + } + + if (count) { + links.push(await file.close()); + count = 0; + file = UnixFS.createFileWriter(writer); + + if (chunk === WARC_GROUP) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + secondaryLinks.push(await concat(writer, links)); + links = []; + } + } + } else if (chunk.length > 0) { + if (!inZipFile) { + lastChunk = chunk; + } + await file.write(chunk); + count++; + } + } + + fileLinks.push(await file.close()); + + for (const [name, dir] of Object.entries(dirs)) { + waczDir.set(name, await dir.close()); + } + + // for await (const chunk of iterate(waczContent)) { + // if (chunk === splitMarker) { + // links.push(await file.close()); + // file = UnixFS.createFileWriter(writer); + // } else { + // file.write(chunk); + // } + // } + + // const rootDir = UnixFS.createDirectoryWriter(writer); + + // await ipfsWriteBuff(writer, "ui.js", uiContent, rootDir); + // await ipfsWriteBuff(writer, "sw.js", swContent, rootDir); + // await ipfsWriteBuff(writer, "index.html", encoder.encode(htmlContent), rootDir); + + rootDir.set("webarchive", await waczDir.close()); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + rootDir.set(waczPath, await concat(writer, fileLinks)); +} + +async function concat( + writer: UnixFS.View<Uint8Array>, + links: Link[], + // eslint-disable-next-line @typescript-eslint/no-explicit-any +): Promise<any> { + //TODO: is this the right way to do this? + const { fileEncoder, hasher, linker } = writer.settings; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const advanced = (fileEncoder as any).createAdvancedFile(links); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + const bytes = fileEncoder.encode(advanced); + const hash = await hasher.digest(bytes); + const cid = linker.createLink(fileEncoder.code, hash); + const block = { bytes, cid }; + writer.writer.write(block); + + const link = { + cid, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + contentByteLength: (fileEncoder as any).cumulativeContentByteLength(links), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + dagByteLength: (fileEncoder as any).cumulativeDagByteLength(bytes, links), + }; + + return link; +} + +export const iterate = async function* (stream: ReadableStream<Uint8Array>) { + const reader = stream.getReader(); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + while (true) { + const next = await reader.read(); + if (next.done) { + return; + } else { + yield next.value; + } + } +}; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export async function encodeBlocks(blocks: UnixFS.Block[], root?: any) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + const { writer, out } = CarWriter.create(root); + /** @type {Error?} */ + let error; + void (async () => { + try { + for await (const block of blocks) { + // @ts-expect-error + await writer.put(block); + } + } catch (err: unknown) { + error = err; + } finally { + await writer.close(); + } + })(); + const chunks = []; + for await (const chunk of out) chunks.push(chunk); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + if (error != null) throw error; + const roots = root != null ? [root] : []; + console.log("chunks", chunks.length); + return Object.assign(new Blob(chunks), { version: 1, roots }); +} + +function getReplayHtml(waczPath: string, replayOpts: ReplayOpts = {}) { + const { showEmbed, pageUrl, pageTitle, deepLink, loading } = replayOpts; + + return ` +<!doctype html> + <html class="no-overflow"> + <head> + <title>${pageTitle || "ReplayWeb.page"}</title> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <script src="./ui.js"></script> + <style> + html, body, replay-web-page, replay-app-main { + width: 100%; + height: 100%; + overflow: hidden; + margin: 0px; + padding: 0px; + } + </style> + </head> + <body>${ + showEmbed + ? ` + <replay-web-page ${deepLink ? 'deepLink="true" ' : ""} ${pageUrl ? `url="${pageUrl}"` : ""} loading="${loading || ""}" embed="replay-with-info" src="${waczPath}"></replay-web-page>` + : ` + <replay-app-main skipRuffle source="${waczPath}"></replay-app-main>` + } + </body> +</html>`; +} + +// Copied from https://github.com/web3-storage/w3protocol/blob/main/packages/upload-client/src/sharding.js + +/** + * Shard a set of blocks into a set of CAR files. The last block is assumed to + * be the DAG root and becomes the CAR root CID for the last CAR output. + * + * @extends {TransformStream<import('@ipld/unixfs').Block, import('./types').CARFile>} + */ +export class ShardingStream extends TransformStream { + /** + * @param {import('./types').ShardingOptions} [options] + */ + constructor(shardSize: number) { + /** @type {import('@ipld/unixfs').Block[]} */ + let shard: UnixFS.Block[] = []; + /** @type {import('@ipld/unixfs').Block[] | null} */ + let readyShard: UnixFS.Block[] | null = null; + let readySize = 0; + + let currSize = 0; + + super({ + async transform(block, controller) { + if (readyShard != null) { + const blocks = await encodeBlocks(readyShard); + const size = readySize; + controller.enqueue({ blocks, size }); + readyShard = null; + } + if (shard.length && currSize + block.bytes.length > shardSize) { + readyShard = shard; + readySize = currSize; + shard = []; + currSize = 0; + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + shard.push(block); + currSize += block.bytes.length; + }, + + async flush(controller) { + if (readyShard != null) { + const blocks = await encodeBlocks(readyShard); + const size = readySize; + controller.enqueue({ blocks, size }); + } + + const rootBlock = shard.at(-1); + if (rootBlock != null) { + const blocks = await encodeBlocks(shard, rootBlock.cid); + const size = currSize; + controller.enqueue({ blocks, size }); + } + }, + }); + } +} + +/** + * Upload multiple DAG shards (encoded as CAR files) to the service. + * + * Note: an "upload" must be registered in order to link multiple shards + * together as a complete upload. + * + * The writeable side of this transform stream accepts CAR files and the + * readable side yields `CARMetadata`. + * + * @extends {TransformStream<import('./types').CARFile, import('./types').CARMetadata>} + */ +export class ShardStoringStream extends TransformStream { + constructor( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + autoipfs: any, + concurrency: number, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + reject: (reason?: any) => void, + ) { + const queue = new Queue({ concurrency }); + const abortController = new AbortController(); + super({ + async transform({ blocks, size }, controller) { + void queue.add( + async () => { + try { + const cid = blocks.roots[0]; + + const resUrls = await autoipfs.uploadCAR(blocks); + const url = resUrls[0]; + + controller.enqueue({ cid, url, size }); + + //const { version, roots, size } = car + //controller.enqueue({ version, roots, cid, size }) + } catch (err) { + controller.error(err); + abortController.abort(err); + autoipfsOpts.daemonURL = ""; + reject(err); + } + }, + { signal: abortController.signal }, + ); + + // retain backpressure by not returning until no items queued to be run + await queue.onSizeLessThan(1); + }, + async flush() { + // wait for queue empty AND pending items complete + await queue.onIdle(); + }, + }); + } +} diff --git a/src/sw/keystore.ts b/src/sw/keystore.ts new file mode 100644 index 00000000..a02206ff --- /dev/null +++ b/src/sw/keystore.ts @@ -0,0 +1,225 @@ +import { openDB } from "idb/with-async-ittr"; +import { + fromByteArray as encodeBase64, + toByteArray as decodeBase64, +} from "base64-js"; +import { type IDBPDatabase } from "idb"; + +type KeyPair = { + public: string; + private: string; +}; + +type IdSig = { + id: string; + sig?: string; + keys?: KeyPair; +}; + +export type DataSignature = { + hash: string; + signature: string; + publicKey: string; + created: string; + software: string; +}; + +// ==================================================================== +export class KeyStore { + dbname: string; + mainStore: string; + key: string; + version: number; + _ready: Promise<void>; + db: IDBPDatabase | null = null; + + constructor({ + dbname = "_keystore", + mainStore = "store", + key = "id", + version = 1, + } = {}) { + this.dbname = dbname; + this.mainStore = mainStore; + this.key = key; + this.version = version; + this._ready = this.init(); + } + + async init() { + //let oldVersion = 0; + + this.db = await openDB(this.dbname, this.version, { + upgrade: (db, oldV, _newV, _tx) => { + //oldVersion = oldV; + this._initDB(db, oldV); + }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + blocking: (e: any) => { + if (!e || e.newVersion === null) { + this.close(); + } + }, + }); + } + + _initDB(db: IDBPDatabase, oldV: number /*, newV, tx*/) { + if (!oldV) { + db.createObjectStore(this.mainStore, { keyPath: this.key }); + } + } + + async listAll() { + await this._ready; + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return await this.db!.getAll(this.mainStore); + } + + async get(name: string) { + await this._ready; + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return await this.db!.get(this.mainStore, name); + } + + async delete(name: string) { + await this._ready; + return this.db!.delete(this.mainStore, name); + } + + async put(value: IdSig) { + await this._ready; + return await this.db!.put(this.mainStore, value); + } + + close() { + if (this.db) { + this.db.close(); + this.db = null; + } + } +} + +// ==================================================================== +export class Signer { + softwareString: string; + _store: KeyStore | null; + cacheSig: boolean; + + constructor(softwareString: string, opts: { cacheSig?: boolean } = {}) { + this._store = new KeyStore(); + this.softwareString = softwareString || "ArchiveWeb.page"; + this.cacheSig = opts.cacheSig || false; + } + + close() { + if (this._store) { + this._store.close(); + this._store = null; + } + } + + async sign(string: string, created: string): Promise<DataSignature> { + let keyPair: CryptoKeyPair; + let keys = await this.loadKeys(); + + const ecdsaImportParams = { + name: "ECDSA", + namedCurve: "P-384", + }; + + const extractable = true; + const usage = ["sign", "verify"] as KeyUsage[]; + + const ecdsaSignParams = { + name: "ECDSA", + hash: "SHA-256", + }; + + if (!keys) { + keyPair = await crypto.subtle.generateKey( + ecdsaImportParams, + extractable, + usage, + ); + + const privateKey = await crypto.subtle.exportKey( + "pkcs8", + keyPair.privateKey, + ); + const publicKey = await crypto.subtle.exportKey( + "spki", + keyPair.publicKey, + ); + keys = { + private: encodeBase64(new Uint8Array(privateKey)), + public: encodeBase64(new Uint8Array(publicKey)), + }; + + await this.saveKeys(keys); + } else { + const privateDecoded = decodeBase64(keys.private); + const publicDecoded = decodeBase64(keys.public); + + const privateKey = await crypto.subtle.importKey( + "pkcs8", + privateDecoded, + ecdsaImportParams, + true, + ["sign"], + ); + const publicKey = await crypto.subtle.importKey( + "spki", + publicDecoded, + ecdsaImportParams, + true, + ["verify"], + ); + keyPair = { privateKey, publicKey }; + } + + let signature: string | null = this.cacheSig + ? await this.loadSig(string) + : null; + + if (!signature) { + const data = new TextEncoder().encode(string); + const signatureBuff = await crypto.subtle.sign( + ecdsaSignParams, + keyPair.privateKey, + data, + ); + signature = encodeBase64(new Uint8Array(signatureBuff)); + await this.saveSig(string, signature); + } + + //console.log("verify", await crypto.subtle.verify(ecdsaSignParams, keyPair.publicKey, signature, data)); + + return { + hash: string, + signature, + publicKey: keys.public, + created, + software: this.softwareString, + }; + } + + async saveSig(id: string, sig: string) { + return await this._store!.put({ id, sig }); + } + + async loadSig(id: string): Promise<string> { + const res = await this._store!.get(id); + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return res?.sig; + } + + async saveKeys(keys: KeyPair, id = "_userkey") { + return await this._store!.put({ id, keys }); + } + + async loadKeys(id = "_userkey"): Promise<KeyPair | null> { + const res = await this._store!.get(id); + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return res?.keys; + } +} diff --git a/src/sw/main.ts b/src/sw/main.ts index fcdf1fea..5c26ff4a 100644 --- a/src/sw/main.ts +++ b/src/sw/main.ts @@ -1,6 +1,7 @@ import { SWReplay, WorkerLoader } from "@webrecorder/wabac/swlib"; -import { ExtAPI, RecordingCollections } from "@webrecorder/awp-sw"; +import { ExtAPI } from "./api"; +import { RecordingCollections } from "./recproxy"; import REC_INDEX_HTML from "@/static/index.html"; import RWP_INDEX_HTML from "replaywebpage/index.html"; diff --git a/src/sw/recproxy.ts b/src/sw/recproxy.ts new file mode 100644 index 00000000..3e86acbf --- /dev/null +++ b/src/sw/recproxy.ts @@ -0,0 +1,319 @@ +import { + type ADBType, + ArchiveDB, + type ArchiveRequest, + type ArchiveResponse, + type CollectionLoader, + type PageEntry, + LiveProxy, + SWCollections, + randomId, +} from "@webrecorder/wabac/swlib"; + +//declare let self: ServiceWorkerGlobalScope; + +import { type IDBPDatabase, type IDBPTransaction } from "idb"; +import { postToGetUrl } from "warcio"; + +//export interface RecDBType extends ADBType { +export type RecDBType = ADBType & { + rec: { + key: string; + }; +}; + +export type ExtPageEntry = PageEntry & { + id: string; + title: string; + size: number; + ts: number; + + favIconUrl?: string; + text?: string; +}; + +// =========================================================================== +export class RecProxy extends ArchiveDB { + collLoader: CollectionLoader; + recordProxied: boolean; + liveProxy: LiveProxy; + pageId: string; + isNew = true; + firstPageOnly: boolean; + counter = 0; + isRecording = true; + allPages = new Map<string, string>(); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + constructor(config: any, collLoader: CollectionLoader) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + super(config.dbname); + + this.name = config.dbname.slice(3); + + this.collLoader = collLoader; + + this.recordProxied = config.extraConfig.recordProxied || false; + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + this.liveProxy = new LiveProxy(config.extraConfig, { + cloneResponse: true, + allowBody: true, + }); + + this.pageId = randomId(); + this.isNew = true; + this.firstPageOnly = config.extraConfig.firstPageOnly || false; + + this.counter = 0; + } + + override _initDB( + db: IDBPDatabase<ADBType>, + oldV: number, + newV: number | null, + tx: IDBPTransaction< + ADBType, + (keyof ADBType)[], + "readwrite" | "versionchange" + >, + ) { + super._initDB(db, oldV, newV, tx); + //TODO: fix + (db as unknown as IDBPDatabase<RecDBType>).createObjectStore("rec"); + } + + async decCounter() { + this.counter--; + //console.log("rec counter", this.counter); + //TODO: fix + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await (this.db! as any).put("rec", this.counter, "numPending"); + } + + async getCounter(): Promise<number | undefined> { + //TODO: fix + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-return + return await (this.db! as any).get("rec", "numPending"); + } + + override async getResource(request: ArchiveRequest, prefix: string, event: FetchEvent) { + if (!this.isRecording) { + return await super.getResource(request, prefix, event); + } + + let req; + + if (request.method === "POST" || request.method === "PUT") { + req = request.request.clone(); + } else { + req = request.request; + } + + let response: ArchiveResponse | null = null; + + try { + this.counter++; + response = await this.liveProxy.getResource(request, prefix); + } catch (_e) { + await this.decCounter(); + return null; + } + + // error response, don't record + if (response?.noRW && response.status >= 400) { + await this.decCounter(); + return response; + } + + // don't record content proxied from specified hosts + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + if (!this.recordProxied && this.liveProxy.hostProxy) { + const parsedUrl = new URL(response!.url); + if (this.liveProxy.hostProxy[parsedUrl.host]) { + await this.decCounter(); + return response; + } + } + + this.doRecord(response!, req, request.mod) + .catch(() => {}) + .finally(async () => this.decCounter()); + + return response; + } + + async doRecord(response: ArchiveResponse, request: Request, mod: string) { + let url = response.url; + const ts = response.date.getTime(); + + const mime = (response.headers.get("content-type") || "").split(";")[0]; + + const range = response.headers.get("content-range"); + + if (range && !range.startsWith("bytes 0-")) { + console.log("skip range request: " + range); + return; + } + + const status = response.status; + const statusText = response.statusText; + + const respHeaders = Object.fromEntries(response.headers.entries()); + const reqHeaders = Object.fromEntries(request.headers.entries()); + + const payload = new Uint8Array( + await response.clonedResponse!.arrayBuffer(), + ); + + if (range) { + const expectedRange = `bytes 0-${payload.length - 1}/${payload.length}`; + if (range !== expectedRange) { + console.log("skip range request: " + range); + return; + } + } + + if (request.mode === "navigate" && mod === "mp_") { + this.pageId = randomId(); + if (!this.firstPageOnly) { + this.isNew = true; + } + } + + const pageId = this.pageId; + const referrer = request.referrer; + + if (request.method === "POST" || request.method === "PUT") { + const data = { + method: request.method, + postData: await request.text(), + headers: request.headers, + url, + }; + + if (postToGetUrl(data)) { + url = new URL(data.url).href; + } + } + + const data = { + url, + ts, + status, + statusText, + pageId, + payload, + mime, + respHeaders, + reqHeaders, + referrer, + }; + + await this.addResource(data); + + await this.collLoader.updateSize(this.name, payload.length, payload.length); + + // don't add page for redirects + if (this.isPage(url, request, status, referrer, mod)) { + await this.addPages([{ id: pageId, url, ts }]); + this.allPages.set(url, pageId); + this.isNew = false; + } else { + console.log("not page", url); + } + } + + isPage(url: string, request: Request, status: number, referrer: string, mod: string) { + if (!this.isNew) { + return false; + } + + if ((status >= 301 && status < 400) || status === 204) { + return false; + } + + if (request.mode !== "navigate" || mod !== "mp_") { + return false; + } + + if (!referrer) { + return true; + } + + const inx = referrer.indexOf("mp_/"); + if (inx > 0) { + const refUrl = referrer.slice(inx + 4); + return url === refUrl || this.allPages.has(refUrl); + } else if (referrer.indexOf("if_/") > 0) { + return false; + } else if (referrer.indexOf("?source=")) { + return true; + } else { + return false; + } + } + + async updateFavIcon(url: string, favIconUrl: string) { + const pageId = this.allPages.get(url); + if (!pageId) { + return; + } + const page = await this.db!.get("pages", pageId) as ExtPageEntry | undefined; + if (!page) { + return; + } + page.favIconUrl = favIconUrl; + try { + await this.db!.put("pages", page); + } catch (_e: unknown) { + // ignore + } + } +} + +// =========================================================================== +export class RecordingCollections extends SWCollections { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + override async _initStore(type: string, config: any): Promise<any> { + let store; + + switch (type) { + case "recordingproxy": + store = new RecProxy(config, this); + await store.initing; + return store; + } + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + return await super._initStore(type, config); + } + + override async _handleMessage(event: MessageEvent) { + let coll; + + switch (event.data.msg_type) { + case "toggle-record": + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + coll = await this.getColl(event.data.id); + if (coll && coll.store instanceof RecProxy) { + console.log("Recording Toggled!", event.data.isRecording); + coll.store.isRecording = event.data.isRecording; + } + break; + + case "update-favicon": + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + coll = await this.getColl(event.data.id); + if (coll && coll.store instanceof RecProxy) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + await coll.store.updateFavIcon(event.data.url, event.data.favIconUrl); + } + break; + + + default: + return await super._handleMessage(event); + } + } +} diff --git a/yarn.lock b/yarn.lock index 71020a5e..7d7d701d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1252,20 +1252,10 @@ resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz#4a2868d75d6d6963e423bcf90b7fd1be343409d3" integrity sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA== -"@ipld/car@^5.3.1": - version "5.3.1" - resolved "https://registry.yarnpkg.com/@ipld/car/-/car-5.3.1.tgz#6a967b2f929cab007466edab3171c18f489036d4" - integrity sha512-8fNkYAZvL9yX2zesF32k7tYqUDGG41felmmBnwjCZJto06QXCb0NOMPJc/mhNgnVa5gkKqxPO1ZdSoHuaYcVSw== - dependencies: - "@ipld/dag-cbor" "^9.0.7" - cborg "^4.0.5" - multiformats "^13.0.0" - varint "^6.0.0" - "@ipld/car@^5.3.2": - version "5.3.2" - resolved "https://registry.yarnpkg.com/@ipld/car/-/car-5.3.2.tgz#b6f9b5e30e0de5d45aff4494e8c3e2667ce9e0a4" - integrity sha512-Bb4XrCFlnsCb9tTzZ1I8zo9O61D9qm7HfvuYrQ9gzdE8YhjyVIjrjmHmnoSWV/uCmyc2/bcqiDPIg+9WljXNzg== + version "5.4.0" + resolved "https://registry.yarnpkg.com/@ipld/car/-/car-5.4.0.tgz#c81087fdfe02039e72b0da558b7505f56e4997e4" + integrity sha512-FiGxOhTUh3fn/kkA+YvNYQjA/T8T5DcKG0NZwAi3aXrizN1qm99HzdYTccEwcX/rUCtI8wTUCKDNPBLUb7pBIQ== dependencies: "@ipld/dag-cbor" "^9.0.7" cborg "^4.0.5" @@ -1281,9 +1271,9 @@ multiformats "^13.1.0" "@ipld/dag-pb@^4.0.0": - version "4.1.1" - resolved "https://registry.yarnpkg.com/@ipld/dag-pb/-/dag-pb-4.1.1.tgz#fb5c253ad0f2ced00832e19b7c58985861a7fa34" - integrity sha512-wsSNjIvcABXuH9MKXpvRGMXsS20+Kf2Q0Hq2+2dxN6Wpw/K0kDF3nDmCnO6wlpninQ0vzx1zq54O3ttn5pTH9A== + version "4.1.3" + resolved "https://registry.yarnpkg.com/@ipld/dag-pb/-/dag-pb-4.1.3.tgz#b572d7978fa548a3a9219f566a80884189261858" + integrity sha512-ueULCaaSCcD+dQga6nKiRr+RSeVgdiYiEPKVUu5iQMNYDN+9osd0KpR3UDd9uQQ+6RWuv9L34SchfEwj7YIbOA== dependencies: multiformats "^13.1.0" @@ -1891,13 +1881,20 @@ dependencies: "@types/node" "*" -"@types/node@*", "@types/node@>=13.7.0", "@types/node@^20.9.0": +"@types/node@*", "@types/node@^20.9.0": version "20.14.0" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.0.tgz#49ceec7b34f8621470cff44677fa9d461a477f17" integrity sha512-5cHBxFGJx6L4s56Bubp4fglrEpmyJypsqI6RgzMfBHWUJQGWAAi8cWcgetEbZXHYXo9C2Fa4EEds/uSyS4cxmA== dependencies: undici-types "~5.26.4" +"@types/node@>=13.7.0": + version "22.15.3" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.15.3.tgz#b7fb9396a8ec5b5dfb1345d8ac2502060e9af68b" + integrity sha512-lX7HFZeHf4QG/J7tBZqrCAXwz9J5RD56Y6MpP0eJkka8p+K0RY/yBTW7CYFJ4VGCclxqOLKmiGP5juQc6MKgcw== + dependencies: + undici-types "~6.21.0" + "@types/pako@^1.0.7": version "1.0.7" resolved "https://registry.yarnpkg.com/@types/pako/-/pako-1.0.7.tgz#aa0e4af9855d81153a29ff84cc44cce25298eda9" @@ -1998,6 +1995,11 @@ dependencies: source-map "^0.6.1" +"@types/uuid@^10.0.0": + version "10.0.0" + resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-10.0.0.tgz#e9c07fe50da0f53dc24970cca94d619ff03f6f6d" + integrity sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ== + "@types/verror@^1.10.3": version "1.10.10" resolved "https://registry.yarnpkg.com/@types/verror/-/verror-1.10.10.tgz#d5a4b56abac169bfbc8b23d291363a682e6fa087" @@ -2308,23 +2310,7 @@ resolved "https://registry.yarnpkg.com/@webpack-cli/serve/-/serve-2.0.5.tgz#325db42395cd49fe6c14057f9a900e427df8810e" integrity sha512-lqaoKnRYBdo1UgDX8uF24AfGMifWK19TxPmM5FHc2vAGxrJ/qtyUyFBWoY1tISZdelsQ5fBcOusifo5o5wSJxQ== -"@webrecorder/awp-sw@^0.5.3": - version "0.5.3" - resolved "https://registry.yarnpkg.com/@webrecorder/awp-sw/-/awp-sw-0.5.3.tgz#881699895e09517ac44fda2f37edf40118a7fa66" - integrity sha512-g8RuI6bF4ixmrurJI+V9hFBZHy1OtIFf6sMqSmJ+JfCQzCg+ZBNTqm8qdFNmiG7cy8Tf2CxXz9LduVKKldin5g== - dependencies: - "@ipld/car" "^5.3.2" - "@ipld/unixfs" "^3.0.0" - "@webrecorder/wabac" "^2.20.6" - auto-js-ipfs "^2.3.0" - client-zip "^2.3.0" - hash-wasm "^4.9.0" - idb "^7.1.1" - p-queue "^8.0.1" - uuid "^9.0.0" - warcio "^2.3.1" - -"@webrecorder/wabac@^2.20.6", "@webrecorder/wabac@^2.22.16": +"@webrecorder/wabac@^2.22.16": version "2.22.16" resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.22.16.tgz#8b9684569b373b8e930852bce4512e2bd2810d65" integrity sha512-n39kwNOD/bKpAFwQ8AXImFqOUhfqUYoz41E0baGfoXydnJc2LKiS7SMqg3wDHazZH3y2DVlUpPknrD7UM75g0A== @@ -3153,15 +3139,10 @@ cli-truncate@^2.1.0: slice-ansi "^3.0.0" string-width "^4.2.0" -client-zip@^2.2.2: - version "2.2.2" - resolved "https://registry.yarnpkg.com/client-zip/-/client-zip-2.2.2.tgz#58dcc66f2534954a50b506d2bd864b51ba98370d" - integrity sha512-Jh1Sz/iBTfYuLQv6yKzrrXofJy17Q7YRhBfh5HH/6i4Z+ACjTfmRJllWQKoe4eFWwXeUoOjEathqXfuGGX4jhg== - client-zip@^2.3.0: - version "2.3.0" - resolved "https://registry.yarnpkg.com/client-zip/-/client-zip-2.3.0.tgz#c7ae74bc277e466e27134ebcdce4aaade4c3ecd5" - integrity sha512-5hJpXzaNx7SEy35QqtdgNh9T3/w2Rjup7n31If5DO+kSgXsTPANmdwO5gRBgAbBn16GVDaGRc7vS2kUMazyXOw== + version "2.5.0" + resolved "https://registry.yarnpkg.com/client-zip/-/client-zip-2.5.0.tgz#72674390955390e327833f15fe5ad0f9b65b76fb" + integrity sha512-ydG4nDZesbFurnNq0VVCp/yyomIBh+X/1fZPI/P24zbnG4dtC4tQAfI5uQsomigsUMeiRO2wiTPizLWQh+IAyQ== cliui@^6.0.0: version "6.0.0" @@ -5673,9 +5654,9 @@ log-update@^3.3.0: wrap-ansi "^5.0.0" long@^5.0.0: - version "5.2.3" - resolved "https://registry.yarnpkg.com/long/-/long-5.2.3.tgz#a3ba97f3877cf1d778eccbcb048525ebb77499e1" - integrity sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q== + version "5.3.2" + resolved "https://registry.yarnpkg.com/long/-/long-5.3.2.tgz#1d84463095999262d7d7b7f8bfd4a8cc55167f83" + integrity sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA== lowercase-keys@^2.0.0: version "2.0.0" @@ -6195,11 +6176,16 @@ multicast-dns@^7.2.5: dns-packet "^5.2.2" thunky "^1.0.2" -multiformats@^13.0.0, multiformats@^13.0.1, multiformats@^13.1.0: +multiformats@^13.0.0, multiformats@^13.1.0: version "13.1.1" resolved "https://registry.yarnpkg.com/multiformats/-/multiformats-13.1.1.tgz#b22ce4df26330d2cf0d69f5bdcbc9a787095a6e5" integrity sha512-JiptvwMmlxlzIlLLwhCi/srf/nk409UL0eUBr0kioRJq15hqqKyg68iftrBvhCRjR6Rw4fkNnSc4ZJXJDuta/Q== +multiformats@^13.0.1: + version "13.3.2" + resolved "https://registry.yarnpkg.com/multiformats/-/multiformats-13.3.2.tgz#a77c2a09c490e90c73b3049551604e6e1a4854e0" + integrity sha512-qbB0CQDt3QKfiAzZ5ZYjLFOs+zW43vA4uyM8g27PeEuXZybUOFyjrVdP93HPBHMoglibwfkdVwbzfUq8qGcH6g== + murmurhash3js-revisited@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/murmurhash3js-revisited/-/murmurhash3js-revisited-3.0.0.tgz#6bd36e25de8f73394222adc6e41fa3fac08a5869" @@ -6485,9 +6471,9 @@ p-locate@^5.0.0: p-limit "^3.0.2" p-queue@^8.0.1: - version "8.0.1" - resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-8.0.1.tgz#718b7f83836922ef213ddec263ff4223ce70bef8" - integrity sha512-NXzu9aQJTAzbBqOt2hwsR63ea7yvxJc0PwN/zobNAudYfb1B7R08SzB4TsLeSbUCuG467NhnoT0oO6w1qRO+BA== + version "8.1.0" + resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-8.1.0.tgz#d71929249868b10b16f885d8a82beeaf35d32279" + integrity sha512-mxLDbbGIBEXTJL0zEx8JIylaj3xQ7Z/7eEVjcF9fJX4DBiH9oqe+oahYnlKKxm0Ci9TlWTyhSHgygxMxjIB2jw== dependencies: eventemitter3 "^5.0.1" p-timeout "^6.1.2" @@ -6502,9 +6488,9 @@ p-retry@^6.2.0: retry "^0.13.1" p-timeout@^6.1.2: - version "6.1.2" - resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-6.1.2.tgz#22b8d8a78abf5e103030211c5fc6dee1166a6aa5" - integrity sha512-UbD77BuZ9Bc9aABo74gfXhNvzC9Tx7SxtHSh1fxvx3jTLLYvmVhiQZZrJzqqU0jKbN32kb5VOKiLEQI/3bIjgQ== + version "6.1.4" + resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-6.1.4.tgz#418e1f4dd833fa96a2e3f532547dd2abdb08dbc2" + integrity sha512-MyIV3ZA/PmyBN/ud8vV9XzwTrNtR4jFrObymZYnZqMmW0zA8Z17vnT0rBgFE/TlohB+YCHqXMgZzb3Csp49vqg== p-try@^2.0.0: version "2.2.0" @@ -6821,9 +6807,9 @@ promise-retry@^2.0.1: retry "^0.12.0" protobufjs@^7.1.2: - version "7.3.0" - resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.3.0.tgz#a32ec0422c039798c41a0700306a6e305b9cb32c" - integrity sha512-YWD03n3shzV9ImZRX3ccbjqLxj7NokGN0V/ESiBV5xWqrommYHYiihuIyavq03pWSGqlyvYUFmfoMKd+1rPA/g== + version "7.5.0" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.5.0.tgz#a317ad80713e9db43c8e55afa8636a9aa76bb630" + integrity sha512-Z2E/kOY1QjoMlCytmexzYfDm/w5fKAiRwpSzGtdnXW1zC88Z2yXazHHrOtwCzn+7wSxyE8PYM4rvVcMphF9sOA== dependencies: "@protobufjs/aspromise" "^1.1.2" "@protobufjs/base64" "^1.1.2" @@ -8290,6 +8276,11 @@ undici-types@~5.26.4: resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA== +undici-types@~6.21.0: + version "6.21.0" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.21.0.tgz#691d00af3909be93a7faa13be61b3a5b50ef12cb" + integrity sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ== + unicode-canonical-property-names-ecmascript@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.0.tgz#301acdc525631670d39f6146e0e77ff6bbdebddc" @@ -8442,9 +8433,9 @@ uuid@^8.3.2: integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== uuid@^9.0.0: - version "9.0.0" - resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.0.tgz#592f550650024a38ceb0c562f2f6aa435761efb5" - integrity sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg== + version "9.0.1" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.1.tgz#e188d4c8853cc722220392c424cd637f32293f30" + integrity sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA== v8-compile-cache@^2.0.3: version "2.4.0" @@ -8470,20 +8461,6 @@ verror@^1.10.0: core-util-is "1.0.2" extsprintf "^1.2.0" -warcio@^2.3.1: - version "2.3.1" - resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.3.1.tgz#8ac9de897de1a556161168f2a3938b60929908ca" - integrity sha512-PjcWqzXfs6HdWfHi1V/i8MoMmV5M0Csg3rOa2mqCJ1dmCJXswVfQ0VXbEVumwavNIW2oFFj6LJoCHHeL4Ls/zw== - dependencies: - "@types/pako" "^1.0.7" - "@types/stream-buffers" "^3.0.7" - base32-encode "^2.0.0" - hash-wasm "^4.9.0" - pako "^1.0.11" - tempy "^3.1.0" - uuid-random "^1.3.2" - yargs "^17.6.2" - warcio@^2.4.0: version "2.4.0" resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd"