diff --git a/src/hooks/custom/utils/pdf.ts b/src/hooks/custom/utils/pdf.ts index c95f508..7c28e95 100644 --- a/src/hooks/custom/utils/pdf.ts +++ b/src/hooks/custom/utils/pdf.ts @@ -98,8 +98,7 @@ export async function splitPdf( } /** - * Checks if the given file is a PDF. First it checks the `.pdf` file extension, then - * it tries to load the file as a PDF using the `PDFDocument.load` method. + * Checks if the given file is a PDF by loading the file as a PDF using the `PDFDocument.load` method. * @param file - The file to check. * @returns A promise that resolves to three values, first is a boolean representing * whether there was an error during PDF load, second is a PDFDocument object or null @@ -109,8 +108,8 @@ export async function splitPdf( export async function loadPdf( file: File | null ): Promise<[boolean, PDFDocument | null, number]> { - if (!file?.name.endsWith(".pdf")) { - console.info("Given file is not a PDF, so splitting is not enabled."); + if (!file) { + console.info("Given file is null, so splitting is not enabled."); return [true, null, 0]; } @@ -120,10 +119,6 @@ export async function loadPdf( const pagesCount = pdf.getPages().length; return [false, pdf, pagesCount]; } catch (e) { - console.error(e); - console.warn( - "Attempted to interpret file as pdf, but error arose when splitting by pages. Reverting to non-split pdf handling path." - ); return [true, null, 0]; } } diff --git a/test/unit/utils/pdf.test.ts b/test/unit/utils/pdf.test.ts index 7e9fdf5..caae7ac 100644 --- a/test/unit/utils/pdf.test.ts +++ b/test/unit/utils/pdf.test.ts @@ -97,7 +97,7 @@ describe("Pdf utility functions", () => { }); describe("loadPdf", () => { - it("should return true, null, and 0 if the file is not a PDF", async () => { + it("should return true, null, and 0 if the file is null", async () => { const result = await loadPdf(null); expect(result).toEqual([true, null, 0]); @@ -115,6 +115,19 @@ describe("Pdf utility functions", () => { expect(file.content).not.toHaveBeenCalled(); }); + it("should return true, null, and 0 if the file is not a PDF without basing on file extension", async () => { + const file = { + name: "uuid1234", + content: jest.fn().mockResolvedValue(new ArrayBuffer(0)), + }; + + const result = await loadPdf(file as any); + + expect(result).toEqual([true, null, 0]); + expect(file.content).not.toHaveBeenCalled(); + }); + + it("should return true, null, and 0 if there is an error while loading the PDF", async () => { const file = { name: "document.pdf", @@ -143,5 +156,24 @@ describe("Pdf utility functions", () => { expect(loadMock).toHaveBeenCalledTimes(1); expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer()); }); + + it("should return false, PDFDocument object, and the number of pages if the PDF is loaded successfully without basing on file extension", async () => { + const file = readFileSync("test/data/layout-parser-paper-fast.pdf"); + const f = { + name: "uuid1234", + arrayBuffer: () => file.buffer, + }; + + jest.clearAllMocks(); // Reset Mocks Between Tests + const loadMock = jest.spyOn(PDFDocument, "load"); + + const [error, _, pages] = await loadPdf(f as any); + + expect(error).toBeFalsy(); + expect(pages).toEqual(2); + expect(loadMock).toHaveBeenCalledTimes(1); + expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer()); + }); + }); });