Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore Use pdf library to check file without extension #115

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions src/hooks/custom/utils/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,7 @@ export async function splitPdf(
}

/**
* Checks if the given file is a PDF. First it checks the `.pdf` file extension, then
* it tries to load the file as a PDF using the `PDFDocument.load` method.
* Checks if the given file is a PDF by loading the file as a PDF using the `PDFDocument.load` method.
* @param file - The file to check.
* @returns A promise that resolves to three values, first is a boolean representing
* whether there was an error during PDF load, second is a PDFDocument object or null
Expand All @@ -109,8 +108,8 @@ export async function splitPdf(
export async function loadPdf(
file: File | null
): Promise<[boolean, PDFDocument | null, number]> {
if (!file?.name.endsWith(".pdf")) {
console.info("Given file is not a PDF, so splitting is not enabled.");
if (!file) {
console.info("Given file is null, so splitting is not enabled.");
return [true, null, 0];
}

Expand All @@ -120,10 +119,6 @@ export async function loadPdf(
const pagesCount = pdf.getPages().length;
return [false, pdf, pagesCount];
} catch (e) {
console.error(e);
console.warn(
"Attempted to interpret file as pdf, but error arose when splitting by pages. Reverting to non-split pdf handling path."
);
return [true, null, 0];
}
}
34 changes: 33 additions & 1 deletion test/unit/utils/pdf.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ describe("Pdf utility functions", () => {
});

describe("loadPdf", () => {
it("should return true, null, and 0 if the file is not a PDF", async () => {
it("should return true, null, and 0 if the file is null", async () => {
const result = await loadPdf(null);

expect(result).toEqual([true, null, 0]);
Expand All @@ -115,6 +115,19 @@ describe("Pdf utility functions", () => {
expect(file.content).not.toHaveBeenCalled();
});

it("should return true, null, and 0 if the file is not a PDF without basing on file extension", async () => {
const file = {
name: "uuid1234",
content: jest.fn().mockResolvedValue(new ArrayBuffer(0)),
};

const result = await loadPdf(file as any);

expect(result).toEqual([true, null, 0]);
expect(file.content).not.toHaveBeenCalled();
});


it("should return true, null, and 0 if there is an error while loading the PDF", async () => {
const file = {
name: "document.pdf",
Expand Down Expand Up @@ -143,5 +156,24 @@ describe("Pdf utility functions", () => {
expect(loadMock).toHaveBeenCalledTimes(1);
expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer());
});

it("should return false, PDFDocument object, and the number of pages if the PDF is loaded successfully without basing on file extension", async () => {
const file = readFileSync("test/data/layout-parser-paper-fast.pdf");
const f = {
name: "uuid1234",
arrayBuffer: () => file.buffer,
};

jest.clearAllMocks(); // Reset Mocks Between Tests
const loadMock = jest.spyOn(PDFDocument, "load");

const [error, _, pages] = await loadPdf(f as any);

expect(error).toBeFalsy();
expect(pages).toEqual(2);
expect(loadMock).toHaveBeenCalledTimes(1);
expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer());
});

});
});
Loading