Skip to content

Commit

Permalink
remove unnecessary log
Browse files Browse the repository at this point in the history
  • Loading branch information
yuming-long committed Sep 26, 2024
1 parent a487104 commit 0a7370a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 16 deletions.
21 changes: 8 additions & 13 deletions _test_unstructured_client/unit/test_split_pdf_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def test_unit_is_pdf_valid_pdf():
assert result is True


def test_unit_is_pdf_valid_pdf_without_file_extension(caplog):
def test_unit_is_pdf_valid_pdf_without_file_extension():
"""Test is pdf method returns True for file with valid pdf content without basing on file extension."""
filename = "_sample_docs/layout-parser-paper-fast.pdf"

Expand All @@ -252,36 +252,31 @@ def test_unit_is_pdf_valid_pdf_without_file_extension(caplog):
assert result is True


def test_unit_is_pdf_invalid_extension(caplog):
def test_unit_is_pdf_invalid_extension():
"""Test is pdf method returns False for file with invalid extension."""
file = shared.Files(content=b"txt_content", file_name="test_file.txt")

with caplog.at_level(logging.WARNING):
result = pdf_utils.is_pdf(file)
result = pdf_utils.is_pdf(file)

assert result is False
assert "The file does not appear to be a valid PDF." in caplog.text


def test_unit_is_pdf_invalid_pdf(caplog):
def test_unit_is_pdf_invalid_pdf():
"""Test is pdf method returns False for file with invalid pdf content."""
file = shared.Files(content=b"invalid_pdf_content", file_name="test_file.pdf")

with caplog.at_level(logging.WARNING):
result = pdf_utils.is_pdf(file)
result = pdf_utils.is_pdf(file)

assert result is False
assert "The file does not appear to be a valid PDF." in caplog.text

def test_unit_is_pdf_invalid_pdf_without_file_extension(caplog):

def test_unit_is_pdf_invalid_pdf_without_file_extension():
"""Test is pdf method returns False for file with invalid pdf content without basing on file extension."""
file = shared.Files(content=b"invalid_pdf_content", file_name="uuid1234")

with caplog.at_level(logging.WARNING):
result = pdf_utils.is_pdf(file)
result = pdf_utils.is_pdf(file)

assert result is False
assert "The file does not appear to be a valid PDF." in caplog.text


def test_unit_get_starting_page_number_missing_key():
Expand Down
4 changes: 1 addition & 3 deletions src/unstructured_client/_hooks/custom/pdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def is_pdf(file: shared.Files) -> bool:
try:
content = cast(bytes, file.content)
PdfReader(io.BytesIO(content), strict=True)
except (PdfReadError, UnicodeDecodeError) as exc:
logger.error(exc)
logger.warning("The file does not appear to be a valid PDF.")
except (PdfReadError, UnicodeDecodeError):
return False

return True

0 comments on commit 0a7370a

Please sign in to comment.