@@ -223,7 +223,7 @@ def test_unit_parse_form_data_none_filename_error():
223
223
224
224
225
225
def test_unit_is_pdf_valid_pdf ():
226
- """Test is pdf method returns True for valid pdf file (has .pdf extension and can be read) ."""
226
+ """Test is pdf method returns True for valid pdf file with filename ."""
227
227
filename = "_sample_docs/layout-parser-paper-fast.pdf"
228
228
229
229
with open (filename , "rb" ) as f :
@@ -237,15 +237,30 @@ def test_unit_is_pdf_valid_pdf():
237
237
assert result is True
238
238
239
239
240
+ def test_unit_is_pdf_valid_pdf_without_file_extension (caplog ):
241
+ """Test is pdf method returns True for file with valid pdf content without basing on file extension."""
242
+ filename = "_sample_docs/layout-parser-paper-fast.pdf"
243
+
244
+ with open (filename , "rb" ) as f :
245
+ file = shared .Files (
246
+ content = f .read (),
247
+ file_name = "uuid1234" ,
248
+ )
249
+
250
+ result = pdf_utils .is_pdf (file )
251
+
252
+ assert result is True
253
+
254
+
240
255
def test_unit_is_pdf_invalid_extension (caplog ):
241
256
"""Test is pdf method returns False for file with invalid extension."""
242
257
file = shared .Files (content = b"txt_content" , file_name = "test_file.txt" )
243
258
244
- with caplog .at_level (logging .INFO ):
259
+ with caplog .at_level (logging .WARNING ):
245
260
result = pdf_utils .is_pdf (file )
246
261
247
262
assert result is False
248
- assert "Given file doesn't have '.pdf' extension " in caplog .text
263
+ assert "The file does not appear to be a valid PDF. " in caplog .text
249
264
250
265
251
266
def test_unit_is_pdf_invalid_pdf (caplog ):
@@ -258,6 +273,16 @@ def test_unit_is_pdf_invalid_pdf(caplog):
258
273
assert result is False
259
274
assert "The file does not appear to be a valid PDF." in caplog .text
260
275
276
+ def test_unit_is_pdf_invalid_pdf_without_file_extension (caplog ):
277
+ """Test is pdf method returns False for file with invalid pdf content without basing on file extension."""
278
+ file = shared .Files (content = b"invalid_pdf_content" , file_name = "uuid1234" )
279
+
280
+ with caplog .at_level (logging .WARNING ):
281
+ result = pdf_utils .is_pdf (file )
282
+
283
+ assert result is False
284
+ assert "The file does not appear to be a valid PDF." in caplog .text
285
+
261
286
262
287
def test_unit_get_starting_page_number_missing_key ():
263
288
"""Test _get_starting_page_number method with missing key."""
0 commit comments