diff --git a/docling_core/cli/view.py b/docling_core/cli/view.py index fd307ae..4ff3b5d 100644 --- a/docling_core/cli/view.py +++ b/docling_core/cli/view.py @@ -57,7 +57,7 @@ def view( doc = DoclingDocument.load_from_json(filename=path) target_path = Path(tempfile.mkdtemp()) / "out.html" html_output = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED) - with open(target_path, "w") as f: + with open(target_path, "w", encoding="utf-8") as f: f.write(html_output) webbrowser.open(url=f"file://{target_path.absolute().resolve()}") diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index a590782..496c937 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -1884,7 +1884,7 @@ def save_as_json( ) out = new_doc.export_to_dict() - with open(filename, "w") as fw: + with open(filename, "w", encoding="utf-8") as fw: json.dump(out, fw, indent=indent) @classmethod @@ -1898,7 +1898,7 @@ def load_from_json(cls, filename: Path) -> "DoclingDocument": :rtype: DoclingDocument """ - with open(filename, "r") as f: + with open(filename, "r", encoding="utf-8") as f: return cls.model_validate_json(f.read()) def save_as_yaml( @@ -1919,7 +1919,7 @@ def save_as_yaml( ) out = new_doc.export_to_dict() - with open(filename, "w") as fw: + with open(filename, "w", encoding="utf-8") as fw: yaml.dump(out, fw, default_flow_style=default_flow_style) def export_to_dict( @@ -1971,7 +1971,7 @@ def save_as_markdown( page_no=page_no, ) - with open(filename, "w") as fw: + with open(filename, "w", encoding="utf-8") as fw: fw.write(md_out) def export_to_markdown( # noqa: C901 @@ -2224,7 +2224,7 @@ def save_as_html( html_head=html_head, ) - with open(filename, "w") as fw: + with open(filename, "w", encoding="utf-8") as fw: fw.write(html_out) def _get_output_paths( @@ -2462,7 +2462,7 @@ def save_as_document_tokens( with_groups=with_groups, ) - with open(filename, "w") as fw: + with open(filename, "w", encoding="utf-8") as fw: fw.write(out) def export_to_document_tokens( diff --git a/docling_core/utils/validate.py b/docling_core/utils/validate.py index d8bf9a3..a5f85d3 100644 --- a/docling_core/utils/validate.py +++ b/docling_core/utils/validate.py @@ -38,7 +38,7 @@ def run(): """Run the validation of a file containing a Document.""" file_format, input_file = parse_arguments() - with open(input_file, "r") as fd: + with open(input_file, "r", encoding="utf-8") as fd: file_ = json.load(fd) result = (False, "Empty result") diff --git a/test/test_base.py b/test/test_base.py index 5dfe537..0fe09ab 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -36,7 +36,7 @@ def test_identifier(): ) # schema_json(): no need to set by_alias since it is True by the default - tf = open("test/data/json_schemas/base_identifier.json") + tf = open("test/data/json_schemas/base_identifier.json", encoding="utf-8") gold_json = json.load(tf) assert Identifier.model_json_schema() == gold_json @@ -104,7 +104,7 @@ def test_log(): == gold_dict ) - with open("test/data/json_schemas/base_log.json") as tf: + with open("test/data/json_schemas/base_log.json", encoding="utf-8") as tf: gold_json_schema = json.load(tf) assert Log.model_json_schema() == gold_json_schema diff --git a/test/test_collection.py b/test/test_collection.py index 34ff40f..6fc18ae 100644 --- a/test/test_collection.py +++ b/test/test_collection.py @@ -45,7 +45,7 @@ def test_generic(): def test_document(): """Test the Document model.""" for filename in glob.glob("test/data/legacy_doc/doc-*.json"): - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Document.model_validate_json(file_json) @@ -54,7 +54,7 @@ def test_table_export_to_tokens(): """Test the Table Tokens export.""" for filename in glob.glob("test/data/legacy_doc/doc-*.json"): - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() doc = Document.model_validate_json(file_json) @@ -73,10 +73,10 @@ def test_table_export_to_tokens(): fname = f"{filename}_table_{i}.doctags.txt" if GENERATE: print(f"writing {fname}") - with open(fname, "w") as gold_obj: + with open(fname, "w", encoding="utf-8") as gold_obj: gold_obj.write(out) - with open(fname, "r") as gold_obj: + with open(fname, "r", encoding="utf-8") as gold_obj: gold_data = gold_obj.read() assert out == gold_data @@ -96,10 +96,10 @@ def test_table_export_to_tokens(): fname = f"{filename}_table_{i}.doctags.txt" if GENERATE: print(f"writing {fname}") - with open(fname, "w") as gold_obj: + with open(fname, "w", encoding="utf-8") as gold_obj: gold_obj.write(out) - with open(fname, "r") as gold_obj: + with open(fname, "r", encoding="utf-8") as gold_obj: gold_data = gold_obj.read() assert out == gold_data @@ -110,17 +110,19 @@ def test_table_export_to_tokens(): def test_document_export_to_md(): """Test the Document Markdown export.""" - with open("test/data/legacy_doc/doc-export.json") as src_obj: + with open("test/data/legacy_doc/doc-export.json", encoding="utf-8") as src_obj: src_data = src_obj.read() doc = Document.model_validate_json(src_data) md = doc.export_to_markdown() if GENERATE: - with open("test/data/legacy_doc/doc-export.md", "w") as gold_obj: + with open( + "test/data/legacy_doc/doc-export.md", "w", encoding="utf-8" + ) as gold_obj: gold_obj.write(md) - with open("test/data/legacy_doc/doc-export.md") as gold_obj: + with open("test/data/legacy_doc/doc-export.md", encoding="utf-8") as gold_obj: gold_data = gold_obj.read().strip() assert md == gold_data @@ -128,17 +130,21 @@ def test_document_export_to_md(): def test_document_export_to_tokens(): """Test the Document Tokens export.""" - with open("test/data/legacy_doc/doc-export.json") as src_obj: + with open("test/data/legacy_doc/doc-export.json", encoding="utf-8") as src_obj: src_data = src_obj.read() doc = Document.model_validate_json(src_data) xml = doc.export_to_document_tokens(delim=True) if GENERATE: - with open("test/data/legacy_doc/doc-export.doctags.txt", "w") as gold_obj: + with open( + "test/data/legacy_doc/doc-export.doctags.txt", "w", encoding="utf-8" + ) as gold_obj: gold_obj.write(xml) - with open("test/data/legacy_doc/doc-export.doctags.txt", "r") as gold_obj: + with open( + "test/data/legacy_doc/doc-export.doctags.txt", "r", encoding="utf-8" + ) as gold_obj: gold_data = gold_obj.read().strip() assert xml == gold_data @@ -147,6 +153,6 @@ def test_document_export_to_tokens(): def test_record(): """Test the Document model.""" for filename in glob.glob("test/data/rec/record-*.json"): - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Record.model_validate_json(file_json) diff --git a/test/test_doc_legacy_convert.py b/test/test_doc_legacy_convert.py index ab2549d..aad7fae 100644 --- a/test/test_doc_legacy_convert.py +++ b/test/test_doc_legacy_convert.py @@ -15,7 +15,7 @@ def test_new_to_old(): filename = "test/data/doc/2206.01062.yaml" - with open(filename, "r") as fp: + with open(filename, "r", encoding="utf-8") as fp: dict_from_yaml = yaml.safe_load(fp) doc = DoclingDocument.model_validate(dict_from_yaml) diff --git a/test/test_doc_schema.py b/test/test_doc_schema.py index a5c3eee..109e7c8 100644 --- a/test/test_doc_schema.py +++ b/test/test_doc_schema.py @@ -27,7 +27,7 @@ def test_ccs_document(): """Validate data with CCSDocument schema.""" for filename in glob.glob("test/data/legacy_doc/doc-*.json"): - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() try: # do not pass strict=True, since date input values are not an instance of datetime. @@ -41,7 +41,7 @@ def test_ccs_document(): # check doc-error-1 is invalid in logs try: - with open("test/data/legacy_doc/error-1.json") as file_obj: + with open("test/data/legacy_doc/error-1.json", encoding="utf-8") as file_obj: file_json = file_obj.read() CCSDocument.model_validate_json(file_json) assert False, f"Data in file {filename} should be invalid for CCSDocument model" @@ -55,7 +55,7 @@ def test_ccs_document(): # check doc-error-2 is invalid for missing page-hashes with ( pytest.raises(ValidationError, match="page-hashes"), - open("test/data/legacy_doc/error-2.json") as file_obj, + open("test/data/legacy_doc/error-2.json", encoding="utf-8") as file_obj, ): file_json = file_obj.read() CCSDocument.model_validate_json(file_json) @@ -63,7 +63,7 @@ def test_ccs_document(): # check doc-error-3 is invalid for wrong types in citation_count and reference_count with ( pytest.raises(ValidationError, match="count"), - open("test/data/legacy_doc/error-3.json") as file_obj, + open("test/data/legacy_doc/error-3.json", encoding="utf-8") as file_obj, ): file_json = file_obj.read() CCSDocument.model_validate_json(file_json) @@ -72,7 +72,7 @@ def test_ccs_document(): def test_publication_journal(): """ "Validate data with Publication model.""" for filename in glob.glob("test/data/legacy_doc/intermediates/publication_*.json"): - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() file_dict = json.loads(file_json) try: @@ -85,7 +85,7 @@ def test_publication_journal(): def test_description_advanced_t(): """Validate data with different DescriptionAdvancedT instances.""" # without description.advanced - with open("test/data/legacy_doc/doc-5.json") as file_obj: + with open("test/data/legacy_doc/doc-5.json", encoding="utf-8") as file_obj: desc = json.load(file_obj)["description"] # without advanced diff --git a/test/test_doc_schema_extractor.py b/test/test_doc_schema_extractor.py index 5537a5d..dd07c62 100644 --- a/test/test_doc_schema_extractor.py +++ b/test/test_doc_schema_extractor.py @@ -15,7 +15,7 @@ def test_ccs_document_update(): """Validate data with CCSDocument extract.""" filename = "test/data/legacy_doc/ext-1.json" try: - with open(filename) as f: + with open(filename, encoding="utf-8") as f: raw_doc = json.load(f) for item in raw_doc["main-text"]: if "$ref" in item: diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py index 51fde0f..d9faa80 100644 --- a/test/test_docling_doc.py +++ b/test/test_docling_doc.py @@ -64,11 +64,15 @@ def serialise(obj): return yaml.safe_dump(obj.model_dump(mode="json", by_alias=True)) def write(name: str, serialisation: str): - with open(f"./test/data/docling_document/unit/{name}.yaml", "w") as fw: + with open( + f"./test/data/docling_document/unit/{name}.yaml", "w", encoding="utf-8" + ) as fw: fw.write(serialisation) def read(name: str): - with open(f"./test/data/docling_document/unit/{name}.yaml", "r") as fr: + with open( + f"./test/data/docling_document/unit/{name}.yaml", "r", encoding="utf-8" + ) as fr: gold = fr.read() return gold @@ -146,7 +150,7 @@ def test_reference_doc(): filename = "test/data/doc/dummy_doc.yaml" # Read YAML file of manual reference doc - with open(filename, "r") as fp: + with open(filename, "r", encoding="utf-8") as fp: dict_from_yaml = yaml.safe_load(fp) doc = DoclingDocument.model_validate(dict_from_yaml) @@ -186,7 +190,7 @@ def test_parse_doc(): filename = "test/data/doc/2206.01062.yaml" - with open(filename, "r") as fp: + with open(filename, "r", encoding="utf-8") as fp: dict_from_yaml = yaml.safe_load(fp) doc = DoclingDocument.model_validate(dict_from_yaml) @@ -244,12 +248,12 @@ def _test_serialize_and_reload(doc): def _verify_regression_test(pred: str, filename: str, ext: str): if os.path.exists(filename + f".{ext}") and not GENERATE: - with open(filename + f".{ext}", "r") as fr: + with open(filename + f".{ext}", "r", encoding="utf-8") as fr: gt_true = fr.read() assert gt_true == pred, f"Does not pass regression-test for {filename}.{ext}" else: - with open(filename + f".{ext}", "w") as fw: + with open(filename + f".{ext}", "w", encoding="utf-8") as fw: fw.write(pred) @@ -499,7 +503,7 @@ def test_version_doc(): doc = DoclingDocument(name="Untitled 1") assert doc.version == CURRENT_VERSION - with open("test/data/doc/dummy_doc.yaml") as fp: + with open("test/data/doc/dummy_doc.yaml", encoding="utf-8") as fp: dict_from_yaml = yaml.safe_load(fp) doc = DoclingDocument.model_validate(dict_from_yaml) assert doc.version == CURRENT_VERSION @@ -674,17 +678,17 @@ def _normalise_string_wrt_filepaths(instr: str, paths: List[Path]): def _verify_saved_output(filename: str, paths: List[Path]): pred = "" - with open(filename, "r") as fr: + with open(filename, "r", encoding="utf-8") as fr: pred = fr.read() pred = _normalise_string_wrt_filepaths(pred, paths=paths) if GENERATE: - with open(str(filename) + ".gt", "w") as fw: + with open(str(filename) + ".gt", "w", encoding="utf-8") as fw: fw.write(pred) else: gt = "" - with open(str(filename) + ".gt", "r") as fr: + with open(str(filename) + ".gt", "r", encoding="utf-8") as fr: gt = fr.read() assert pred == gt, f"pred!=gt for {filename}" diff --git a/test/test_hierarchical_chunker.py b/test/test_hierarchical_chunker.py index 38504e7..ec98dc7 100644 --- a/test/test_hierarchical_chunker.py +++ b/test/test_hierarchical_chunker.py @@ -11,7 +11,7 @@ def test_chunk_merge_list_items(): - with open("test/data/chunker/0_inp_dl_doc.json") as f: + with open("test/data/chunker/0_inp_dl_doc.json", encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) chunker = HierarchicalChunker( @@ -21,13 +21,13 @@ def test_chunk_merge_list_items(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open("test/data/chunker/0_out_chunks.json") as f: + with open("test/data/chunker/0_out_chunks.json", encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data def test_chunk_no_merge_list_items(): - with open("test/data/chunker/0_inp_dl_doc.json") as f: + with open("test/data/chunker/0_inp_dl_doc.json", encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) chunker = HierarchicalChunker( @@ -37,6 +37,6 @@ def test_chunk_no_merge_list_items(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open("test/data/chunker/1_out_chunks.json") as f: + with open("test/data/chunker/1_out_chunks.json", encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data diff --git a/test/test_hybrid_chunker.py b/test/test_hybrid_chunker.py index d13fc05..c73c3b5 100644 --- a/test/test_hybrid_chunker.py +++ b/test/test_hybrid_chunker.py @@ -21,7 +21,7 @@ def test_chunk_merge_peers(): EXPECTED_OUT_FILE = "test/data/chunker/2a_out_chunks.json" - with open(INPUT_FILE) as f: + with open(INPUT_FILE, encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) @@ -36,7 +36,7 @@ def test_chunk_merge_peers(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE) as f: + with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data @@ -44,7 +44,7 @@ def test_chunk_merge_peers(): def test_chunk_no_merge_peers(): EXPECTED_OUT_FILE = "test/data/chunker/2b_out_chunks.json" - with open(INPUT_FILE) as f: + with open(INPUT_FILE, encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) @@ -58,7 +58,7 @@ def test_chunk_no_merge_peers(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE) as f: + with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data @@ -66,7 +66,7 @@ def test_chunk_no_merge_peers(): def test_serialize(): EXPECTED_OUT_FILE = "test/data/chunker/2a_out_ser_chunks.json" - with open(INPUT_FILE) as f: + with open(INPUT_FILE, encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) @@ -88,7 +88,7 @@ def test_serialize(): for chunk in chunks ] ) - with open(EXPECTED_OUT_FILE) as f: + with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data @@ -96,7 +96,7 @@ def test_serialize(): def test_chunk_with_model_name(): EXPECTED_OUT_FILE = "test/data/chunker/2a_out_chunks.json" - with open(INPUT_FILE) as f: + with open(INPUT_FILE, encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) @@ -111,7 +111,7 @@ def test_chunk_with_model_name(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE) as f: + with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data @@ -119,7 +119,7 @@ def test_chunk_with_model_name(): def test_chunk_default(): EXPECTED_OUT_FILE = "test/data/chunker/2c_out_chunks.json" - with open(INPUT_FILE) as f: + with open(INPUT_FILE, encoding="utf-8") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) @@ -130,6 +130,6 @@ def test_chunk_default(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE) as f: + with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: exp_data = json.load(fp=f) assert exp_data == act_data diff --git a/test/test_json_schema_to_search_mapper.py b/test/test_json_schema_to_search_mapper.py index 583ce30..e52984d 100644 --- a/test/test_json_schema_to_search_mapper.py +++ b/test/test_json_schema_to_search_mapper.py @@ -16,7 +16,7 @@ def _load(filename): doc = {} - with open(filename, "r") as fid: + with open(filename, "r", encoding="utf-8") as fid: doc = json.load(fid) return doc diff --git a/test/test_nlp_qa.py b/test/test_nlp_qa.py index 27c671d..77877e3 100644 --- a/test/test_nlp_qa.py +++ b/test/test_nlp_qa.py @@ -20,7 +20,7 @@ def test_qapair_read(self): """Validate data read from files.""" for filename in glob.glob("test/data/nlp/qa-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() QAPair.model_validate_json(file_json) except ValidationError as e: @@ -32,7 +32,7 @@ def test_qapair_wrong(self): filename = "test/data/nlp/error-qa-1.json" with ( pytest.raises(ValidationError, match="Input should be a valid string"), - open(filename) as file_obj, + open(filename, encoding="utf-8") as file_obj, ): file_json = file_obj.read() QAPair.model_validate_json(file_json) @@ -40,7 +40,7 @@ def test_qapair_wrong(self): filename = "test/data/nlp/error-qa-3.json" with ( pytest.raises(ValidationError, match="List must be unique"), - open(filename) as file_obj, + open(filename, encoding="utf-8") as file_obj, ): file_json = file_obj.read() QAPair.model_validate_json(file_json) diff --git a/test/test_rec_schema.py b/test/test_rec_schema.py index ae7bcd0..a898006 100644 --- a/test/test_rec_schema.py +++ b/test/test_rec_schema.py @@ -23,7 +23,7 @@ def test_predicates(self): """Validate data with Predicate schema.""" for filename in glob.glob("test/data/rec/predicate-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Predicate.model_validate_json(file_json) except ValidationError as e: @@ -34,7 +34,7 @@ def test_predicates_wrong(self): filename = "test/data/rec/error-predicate-01.json" with ( pytest.raises(ValidationError, match="invalid latitude"), - open(filename) as file_obj, + open(filename, encoding="utf-8") as file_obj, ): file_json = file_obj.read() Predicate.model_validate_json(file_json) @@ -42,7 +42,7 @@ def test_predicates_wrong(self): filename = "test/data/rec/error-predicate-02.json" with ( pytest.raises(ValidationError, match="geopoint_value.conf"), - open(filename) as file_obj, + open(filename, encoding="utf-8") as file_obj, ): file_json = file_obj.read() Predicate.model_validate_json(file_json) @@ -51,7 +51,7 @@ def test_attributes(self): """Validate data with Attribute schema.""" for filename in glob.glob("test/data/rec/attribute-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Attribute.model_validate_json(file_json) except ValidationError as e: @@ -61,7 +61,10 @@ def test_attributes(self): def test_attributes_wrong(self): """Validate data with Attribute schema.""" for filename in glob.glob("test/data/rec/error-attribute-*.json"): - with pytest.raises(ValidationError), open(filename) as file_obj: + with ( + pytest.raises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() Attribute.model_validate_json(file_json) @@ -69,7 +72,7 @@ def test_subjects(self): """Validate data with Subject schema.""" for filename in glob.glob("test/data/rec/subject-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Subject.model_validate_json(file_json) except ValidationError as e: @@ -84,7 +87,7 @@ def test_subjects2(self): ] for filename in glob.glob("test/data/rec/subject-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() subject.model_validate_json(file_json) except ValidationError as e: @@ -98,14 +101,20 @@ def test_subjects_wrong(self): Literal["db_"], Literal["material"], Literal["chemical_name", "sum_formula"] ] for filename in glob.glob("test/data/rec/subject-*.json"): - with self.assertRaises(ValidationError), open(filename) as file_obj: + with ( + self.assertRaises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() subject.model_validate_json(file_json) subject = Subject[ Literal["db"], Literal["material_"], Literal["chemical_name", "sum_formula"] ] for filename in glob.glob("test/data/rec/subject-*.json"): - with self.assertRaises(ValidationError), open(filename) as file_obj: + with ( + self.assertRaises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() subject.model_validate_json(file_json) subject = Subject[ @@ -114,7 +123,10 @@ def test_subjects_wrong(self): Literal["chemical_name_", "sum_formula_"], ] for filename in glob.glob("test/data/rec/subject-*.json"): - with self.assertRaises(ValidationError), open(filename) as file_obj: + with ( + self.assertRaises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() subject.model_validate_json(file_json) @@ -122,7 +134,7 @@ def test_statements(self): """Validate data with Statement schema.""" for filename in glob.glob("test/data/rec/statement-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Statement.model_validate_json(file_json) except ValidationError as e: @@ -133,7 +145,7 @@ def test_records(self): """Validate data with Record schema.""" for filename in glob.glob("test/data/rec/record-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Record.model_validate_json(file_json) except ValidationError as e: @@ -154,7 +166,7 @@ def test_records_2(self): ] for filename in glob.glob("test/data/rec/record-01.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() record.model_validate_json(file_json) except ValidationError as e: @@ -211,7 +223,7 @@ def test_records_3(self): ] try: filename = "test/data/rec/record-04.json" - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() record.model_validate_json(file_json) except ValidationError as e: @@ -231,7 +243,10 @@ def test_records_wrong(self): Literal["DB", "Chemicals", "ChemDatabase"], # CollectionNameTypeT ] for filename in glob.glob("test/data/rec/record-01.json"): - with self.assertRaises(ValidationError), open(filename) as file_obj: + with ( + self.assertRaises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() record.model_validate_json(file_json) record = Record[ @@ -245,6 +260,9 @@ def test_records_wrong(self): Literal["DB", "Chemicals", "ChemDatabase"], # CollectionNameTypeT ] for filename in glob.glob("test/data/rec/record-01.json"): - with self.assertRaises(ValidationError), open(filename) as file_obj: + with ( + self.assertRaises(ValidationError), + open(filename, encoding="utf-8") as file_obj, + ): file_json = file_obj.read() record.model_validate_json(file_json) diff --git a/test/test_search_meta.py b/test/test_search_meta.py index 7f72bac..e222267 100644 --- a/test/test_search_meta.py +++ b/test/test_search_meta.py @@ -22,7 +22,7 @@ def test_meta(): for filename in glob.glob("test/data/search/meta-*.json"): try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Meta[taxonomy, domain].model_validate_json(file_json) except ValidationError as e: @@ -39,7 +39,7 @@ def test_meta(): for filename in glob.glob("test/data/search/error-meta-*.json"): gold = gold_errors[os.path.basename(filename)] try: - with open(filename) as file_obj: + with open(filename, encoding="utf-8") as file_obj: file_json = file_obj.read() Meta[taxonomy, domain].model_validate_json(file_json) assert False, f"File {filename} should be an invalid metadata" diff --git a/test/test_utils.py b/test/test_utils.py index e87ad97..e233b91 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -67,7 +67,7 @@ def get_dummy_response(*args, **kwargs): lambda *args, **kwargs: [expected_bytes], ) path = resolve_source_to_path("https://pypi.org") - with open(path) as f: + with open(path, encoding="utf-8") as f: text = f.read() assert text == expected_str