From 2b728c1574928c3d37e44ba2e73b3b7959b68b31 Mon Sep 17 00:00:00 2001 From: Marta Banon Date: Fri, 9 Feb 2024 08:14:59 +0000 Subject: [PATCH] Bump hardrules requirement, fix compat issues --- CHANGELOG.md | 3 +++ pyproject.toml | 4 ++-- src/bicleaner/classify.py | 16 ++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caefa1a..4553146 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +Bicleaner 0.17.4: +* Bump bicleaner-hardrules requirement to 2.10.3 and fix compatibility issues. + Bicleaner 0.17.3: * Bump bicleaner-hardrules requirement to 2.8.1 to avoid hunspell installation issues. diff --git a/pyproject.toml b/pyproject.toml index 64592d3..72ef7d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bicleaner" -version = "0.17.3" +version = "0.17.4" description = "Parallel corpus classifier, indicating the likelihood of a pair of sentences being mutual translations or not" readme = "README.md" maintainers = [ @@ -19,7 +19,7 @@ dependencies = [ "regex", "toolwrapper>=0.4.1,<=2.1.0", "joblib", "sacremoses==0.0.53", - "bicleaner-hardrules==2.8.1", + "bicleaner-hardrules==2.10.3", ] classifiers = [ "Environment :: Console", "Intended Audience :: Science/Research", diff --git a/src/bicleaner/classify.py b/src/bicleaner/classify.py index 6994d6f..c3d7896 100644 --- a/src/bicleaner/classify.py +++ b/src/bicleaner/classify.py @@ -73,8 +73,8 @@ def load_metadata(args, parser): try: # Load YAML metadata_yaml = yaml.safe_load(args.metadata) - yamlpath = os.path.dirname(os.path.abspath(args.metadata.name)) - metadata_yaml["yamlpath"] = yamlpath + yamldir = os.path.dirname(os.path.abspath(args.metadata.name)) + metadata_yaml["yamldir"] = yamldir # Read language pair and tokenizers args.source_lang=metadata_yaml["source_lang"] @@ -86,7 +86,7 @@ def load_metadata(args, parser): # Load classifier try: - args.clf=joblib.load( os.path.join( yamlpath , metadata_yaml["classifier"])) + args.clf=joblib.load( os.path.join( yamldir , metadata_yaml["classifier"])) except: args.clf=joblib.load(metadata_yaml["classifier"]) args.clf.n_jobs = 1 @@ -94,24 +94,24 @@ def load_metadata(args, parser): # Load probabilistic dictionaries try: - args.dict_sl_tl = ProbabilisticDictionary( os.path.join(yamlpath , metadata_yaml["source_dictionary"])) + args.dict_sl_tl = ProbabilisticDictionary( os.path.join(yamldir , metadata_yaml["source_dictionary"])) except: args.dict_sl_tl = ProbabilisticDictionary(metadata_yaml["source_dictionary"]) try: - args.dict_tl_sl = ProbabilisticDictionary( os.path.join(yamlpath , metadata_yaml["target_dictionary"])) + args.dict_tl_sl = ProbabilisticDictionary( os.path.join(yamldir , metadata_yaml["target_dictionary"])) except: args.dict_tl_sl = ProbabilisticDictionary(metadata_yaml["target_dictionary"]) # Load wordfreqs try: - args.sl_word_freqs = WordZipfFreqDist( os.path.join( yamlpath, metadata_yaml["source_word_freqs"])) + args.sl_word_freqs = WordZipfFreqDist( os.path.join( yamldir, metadata_yaml["source_word_freqs"])) except: try: args.sl_word_freqs = WordZipfFreqDist(metadata_yaml["source_word_freqs"]) except: args.sl_word_freqs = None try: - args.tl_word_freqs = WordZipfFreqDist( os.path.join( yamlpath , metadata_yaml["target_word_freqs"])) + args.tl_word_freqs = WordZipfFreqDist( os.path.join( yamldir , metadata_yaml["target_word_freqs"])) except: try: args.tl_word_freqs = WordZipfFreqDist(metadata_yaml["target_word_freqs"]) @@ -153,7 +153,7 @@ def load_metadata(args, parser): logging.warning("Porn removal not present in metadata, disabling.") else: try: - args.porn_removal = fasttext.load_model(os.path.join(yamlpath, metadata_yaml['porn_removal_file'])) + args.porn_removal = fasttext.load_model(os.path.join(yamldir, metadata_yaml['porn_removal_file'])) except: args.porn_removal = fasttext.load_model(args.metadata_yaml['porn_removal_file']) else: