feature: upgrade to python3

unsektor · unsektor · commit 1a5f507cdf82 · 2025-02-15T21:50:59.000+03:00
diff --git a/changelog.md b/changelog.md
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## 0.1.4 — 2025-02-15
+### Changed
+
+- [x] upgrade to python3
+
 ## 0.1.3 — 2025-02-15
 ### Removed
 
diff --git a/setup.py b/setup.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import setuptools
 
 
@@ -7,7 +6,7 @@
 
 setuptools.setup(
     name='yoficator',
-    version='0.1.3',
+    version='0.1.4',
     description='A Russian text yoficator (ёфикатор)',
     long_description=long_description,
     long_description_content_type='text/markdown',
@@ -22,5 +21,5 @@
         'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
     ],
-    python_requires='>=2.7, <3',
+    python_requires='>=3',
 )
diff --git a/yoficator/__main__.py b/yoficator/__main__.py
@@ -1,44 +1,42 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
-import codecs
-import os, sys
+#!/usr/bin/env python3
+import os
+import sys
 import re
 
 #-------------------------------------------------------------------------#
 #
-#                         ▗▀▖▗       ▐                
+#                         ▗▀▖▗       ▐
 #                   ▌ ▌▞▀▖▐  ▄ ▞▀▖▝▀▖▜▀ ▞▀▖▙▀▖  ▛▀▖▌ ▌
 #                   ▚▄▌▌ ▌▜▀ ▐ ▌ ▖▞▀▌▐ ▖▌ ▌▌  ▗▖▙▄▘▚▄▌
 #                   ▗▄▘▝▀ ▐  ▀▘▝▀ ▝▀▘ ▀ ▝▀ ▘  ▝▘▌  ▗▄▘
 #
 # Description:
 #    This is a Russian text yoficator (ёфикатор).
 #
-#    It conservatively replaces every "е" to "ё" when it's unambiguously 
+#    It conservatively replaces every "е" to "ё" when it's unambiguously
 #    a case of the latter. No context is used; it relies entirely on a lack
-#    of dictionary entries for a correspondent "truly е" homograph. 
+#    of dictionary entries for a correspondent "truly е" homograph.
 #
 #    Yoficating Russian texts remove some unnecessary ambiguity.
 #    https://en.wikipedia.org/wiki/Yoficator
 #    https://ru.wikipedia.org/wiki/Ёфикатор
 #
 #    Syntax: yoficator.py [text-file-in-Russian | string-in-Russian]
-# 
+#
 #    Depends on yoficator.dic, which is used for the lookup.
 #
-#    Limitations: 
-#    * The code being conservative and not looking for context, it won't correct 
-#      when a "truly е" homograph exists. Thus a "все" will never be corrected, 
+#    Limitations:
+#    * The code being conservative and not looking for context, it won't correct
+#      when a "truly е" homograph exists. Thus a "все" will never be corrected,
 #      because both все and всё exist as different words.
-#    * Prone to wrongly yoficate other Cyrillic-based languages, such as 
+#    * Prone to wrongly yoficate other Cyrillic-based languages, such as
 #      Bulgarian, Ukrainian, Belarussian.
 #    * It's not the fastest thing in the world, mind you. But does the job.
 #
 #-------------------------------------------------------------------------
 #
 # Found this useful? Appalling? Appealing? Please let me know.
-# The Unabashed welcomes your impressions. 
+# The Unabashed welcomes your impressions.
 #
 # You will find the
 #   unabashed
@@ -63,45 +61,35 @@
 #
 #--------------------------------------------------------------------------#
 
-# TODO Better handle lowercase, uppercase
-
-workingDir = os.path.abspath(os.path.dirname(__file__)) + '/_data'
-dictionaryFile = workingDir + "/yoficator.dic"
-
-if len(sys.argv) > 1:
-    # Is the input a filename?
-    if os.path.isfile(sys.argv[1]):
-        text = codecs.open(sys.argv[1].decode("utf-8"), "r", "utf-8").read()
-    # Else we will assume it's a string
-    else:
-        text = sys.argv[1].decode("utf-8")
-else:
-    print('Error: No file specified', file=sys.stderr)
-    sys.exit(1)
-
-dictionary = {}
-
 
-# Splitter / tokenizer
-splitter = re.compile(r'(\s+|\w+|\W+|\S+)', re.UNICODE)
-tokens = splitter.findall(text)
+if __name__ == '__main__':
+    # TODO Better handle lowercase, uppercase
+    dictionary_file_path = os.path.abspath(os.path.dirname(__file__)) + '/_data/yoficator.dic'
 
-with codecs.open(dictionaryFile, "r", "utf-8") as f:
-    for line in f:
-        if ":" in line:
-            key,value = line.split(":")
-            dictionary[key] = value.rstrip('\n')
+    if len(sys.argv) > 1:
+        # Is the input a filename?
+        if os.path.isfile(sys.argv[1]):
+            text = open(sys.argv[1]).read()
+        # Else we will assume it's a string
         else:
-            pass
-
-for token in tokens:
-    if token in dictionary:
-        print(dictionary[token], end='')
+            text = sys.argv[1]
     else:
-        print(token, end='')
+        print('Error: No file specified', file=sys.stderr)
+        exit(1)
 
+    dictionary = {}
 
-sys.exit(0)
+    # Splitter / tokenizer
+    splitter = re.compile(r'(\s+|\w+|\W+|\S+)')
 
-# -------------------- END -----------------------
+    with open(dictionary_file_path) as stream:
+        for line in iter(stream):
+            if ':' in line:
+                key, value = line.split(':')
+                dictionary[key] = value.rstrip('\n')
 
+    for token in splitter.finditer(text):
+        if token in dictionary:
+            print(dictionary[token], end='')
+        else:
+            print(token, end='')