Skip to content

Commit 1a5f507

Browse files
committed
feature: upgrade to python3
1 parent 5ea7941 commit 1a5f507

File tree

3 files changed

+43
-51
lines changed

3 files changed

+43
-51
lines changed

changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## 0.1.4 — 2025-02-15
9+
### Changed
10+
11+
- [x] upgrade to python3
12+
813
## 0.1.3 — 2025-02-15
914
### Removed
1015

setup.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
import setuptools
32

43

@@ -7,7 +6,7 @@
76

87
setuptools.setup(
98
name='yoficator',
10-
version='0.1.3',
9+
version='0.1.4',
1110
description='A Russian text yoficator (ёфикатор)',
1211
long_description=long_description,
1312
long_description_content_type='text/markdown',
@@ -22,5 +21,5 @@
2221
'License :: OSI Approved :: MIT License',
2322
'Operating System :: OS Independent',
2423
],
25-
python_requires='>=2.7, <3',
24+
python_requires='>=3',
2625
)

yoficator/__main__.py

Lines changed: 36 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,42 @@
1-
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
3-
from __future__ import print_function, unicode_literals
4-
import codecs
5-
import os, sys
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
64
import re
75

86
#-------------------------------------------------------------------------#
97
#
10-
# ▗▀▖▗ ▐
8+
# ▗▀▖▗ ▐
119
# ▌ ▌▞▀▖▐ ▄ ▞▀▖▝▀▖▜▀ ▞▀▖▙▀▖ ▛▀▖▌ ▌
1210
# ▚▄▌▌ ▌▜▀ ▐ ▌ ▖▞▀▌▐ ▖▌ ▌▌ ▗▖▙▄▘▚▄▌
1311
# ▗▄▘▝▀ ▐ ▀▘▝▀ ▝▀▘ ▀ ▝▀ ▘ ▝▘▌ ▗▄▘
1412
#
1513
# Description:
1614
# This is a Russian text yoficator (ёфикатор).
1715
#
18-
# It conservatively replaces every "е" to "ё" when it's unambiguously
16+
# It conservatively replaces every "е" to "ё" when it's unambiguously
1917
# a case of the latter. No context is used; it relies entirely on a lack
20-
# of dictionary entries for a correspondent "truly е" homograph.
18+
# of dictionary entries for a correspondent "truly е" homograph.
2119
#
2220
# Yoficating Russian texts remove some unnecessary ambiguity.
2321
# https://en.wikipedia.org/wiki/Yoficator
2422
# https://ru.wikipedia.org/wiki/Ёфикатор
2523
#
2624
# Syntax: yoficator.py [text-file-in-Russian | string-in-Russian]
27-
#
25+
#
2826
# Depends on yoficator.dic, which is used for the lookup.
2927
#
30-
# Limitations:
31-
# * The code being conservative and not looking for context, it won't correct
32-
# when a "truly е" homograph exists. Thus a "все" will never be corrected,
28+
# Limitations:
29+
# * The code being conservative and not looking for context, it won't correct
30+
# when a "truly е" homograph exists. Thus a "все" will never be corrected,
3331
# because both все and всё exist as different words.
34-
# * Prone to wrongly yoficate other Cyrillic-based languages, such as
32+
# * Prone to wrongly yoficate other Cyrillic-based languages, such as
3533
# Bulgarian, Ukrainian, Belarussian.
3634
# * It's not the fastest thing in the world, mind you. But does the job.
3735
#
3836
#-------------------------------------------------------------------------
3937
#
4038
# Found this useful? Appalling? Appealing? Please let me know.
41-
# The Unabashed welcomes your impressions.
39+
# The Unabashed welcomes your impressions.
4240
#
4341
# You will find the
4442
# unabashed
@@ -63,45 +61,35 @@
6361
#
6462
#--------------------------------------------------------------------------#
6563

66-
# TODO Better handle lowercase, uppercase
67-
68-
workingDir = os.path.abspath(os.path.dirname(__file__)) + '/_data'
69-
dictionaryFile = workingDir + "/yoficator.dic"
70-
71-
if len(sys.argv) > 1:
72-
# Is the input a filename?
73-
if os.path.isfile(sys.argv[1]):
74-
text = codecs.open(sys.argv[1].decode("utf-8"), "r", "utf-8").read()
75-
# Else we will assume it's a string
76-
else:
77-
text = sys.argv[1].decode("utf-8")
78-
else:
79-
print('Error: No file specified', file=sys.stderr)
80-
sys.exit(1)
81-
82-
dictionary = {}
83-
8464

85-
# Splitter / tokenizer
86-
splitter = re.compile(r'(\s+|\w+|\W+|\S+)', re.UNICODE)
87-
tokens = splitter.findall(text)
65+
if __name__ == '__main__':
66+
# TODO Better handle lowercase, uppercase
67+
dictionary_file_path = os.path.abspath(os.path.dirname(__file__)) + '/_data/yoficator.dic'
8868

89-
with codecs.open(dictionaryFile, "r", "utf-8") as f:
90-
for line in f:
91-
if ":" in line:
92-
key,value = line.split(":")
93-
dictionary[key] = value.rstrip('\n')
69+
if len(sys.argv) > 1:
70+
# Is the input a filename?
71+
if os.path.isfile(sys.argv[1]):
72+
text = open(sys.argv[1]).read()
73+
# Else we will assume it's a string
9474
else:
95-
pass
96-
97-
for token in tokens:
98-
if token in dictionary:
99-
print(dictionary[token], end='')
75+
text = sys.argv[1]
10076
else:
101-
print(token, end='')
77+
print('Error: No file specified', file=sys.stderr)
78+
exit(1)
10279

80+
dictionary = {}
10381

104-
sys.exit(0)
82+
# Splitter / tokenizer
83+
splitter = re.compile(r'(\s+|\w+|\W+|\S+)')
10584

106-
# -------------------- END -----------------------
85+
with open(dictionary_file_path) as stream:
86+
for line in iter(stream):
87+
if ':' in line:
88+
key, value = line.split(':')
89+
dictionary[key] = value.rstrip('\n')
10790

91+
for token in splitter.finditer(text):
92+
if token in dictionary:
93+
print(dictionary[token], end='')
94+
else:
95+
print(token, end='')

0 commit comments

Comments
 (0)