File tree Expand file tree Collapse file tree 3 files changed +13
-6
lines changed
Expand file tree Collapse file tree 3 files changed +13
-6
lines changed Original file line number Diff line number Diff line change @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55The format is based on [ Keep a Changelog] ( https://keepachangelog.com/en/1.1.0/ ) ,
66and this project adheres to [ Semantic Versioning] ( https://semver.org/spec/v2.0.0.html ) .
77
8+ ## 0.1.6 — 2025-02-15
9+ ### Changed
10+
11+ - [x] optimize regexp pattern
12+
813## 0.1.5 — 2025-02-15
914### Changed
1015
Original file line number Diff line number Diff line change 66
77setuptools .setup (
88 name = 'yoficator' ,
9- version = '0.1.5 ' ,
9+ version = '0.1.6 ' ,
1010 description = 'A Russian text yoficator (ёфикатор)' ,
1111 long_description = long_description ,
1212 long_description_content_type = 'text/markdown' ,
Original file line number Diff line number Diff line change 8080 dictionary = {}
8181
8282 # Splitter / tokenizer
83- splitter = re .compile (r'(\s+|\w+|\W+|\S+)' )
83+ splitter = re .compile (r'(?P<word>[а-я]*е[а-я]*)|(?P<unknown>[^е]+\b)' , re . IGNORECASE )
8484
8585 with open (dictionary_file_path ) as stream :
8686 for line in iter (stream ):
8989 dictionary [key ] = value .rstrip ('\n ' )
9090
9191 for token in splitter .finditer (text ):
92- if token in dictionary :
93- print (dictionary [token ], end = '' )
94- else :
95- print (token , end = '' )
92+ word = token .group (0 )
93+ if token .lastgroup == 'word' :
94+ print (dictionary .get (word , word ), end = '' )
95+ continue
96+
97+ print (word , end = '' )
You can’t perform that action at this time.
0 commit comments