Skip to content

Commit ddf875e

Browse files
authored
ENH: Make lookups case insensitive (#4)
The regexes are case insensitive, but in the repl funcs, lookups from the consts dicts were not.
1 parent 3a19399 commit ddf875e

File tree

3 files changed

+19
-11
lines changed

3 files changed

+19
-11
lines changed

numerizer/numerizer.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,31 @@
1010
# Replacement regular expressions - to be used only in `re.sub`
1111
def _repl_single_digit(m):
1212
m1 = m.group(1)
13-
m2 = consts.DIRECT_SINGLE_NUMS[m.group(2)]
13+
m2 = consts.DIRECT_SINGLE_NUMS[m.group(2).lower()]
1414
return f'{m1}<num>{m2}'
1515

1616

1717
def _repl_ten_prefixes(m):
1818
m1 = m.group(1)
19-
m2 = consts.TEN_PREFIXES[m.group(2)]
20-
m3 = consts.SINGLE_NUMS[m.group(3)]
19+
m2 = consts.TEN_PREFIXES[m.group(2).lower()]
20+
m3 = consts.SINGLE_NUMS[m.group(3).lower()]
2121
return f'{m1}<num>{m2 + m3}'
2222

2323

2424
def _repl_ten_prefs_single_ords(m):
2525
m2, m4 = m.group(2), m.group(4)
2626
repl = f'{m.group(1)}<num>' \
27-
+ str(consts.TEN_PREFIXES[m2] + consts.ORDINAL_SINGLE[m4]) \
27+
+ str(consts.TEN_PREFIXES[m2.lower()] + consts.ORDINAL_SINGLE[m4.lower()]) \
2828
+ m4[-2:]
2929
return repl
3030

3131

3232
def _repl_ten_prefs(m):
33-
return f'{m.group(1)}<num>' + str(consts.TEN_PREFIXES[m.group(2)])
33+
return f'{m.group(1)}<num>' + str(consts.TEN_PREFIXES[m.group(2).lower()])
3434

3535

3636
def _repl_all_fractions(m):
37-
return f'<num>{m.group(1)}' + str(consts.ALL_FRACTIONS[m.group(1)])
37+
return f'<num>{m.group(1)}' + str(consts.ALL_FRACTIONS[m.group(1).lower()])
3838

3939

4040
# Public
@@ -151,11 +151,12 @@ def numerize_fractions(s, ignore=None, bias=None):
151151
if not(m and m2):
152152
m = None
153153
if m is not None:
154-
s = re.sub(pat, lambda m: '/' + str(consts.ALL_FRACTIONS[m.group(2)]), s, count=1)
154+
s = re.sub(pat, lambda m: '/' + str(consts.ALL_FRACTIONS[m.group(2).lower()]),
155+
s, count=1)
155156
pat = re.compile(r'(^|\W)({})(?=$|\W)'.format(quarters), flags=re.IGNORECASE)
156157
m = re.search(pat, s)
157158
if m is not None:
158-
s = re.sub(pat, lambda m: '/' + str(consts.ALL_FRACTIONS[m.group(2)]), s, count=1)
159+
s = re.sub(pat, lambda m: '/' + str(consts.ALL_FRACTIONS[m.group(2).lower()]), s, count=1)
159160
s = cleanup_fractions(s)
160161
return s
161162

@@ -174,15 +175,15 @@ def numerize_ordinals(s, ignore=None, bias=None):
174175
if m is not None:
175176
def _repl_ordinal(m):
176177
m1 = m.group(1)
177-
m2 = str(consts.ALL_ORDINALS['second'])
178+
m2 = str(consts.ALL_ORDINALS['second'.lower()])
178179
return f'{m1}<num>{m2}nd'
179180
s = re.sub(pat, _repl_ordinal, s, count=1)
180181
pat = re.compile(r'(^|\W)({})(?=$|\W)'.format(all_ords), flags=re.IGNORECASE)
181182
m = re.search(pat, s)
182183
if m is not None:
183184
def _repl_ordinal(m):
184185
m1 = m.group(1)
185-
m2 = str(consts.ALL_ORDINALS[m.group(2)])
186+
m2 = str(consts.ALL_ORDINALS[m.group(2).lower()])
186187
return f'{m1}<num>{m2}' + m.group(2)[-2:]
187188
s = re.sub(pat, _repl_ordinal, s, count=1)
188189
return s

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ max-line-length=99
1111
; N802 ignores "function name should be in lowercase". Required for
1212
; tearDownModule(), extendMarkdown, etc where function name is pre-defined
1313
; W504 ignores "line break after binary operator". We use (x and\ y)
14-
ignore=E911,N802,W504
14+
ignore=E911,E912,N802,W504
1515

1616
[nosetests]
1717
verbosity=2

test_numerize.py

+7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ def test_init():
55
assert numerize('forty two') == '42'
66

77

8+
def test_case_insensitive():
9+
assert numerize('Forty two') == '42'
10+
assert numerize('FORTY TWO') == '42'
11+
assert numerize('FORTY Second') == '42nd'
12+
assert numerize('Ninety Nine') == '99'
13+
14+
815
def test_hyenated():
916
assert numerize('forty-two') == '42'
1017

0 commit comments

Comments
 (0)