Skip to content

Commit 5267ce1

Browse files
Add support for Hindi language (#607)
Copied from stale PR here: #442
1 parent beca211 commit 5267ce1

File tree

4 files changed

+509
-6
lines changed

4 files changed

+509
-6
lines changed

README.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a
103103
* ``fr_CH`` (French - Switzerland)
104104
* ``fr_DZ`` (French - Algeria)
105105
* ``he`` (Hebrew)
106+
* ``hi`` (Hindi)
106107
* ``hu`` (Hungarian)
107108
* ``id`` (Indonesian)
108109
* ``is`` (Icelandic)
@@ -163,4 +164,4 @@ added Lithuanian support, but didn't take over maintenance of the project.
163164
I am thus basing myself on Marius Grigaitis' improvements and re-publishing
164165
``pynum2word`` as ``num2words``.
165166

166-
Virgil Dupras, Savoir-faire Linux
167+
Virgil Dupras, Savoir-faire Linux

num2words/__init__.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121
lang_CS, lang_CY, lang_DA, lang_DE, lang_EN, lang_EN_IN,
2222
lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, lang_ES_CR,
2323
lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR,
24-
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID,
25-
lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT,
26-
lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, lang_PT_BR,
27-
lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, lang_SV, lang_TE,
28-
lang_TET, lang_TG, lang_TH, lang_TR, lang_UK, lang_VI)
24+
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HI, lang_HU,
25+
lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ,
26+
lang_LT, lang_LV, lang_NL, lang_NO, lang_PL, lang_PT,
27+
lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, lang_SR,
28+
lang_SV, lang_TE, lang_TET, lang_TG, lang_TH, lang_TR, lang_UK,
29+
lang_VI)
2930

3031
CONVERTER_CLASSES = {
3132
'am': lang_AM.Num2Word_AM(),
@@ -84,6 +85,7 @@
8485
'tet': lang_TET.Num2Word_TET(),
8586
'hu': lang_HU.Num2Word_HU(),
8687
'is': lang_IS.Num2Word_IS(),
88+
'hi': lang_HI.Num2Word_HI(),
8789
}
8890

8991
CONVERTES_TYPES = ['cardinal', 'ordinal', 'ordinal_num', 'year', 'currency']

num2words/lang_HI.py

+199
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# -*- encoding: utf-8 -*-
2+
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
3+
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.
4+
5+
# This library is free software; you can redistribute it and/or
6+
# modify it under the terms of the GNU Lesser General Public
7+
# License as published by the Free Software Foundation; either
8+
# version 2.1 of the License, or (at your option) any later version.
9+
# This library is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
# Lesser General Public License for more details.
13+
# You should have received a copy of the GNU Lesser General Public
14+
# License along with this library; if not, write to the Free Software
15+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16+
# MA 02110-1301 USA
17+
18+
from __future__ import unicode_literals
19+
20+
import string
21+
22+
from num2words.base import Num2Word_Base
23+
24+
25+
class Num2Word_HI(Num2Word_Base):
26+
"""
27+
Hindi (HI) Num2Word class
28+
"""
29+
30+
_irregular_ordinals = {
31+
0: "शून्य",
32+
1: "पहला",
33+
2: "दूसरा",
34+
3: "तीसरा",
35+
4: "चौथा",
36+
6: "छठा",
37+
}
38+
_irregular_ordinals_nums = {
39+
0: "०",
40+
1: "१ला",
41+
2: "२रा",
42+
3: "३रा",
43+
4: "४था",
44+
6: "६ठा",
45+
}
46+
_hindi_digits = "०१२३४५६७८९" # 0-9
47+
_digits_to_hindi_digits = dict(zip(string.digits, _hindi_digits))
48+
_regular_ordinal_suffix = "वाँ"
49+
50+
def setup(self):
51+
# Note: alternative forms are informal
52+
self.low_numwords = [
53+
"निन्यानवे",
54+
"अट्ठानवे",
55+
"सत्तानवे", # alternative "सतानवे"
56+
"छियानवे",
57+
"पचानवे",
58+
"चौरानवे",
59+
"तिरानवे",
60+
"बानवे",
61+
"इक्यानवे",
62+
"नब्बे",
63+
"नवासी",
64+
"अट्ठासी",
65+
"सतासी",
66+
"छियासी",
67+
"पचासी",
68+
"चौरासी",
69+
"तिरासी",
70+
"बयासी",
71+
"इक्यासी",
72+
"अस्सी",
73+
"उनासी", # alternative "उन्नासी"
74+
"अठहत्तर", # alternative "अठहतर"
75+
"सतहत्तर", # alternative "सतहतर"
76+
"छिहत्तर", # alternative "छिहतर"
77+
"पचहत्तर", # alternative "पचहतर"
78+
"चौहत्तर", # alternative "चौहतर"
79+
"तिहत्तर", # alternative "तिहतर"
80+
"बहत्तर", # alternative "बहतर"
81+
"इकहत्तर", # alternative "इकहतर"
82+
"सत्तर",
83+
"उनहत्तर", # alternative "उनहतर"
84+
"अड़सठ", # alternative "अड़सठ"
85+
"सड़सठ", # alternative "सड़सठ"
86+
"छियासठ",
87+
"पैंसठ",
88+
"चौंसठ",
89+
"तिरसठ",
90+
"बासठ",
91+
"इकसठ",
92+
"साठ",
93+
"उनसठ",
94+
"अट्ठावन", # alternative "अठावन"
95+
"सत्तावन", # alternative "सतावन"
96+
"छप्पन",
97+
"पचपन",
98+
"चौवन",
99+
"तिरेपन", # alternative "तिरपन"
100+
"बावन",
101+
"इक्यावन",
102+
"पचास",
103+
"उनचास",
104+
"अड़तालीस", # alternative "अड़तालीस"
105+
"सैंतालीस",
106+
"छियालीस", # alternative "छयालिस"
107+
"पैंतालीस",
108+
"चौवालीस", # alternative "चवालीस"
109+
"तैंतालीस", # alternative "तैतालीस"
110+
"बयालीस",
111+
"इकतालीस",
112+
"चालीस",
113+
"उनतालीस",
114+
"अड़तीस", # alternative "अड़तीस"
115+
"सैंतीस",
116+
"छत्तीस", # alternative "छतीस"
117+
"पैंतीस",
118+
"चौंतीस",
119+
"तैंतीस",
120+
"बत्तीस", # alternative "बतीस"
121+
"इकत्तीस", # alternative "इकतीस"
122+
"तीस",
123+
"उनतीस",
124+
"अट्ठाईस", # alternative "अट्ठाइस"
125+
"सत्ताईस", # alternative "सताइस"
126+
"छब्बीस",
127+
"पच्चीस",
128+
"चौबीस",
129+
"तेईस", # alternative "तेइस"
130+
"बाईस",
131+
"इक्कीस", # alternative "इकीस"
132+
"बीस",
133+
"उन्नीस",
134+
"अट्ठारह", # alternative "अठारह"
135+
"सत्रह",
136+
"सोलह",
137+
"पंद्रह",
138+
"चौदह",
139+
"तेरह",
140+
"बारह",
141+
"ग्यारह",
142+
"दस",
143+
"नौ",
144+
"आठ",
145+
"सात",
146+
"छः", # alternative "छह"
147+
"पाँच", # alternative "पांच"
148+
"चार",
149+
"तीन",
150+
"दो",
151+
"एक",
152+
"शून्य",
153+
]
154+
155+
self.mid_numwords = [(100, "सौ")]
156+
self.high_numwords = [
157+
(11, "ख़रब"),
158+
(9, "अरब"),
159+
(7, "करोड़"), # alternative "करोड़"
160+
(5, "लाख"),
161+
(3, "हज़ार"), # alternative "हज़ार"
162+
]
163+
self.pointword = "दशमलव"
164+
self.negword = "माइनस "
165+
166+
def set_high_numwords(self, high):
167+
for n, word in self.high_numwords:
168+
self.cards[10**n] = word
169+
170+
def merge(self, lpair, rpair):
171+
ltext, lnum = lpair
172+
rtext, rnum = rpair
173+
if lnum == 1 and rnum < 100:
174+
return rtext, rnum
175+
elif lnum >= 100 > rnum:
176+
return "%s %s" % (ltext, rtext), lnum + rnum
177+
elif rnum > lnum:
178+
return "%s %s" % (ltext, rtext), lnum * rnum
179+
return "%s %s" % (ltext, rtext), lnum + rnum
180+
181+
def to_ordinal(self, value):
182+
if value in self._irregular_ordinals:
183+
return self._irregular_ordinals[value]
184+
185+
# regular Hindi ordinals are derived from cardinals
186+
# by modifying the last member of the expression.
187+
cardinal = self.to_cardinal(value)
188+
return cardinal + self._regular_ordinal_suffix
189+
190+
def _convert_to_hindi_numerals(self, value):
191+
return "".join(map(self._digits_to_hindi_digits.__getitem__,
192+
str(value)))
193+
194+
def to_ordinal_num(self, value):
195+
if value in self._irregular_ordinals_nums:
196+
return self._irregular_ordinals_nums[value]
197+
198+
return self._convert_to_hindi_numerals(value) \
199+
+ self._regular_ordinal_suffix

0 commit comments

Comments
 (0)