Skip to content

Commit 0194c81

Browse files
author
Chaoser
committed
Commited .json data files. Fixed test with trigram rank
1 parent 1d53832 commit 0194c81

File tree

4 files changed

+147
-145
lines changed

4 files changed

+147
-145
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
phptest
22
node_modules
3-
.cupboard
3+
.cupboard
4+
.idea

data/lang.json

+1
Large diffs are not rendered by default.

data/unicode_blocks.json

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[["0x0000","0x007F","Basic Latin"],["0x0080","0x00FF","Latin-1 Supplement"],["0x0100","0x017F","Latin Extended-A"],["0x0180","0x024F","Latin Extended-B"],["0x0250","0x02AF","IPA Extensions"],["0x02B0","0x02FF","Spacing Modifier Letters"],["0x0300","0x036F","Combining Diacritical Marks"],["0x0370","0x03FF","Greek and Coptic"],["0x0400","0x04FF","Cyrillic"],["0x0500","0x052F","Cyrillic Supplement"],["0x0530","0x058F","Armenian"],["0x0590","0x05FF","Hebrew"],["0x0600","0x06FF","Arabic"],["0x0700","0x074F","Syriac"],["0x0750","0x077F","Arabic Supplement"],["0x0780","0x07BF","Thaana"],["0x0900","0x097F","Devanagari"],["0x0980","0x09FF","Bengali"],["0x0A00","0x0A7F","Gurmukhi"],["0x0A80","0x0AFF","Gujarati"],["0x0B00","0x0B7F","Oriya"],["0x0B80","0x0BFF","Tamil"],["0x0C00","0x0C7F","Telugu"],["0x0C80","0x0CFF","Kannada"],["0x0D00","0x0D7F","Malayalam"],["0x0D80","0x0DFF","Sinhala"],["0x0E00","0x0E7F","Thai"],["0x0E80","0x0EFF","Lao"],["0x0F00","0x0FFF","Tibetan"],["0x1000","0x109F","Myanmar"],["0x10A0","0x10FF","Georgian"],["0x1100","0x11FF","Hangul Jamo"],["0x1200","0x137F","Ethiopic"],["0x1380","0x139F","Ethiopic Supplement"],["0x13A0","0x13FF","Cherokee"],["0x1400","0x167F","Unified Canadian Aboriginal Syllabics"],["0x1680","0x169F","Ogham"],["0x16A0","0x16FF","Runic"],["0x1700","0x171F","Tagalog"],["0x1720","0x173F","Hanunoo"],["0x1740","0x175F","Buhid"],["0x1760","0x177F","Tagbanwa"],["0x1780","0x17FF","Khmer"],["0x1800","0x18AF","Mongolian"],["0x1900","0x194F","Limbu"],["0x1950","0x197F","Tai Le"],["0x1980","0x19DF","New Tai Lue"],["0x19E0","0x19FF","Khmer Symbols"],["0x1A00","0x1A1F","Buginese"],["0x1D00","0x1D7F","Phonetic Extensions"],["0x1D80","0x1DBF","Phonetic Extensions Supplement"],["0x1DC0","0x1DFF","Combining Diacritical Marks Supplement"],["0x1E00","0x1EFF","Latin Extended Additional"],["0x1F00","0x1FFF","Greek Extended"],["0x2000","0x206F","General Punctuation"],["0x2070","0x209F","Superscripts and Subscripts"],["0x20A0","0x20CF","Currency Symbols"],["0x20D0","0x20FF","Combining Diacritical Marks for Symbols"],["0x2100","0x214F","Letterlike Symbols"],["0x2150","0x218F","Number Forms"],["0x2190","0x21FF","Arrows"],["0x2200","0x22FF","Mathematical Operators"],["0x2300","0x23FF","Miscellaneous Technical"],["0x2400","0x243F","Control Pictures"],["0x2440","0x245F","Optical Character Recognition"],["0x2460","0x24FF","Enclosed Alphanumerics"],["0x2500","0x257F","Box Drawing"],["0x2580","0x259F","Block Elements"],["0x25A0","0x25FF","Geometric Shapes"],["0x2600","0x26FF","Miscellaneous Symbols"],["0x2700","0x27BF","Dingbats"],["0x27C0","0x27EF","Miscellaneous Mathematical Symbols-A"],["0x27F0","0x27FF","Supplemental Arrows-A"],["0x2800","0x28FF","Braille Patterns"],["0x2900","0x297F","Supplemental Arrows-B"],["0x2980","0x29FF","Miscellaneous Mathematical Symbols-B"],["0x2A00","0x2AFF","Supplemental Mathematical Operators"],["0x2B00","0x2BFF","Miscellaneous Symbols and Arrows"],["0x2C00","0x2C5F","Glagolitic"],["0x2C80","0x2CFF","Coptic"],["0x2D00","0x2D2F","Georgian Supplement"],["0x2D30","0x2D7F","Tifinagh"],["0x2D80","0x2DDF","Ethiopic Extended"],["0x2E00","0x2E7F","Supplemental Punctuation"],["0x2E80","0x2EFF","CJK Radicals Supplement"],["0x2F00","0x2FDF","Kangxi Radicals"],["0x2FF0","0x2FFF","Ideographic Description Characters"],["0x3000","0x303F","CJK Symbols and Punctuation"],["0x3040","0x309F","Hiragana"],["0x30A0","0x30FF","Katakana"],["0x3100","0x312F","Bopomofo"],["0x3130","0x318F","Hangul Compatibility Jamo"],["0x3190","0x319F","Kanbun"],["0x31A0","0x31BF","Bopomofo Extended"],["0x31C0","0x31EF","CJK Strokes"],["0x31F0","0x31FF","Katakana Phonetic Extensions"],["0x3200","0x32FF","Enclosed CJK Letters and Months"],["0x3300","0x33FF","CJK Compatibility"],["0x3400","0x4DBF","CJK Unified Ideographs Extension A"],["0x4DC0","0x4DFF","Yijing Hexagram Symbols"],["0x4E00","0x9FFF","CJK Unified Ideographs"],["0xA000","0xA48F","Yi Syllables"],["0xA490","0xA4CF","Yi Radicals"],["0xA700","0xA71F","Modifier Tone Letters"],["0xA800","0xA82F","Syloti Nagri"],["0xAC00","0xD7AF","Hangul Syllables"],["0xD800","0xDB7F","High Surrogates"],["0xDB80","0xDBFF","High Private Use Surrogates"],["0xDC00","0xDFFF","Low Surrogates"],["0xE000","0xF8FF","Private Use Area"],["0xF900","0xFAFF","CJK Compatibility Ideographs"],["0xFB00","0xFB4F","Alphabetic Presentation Forms"],["0xFB50","0xFDFF","Arabic Presentation Forms-A"],["0xFE00","0xFE0F","Variation Selectors"],["0xFE10","0xFE1F","Vertical Forms"],["0xFE20","0xFE2F","Combining Half Marks"],["0xFE30","0xFE4F","CJK Compatibility Forms"],["0xFE50","0xFE6F","Small Form Variants"],["0xFE70","0xFEFF","Arabic Presentation Forms-B"],["0xFF00","0xFFEF","Halfwidth and Fullwidth Forms"],["0xFFF0","0xFFFF","Specials"],["0x10000","0x1007F","Linear B Syllabary"],["0x10080","0x100FF","Linear B Ideograms"],["0x10100","0x1013F","Aegean Numbers"],["0x10140","0x1018F","Ancient Greek Numbers"],["0x10300","0x1032F","Old Italic"],["0x10330","0x1034F","Gothic"],["0x10380","0x1039F","Ugaritic"],["0x103A0","0x103DF","Old Persian"],["0x10400","0x1044F","Deseret"],["0x10450","0x1047F","Shavian"],["0x10480","0x104AF","Osmanya"],["0x10800","0x1083F","Cypriot Syllabary"],["0x10A00","0x10A5F","Kharoshthi"],["0x1D000","0x1D0FF","Byzantine Musical Symbols"],["0x1D100","0x1D1FF","Musical Symbols"],["0x1D200","0x1D24F","Ancient Greek Musical Notation"],["0x1D300","0x1D35F","Tai Xuan Jing Symbols"],["0x1D400","0x1D7FF","Mathematical Alphanumeric Symbols"],["0x20000","0x2A6DF","CJK Unified Ideographs Extension B"],["0x2F800","0x2FA1F","CJK Compatibility Ideographs Supplement"],["0xE0000","0xE007F","Tags"],["0xE0100","0xE01EF","Variation Selectors Supplement"],["0xF0000","0xFFFFF","Supplementary Private Use Area-A"],["0x100000","0x10FFFF","Supplementary Private Use Area-B"]]

test/Parser.test.js

+143-144
Original file line numberDiff line numberDiff line change
@@ -690,150 +690,149 @@
690690
l = new Parser(str);
691691
l.setPadStart(true);
692692
l.analyze();
693-
t.deepEqual(l.getTrigramRanks(), {
694-
"ion": 0,
695-
"on ": 1,
696-
" so": 2,
697-
"ess": 3,
698-
"hou": 4,
699-
"n s": 5,
700-
"oul": 6,
701-
"re ": 7,
702-
"tio": 8,
703-
"ust": 9,
704-
" a ": 10,
705-
" al": 11,
706-
" ar": 12,
707-
" b ": 13,
708-
" bo": 14,
709-
" do": 15,
710-
" ex": 16,
711-
" fo": 17,
712-
" fr": 18,
713-
" go": 19,
714-
" ho": 20,
715-
" if": 21,
716-
" in": 22,
717-
" ju": 23,
718-
" li": 24,
719-
" me": 25,
720-
" pv": 26,
721-
" se": 27,
722-
" sh": 28,
723-
" st": 29,
724-
" sw": 30,
725-
" th": 31,
726-
" to": 32,
727-
" u ": 33,
728-
" wh": 34,
729-
"a s": 35,
730-
"alw": 36,
731-
"are": 37,
732-
"at ": 38,
733-
"ati": 39,
734-
"atu": 40,
735-
"ays": 41,
736-
"b l": 42,
737-
"bot": 43,
738-
"d a": 44,
739-
"d s": 45,
740-
"don": 46,
741-
"e g": 47,
742-
"e i": 48,
743-
"e s": 49,
744-
"elf": 50,
745-
"ent": 51,
746-
"ers": 52,
747-
"exp": 53,
748-
"f e": 54,
749-
"f t": 55,
750-
"fol": 56,
751-
"fro": 57,
752-
"goo": 58,
753-
"hat": 59,
754-
"her": 60,
755-
"hom": 61,
756-
"hos": 62,
757-
"if ": 63,
758-
"imi": 64,
759-
"int": 65,
760-
"itl": 66,
761-
"jus": 67,
762-
"l w": 68,
763-
"ld ": 69,
764-
"les": 70,
765-
"lf ": 71,
766-
"lim": 72,
767-
"llo": 73,
768-
"low": 74,
769-
"lwa": 75,
770-
"m s": 76,
771-
"me ": 77,
772-
"mit": 78,
773-
"mor": 79,
774-
"n t": 80,
775-
"nat": 81,
776-
"ns ": 82,
777-
"nt ": 83,
778-
"nte": 84,
779-
"nti": 85,
780-
"od ": 86,
781-
"oll": 87,
782-
"om ": 88,
783-
"omo": 89,
784-
"ons": 90,
785-
"ont": 91,
786-
"ood": 92,
787-
"oph": 93,
788-
"ore": 94,
789-
"ose": 95,
790-
"oth": 96,
791-
"ous": 97,
792-
"ow ": 98,
793-
"pho": 99,
794-
"pre": 100,
795-
"pvn": 101,
796-
"res": 102,
797-
"rom": 103,
798-
"rs ": 104,
799-
"s a": 105,
800-
"s b": 106,
801-
"s i": 107,
802-
"s j": 108,
803-
"s u": 109,
804-
"se ": 110,
805-
"sel": 111,
806-
"sho": 112,
807-
"sio": 113,
808-
"sop": 114,
809-
"sou": 115,
810-
"ss ": 116,
811-
"ssi": 117,
812-
"st ": 118,
813-
"sta": 119,
814-
"sto": 120,
815-
"sw ": 121,
816-
"t a": 122,
817-
"t b": 123,
818-
"t f": 124,
819-
"tat": 125,
820-
"ten": 126,
821-
"tha": 127,
822-
"the": 128,
823-
"tle": 129,
824-
"to ": 130,
825-
"ton": 131,
826-
"tus": 132,
827-
"ul ": 133,
828-
"uld": 134,
829-
"us ": 135,
830-
"vna": 136,
831-
"w h": 137,
832-
"w m": 138,
833-
"way": 139,
834-
"who": 140,
835-
"xpr": 141,
836-
"ys ": 142
693+
t.deepEqual(l.getTrigramRanks(), { ion: 0,
694+
'on ': 1,
695+
' so': 2,
696+
ess: 3,
697+
hou: 4,
698+
'n s': 5,
699+
oul: 6,
700+
're ': 7,
701+
tio: 8,
702+
ust: 9,
703+
' a ': 10,
704+
' al': 11,
705+
' ar': 12,
706+
' b ': 13,
707+
' bo': 14,
708+
' fo': 15,
709+
' fr': 16,
710+
' go': 17,
711+
' ho': 18,
712+
' if': 19,
713+
' in': 20,
714+
' ju': 21,
715+
' li': 22,
716+
' me': 23,
717+
' pv': 24,
718+
' se': 25,
719+
' sh': 26,
720+
' st': 27,
721+
' sw': 28,
722+
' th': 29,
723+
' to': 30,
724+
' ud': 31,
725+
' wh': 32,
726+
'a s': 33,
727+
alw: 34,
728+
are: 35,
729+
'at ': 36,
730+
ati: 37,
731+
atu: 38,
732+
ays: 39,
733+
'b l': 40,
734+
bot: 41,
735+
'd a': 42,
736+
'd s': 43,
737+
don: 44,
738+
'e g': 45,
739+
'e i': 46,
740+
'e s': 47,
741+
elf: 48,
742+
ent: 49,
743+
ers: 50,
744+
exp: 51,
745+
'f t': 52,
746+
fex: 53,
747+
fol: 54,
748+
fro: 55,
749+
goo: 56,
750+
hat: 57,
751+
her: 58,
752+
hom: 59,
753+
hos: 60,
754+
'if ': 61,
755+
imi: 62,
756+
int: 63,
757+
itl: 64,
758+
jus: 65,
759+
'l w': 66,
760+
'ld ': 67,
761+
les: 68,
762+
lfe: 69,
763+
lim: 70,
764+
llo: 71,
765+
low: 72,
766+
lwa: 73,
767+
'm s': 74,
768+
'me ': 75,
769+
mit: 76,
770+
mor: 77,
771+
'n t': 78,
772+
nat: 79,
773+
'ns ': 80,
774+
'nt ': 81,
775+
nte: 82,
776+
nti: 83,
777+
'o p': 84,
778+
'od ': 85,
779+
oll: 86,
780+
'om ': 87,
781+
omo: 88,
782+
ons: 89,
783+
ont: 90,
784+
ood: 91,
785+
oph: 92,
786+
ore: 93,
787+
ose: 94,
788+
oth: 95,
789+
ous: 96,
790+
'ow ': 97,
791+
pho: 98,
792+
pre: 99,
793+
pvn: 100,
794+
res: 101,
795+
rom: 102,
796+
'rs ': 103,
797+
's a': 104,
798+
's b': 105,
799+
's i': 106,
800+
's j': 107,
801+
's u': 108,
802+
'se ': 109,
803+
sel: 110,
804+
sho: 111,
805+
sio: 112,
806+
sop: 113,
807+
sou: 114,
808+
'ss ': 115,
809+
ssi: 116,
810+
'st ': 117,
811+
sta: 118,
812+
sto: 119,
813+
'sw ': 120,
814+
't a': 121,
815+
't b': 122,
816+
't f': 123,
817+
tat: 124,
818+
ten: 125,
819+
tha: 126,
820+
the: 127,
821+
tle: 128,
822+
'to ': 129,
823+
ton: 130,
824+
tus: 131,
825+
udo: 132,
826+
'ul ': 133,
827+
uld: 134,
828+
'us ': 135,
829+
vna: 136,
830+
'w h': 137,
831+
'w m': 138,
832+
way: 139,
833+
who: 140,
834+
xpr: 141,
835+
'ys ': 142
837836
});
838837
return t.done();
839838
};

0 commit comments

Comments
 (0)