forked from ruby/prism
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencodings
executable file
·116 lines (105 loc) · 5.05 KB
/
encodings
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env ruby
# frozen_string_literal: true
def table(encoding)
puts "//#{(0...16).map { |value| value.to_s(16).upcase }.join(" ")}"
(0...256).each_slice(16).with_index do |slice, row_index|
row =
slice.map do |codepoint|
character = codepoint.chr(encoding)
value = 0
value |= (1 << 0) if character.match?(/[[:alpha:]]/)
value |= (1 << 1) if character.match?(/[[:alnum:]]/)
value |= (1 << 2) if character.match?(/[[:upper:]]/)
"%d," % value
end
puts "#{row.join(" ")} // #{row_index.to_s(16).upcase}x"
end
end
def lists(name, range, encoding)
range = range.map { begin; _1.chr(encoding); _1; rescue RangeError; nil; end }.compact
{ alpha: /[[:alpha:]]/, alnum: /[[:alnum:]]/, isupper: /[[:upper:]]/ }.map do |kind, regex|
codepoints = range.select { _1.chr(encoding).match?(regex) }
previous = nil
groups =
codepoints.slice_before do |codepoint|
(!previous.nil? && (codepoint - previous) != 1).tap { previous = codepoint }
end
matched =
groups.flat_map do |group|
["0x#{group.first.to_s(16).upcase}", "0x#{group.last.to_s(16).upcase}"]
end
puts "\n#define #{name.upcase}_#{kind.upcase}_CODEPOINTS_LENGTH #{matched.length}"
puts "#{name}_codepoint_t #{name}_#{kind}_codepoints[#{name.upcase}_#{kind.upcase}_CODEPOINTS_LENGTH] = {"
matched.each_slice(2) { |slice| puts " #{slice.join(", ")}," }
puts "};"
end
end
case ARGV[0].downcase
when "ascii" then table(Encoding::ASCII_8BIT)
when "ascii-8bit" then table(Encoding::ASCII_8BIT)
when "big5" then lists("big5", 0...0x10000, Encoding::Big5)
when "cp850" then table(Encoding::CP850)
when "cp852" then table(Encoding::CP852)
when "cp855" then table(Encoding::CP855)
when "euc-jp" then lists("euc-jp", 0...0x10000, Encoding::EUC_JP)
when "gb1988" then table(Encoding::GB1988)
when "gbk" then lists("gbk", 0...0x10000, Encoding::GBK)
when "ibm437" then table(Encoding::IBM437)
when "ibm720" then table(Encoding::IBM720)
when "ibm737" then table(Encoding::IBM737)
when "ibm775" then table(Encoding::IBM775)
when "ibm852" then table(Encoding::IBM852)
when "ibm855" then table(Encoding::IBM855)
when "ibm857" then table(Encoding::IBM857)
when "ibm860" then table(Encoding::IBM860)
when "ibm863" then table(Encoding::IBM863)
when "ibm861" then table(Encoding::IBM861)
when "ibm862" then table(Encoding::IBM862)
when "ibm864" then table(Encoding::IBM864)
when "ibm865" then table(Encoding::IBM865)
when "ibm866" then table(Encoding::IBM866)
when "ibm869" then table(Encoding::IBM869)
when "iso-8859-1" then table(Encoding::ISO8859_1)
when "iso-8859-2" then table(Encoding::ISO8859_2)
when "iso-8859-3" then table(Encoding::ISO8859_3)
when "iso-8859-4" then table(Encoding::ISO8859_4)
when "iso-8859-5" then table(Encoding::ISO8859_5)
when "iso-8859-6" then table(Encoding::ISO8859_6)
when "iso-8859-7" then table(Encoding::ISO8859_7)
when "iso-8859-8" then table(Encoding::ISO8859_8)
when "iso-8859-9" then table(Encoding::ISO8859_9)
when "iso-8859-10" then table(Encoding::ISO8859_10)
when "iso-8859-11" then table(Encoding::ISO8859_11)
when "iso-8859-13" then table(Encoding::ISO8859_13)
when "iso-8859-14" then table(Encoding::ISO8859_14)
when "iso-8859-15" then table(Encoding::ISO8859_15)
when "iso-8859-16" then table(Encoding::ISO8859_16)
when "koi8-r" then table(Encoding::KOI8_R)
when "koi8-u" then table(Encoding::KOI8_U)
when "maccenteuro" then table(Encoding::MACCENTEURO)
when "maccroatian" then table(Encoding::MACCROATIAN)
when "maccyrillic" then table(Encoding::MACCYRILLIC)
when "macgreek" then table(Encoding::MACGREEK)
when "maciceland" then table(Encoding::MACICELAND)
when "macroman" then table(Encoding::MACROMAN)
when "macromania" then table(Encoding::MACROMANIA)
when "macthai" then table(Encoding::MACTHAI)
when "macturkish" then table(Encoding::MACTURKISH)
when "macukraine" then table(Encoding::MACUKRAINE)
when "shift_jis" then lists("shift_jis", 0...0x10000, Encoding::Shift_JIS)
when "tis-620" then table(Encoding::TIS_620)
when "utf8" then table(Encoding::UTF_8) and lists("utf8", 0x100...0x110000, Encoding::UTF_8)
when "utf8-mac" then table(Encoding::UTF8_MAC) and lists("utf8-mac", 0x100...0x110000, Encoding::UTF8_MAC)
when "windows-1250" then table(Encoding::Windows_1250)
when "windows-1251" then table(Encoding::Windows_1251)
when "windows-1252" then table(Encoding::Windows_1252)
when "windows-1253" then table(Encoding::Windows_1253)
when "windows-1254" then table(Encoding::Windows_1254)
when "windows-1255" then table(Encoding::Windows_1255)
when "windows-1256" then table(Encoding::Windows_1256)
when "windows-1257" then table(Encoding::Windows_1257)
when "windows-1258" then table(Encoding::Windows_1258)
when "windows-31j" then lists("windows-31j", 0...0x10000, Encoding::Windows_31J)
when "windows-874" then table(Encoding::Windows_874)
else raise "Unknown encoding `#{ARGV[0]}'"
end