Skip to content

Commit 3b6484b

Browse files
committed
Fix support for calling fmt and cfmt functions
Add Python style Unicode names
1 parent be97d20 commit 3b6484b

File tree

4 files changed

+230
-19
lines changed

4 files changed

+230
-19
lines changed

src/StringUtils.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export @u_str, @sinterpolate
99
export s_unescape_string, s_escape_string, s_print_unescaped, s_print_escaped
1010

1111
include("literals.jl")
12+
include("unicodenames.jl")
1213

1314
# From Formatting.jl
1415
import Base.show

src/fmt.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ function fmt(x; kwargs...)
161161
end
162162

163163
# some helper method calls, which just convert to kwargs
164-
fmt(x, prec::Int, args...; kwargs...) = fmt(x, args...; prec=prec, kwargs...)
164+
fmt(x, width::Int, args...; kwargs...) = fmt(x, args...; width=width, kwargs...)
165165

166-
fmt(x, prec::Int, width::Int, args...; kwargs...) =
167-
fmt(x, args...; prec=prec, width=width, kwargs...)
166+
fmt(x, width::Int, prec::Int, args...; kwargs...) =
167+
fmt(x, args...; width=width, prec=prec, kwargs...)
168168

169169
# integrate some symbol shorthands into the keyword args
170170
# note: as above, this will generate relavent kwargs, so to format in a tight loop, you should probably update the fmt_default

src/literals.jl

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,26 @@ function s_parse_latex(io, s, i)
7272
i
7373
end
7474

75+
"""
76+
Handle Unicode name, of form \\N{<name>}, from Python
77+
"""
78+
function s_parse_uniname(io, s, i)
79+
done(s, i) && throw(ArgumentError("\\N incomplete in $(repr(s))"))
80+
c, i = next(s, i)
81+
c != '{' && throw(ArgumentError("\\N missing initial { in $(repr(s))"))
82+
done(s, i) && throw(ArgumentError("\\N{ incomplete in $(repr(s))"))
83+
beg = i # start location
84+
c, i = next(s, i)
85+
while c != '}'
86+
done(s, i) && throw(ArgumentError("\\N{ missing closing } in $(repr(s))"))
87+
c, i = next(s, i)
88+
end
89+
unichar = get(UnicodeNames, uppercase(s[beg:i-2]), typemax(UInt32))
90+
unichar == typemax(UInt32) && throw(ArgumentError("Invalid Unicode name in $(repr(s))"))
91+
print(io, Char(unichar))
92+
i
93+
end
94+
7595
"""
7696
String interpolation parsing
7797
Based on code resurrected from Julia base:
@@ -89,6 +109,8 @@ function s_print_unescaped(io, s::AbstractString)
89109
i = s_parse_emoji(io, s, i)
90110
elseif c == '{' # LaTex
91111
i = s_parse_latex(io, s, i)
112+
elseif c == 'N' # Unicode name
113+
i = s_parse_uniname(io, s, i)
92114
else
93115
c = (c == '0' ? '\0' :
94116
c == '"' ? '"' :
@@ -153,25 +175,28 @@ function s_interp_parse(s::AbstractString, unescape::Function, p::Function)
153175
if !isempty(s[i:j-1])
154176
push!(sx, unescape(s[i:j-1]))
155177
end
156-
c = s[k]
157-
if c != '('
178+
if s[k] == '('
179+
# Need to find end to parse to
180+
_, j = parse(s, k, greedy=false)
181+
# This is a bit hacky, and probably doesn't perform as well as it could,
182+
# but it works! Same below.
183+
str = "(fmt" * s[k:j-1] * ")"
184+
else
158185
# Move past %, c should point to letter
159-
c, k = next(s, k)
160-
s[k] == '(' || throw(ParseError("Missing ( in % format"))
161-
end
162-
# c is now either ( or C format letter to be used
163-
ex, j = parse(s, k, greedy=false)
164-
if isa(ex, Expr)
165-
is(ex.head, :continue) && throw(ParseError("Incomplete expression"))
166-
# Need to wrap call to fmt around expression
167-
if ex.head == :tuple
168-
push!(sx, esc(:(fmt($(ex.args...)))))
169-
else
170-
push!(sx, esc(:(fmt($(ex.args[1])))))
186+
beg = k
187+
while true
188+
c, k = next(s, k)
189+
done(s, k) && throw(ParseError("Incomplete % expression"))
190+
s[k] == '(' && break
171191
end
172-
else
173-
push!(sx, esc(:(fmt($ex))))
192+
_, j = parse(s, k, greedy=false)
193+
str = string("(cfmt(\"", s[beg:k-1], "\",", s[k+1:j-1], ")")
174194
end
195+
ex, _ = parse(str, 1, greedy=false)
196+
if isa(ex, Expr) && is(ex.head, :continue)
197+
throw(ParseError("Incomplete expression"))
198+
end
199+
push!(sx, esc(ex))
175200
i = j
176201
else
177202
j = k

src/unicodenames.jl

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
const global UnicodeNames = Dict{ASCIIString, UInt32}(
2+
("EN QUAD" => 0x2000),
3+
("EM QUAD" => 0x2001),
4+
("EN SPACE" => 0x2002),
5+
("EM SPACE" => 0x2003),
6+
("THREE-PER-EM SPACE" => 0x2004),
7+
("FOUR-PER-EM SPACE" => 0x2005),
8+
("SIX-PER-EM SPACE" => 0x2006),
9+
("FIGURE SPACE" => 0x2007),
10+
("PUNCTUATION SPACE" => 0x2008),
11+
("THIN SPACE" => 0x2009),
12+
("HAIR SPACE" => 0x200A),
13+
("ZERO WIDTH SPACE" => 0x200B),
14+
("ZERO WIDTH NON-JOINER" => 0x200C),
15+
("ZERO WIDTH JOINER" => 0x200D),
16+
("LEFT-TO-RIGHT MARK" => 0x200E),
17+
("RIGHT-TO-LEFT MARK" => 0x200F),
18+
("HYPHEN" => 0x2010),
19+
("NON-BREAKING HYPHEN" => 0x2011),
20+
("FIGURE DASH" => 0x2012),
21+
("EN DASH" => 0x2013),
22+
("EM DASH" => 0x2014),
23+
("HORIZONTAL BAR" => 0x2015),
24+
("DOUBLE VERTICAL LINE" => 0x2016),
25+
("DOUBLE LOW LINE" => 0x2017),
26+
("LEFT SINGLE QUOTATION MARK" => 0x2018),
27+
("RIGHT SINGLE QUOTATION MARK" => 0x2019),
28+
("SINGLE LOW-9 QUOTATION MARK" => 0x201A),
29+
("SINGLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201B),
30+
("LEFT DOUBLE QUOTATION MARK" => 0x201C),
31+
("RIGHT DOUBLE QUOTATION MARK" => 0x201D),
32+
("DOUBLE LOW-9 QUOTATION MARK" => 0x201E),
33+
("DOUBLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201F),
34+
("DAGGER" => 0x2020),
35+
("DOUBLE DAGGER" => 0x2021),
36+
("BULLET" => 0x2022),
37+
("TRIANGULAR BULLET" => 0x2023),
38+
("ONE DOT LEADER" => 0x2024),
39+
("TWO DOT LEADER" => 0x2025),
40+
("HORIZONTAL ELLIPSIS" => 0x2026),
41+
("HYPHENATION POINT" => 0x2027),
42+
("LINE SEPARATOR" => 0x2028),
43+
("PARAGRAPH SEPARATOR" => 0x2029),
44+
("LEFT-TO-RIGHT EMBEDDING" => 0x202A),
45+
("RIGHT-TO-LEFT EMBEDDING" => 0x202B),
46+
("POP DIRECTIONAL FORMATTING" => 0x202C),
47+
("LEFT-TO-RIGHT OVERRIDE" => 0x202D),
48+
("RIGHT-TO-LEFT OVERRIDE" => 0x202E),
49+
("NARROW NO-BREAK SPACE" => 0x202F),
50+
("PER MILLE SIGN" => 0x2030),
51+
("PER TEN THOUSAND SIGN" => 0x2031),
52+
("PRIME" => 0x2032),
53+
("DOUBLE PRIME" => 0x2033),
54+
("TRIPLE PRIME" => 0x2034),
55+
("REVERSED PRIME" => 0x2035),
56+
("REVERSED DOUBLE PRIME" => 0x2036),
57+
("REVERSED TRIPLE PRIME" => 0x2037),
58+
("CARET" => 0x2038),
59+
("SINGLE LEFT-POINTING ANGLE QUOTATION MARK" => 0x2039),
60+
("SINGLE RIGHT-POINTING ANGLE QUOTATION MARK" => 0x203A),
61+
("REFERENCE MARK" => 0x203B),
62+
("DOUBLE EXCLAMATION MARK" => 0x203C),
63+
("INTERROBANG" => 0x203D),
64+
("OVERLINE" => 0x203E),
65+
("UNDERTIE" => 0x203F),
66+
("CHARACTER TIE" => 0x2040),
67+
("CARET INSERTION POINT" => 0x2041),
68+
("ASTERISM" => 0x2042),
69+
("HYPHEN BULLET" => 0x2043),
70+
("FRACTION SLASH" => 0x2044),
71+
("LEFT SQUARE BRACKET WITH QUILL" => 0x2045),
72+
("RIGHT SQUARE BRACKET WITH QUILL" => 0x2046),
73+
("DOUBLE QUESTION MARK" => 0x2047),
74+
("QUESTION EXCLAMATION MARK" => 0x2048),
75+
("EXCLAMATION QUESTION MARK" => 0x2049),
76+
("TIRONIAN SIGN ET" => 0x204A),
77+
("REVERSED PILCROW SIGN" => 0x204B),
78+
("BLACK LEFTWARDS BULLET" => 0x204C),
79+
("BLACK RIGHTWARDS BULLET" => 0x204D),
80+
("LOW ASTERISK" => 0x204E),
81+
("REVERSED SEMICOLON" => 0x204F),
82+
("CLOSE UP" => 0x2050),
83+
("TWO ASTERISKS ALIGNED VERTICALLY" => 0x2051),
84+
("COMMERCIAL MINUS SIGN" => 0x2052),
85+
("SWUNG DASH" => 0x2053),
86+
("INVERTED UNDERTIE" => 0x2054),
87+
("FLOWER PUNCTUATION MARK" => 0x2055),
88+
("THREE DOT PUNCTUATION" => 0x2056),
89+
("QUADRUPLE PRIME" => 0x2057),
90+
("FOUR DOT PUNCTUATION" => 0x2058),
91+
("FIVE DOT PUNCTUATION" => 0x2059),
92+
("TWO DOT PUNCTUATION" => 0x205A),
93+
("FOUR DOT MARK" => 0x205B),
94+
("DOTTED CROSS" => 0x205C),
95+
("TRICOLON" => 0x205D),
96+
("VERTICAL FOUR DOTS" => 0x205E),
97+
("MEDIUM MATHEMATICAL SPACE" => 0x205F),
98+
("WORD JOINER" => 0x2060),
99+
("FUNCTION APPLICATION" => 0x2061),
100+
("INVISIBLE TIMES" => 0x2062),
101+
("INVISIBLE SEPARATOR" => 0x2063),
102+
("INVISIBLE PLUS" => 0x2064),
103+
("LEFT-TO-RIGHT ISOLATE" => 0x2066),
104+
("RIGHT-TO-LEFT ISOLATE" => 0x2067),
105+
("FIRST STRONG ISOLATE" => 0x2068),
106+
("POP DIRECTIONAL ISOLATE" => 0x2069),
107+
("INHIBIT SYMMETRIC SWAPPING" => 0x206A),
108+
("ACTIVATE SYMMETRIC SWAPPING" => 0x206B),
109+
("INHIBIT ARABIC FORM SHAPING" => 0x206C),
110+
("ACTIVATE ARABIC FORM SHAPING" => 0x206D),
111+
("NATIONAL DIGIT SHAPES" => 0x206E),
112+
("NOMINAL DIGIT SHAPES" => 0x206F),
113+
("SUPERSCRIPT ZERO" => 0x2070),
114+
("SUPERSCRIPT LATIN SMALL LETTER I" => 0x2071),
115+
("SUPERSCRIPT FOUR" => 0x2074),
116+
("SUPERSCRIPT FIVE" => 0x2075),
117+
("SUPERSCRIPT SIX" => 0x2076),
118+
("SUPERSCRIPT SEVEN" => 0x2077),
119+
("SUPERSCRIPT EIGHT" => 0x2078),
120+
("SUPERSCRIPT NINE" => 0x2079),
121+
("SUPERSCRIPT PLUS SIGN" => 0x207A),
122+
("SUPERSCRIPT MINUS" => 0x207B),
123+
("SUPERSCRIPT EQUALS SIGN" => 0x207C),
124+
("SUPERSCRIPT LEFT PARENTHESIS" => 0x207D),
125+
("SUPERSCRIPT RIGHT PARENTHESIS" => 0x207E),
126+
("SUPERSCRIPT LATIN SMALL LETTER N" => 0x207F),
127+
("SUBSCRIPT ZERO" => 0x2080),
128+
("SUBSCRIPT ONE" => 0x2081),
129+
("SUBSCRIPT TWO" => 0x2082),
130+
("SUBSCRIPT THREE" => 0x2083),
131+
("SUBSCRIPT FOUR" => 0x2084),
132+
("SUBSCRIPT FIVE" => 0x2085),
133+
("SUBSCRIPT SIX" => 0x2086),
134+
("SUBSCRIPT SEVEN" => 0x2087),
135+
("SUBSCRIPT EIGHT" => 0x2088),
136+
("SUBSCRIPT NINE" => 0x2089),
137+
("SUBSCRIPT PLUS SIGN" => 0x208A),
138+
("SUBSCRIPT MINUS" => 0x208B),
139+
("SUBSCRIPT EQUALS SIGN" => 0x208C),
140+
("SUBSCRIPT LEFT PARENTHESIS" => 0x208D),
141+
("SUBSCRIPT RIGHT PARENTHESIS" => 0x208E),
142+
("LATIN SUBSCRIPT SMALL LETTER A" => 0x2090),
143+
("LATIN SUBSCRIPT SMALL LETTER E" => 0x2091),
144+
("LATIN SUBSCRIPT SMALL LETTER O" => 0x2092),
145+
("LATIN SUBSCRIPT SMALL LETTER X" => 0x2093),
146+
("LATIN SUBSCRIPT SMALL LETTER SCHWA" => 0x2094),
147+
("LATIN SUBSCRIPT SMALL LETTER H" => 0x2095),
148+
("LATIN SUBSCRIPT SMALL LETTER K" => 0x2096),
149+
("LATIN SUBSCRIPT SMALL LETTER L" => 0x2097),
150+
("LATIN SUBSCRIPT SMALL LETTER M" => 0x2098),
151+
("LATIN SUBSCRIPT SMALL LETTER N" => 0x2099),
152+
("LATIN SUBSCRIPT SMALL LETTER P" => 0x209A),
153+
("LATIN SUBSCRIPT SMALL LETTER S" => 0x209B),
154+
("LATIN SUBSCRIPT SMALL LETTER T" => 0x209C),
155+
("EURO-CURRENCY SIGN" => 0x20A0),
156+
("COLON SIGN" => 0x20A1),
157+
("CRUZEIRO SIGN" => 0x20A2),
158+
("FRENCH FRANC SIGN" => 0x20A3),
159+
("LIRA SIGN" => 0x20A4),
160+
("MILL SIGN" => 0x20A5),
161+
("NAIRA SIGN" => 0x20A6),
162+
("PESETA SIGN" => 0x20A7),
163+
("RUPEE SIGN" => 0x20A8),
164+
("WON SIGN" => 0x20A9),
165+
("NEW SHEQEL SIGN" => 0x20AA),
166+
("DONG SIGN" => 0x20AB),
167+
("EURO SIGN" => 0x20AC),
168+
("KIP SIGN" => 0x20AD),
169+
("TUGRIK SIGN" => 0x20AE),
170+
("DRACHMA SIGN" => 0x20AF),
171+
("GERMAN PENNY SIGN" => 0x20B0),
172+
("PESO SIGN" => 0x20B1),
173+
("GUARANI SIGN" => 0x20B2),
174+
("AUSTRAL SIGN" => 0x20B3),
175+
("HRYVNIA SIGN" => 0x20B4),
176+
("CEDI SIGN" => 0x20B5),
177+
("LIVRE TOURNOIS SIGN" => 0x20B6),
178+
("SPESMILO SIGN" => 0x20B7),
179+
("TENGE SIGN" => 0x20B8),
180+
("INDIAN RUPEE SIGN" => 0x20B9),
181+
("TURKISH LIRA SIGN" => 0x20BA),
182+
("NORDIC MARK SIGN" => 0x20BB),
183+
("MANAT SIGN" => 0x20BC),
184+
("RUBLE SIGN" => 0x20BD),
185+
("LARI SIGN" => 0x20BE))

0 commit comments

Comments
 (0)