diff --git a/LICENSE.md b/LICENSE.md index ae707c5..b71e0be 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -5,6 +5,12 @@ Copyright (c) 2016 Gandalf Software, Inc. (Scott P. Jones) Portions based on code that is part of Julia, licensed under the MIT license, and also Eric Forgy's StringInterpolations.jl package. +Portions are based on code submitted as a PR to the Formatting.jl package, +also under MIT "Expat" license: Copyright (c) 2015 Tom Breloff + +Portions are based on the Formatting.jl package, also under MIT "Expat" license: +Copyright (c) 2014: Dahua Lin and contributors. + > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights @@ -22,4 +28,3 @@ and also Eric Forgy's StringInterpolations.jl package. > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. -> diff --git a/README.md b/README.md index 00d8569..a68b203 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,12 @@ Finally, I have added two new ways of representing characters in the literal str This makes life a lot easier when you want to keep the text of a program in ASCII, and also to be able to write programs using those characters that might not even display correctly in their editor. + +This now has some initial formatting capability, based on Tom Breloff's wonderful PR #10 to the +JuliaLang/Formatting.jl package (by Dahua Lin and other contributors). +`\%(arguments)` is interpolated as a call to fmt(arguments). + +I also plan on adding support for `\%c(arguments)`, where c is a C style formatting character. +I'm debating if I should make it take the full C style syntax, with leading 0, width/precision, +etc, before the single character. + diff --git a/src/StringUtils.jl b/src/StringUtils.jl index 8492bc5..a02aada 100644 --- a/src/StringUtils.jl +++ b/src/StringUtils.jl @@ -9,4 +9,17 @@ export @u_str, @sinterpolate export s_unescape_string, s_escape_string, s_print_unescaped, s_print_escaped include("literals.jl") +include("unicodenames.jl") + +# From Formatting.jl +import Base.show + +export cfmt, fmt, fmt_default, fmt_default! + +include("cformat.jl" ) +include("fmtspec.jl") +include("fmtcore.jl") +include("formatexpr.jl") +include("fmt.jl") + end diff --git a/src/cformat.jl b/src/cformat.jl new file mode 100644 index 0000000..9e28f1a --- /dev/null +++ b/src/cformat.jl @@ -0,0 +1,364 @@ +formatters = Dict{ ASCIIString, Function }() + +function sprintf1( fmt::ASCIIString, x ) + global formatters + f = generate_formatter( fmt ) + f( x ) +end + +function generate_formatter( fmt::ASCIIString ) + global formatters + if haskey( formatters, fmt ) + return formatters[fmt] + end + func = symbol( "sprintf_" * replace( base64encode( fmt ), "=", "!" ) ) + + if !contains( fmt, "'" ) + test = Base.Printf.parse( fmt ) + if length( test ) != 1 || !( typeof( test[1] ) <: Tuple ) + error( "Only one AND undecorated format string is allowed") + end + + code = quote + function $func( x ) + @sprintf( $fmt, x ) + end + end + else + conversion = fmt[end] + if !in( conversion, "sduifF" ) + error( "thousand separator not defined for " * string( conversion ) * " conversion") + end + fmtactual = replace( fmt, "'", "", 1 ) + test = Base.Printf.parse( fmtactual ) + if length( test ) != 1 || !( typeof( test[1] ) <: Tuple ) + error( "Only one AND undecorated format string is allowed") + end + if in( conversion, "sfF" ) + code = quote + function $func{T<:Real}( x::T ) + s = @sprintf( $fmtactual, x ) + # commas are added to only the numerator + if T <: Rational && endswith( $fmtactual, "s" ) + spos = findfirst( s, '/' ) + s = addcommas( s[1:spos-1] ) * s[spos:end] + else + dpos = findfirst( s, '.' ) + if dpos != 0 + s = addcommas( s[1:dpos-1] ) * s[ dpos:end ] + else # find the rightmost digit + for i in length( s ):-1:1 + if isdigit( s[i] ) + s = addcommas( s[1:i] ) * s[i+1:end] + break + end + end + end + end + s + end + end + else + code = quote + function $func( x ) + s = @sprintf( $fmtactual, x ) + for i in length( s ):-1:1 + if isdigit( s[i] ) + s = addcommas( s[1:i] ) * s[i+1:end] + break + end + end + s + end + end + end + end + f = eval( code ) + formatters[ fmt ] = f + f +end + +function addcommas( s::ASCIIString ) + len = length(s) + t = "" + for i in 1:3:len + subs = s[max(1,len-i-1):len-i+1] + if i == 1 + t = subs + else + if match( r"[0-9]", subs ) != nothing + t = subs * "," * t + else + t = subs * t + end + end + end + return t +end + +function generate_format_string(; + width::Int=-1, + precision::Int= -1, + leftjustified::Bool=false, + zeropadding::Bool=false, + commas::Bool=false, + signed::Bool=false, + positivespace::Bool=false, + alternative::Bool=false, + conversion::ASCIIString="f" #aAdecEfFiosxX + ) + s = "%" + if commas + s *= "'" + end + if alternative && in( conversion[1], "aAeEfFoxX" ) + s *= "#" + end + if zeropadding && !leftjustified && width != -1 + s *= "0" + end + + if signed + s *= "+" + elseif positivespace + s *= " " + end + + if width != -1 + if leftjustified + s *= "-" * string( width ) + else + s *= string( width ) + end + end + if precision != -1 + s *= "." * string( precision ) + end + s * conversion +end + +function format{T<:Real}( x::T; + width::Int=-1, + precision::Int= -1, + leftjustified::Bool=false, + zeropadding::Bool=false, # when right-justified, use 0 instead of space to fill + commas::Bool=false, + signed::Bool=false, # +/- prefix + positivespace::Bool=false, + stripzeros::Bool=(precision== -1), + parens::Bool=false, # use (1.00) instead of -1.00. Used in finance + alternative::Bool=false, # usually for hex + mixedfraction::Bool=false, + mixedfractionsep::UTF8String="_", + fractionsep::UTF8String="/", # num / den + fractionwidth::Int = 0, + tryden::Int = 0, # if 2 or higher, try to use this denominator, without losing precision + suffix::UTF8String="", # useful for units/% + autoscale::Symbol=:none, # :metric, :binary or :finance + conversion::ASCIIString="" + ) + checkwidth = commas + if conversion == "" + if T <: FloatingPoint || T <: Rational && precision != -1 + actualconv = "f" + elseif T <: Unsigned + actualconv = "x" + elseif T <: Integer + actualconv = "d" + else + conversion = "s" + actualconv = "s" + end + else + actualconv = conversion + end + if signed && commas + error( "You cannot use signed (+/-) AND commas at the same time") + end + if T <: Rational && conversion == "s" + stripzeros = false + end + if ( T <: FloatingPoint && actualconv == "f" || T <: Integer ) && autoscale != :none + actualconv = "f" + if autoscale == :metric + scales = [ + (1e24, "Y" ), + (1e21, "Z" ), + (1e18, "E" ), + (1e15, "P" ), + (1e12, "T" ), + (1e9, "G"), + (1e6, "M"), + (1e3, "k") ] + if abs(x) > 1 + for (mag, sym) in scales + if abs(x) >= mag + x /= mag + suffix = sym * suffix + break + end + end + elseif T <: FloatingPoint + smallscales = [ + ( 1e-12, "p" ), + ( 1e-9, "n" ), + ( 1e-6, "μ" ), + ( 1e-3, "m" ) ] + for (mag,sym) in smallscales + if abs(x) < mag*10 + x /= mag + suffix = sym * suffix + break + end + end + end + else + if autoscale == :binary + scales = [ + (1024.0 ^8, "Yi" ), + (1024.0 ^7, "Zi" ), + (1024.0 ^6, "Ei" ), + (1024^5, "Pi" ), + (1024^4, "Ti" ), + (1024^3, "Gi"), + (1024^2, "Mi"), + (1024, "Ki") + ] + else # :finance + scales = [ + (1e12, "t" ), + (1e9, "b"), + (1e6, "m"), + (1e3, "k") ] + end + for (mag, sym) in scales + if abs(x) >= mag + x /= mag + suffix = sym * suffix + break + end + end + end + end + + nonneg = x >= 0 + fractional = 0 + if T <: Rational && mixedfraction + actualconv = "d" + actualx = trunc( Int, x ) + fractional = abs(x) - abs(actualx) + else + if parens && !in( actualconv[1], "xX" ) + actualx = abs(x) + else + actualx = x + end + end + s = sprintf1( generate_format_string( width=width, + precision=precision, + leftjustified=leftjustified, + zeropadding=zeropadding, + commas=commas, + signed=signed, + positivespace=positivespace, + alternative=alternative, + conversion=actualconv + ),actualx) + + if T <:Rational && conversion == "s" + if mixedfraction && fractional != 0 + num = fractional.num + den = fractional.den + if tryden >= 2 && mod( tryden, den ) == 0 + num *= div(tryden,den) + den = tryden + end + fs = string( num ) * fractionsep * string(den) + if length(fs) < fractionwidth + fs = repeat( "0", fractionwidth - length(fs) ) * fs + end + s = rstrip(s) + if actualx != 0 + s = rstrip(s) * mixedfractionsep * fs + else + if !nonneg + s = "-" * fs + else + s = fs + end + end + checkwidth = true + elseif !mixedfraction + s = replace( s, "//", fractionsep ) + checkwidth = true + end + elseif stripzeros && in( actualconv[1], "fFeEs" ) + dpos = findfirst( s, '.') + if in( actualconv[1], "eEs" ) + if in( actualconv[1], "es" ) + epos = findfirst( s, 'e' ) + else + epos = findfirst( s, 'E' ) + end + if epos == 0 + rpos = length( s ) + else + rpos = epos-1 + end + else + rpos = length(s) + end + # rpos at this point is the rightmost possible char to start + # stripping + stripfrom = rpos+1 + for i = rpos:-1:dpos+1 + if s[i] == '0' + stripfrom = i + elseif s[i] ==' ' + continue + else + break + end + end + if stripfrom <= rpos + if stripfrom == dpos+1 # everything after decimal is 0, so strip the decimal too + stripfrom = dpos + end + s = s[1:stripfrom-1] * s[rpos+1:end] + checkwidth = true + end + end + + s *= suffix + + if parens && !in( actualconv[1], "xX" ) + # if zero or positive, we still need 1 white space on the right + if nonneg + s = " " * strip(s) * " " + else + s = "(" * strip(s) * ")" + end + + checkwidth = true + end + + if checkwidth && width != -1 + if length(s) > width + s = replace( s, " ", "", length(s)-width ) + if length(s) > width && endswith( s, " " ) + s = reverse( replace( reverse(s), " ", "", length(s)-width ) ) + end + if length(s) > width + s = replace( s, ",", "", length(s)-width ) + end + elseif length(s) < width + if leftjustified + s = s * repeat( " ", width - length(s) ) + else + s = repeat( " ", width - length(s) ) * s + end + end + end + + s +end diff --git a/src/fmt.jl b/src/fmt.jl new file mode 100644 index 0000000..2a64e49 --- /dev/null +++ b/src/fmt.jl @@ -0,0 +1,174 @@ + +# interface proposal by Tom Breloff (@tbreloff)... comments welcome +# This uses the more basic formatting based on FormatSpec and the cfmt method (formerly called fmt, which I repurposed) + +# TODO: swap out FormatSpec for something that is able to use the "format" method, which has more options for units, prefixes, etc +# TODO: support rational numbers, autoscale, etc as in "format" + +# -------------------------------------------------------------------------------------------------- + +# the DefaultSpec object is just something to hold onto the current FormatSpec. +# we keep the typechar around specically for the reset! function, to go back to the starting state + +type DefaultSpec + typechar::Char + fspec::FormatSpec + DefaultSpec(c::Char) = new(c, FormatSpec(c)) +end + +const DEFAULT_FORMATTERS = Dict{DataType, DefaultSpec}() + +# adds a new default formatter for this type +defaultSpec!{T}(::Type{T}, c::Char) = (DEFAULT_FORMATTERS[T] = DefaultSpec(c); nothing) + +# note: types T and K will now both share K's default +defaultSpec!{T,K}(::Type{T}, ::Type{K}) = (DEFAULT_FORMATTERS[T] = DEFAULT_FORMATTERS[K]; nothing) + +# seed it with some basic default formatters +for (t, c) in [(Integer,'d'), (AbstractFloat,'f'), (Char,'c'), (AbstractString,'s')] + defaultSpec!(t, c) +end + +reset!{T}(::Type{T}) = (dspec = defaultSpec(T); dspec.fspec = FormatSpec(dspec.typechar); nothing) + + +# -------------------------------------------------------------------------------------------------- + + +function addKWArgsFromSymbols(kwargs, syms::Symbol...) + d = Dict(kwargs) + for s in syms + if s == :ljust || s == :left + d[:align] = '<' + elseif s == :rjust || s == :right + d[:align] = '>' + elseif s == :commas + d[:tsep] = true + elseif s == :zpad || s == :zeropad + d[:zpad] = true + elseif s == :ipre || s == :prefix + d[:ipre] = true + end + end + d +end + +# -------------------------------------------------------------------------------------------------- + +# methods to get the current default objects +# note: if you want to set a default for an abstract type (i.e. AbstractFloat) you'll need to extend this method like here: +defaultSpec{T<:Integer}(::Type{T}) = DEFAULT_FORMATTERS[Integer] +defaultSpec{T<:AbstractFloat}(::Type{T}) = DEFAULT_FORMATTERS[AbstractFloat] +defaultSpec{T<:AbstractString}(::Type{T}) = DEFAULT_FORMATTERS[AbstractString] +function defaultSpec{T}(::Type{T}) + get(DEFAULT_FORMATTERS, T) do + error("Missing default spec for type $T... call default!(T, c): $DEFAULT_FORMATTERS") + end +end +defaultSpec(x) = defaultSpec(typeof(x)) + +fmt_default{T}(::Type{T}) = defaultSpec(T).fspec +fmt_default(x) = defaultSpec(x).fspec + + + +# first resets the fmt_default spec to the given arg, then continue by updating with args and kwargs +fmt_default!{T}(::Type{T}, c::Char, args...; kwargs...) = (defaultSpec!(T,c); fmt_default!(T, args...; kwargs...)) +fmt_default!{T,K}(::Type{T}, ::Type{K}, args...; kwargs...) = (defaultSpec!(T,K); fmt_default!(T, args...; kwargs...)) + +# update the fmt_default for a specific type +function fmt_default!{T}(::Type{T}, syms::Symbol...; kwargs...) + if isempty(syms) + + # if there are no arguments, reset to initial defaults + if isempty(kwargs) + reset!(T) + return + end + + # otherwise update the spec + dspec = defaultSpec(T) + dspec.fspec = FormatSpec(dspec.fspec; kwargs...) + + else + d = addKWArgsFromSymbols(kwargs, syms...) + fmt_default!(T; d...) + end + nothing +end + +# update the fmt_default for all types +function fmt_default!(syms::Symbol...; kwargs...) + if isempty(syms) + for k in keys(DEFAULT_FORMATTERS) + fmt_default!(k; kwargs...) + end + else + d = addKWArgsFromSymbols(kwargs, syms...) + fmt_default!(; d...) + end + nothing +end + +# -------------------------------------------------------------------------------------------------- + +# TODO: get rid of this entire hack by moving commas into cfmt + +function optionalCommas(x::Real, s::AbstractString, fspec::FormatSpec) + dpos = findfirst(s, '.') + prevwidth = length(s) + + if dpos == 0 + s = addcommas(s) + else + s = string(addcommas(s[1:dpos-1]), '.', s[dpos+1:end]) + end + + # check for excess width from commas + w = length(s) + if fspec.width > 0 && w > fspec.width && w > prevwidth + # we may have made the string too wide with those commas... gotta fix it + s = strip(s) + n = fspec.width - length(s) + if fspec.align == '<' # left alignment + s = string(s, " "^n) + else + s = string(" "^n, s) + end + end + + s +end +optionalCommas(x, s::AbstractString, fspec::FormatSpec) = s + +# -------------------------------------------------------------------------------------------------- + + +# TODO: do more caching to optimize repeated calls + +# creates a new FormatSpec by overriding the defaults and passes it to cfmt +# note: adding kwargs is only appropriate for one-off formatting. +# normally it will be much faster to change the fmt_default formatting as needed +function fmt(x; kwargs...) + fspec = isempty(kwargs) ? fmt_default(x) : FormatSpec(fmt_default(x); kwargs...) + s = cfmt(fspec, x) + + # add the commas now... I was confused as to when this is done currently + if fspec.tsep + return optionalCommas(x, s, fspec) + end + s +end + +# some helper method calls, which just convert to kwargs +fmt(x, width::Int, args...; kwargs...) = fmt(x, args...; width=width, kwargs...) + +fmt(x, width::Int, prec::Int, args...; kwargs...) = + fmt(x, args...; width=width, prec=prec, kwargs...) + +# integrate some symbol shorthands into the keyword args +# note: as above, this will generate relavent kwargs, so to format in a tight loop, you should probably update the fmt_default +function fmt(x, syms::Symbol...; kwargs...) + d = addKWArgsFromSymbols(kwargs, syms...) + fmt(x; d...) +end diff --git a/src/fmtcore.jl b/src/fmtcore.jl new file mode 100644 index 0000000..cfad805 --- /dev/null +++ b/src/fmtcore.jl @@ -0,0 +1,292 @@ +# core formatting functions + +### auxiliary functions + +function _repwrite(out::IO, c::Char, n::Int) + while n > 0 + write(out, c) + n -= 1 + end +end + + +### print string or char + +function _pfmt_s(out::IO, fs::FormatSpec, s::Union{AbstractString,Char}) + wid = fs.width + slen = length(s) + if wid <= slen + write(out, s) + else + a = fs.align + if a == '<' + write(out, s) + _repwrite(out, fs.fill, wid-slen) + else + _repwrite(out, fs.fill, wid-slen) + write(out, s) + end + end +end + + +### print integers + +_mul(x::Integer, ::_Dec) = x * 10 +_mul(x::Integer, ::_Bin) = x << 1 +_mul(x::Integer, ::_Oct) = x << 3 +_mul(x::Integer, ::Union{_Hex, _HEX}) = x << 4 + +_div(x::Integer, ::_Dec) = div(x, 10) +_div(x::Integer, ::_Bin) = x >> 1 +_div(x::Integer, ::_Oct) = x >> 3 +_div(x::Integer, ::Union{_Hex, _HEX}) = x >> 4 + +function _ndigits(x::Integer, op) # suppose x is non-negative + m = 1 + q = _div(x, op) + while q > 0 + m += 1 + q = _div(q, op) + end + return m +end + +_ipre(op) = "" +_ipre(::Union{_Hex, _HEX}) = "0x" +_ipre(::_Oct) = "0o" +_ipre(::_Bin) = "0b" + +_digitchar(x::Integer, ::_Bin) = Char(x == 0 ? '0' : '1') +_digitchar(x::Integer, ::_Dec) = Char('0' + x) +_digitchar(x::Integer, ::_Oct) = Char('0' + x) +_digitchar(x::Integer, ::_Hex) = Char(x < 10 ? '0' + x : 'a' + (x - 10)) +_digitchar(x::Integer, ::_HEX) = Char(x < 10 ? '0' + x : 'A' + (x - 10)) + +_signchar(x::Number, s::Char) = x < 0 ? '-' : + s == '+' ? '+' : + s == ' ' ? ' ' : '\0' + +function _pfmt_int{Op}(out::IO, sch::Char, ip::ASCIIString, zs::Integer, ax::Integer, op::Op) + # print sign + if sch != '\0' + write(out, sch) + end + # print prefix + if !isempty(ip) + write(out, ip) + end + # print padding zeros + if zs > 0 + _repwrite(out, '0', zs) + end + # print actual digits + if ax == 0 + write(out, '0') + else + _pfmt_intdigits(out, ax, op) + end +end + +function _pfmt_intdigits{Op,T<:Integer}(out::IO, ax::T, op::Op) + b_lb = _div(ax, op) + b = one(T) + while b <= b_lb + b = _mul(b, op) + end + r = ax + while b > 0 + (q, r) = divrem(r, b) + write(out, _digitchar(q, op)) + b = _div(b, op) + end +end + +function _pfmt_i{Op}(out::IO, fs::FormatSpec, x::Integer, op::Op) + # calculate actual length + ax = abs(x) + xlen = _ndigits(abs(x), op) + # sign char + sch = _signchar(x, fs.sign) + if sch != '\0' + xlen += 1 + end + # prefix (e.g. 0x, 0b, 0o) + ip = "" + if fs.ipre + ip = _ipre(op) + xlen += length(ip) + end + + # printing + wid = fs.width + if wid <= xlen + _pfmt_int(out, sch, ip, 0, ax, op) + elseif fs.zpad + _pfmt_int(out, sch, ip, wid-xlen, ax, op) + else + a = fs.align + if a == '<' + _pfmt_int(out, sch, ip, 0, ax, op) + _repwrite(out, fs.fill, wid-xlen) + else + _repwrite(out, fs.fill, wid-xlen) + _pfmt_int(out, sch, ip, 0, ax, op) + end + end +end + + +### print floating point numbers + +function _pfmt_float(out::IO, sch::Char, zs::Integer, intv::Real, decv::Real, prec::Int) + # print sign + if sch != '\0' + write(out, sch) + end + # print padding zeros + if zs > 0 + _repwrite(out, '0', zs) + end + idecv = round(Integer, decv * exp10(prec)) + if idecv == exp10(prec) + intv += 1 + idecv = 0 + end + # print integer part + if intv == 0 + write(out, '0') + else + _pfmt_intdigits(out, intv, _Dec()) + end + # print decimal point + write(out, '.') + # print decimal part + if prec > 0 + nd = _ndigits(idecv, _Dec()) + if nd < prec + _repwrite(out, '0', prec - nd) + end + _pfmt_intdigits(out, idecv, _Dec()) + end +end + +function _pfmt_f(out::IO, fs::FormatSpec, x::AbstractFloat) + # separate sign, integer, and decimal part + ax = abs(x) + sch = _signchar(x, fs.sign) + intv = trunc(Integer, ax) + decv = ax - intv + + # calculate length + xlen = _ndigits(intv, _Dec()) + 1 + fs.prec + if sch != '\0' + xlen += 1 + end + + # print + wid = fs.width + if wid <= xlen + _pfmt_float(out, sch, 0, intv, decv, fs.prec) + elseif fs.zpad + _pfmt_float(out, sch, wid-xlen, intv, decv, fs.prec) + else + a = fs.align + if a == '<' + _pfmt_float(out, sch, 0, intv, decv, fs.prec) + _repwrite(out, fs.fill, wid-xlen) + else + _repwrite(out, fs.fill, wid-xlen) + _pfmt_float(out, sch, 0, intv, decv, fs.prec) + end + end +end + +function _pfmt_floate(out::IO, sch::Char, zs::Integer, u::Real, prec::Int, e::Int, ec::Char) + intv = trunc(Integer,u) + decv = u - intv + if round(Integer, decv * exp10(prec)) == exp10(prec) + intv += 1 + if intv == 10 + intv = 1 + e += 1 + end + decv = 0. + end + _pfmt_float(out, sch, zs, intv, decv, prec) + write(out, ec) + if e >= 0 + write(out, '+') + else + write(out, '-') + e = -e + end + (e1, e2) = divrem(e, 10) + write(out, Char('0' + e1)) + write(out, Char('0' + e2)) +end + + +function _pfmt_e(out::IO, fs::FormatSpec, x::AbstractFloat) + # extract sign, significand, and exponent + ax = abs(x) + sch = _signchar(x, fs.sign) + if ax == 0.0 + e = 0 + u = zero(x) + else + e = floor(Integer,log10(ax)) # exponent + u = ax / exp10(e) # significand + end + + # calculate length + xlen = 6 + fs.prec + if sch != '\0' + xlen += 1 + end + + # print + ec = isupper(fs.typ) ? 'E' : 'e' + wid = fs.width + if wid <= xlen + _pfmt_floate(out, sch, 0, u, fs.prec, e, ec) + elseif fs.zpad + _pfmt_floate(out, sch, wid-xlen, u, fs.prec, e, ec) + else + a = fs.align + if a == '<' + _pfmt_floate(out, sch, 0, u, fs.prec, e, ec) + _repwrite(out, fs.fill, wid-xlen) + else + _repwrite(out, fs.fill, wid-xlen) + _pfmt_floate(out, sch, 0, u, fs.prec, e, ec) + end + end +end + + +function _pfmt_g(out::IO, fs::FormatSpec, x::AbstractFloat) + # number decomposition + ax = abs(x) + if 1.0e-4 <= ax < 1.0e6 + _pfmt_f(out, fs, x) + else + _pfmt_e(out, fs, x) + end +end + +function _pfmt_specialf(out::IO, fs::FormatSpec, x::AbstractFloat) + if isinf(x) + if x > 0 + _pfmt_s(out, fs, "Inf") + else + _pfmt_s(out, fs, "-Inf") + end + else + @assert isnan(x) + _pfmt_s(out, fs, "NaN") + end +end + + + diff --git a/src/fmtspec.jl b/src/fmtspec.jl new file mode 100644 index 0000000..d6ef878 --- /dev/null +++ b/src/fmtspec.jl @@ -0,0 +1,213 @@ +# formatting specification + +# formatting specification language +# +# spec ::= [[fill]align][sign][#][0][width][,][.prec][type] +# fill ::= +# align ::= '<' | '>' +# sign ::= '+' | '-' | ' ' +# width ::= +# prec ::= +# type ::= 'b' | 'c' | 'd' | 'e' | 'E' | 'f' | 'F' | 'g' | 'G' | +# 'n' | 'o' | 'x' | 'X' | 's' +# +# Please refer to http://docs.python.org/2/library/string.html#formatspec +# for more details +# + +## FormatSpec type + +const _numtypchars = Set(['b', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'n', 'o', 'x', 'X']) + +_tycls(c::Char) = + (c == 'd' || c == 'n' || c == 'b' || c == 'o' || c == 'x') ? 'i' : + (c == 'e' || c == 'f' || c == 'g') ? 'f' : + (c == 'c') ? 'c' : + (c == 's') ? 's' : + error("Invalid type char $(c)") + +immutable FormatSpec + cls::Char # category: 'i' | 'f' | 'c' | 's' + typ::Char + fill::Char + align::Char + sign::Char + width::Int + prec::Int + ipre::Bool # whether to prefix 0b, 0o, or 0x + zpad::Bool # whether to do zero-padding + tsep::Bool # whether to use thousand-separator + + function FormatSpec(typ::Char; + fill::Char=' ', + align::Char='\0', + sign::Char='-', + width::Int=-1, + prec::Int=-1, + ipre::Bool=false, + zpad::Bool=false, + tsep::Bool=false) + + if align=='\0' + align = (typ in _numtypchars) ? '>' : '<' + end + cls = _tycls(lowercase(typ)) + if cls == 'f' && prec < 0 + prec = 6 + end + new(cls, typ, fill, align, sign, width, prec, ipre, zpad, tsep) + end + + # copy constructor with overrides + function FormatSpec(spec::FormatSpec; + fill::Char=spec.fill, + align::Char=spec.align, + sign::Char=spec.sign, + width::Int=spec.width, + prec::Int=spec.prec, + ipre::Bool=spec.ipre, + zpad::Bool=spec.zpad, + tsep::Bool=spec.tsep) + new(spec.cls, spec.typ, fill, align, sign, width, prec, ipre, zpad, tsep) + end +end + +function show(io::IO, fs::FormatSpec) + println(io, "$(typeof(fs))") + println(io, " cls = $(fs.cls)") + println(io, " typ = $(fs.typ)") + println(io, " fill = $(fs.fill)") + println(io, " align = $(fs.align)") + println(io, " sign = $(fs.sign)") + println(io, " width = $(fs.width)") + println(io, " prec = $(fs.prec)") + println(io, " ipre = $(fs.ipre)") + println(io, " zpad = $(fs.zpad)") + println(io, " tsep = $(fs.tsep)") +end + +## parse FormatSpec from a string + +const _spec_regex = r"^(.?[<>])?([ +-])?(#)?(\d+)?(,)?(.\d+)?([bcdeEfFgGnosxX])?$" + +function FormatSpec(s::AbstractString) + # default spec + _fill = ' ' + _align = '\0' + _sign = '-' + _width = -1 + _prec = -1 + _ipre = false + _zpad = false + _tsep = false + _typ = 's' + + if !isempty(s) + m = match(_spec_regex, s) + if m == nothing + error("Invalid formatting spec: $(s)") + end + (a1, a2, a3, a4, a5, a6, a7) = m.captures + + # a1: [[fill]align] + if a1 != nothing + if length(a1) == 1 + _align = a1[1] + else + _fill = a1[1] + _align = a1[2] + end + end + + # a2: [sign] + if a2 != nothing + _sign = a2[1] + end + + # a3: [#] + if a3 != nothing + _ipre = true + end + + # a4: [0][width] + if a4 != nothing + if a4[1] == '0' + _zpad = true + if length(a4) > 1 + _width = parse(Int,a4[2:end]) + end + else + _width = parse(Int,a4) + end + end + + # a5: [,] + if a5 != nothing + _tsep = true + end + + # a6 [.prec] + if a6 != nothing + _prec = parse(Int,a6[2:end]) + end + + # a7: [type] + if a7 != nothing + _typ = a7[1] + end + end + + return FormatSpec(_typ; + fill=_fill, + align=_align, + sign=_sign, + width=_width, + prec=_prec, + ipre=_ipre, + zpad=_zpad, + tsep=_tsep) +end + + +## formatted printing using a format spec + +type _Dec end +type _Oct end +type _Hex end +type _HEX end +type _Bin end + +_srepr(x) = repr(x) +_srepr(x::AbstractString) = x +_srepr(x::Char) = string(x) + +function printfmt(io::IO, fs::FormatSpec, x) + cls = fs.cls + ty = fs.typ + if cls == 'i' + ix = Integer(x) + ty == 'd' || ty == 'n' ? _pfmt_i(io, fs, ix, _Dec()) : + ty == 'x' ? _pfmt_i(io, fs, ix, _Hex()) : + ty == 'X' ? _pfmt_i(io, fs, ix, _HEX()) : + ty == 'o' ? _pfmt_i(io, fs, ix, _Oct()) : + _pfmt_i(io, fs, ix, _Bin()) + elseif cls == 'f' + fx = float(x) + if isfinite(fx) + ty == 'f' || ty == 'F' ? _pfmt_f(io, fs, fx) : + ty == 'e' || ty == 'E' ? _pfmt_e(io, fs, fx) : + error("format for type g or G is not supported yet (use f or e instead).") + else + _pfmt_specialf(io, fs, fx) + end + elseif cls == 's' + _pfmt_s(io, fs, _srepr(x)) + else # cls == 'c' + _pfmt_s(io, fs, Char(x)) + end +end + +printfmt(fs::FormatSpec, x) = printfmt(STDOUT, fs, x) + +cfmt(fs::FormatSpec, x) = (buf = IOBuffer(); printfmt(buf, fs, x); bytestring(buf)) +cfmt(spec::AbstractString, x) = cfmt(FormatSpec(spec), x) diff --git a/src/formatexpr.jl b/src/formatexpr.jl new file mode 100644 index 0000000..ecbf0ef --- /dev/null +++ b/src/formatexpr.jl @@ -0,0 +1,171 @@ +# formatting expression + +### Argument specification + +immutable ArgSpec + argidx::Int + hasfilter::Bool + filter::Function + + function ArgSpec(idx::Int, hasfil::Bool, filter::Function) + idx != 0 || error("Argument index cannot be zero.") + new(idx, hasfil, filter) + end +end + +getarg(args, sp::ArgSpec) = + (a = args[sp.argidx]; sp.hasfilter ? sp.filter(a) : a) + +# pos > 0: must not have iarg in expression (use pos+1), return (entry, pos + 1) +# pos < 0: must have iarg in expression, return (entry, -1) +# pos = 0: no positional argument before, can be either, return (entry, 1) or (entry, -1) +function make_argspec(s::AbstractString, pos::Int) + # for argument position + iarg::Int = -1 + hasfil::Bool = false + ff::Function = Base.identity + + if !isempty(s) + ifil = searchindex(s, "|>") + if ifil == 0 + iarg = parse(Int,s) + else + iarg = ifil > 1 ? parse(Int,s[1:ifil-1]) : -1 + hasfil = true + ff = eval(symbol(s[ifil+2:end])) + end + end + + if pos > 0 + iarg < 0 || error("entry with and without argument index must not coexist.") + iarg = (pos += 1) + elseif pos < 0 + iarg > 0 || error("entry with and without argument index must not coexist.") + else # pos == 0 + if iarg < 0 + iarg = pos = 1 + else + pos = -1 + end + end + + return (ArgSpec(iarg, hasfil, ff), pos) +end + + +### Format entry + +immutable FormatEntry + argspec::ArgSpec + spec::FormatSpec +end + +function make_formatentry(s::AbstractString, pos::Int) + @assert s[1] == '{' && s[end] == '}' + sc = s[2:end-1] + icolon = search(sc, ':') + if icolon == 0 # no colon + (argspec, pos) = make_argspec(sc, pos) + spec = FormatSpec('s') + else + (argspec, pos) = make_argspec(sc[1:icolon-1], pos) + spec = FormatSpec(sc[icolon+1:end]) + end + return (FormatEntry(argspec, spec), pos) +end + + +### Format expression + +type FormatExpr + prefix::UTF8String + suffix::UTF8String + entries::Vector{FormatEntry} + inter::Vector{UTF8String} +end + +_raise_unmatched_lbrace() = error("Unmatched { in format expression.") + +function find_next_entry_open(s::AbstractString, si::Int) + slen = length(s) + p = search(s, '{', si) + p < slen || _raise_unmatched_lbrace() + while p > 0 && s[p+1] == '{' # escape `{{` + p = search(s, '{', p+2) + p < slen || _raise_unmatched_lbrace() + end + # println("open at $p") + pre = p > 0 ? s[si:p-1] : s[si:end] + if !isempty(pre) + pre = replace(pre, "{{", '{') + pre = replace(pre, "}}", '}') + end + return (p, utf8(pre)) +end + +function find_next_entry_close(s::AbstractString, si::Int) + slen = length(s) + p = search(s, '}', si) + p > 0 || _raise_unmatched_lbrace() + # println("close at $p") + return p +end + +function FormatExpr(s::AbstractString) + slen = length(s) + + # init + prefix = utf8("") + suffix = utf8("") + entries = FormatEntry[] + inter = UTF8String[] + + # scan + (p, prefix) = find_next_entry_open(s, 1) + if p > 0 + q = find_next_entry_close(s, p+1) + (e, pos) = make_formatentry(s[p:q], 0) + push!(entries, e) + (p, pre) = find_next_entry_open(s, q+1) + while p > 0 + push!(inter, pre) + q = find_next_entry_close(s, p+1) + (e, pos) = make_formatentry(s[p:q], pos) + push!(entries, e) + (p, pre) = find_next_entry_open(s, q+1) + end + suffix = pre + end + FormatExpr(prefix, suffix, entries, inter) +end + +function printfmt(io::IO, fe::FormatExpr, args...) + if !isempty(fe.prefix) + write(io, fe.prefix) + end + ents = fe.entries + ne = length(ents) + if ne > 0 + e = ents[1] + printfmt(io, e.spec, getarg(args, e.argspec)) + for i = 2:ne + write(io, fe.inter[i-1]) + e = ents[i] + printfmt(io, e.spec, getarg(args, e.argspec)) + end + end + if !isempty(fe.suffix) + write(io, fe.suffix) + end +end + +typealias StringOrFE Union{AbstractString,FormatExpr} +printfmt(io::IO, fe::AbstractString, args...) = printfmt(io, FormatExpr(fe), args...) +printfmt(fe::StringOrFE, args...) = printfmt(STDOUT, fe, args...) + +printfmtln(io::IO, fe::StringOrFE, args...) = (printfmt(io, fe, args...); println(io)) +printfmtln(fe::StringOrFE, args...) = printfmtln(STDOUT, fe, args...) + +format(fe::StringOrFE, args...) = + (buf = IOBuffer(); printfmt(buf, fe, args...); bytestring(buf)) + diff --git a/src/literals.jl b/src/literals.jl index b006492..60b8cbe 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -63,7 +63,7 @@ function s_parse_latex(io, s, i) beg = i # start location c, i = next(s, i) while c != '}' - done(s, i) && throw(ArgumentError("\\: missing closing : in $(repr(s))")) + done(s, i) && throw(ArgumentError("\\{ missing closing } in $(repr(s))")) c, i = next(s, i) end latexstr = get(Base.REPLCompletions.latex_symbols, string("\\", s[beg:i-2]), "") @@ -72,6 +72,26 @@ function s_parse_latex(io, s, i) i end +""" +Handle Unicode name, of form \\N{}, from Python +""" +function s_parse_uniname(io, s, i) + done(s, i) && throw(ArgumentError("\\N incomplete in $(repr(s))")) + c, i = next(s, i) + c != '{' && throw(ArgumentError("\\N missing initial { in $(repr(s))")) + done(s, i) && throw(ArgumentError("\\N{ incomplete in $(repr(s))")) + beg = i # start location + c, i = next(s, i) + while c != '}' + done(s, i) && throw(ArgumentError("\\N{ missing closing } in $(repr(s))")) + c, i = next(s, i) + end + unichar = get(UnicodeNames, uppercase(s[beg:i-2]), typemax(UInt32)) + unichar == typemax(UInt32) && throw(ArgumentError("Invalid Unicode name in $(repr(s))")) + print(io, Char(unichar)) + i +end + """ String interpolation parsing Based on code resurrected from Julia base: @@ -89,6 +109,8 @@ function s_print_unescaped(io, s::AbstractString) i = s_parse_emoji(io, s, i) elseif c == '{' # LaTex i = s_parse_latex(io, s, i) + elseif c == 'N' # Unicode name + i = s_parse_uniname(io, s, i) else c = (c == '0' ? '\0' : c == '"' ? '"' : @@ -133,17 +155,52 @@ function s_interp_parse(s::AbstractString, unescape::Function, p::Function) i = j = start(s) while !done(s, j) c, k = next(s, j) - if c == '\\' && !done(s, k) && s[k] == '(' - # Handle interpolation - if !isempty(s[i:j-1]) - push!(sx, unescape(s[i:j-1])) - end - ex, j = parse(s, k, greedy=false) - if isa(ex, Expr) && is(ex.head, :continue) - throw(ParseError("Incomplete expression")) + if c == '\\' && !done(s, k) + if s[k] == '(' + # Handle interpolation + if !isempty(s[i:j-1]) + push!(sx, unescape(s[i:j-1])) + end + ex, j = parse(s, k, greedy=false) + if isa(ex, Expr) && is(ex.head, :continue) + throw(ParseError("Incomplete expression")) + end + push!(sx, esc(ex)) + i = j + elseif s[k] == '%' + # Move past \\, c should point to '%' + c, k = next(s, k) + done(s, k) && throw(ParseError("Incomplete % expression")) + # Handle interpolation + if !isempty(s[i:j-1]) + push!(sx, unescape(s[i:j-1])) + end + if s[k] == '(' + # Need to find end to parse to + _, j = parse(s, k, greedy=false) + # This is a bit hacky, and probably doesn't perform as well as it could, + # but it works! Same below. + str = "(fmt" * s[k:j-1] * ")" + else + # Move past %, c should point to letter + beg = k + while true + c, k = next(s, k) + done(s, k) && throw(ParseError("Incomplete % expression")) + s[k] == '(' && break + end + _, j = parse(s, k, greedy=false) + str = string("(cfmt(\"", s[beg:k-1], "\",", s[k+1:j-1], ")") + end + ex, _ = parse(str, 1, greedy=false) + if isa(ex, Expr) && is(ex.head, :continue) + throw(ParseError("Incomplete expression")) + end + push!(sx, esc(ex)) + i = j + else + j = k end - push!(sx, esc(ex)) - i = j else j = k end diff --git a/src/unicodenames.jl b/src/unicodenames.jl new file mode 100644 index 0000000..d725c57 --- /dev/null +++ b/src/unicodenames.jl @@ -0,0 +1,185 @@ +const global UnicodeNames = Dict{ASCIIString, UInt32}( + ("EN QUAD" => 0x2000), + ("EM QUAD" => 0x2001), + ("EN SPACE" => 0x2002), + ("EM SPACE" => 0x2003), + ("THREE-PER-EM SPACE" => 0x2004), + ("FOUR-PER-EM SPACE" => 0x2005), + ("SIX-PER-EM SPACE" => 0x2006), + ("FIGURE SPACE" => 0x2007), + ("PUNCTUATION SPACE" => 0x2008), + ("THIN SPACE" => 0x2009), + ("HAIR SPACE" => 0x200A), + ("ZERO WIDTH SPACE" => 0x200B), + ("ZERO WIDTH NON-JOINER" => 0x200C), + ("ZERO WIDTH JOINER" => 0x200D), + ("LEFT-TO-RIGHT MARK" => 0x200E), + ("RIGHT-TO-LEFT MARK" => 0x200F), + ("HYPHEN" => 0x2010), + ("NON-BREAKING HYPHEN" => 0x2011), + ("FIGURE DASH" => 0x2012), + ("EN DASH" => 0x2013), + ("EM DASH" => 0x2014), + ("HORIZONTAL BAR" => 0x2015), + ("DOUBLE VERTICAL LINE" => 0x2016), + ("DOUBLE LOW LINE" => 0x2017), + ("LEFT SINGLE QUOTATION MARK" => 0x2018), + ("RIGHT SINGLE QUOTATION MARK" => 0x2019), + ("SINGLE LOW-9 QUOTATION MARK" => 0x201A), + ("SINGLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201B), + ("LEFT DOUBLE QUOTATION MARK" => 0x201C), + ("RIGHT DOUBLE QUOTATION MARK" => 0x201D), + ("DOUBLE LOW-9 QUOTATION MARK" => 0x201E), + ("DOUBLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201F), + ("DAGGER" => 0x2020), + ("DOUBLE DAGGER" => 0x2021), + ("BULLET" => 0x2022), + ("TRIANGULAR BULLET" => 0x2023), + ("ONE DOT LEADER" => 0x2024), + ("TWO DOT LEADER" => 0x2025), + ("HORIZONTAL ELLIPSIS" => 0x2026), + ("HYPHENATION POINT" => 0x2027), + ("LINE SEPARATOR" => 0x2028), + ("PARAGRAPH SEPARATOR" => 0x2029), + ("LEFT-TO-RIGHT EMBEDDING" => 0x202A), + ("RIGHT-TO-LEFT EMBEDDING" => 0x202B), + ("POP DIRECTIONAL FORMATTING" => 0x202C), + ("LEFT-TO-RIGHT OVERRIDE" => 0x202D), + ("RIGHT-TO-LEFT OVERRIDE" => 0x202E), + ("NARROW NO-BREAK SPACE" => 0x202F), + ("PER MILLE SIGN" => 0x2030), + ("PER TEN THOUSAND SIGN" => 0x2031), + ("PRIME" => 0x2032), + ("DOUBLE PRIME" => 0x2033), + ("TRIPLE PRIME" => 0x2034), + ("REVERSED PRIME" => 0x2035), + ("REVERSED DOUBLE PRIME" => 0x2036), + ("REVERSED TRIPLE PRIME" => 0x2037), + ("CARET" => 0x2038), + ("SINGLE LEFT-POINTING ANGLE QUOTATION MARK" => 0x2039), + ("SINGLE RIGHT-POINTING ANGLE QUOTATION MARK" => 0x203A), + ("REFERENCE MARK" => 0x203B), + ("DOUBLE EXCLAMATION MARK" => 0x203C), + ("INTERROBANG" => 0x203D), + ("OVERLINE" => 0x203E), + ("UNDERTIE" => 0x203F), + ("CHARACTER TIE" => 0x2040), + ("CARET INSERTION POINT" => 0x2041), + ("ASTERISM" => 0x2042), + ("HYPHEN BULLET" => 0x2043), + ("FRACTION SLASH" => 0x2044), + ("LEFT SQUARE BRACKET WITH QUILL" => 0x2045), + ("RIGHT SQUARE BRACKET WITH QUILL" => 0x2046), + ("DOUBLE QUESTION MARK" => 0x2047), + ("QUESTION EXCLAMATION MARK" => 0x2048), + ("EXCLAMATION QUESTION MARK" => 0x2049), + ("TIRONIAN SIGN ET" => 0x204A), + ("REVERSED PILCROW SIGN" => 0x204B), + ("BLACK LEFTWARDS BULLET" => 0x204C), + ("BLACK RIGHTWARDS BULLET" => 0x204D), + ("LOW ASTERISK" => 0x204E), + ("REVERSED SEMICOLON" => 0x204F), + ("CLOSE UP" => 0x2050), + ("TWO ASTERISKS ALIGNED VERTICALLY" => 0x2051), + ("COMMERCIAL MINUS SIGN" => 0x2052), + ("SWUNG DASH" => 0x2053), + ("INVERTED UNDERTIE" => 0x2054), + ("FLOWER PUNCTUATION MARK" => 0x2055), + ("THREE DOT PUNCTUATION" => 0x2056), + ("QUADRUPLE PRIME" => 0x2057), + ("FOUR DOT PUNCTUATION" => 0x2058), + ("FIVE DOT PUNCTUATION" => 0x2059), + ("TWO DOT PUNCTUATION" => 0x205A), + ("FOUR DOT MARK" => 0x205B), + ("DOTTED CROSS" => 0x205C), + ("TRICOLON" => 0x205D), + ("VERTICAL FOUR DOTS" => 0x205E), + ("MEDIUM MATHEMATICAL SPACE" => 0x205F), + ("WORD JOINER" => 0x2060), + ("FUNCTION APPLICATION" => 0x2061), + ("INVISIBLE TIMES" => 0x2062), + ("INVISIBLE SEPARATOR" => 0x2063), + ("INVISIBLE PLUS" => 0x2064), + ("LEFT-TO-RIGHT ISOLATE" => 0x2066), + ("RIGHT-TO-LEFT ISOLATE" => 0x2067), + ("FIRST STRONG ISOLATE" => 0x2068), + ("POP DIRECTIONAL ISOLATE" => 0x2069), + ("INHIBIT SYMMETRIC SWAPPING" => 0x206A), + ("ACTIVATE SYMMETRIC SWAPPING" => 0x206B), + ("INHIBIT ARABIC FORM SHAPING" => 0x206C), + ("ACTIVATE ARABIC FORM SHAPING" => 0x206D), + ("NATIONAL DIGIT SHAPES" => 0x206E), + ("NOMINAL DIGIT SHAPES" => 0x206F), + ("SUPERSCRIPT ZERO" => 0x2070), + ("SUPERSCRIPT LATIN SMALL LETTER I" => 0x2071), + ("SUPERSCRIPT FOUR" => 0x2074), + ("SUPERSCRIPT FIVE" => 0x2075), + ("SUPERSCRIPT SIX" => 0x2076), + ("SUPERSCRIPT SEVEN" => 0x2077), + ("SUPERSCRIPT EIGHT" => 0x2078), + ("SUPERSCRIPT NINE" => 0x2079), + ("SUPERSCRIPT PLUS SIGN" => 0x207A), + ("SUPERSCRIPT MINUS" => 0x207B), + ("SUPERSCRIPT EQUALS SIGN" => 0x207C), + ("SUPERSCRIPT LEFT PARENTHESIS" => 0x207D), + ("SUPERSCRIPT RIGHT PARENTHESIS" => 0x207E), + ("SUPERSCRIPT LATIN SMALL LETTER N" => 0x207F), + ("SUBSCRIPT ZERO" => 0x2080), + ("SUBSCRIPT ONE" => 0x2081), + ("SUBSCRIPT TWO" => 0x2082), + ("SUBSCRIPT THREE" => 0x2083), + ("SUBSCRIPT FOUR" => 0x2084), + ("SUBSCRIPT FIVE" => 0x2085), + ("SUBSCRIPT SIX" => 0x2086), + ("SUBSCRIPT SEVEN" => 0x2087), + ("SUBSCRIPT EIGHT" => 0x2088), + ("SUBSCRIPT NINE" => 0x2089), + ("SUBSCRIPT PLUS SIGN" => 0x208A), + ("SUBSCRIPT MINUS" => 0x208B), + ("SUBSCRIPT EQUALS SIGN" => 0x208C), + ("SUBSCRIPT LEFT PARENTHESIS" => 0x208D), + ("SUBSCRIPT RIGHT PARENTHESIS" => 0x208E), + ("LATIN SUBSCRIPT SMALL LETTER A" => 0x2090), + ("LATIN SUBSCRIPT SMALL LETTER E" => 0x2091), + ("LATIN SUBSCRIPT SMALL LETTER O" => 0x2092), + ("LATIN SUBSCRIPT SMALL LETTER X" => 0x2093), + ("LATIN SUBSCRIPT SMALL LETTER SCHWA" => 0x2094), + ("LATIN SUBSCRIPT SMALL LETTER H" => 0x2095), + ("LATIN SUBSCRIPT SMALL LETTER K" => 0x2096), + ("LATIN SUBSCRIPT SMALL LETTER L" => 0x2097), + ("LATIN SUBSCRIPT SMALL LETTER M" => 0x2098), + ("LATIN SUBSCRIPT SMALL LETTER N" => 0x2099), + ("LATIN SUBSCRIPT SMALL LETTER P" => 0x209A), + ("LATIN SUBSCRIPT SMALL LETTER S" => 0x209B), + ("LATIN SUBSCRIPT SMALL LETTER T" => 0x209C), + ("EURO-CURRENCY SIGN" => 0x20A0), + ("COLON SIGN" => 0x20A1), + ("CRUZEIRO SIGN" => 0x20A2), + ("FRENCH FRANC SIGN" => 0x20A3), + ("LIRA SIGN" => 0x20A4), + ("MILL SIGN" => 0x20A5), + ("NAIRA SIGN" => 0x20A6), + ("PESETA SIGN" => 0x20A7), + ("RUPEE SIGN" => 0x20A8), + ("WON SIGN" => 0x20A9), + ("NEW SHEQEL SIGN" => 0x20AA), + ("DONG SIGN" => 0x20AB), + ("EURO SIGN" => 0x20AC), + ("KIP SIGN" => 0x20AD), + ("TUGRIK SIGN" => 0x20AE), + ("DRACHMA SIGN" => 0x20AF), + ("GERMAN PENNY SIGN" => 0x20B0), + ("PESO SIGN" => 0x20B1), + ("GUARANI SIGN" => 0x20B2), + ("AUSTRAL SIGN" => 0x20B3), + ("HRYVNIA SIGN" => 0x20B4), + ("CEDI SIGN" => 0x20B5), + ("LIVRE TOURNOIS SIGN" => 0x20B6), + ("SPESMILO SIGN" => 0x20B7), + ("TENGE SIGN" => 0x20B8), + ("INDIAN RUPEE SIGN" => 0x20B9), + ("TURKISH LIRA SIGN" => 0x20BA), + ("NORDIC MARK SIGN" => 0x20BB), + ("MANAT SIGN" => 0x20BC), + ("RUBLE SIGN" => 0x20BD), + ("LARI SIGN" => 0x20BE))