From 4c9587b3df1d99ea92e880f36816db1a15e6a197 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 18 Feb 2016 19:46:48 -0500
Subject: [PATCH 1/4] Add support for formatting with \% syntax

---
 src/literals.jl | 54 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/literals.jl b/src/literals.jl
index b006492..7178e4c 100644
--- a/src/literals.jl
+++ b/src/literals.jl
@@ -63,7 +63,7 @@ function s_parse_latex(io, s,  i)
     beg = i # start location
     c, i = next(s, i)
     while c != '}'
-        done(s, i) && throw(ArgumentError("\\: missing closing : in $(repr(s))"))
+        done(s, i) && throw(ArgumentError("\\{ missing closing } in $(repr(s))"))
         c, i = next(s, i)
     end
     latexstr = get(Base.REPLCompletions.latex_symbols, string("\\", s[beg:i-2]), "")
@@ -133,17 +133,49 @@ function s_interp_parse(s::AbstractString, unescape::Function, p::Function)
     i = j = start(s)
     while !done(s, j)
         c, k = next(s, j)
-        if c == '\\' && !done(s, k) && s[k] == '('
-            # Handle interpolation
-            if !isempty(s[i:j-1])
-                push!(sx, unescape(s[i:j-1]))
-            end
-            ex, j = parse(s, k, greedy=false)
-            if isa(ex, Expr) && is(ex.head, :continue)
-                throw(ParseError("Incomplete expression"))
+        if c == '\\' && !done(s, k)
+            if s[k] == '('
+                # Handle interpolation
+                if !isempty(s[i:j-1])
+                    push!(sx, unescape(s[i:j-1]))
+                end
+                ex, j = parse(s, k, greedy=false)
+                if isa(ex, Expr) && is(ex.head, :continue)
+                    throw(ParseError("Incomplete expression"))
+                end
+                push!(sx, esc(ex))
+                i = j
+            elseif s[k] == '%'
+                # Move past \\, c should point to '%'
+                c, k = next(s, k)
+                done(s, k) && throw(ParseError("Incomplete % expression"))
+                # Handle interpolation
+                if !isempty(s[i:j-1])
+                    push!(sx, unescape(s[i:j-1]))
+                end
+                c = s[k]
+                if c != '('
+                    # Move past %, c should point to letter
+                    c, k = next(s, k)
+                    s[k] == '(' || throw(ParseError("Missing ( in % format"))
+                end
+                # c is now either ( or C format letter to be used
+                ex, j = parse(s, k, greedy=false)
+                if isa(ex, Expr)
+                    is(ex.head, :continue) && throw(ParseError("Incomplete expression"))
+                    # Need to wrap call to fmt around expression
+                    if ex.head == :tuple
+                        push!(sx, esc(:(fmt(ex.args...))))
+                    else
+                        push!(sx, esc(:(fmt(ex.args[1]))))
+                    end
+                else
+                    push!(sx, esc(:(fmt($ex))))
+                end
+                i = j
+            else
+                j = k
             end
-            push!(sx, esc(ex))
-            i = j
         else
             j = k
         end

From 7125f3eb4411ad0b5971f898c7baf4055d1d3636 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 18 Feb 2016 20:12:21 -0500
Subject: [PATCH 2/4] Add fmt code from Tom Breloff's PR #10 to Formatting.jl

---
 LICENSE.md         |   7 +-
 README.md          |   9 ++
 src/StringUtils.jl |  12 ++
 src/cformat.jl     | 368 +++++++++++++++++++++++++++++++++++++++++++++
 src/fmt.jl         | 173 +++++++++++++++++++++
 src/fmtcore.jl     | 292 +++++++++++++++++++++++++++++++++++
 src/fmtspec.jl     | 213 ++++++++++++++++++++++++++
 src/formatexpr.jl  | 170 +++++++++++++++++++++
 src/literals.jl    |   4 +-
 9 files changed, 1245 insertions(+), 3 deletions(-)
 create mode 100644 src/cformat.jl
 create mode 100644 src/fmt.jl
 create mode 100644 src/fmtcore.jl
 create mode 100644 src/fmtspec.jl
 create mode 100644 src/formatexpr.jl

diff --git a/LICENSE.md b/LICENSE.md
index ae707c5..b71e0be 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -5,6 +5,12 @@ Copyright (c) 2016 Gandalf Software, Inc. (Scott P. Jones)
 Portions based on code that is part of Julia, licensed under the MIT license,
 and also Eric Forgy's StringInterpolations.jl package.
 
+Portions are based on code submitted as a PR to the Formatting.jl package,
+also under MIT "Expat" license: Copyright (c) 2015 Tom Breloff
+
+Portions are based on the Formatting.jl package, also under MIT "Expat" license:
+Copyright (c) 2014: Dahua Lin and contributors.
+
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
 > in the Software without restriction, including without limitation the rights
@@ -22,4 +28,3 @@ and also Eric Forgy's StringInterpolations.jl package.
 > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 > SOFTWARE.
-> 
diff --git a/README.md b/README.md
index 00d8569..a68b203 100644
--- a/README.md
+++ b/README.md
@@ -16,3 +16,12 @@ Finally, I have added two new ways of representing characters in the literal str
 This makes life a lot easier when you want to keep the text of a program in ASCII, and
 also to be able to write programs using those characters that might not even display
 correctly in their editor.
+
+This now has some initial formatting capability, based on Tom Breloff's wonderful PR #10 to the
+JuliaLang/Formatting.jl package (by Dahua Lin and other contributors).
+`\%(arguments)` is interpolated as a call to fmt(arguments).
+
+I also plan on adding support for `\%c(arguments)`, where c is a C style formatting character.
+I'm debating if I should make it take the full C style syntax, with leading 0, width/precision,
+etc, before the single character.
+
diff --git a/src/StringUtils.jl b/src/StringUtils.jl
index 8492bc5..fe99945 100644
--- a/src/StringUtils.jl
+++ b/src/StringUtils.jl
@@ -9,4 +9,16 @@ export @u_str, @sinterpolate
 export s_unescape_string, s_escape_string, s_print_unescaped, s_print_escaped
 
 include("literals.jl")
+
+# From Formatting.jl
+import Base.show
+
+export cfmt, fmt, fmt_default, fmt_default!
+
+include("cformat.jl" )
+include("fmtspec.jl")
+include("fmtcore.jl")
+include("formatexpr.jl")
+include("fmt.jl")
+
 end
diff --git a/src/cformat.jl b/src/cformat.jl
new file mode 100644
index 0000000..1d9479a
--- /dev/null
+++ b/src/cformat.jl
@@ -0,0 +1,368 @@
+formatters = Dict{ ASCIIString, Function }()
+
+function sprintf1( fmt::ASCIIString, x )
+    global formatters
+    f = generate_formatter( fmt )
+    f( x )
+end
+
+if VERSION < v"0.4-"
+    const base64encode = base64
+end
+
+function generate_formatter( fmt::ASCIIString )
+    global formatters
+    if haskey( formatters, fmt )
+        return formatters[fmt]
+    end
+    func = symbol( "sprintf_" * replace( base64encode( fmt ), "=", "!" ) )
+
+    if !contains( fmt, "'" )
+        test = Base.Printf.parse( fmt )
+        if length( test ) != 1 || !( typeof( test[1] ) <: Tuple )
+            error( "Only one AND undecorated format string is allowed")
+        end
+
+        code = quote
+            function $func( x )
+                @sprintf( $fmt, x )
+            end
+        end
+    else
+        conversion = fmt[end]
+        if !in( conversion, "sduifF" )
+            error( "thousand separator not defined for " * string( conversion ) * " conversion")
+        end
+        fmtactual = replace( fmt, "'", "", 1 )
+        test = Base.Printf.parse( fmtactual )
+        if length( test ) != 1 || !( typeof( test[1] ) <: Tuple )
+            error( "Only one AND undecorated format string is allowed")
+        end
+        if in( conversion, "sfF" )
+            code = quote
+                function $func{T<:Real}( x::T )
+                    s = @sprintf( $fmtactual, x )
+                    # commas are added to only the numerator
+                    if T <: Rational && endswith( $fmtactual, "s" )
+                        spos = findfirst( s, '/' )
+                        s = addcommas( s[1:spos-1] ) * s[spos:end]
+                    else
+                        dpos = findfirst( s, '.' )
+                        if dpos != 0
+                            s = addcommas( s[1:dpos-1] ) * s[ dpos:end ]
+                        else # find the rightmost digit
+                            for i in length( s ):-1:1
+                                if isdigit( s[i] )
+                                    s = addcommas( s[1:i] ) * s[i+1:end]
+                                    break
+                                end
+                            end
+                        end
+                    end
+                    s
+                end
+            end
+        else
+            code = quote
+                function $func( x )
+                    s = @sprintf( $fmtactual, x )
+                    for i in length( s ):-1:1
+                        if isdigit( s[i] )
+                            s = addcommas( s[1:i] ) * s[i+1:end]
+                            break
+                        end
+                    end
+                    s
+                end
+            end
+        end
+    end
+    f = eval( code )
+    formatters[ fmt ] = f
+    f
+end
+
+function addcommas( s::ASCIIString )
+    len = length(s)
+    t = ""
+    for i in 1:3:len
+        subs = s[max(1,len-i-1):len-i+1]
+        if i == 1
+            t = subs
+        else
+            if match( r"[0-9]", subs ) != nothing
+                t = subs * "," * t
+            else
+                t = subs * t
+            end
+        end
+    end
+    return t
+end
+
+function generate_format_string(;
+        width::Int=-1,
+        precision::Int= -1,
+        leftjustified::Bool=false,
+        zeropadding::Bool=false,
+        commas::Bool=false,
+        signed::Bool=false,
+        positivespace::Bool=false,
+        alternative::Bool=false,
+        conversion::ASCIIString="f" #aAdecEfFiosxX
+        )
+    s = "%"
+    if commas
+        s *= "'"
+    end
+    if alternative && in( conversion[1], "aAeEfFoxX" )
+        s *= "#"
+    end
+    if zeropadding && !leftjustified && width != -1
+        s *= "0"
+    end
+
+    if signed
+        s *= "+"
+    elseif positivespace
+        s *= " "
+    end
+
+    if width != -1
+        if leftjustified
+            s *= "-" * string( width )
+        else
+            s *= string( width )
+        end
+    end
+    if precision != -1
+        s *= "." * string( precision )
+    end
+    s * conversion
+end
+
+function format{T<:Real}( x::T;
+        width::Int=-1,
+        precision::Int= -1,
+        leftjustified::Bool=false,
+        zeropadding::Bool=false, # when right-justified, use 0 instead of space to fill
+        commas::Bool=false,
+        signed::Bool=false, # +/- prefix
+        positivespace::Bool=false,
+        stripzeros::Bool=(precision== -1),
+        parens::Bool=false, # use (1.00) instead of -1.00. Used in finance
+        alternative::Bool=false, # usually for hex
+        mixedfraction::Bool=false,
+        mixedfractionsep::String="_",
+        fractionsep::String="/", # num / den
+        fractionwidth::Int = 0,
+        tryden::Int = 0, # if 2 or higher, try to use this denominator, without losing precision
+        suffix::String="", # useful for units/%
+        autoscale::Symbol=:none, # :metric, :binary or :finance
+        conversion::ASCIIString=""
+        )
+    checkwidth = commas
+    if conversion == ""
+        if T <: FloatingPoint || T <: Rational && precision != -1
+            actualconv = "f"
+        elseif T <: Unsigned
+            actualconv = "x"
+        elseif T <: Integer
+            actualconv = "d"
+        else
+            conversion = "s"
+            actualconv = "s"
+        end
+    else
+        actualconv = conversion
+    end
+    if signed && commas
+        error( "You cannot use signed (+/-) AND commas at the same time")
+    end
+    if T <: Rational && conversion == "s"
+        stripzeros = false
+    end
+    if ( T <: FloatingPoint && actualconv == "f" || T <: Integer ) && autoscale != :none
+        actualconv = "f"
+        if autoscale == :metric
+            scales = [
+                (1e24, "Y" ),
+                (1e21, "Z" ),
+                (1e18, "E" ),
+                (1e15, "P" ),
+                (1e12, "T" ),
+                (1e9,  "G"),
+                (1e6,  "M"),
+                (1e3,  "k") ]
+            if abs(x) > 1
+                for (mag, sym) in scales
+                    if abs(x) >= mag
+                        x /= mag
+                        suffix = sym * suffix
+                        break
+                    end
+                end
+            elseif T <: FloatingPoint
+                smallscales = [
+                    ( 1e-12, "p" ),
+                    ( 1e-9,  "n" ),
+                    ( 1e-6,  "μ" ),
+                    ( 1e-3,  "m" ) ]
+                for (mag,sym) in smallscales
+                    if abs(x) < mag*10
+                        x /= mag
+                        suffix = sym * suffix
+                        break
+                    end
+                end
+            end
+        else
+            if autoscale == :binary
+                scales = [
+                    (1024.0 ^8,  "Yi" ),
+                    (1024.0 ^7,  "Zi" ),
+                    (1024.0 ^6,  "Ei" ),
+                    (1024^5,  "Pi" ),
+                    (1024^4,  "Ti" ),
+                    (1024^3,  "Gi"),
+                    (1024^2,  "Mi"),
+                    (1024,    "Ki")
+                ]
+            else # :finance
+                scales = [
+                    (1e12, "t" ),
+                    (1e9,  "b"),
+                    (1e6,  "m"),
+                    (1e3,  "k") ]
+            end
+            for (mag, sym) in scales
+                if abs(x) >= mag
+                    x /= mag
+                    suffix = sym * suffix
+                    break
+                end
+            end
+        end
+    end
+
+    nonneg = x >= 0
+    fractional = 0
+    if T <: Rational && mixedfraction
+        actualconv = "d"
+        actualx = trunc( Int, x )
+        fractional = abs(x) - abs(actualx)
+    else
+        if parens && !in( actualconv[1], "xX" )
+            actualx = abs(x)
+        else
+            actualx = x
+        end
+    end
+    s = sprintf1( generate_format_string( width=width,
+        precision=precision,
+        leftjustified=leftjustified,
+        zeropadding=zeropadding,
+        commas=commas,
+        signed=signed,
+        positivespace=positivespace,
+        alternative=alternative,
+        conversion=actualconv
+    ),actualx)
+
+    if T <:Rational && conversion == "s"
+        if mixedfraction && fractional != 0
+            num = fractional.num
+            den = fractional.den
+            if tryden >= 2 && mod( tryden, den ) == 0
+                num *= div(tryden,den)
+                den = tryden
+            end
+            fs = string( num ) * fractionsep * string(den)
+            if length(fs) < fractionwidth
+                fs = repeat( "0", fractionwidth - length(fs) ) * fs
+            end
+            s = rstrip(s)
+            if actualx != 0
+                s = rstrip(s) * mixedfractionsep * fs
+            else
+                if !nonneg
+                    s = "-" * fs
+                else
+                    s = fs
+                end
+            end
+            checkwidth = true
+        elseif !mixedfraction
+            s = replace( s, "//", fractionsep )
+            checkwidth = true
+        end
+    elseif stripzeros && in( actualconv[1], "fFeEs" )
+        dpos = findfirst( s, '.')
+        if in( actualconv[1], "eEs" )
+            if in( actualconv[1], "es" )
+                epos = findfirst( s, 'e' )
+            else
+                epos = findfirst( s, 'E' )
+            end
+            if epos == 0
+                rpos = length( s )
+            else
+                rpos = epos-1
+            end
+        else
+            rpos = length(s)
+        end
+        # rpos at this point is the rightmost possible char to start
+        # stripping
+        stripfrom = rpos+1
+        for i = rpos:-1:dpos+1
+            if s[i] == '0'
+                stripfrom = i
+            elseif s[i] ==' '
+                continue
+            else
+                break
+            end
+        end
+        if stripfrom <= rpos
+            if stripfrom == dpos+1 # everything after decimal is 0, so strip the decimal too
+                stripfrom = dpos
+            end
+            s = s[1:stripfrom-1] * s[rpos+1:end]
+            checkwidth = true
+        end
+    end
+
+    s *= suffix
+
+    if parens && !in( actualconv[1], "xX" )
+        # if zero or positive, we still need 1 white space on the right
+        if nonneg
+            s = " " * strip(s) * " "
+        else
+            s = "(" * strip(s) * ")"
+        end
+
+        checkwidth = true
+    end
+
+    if checkwidth && width != -1
+        if length(s) > width
+            s = replace( s, " ", "", length(s)-width )
+            if length(s) > width && endswith( s, " " )
+                s = reverse( replace( reverse(s), " ", "", length(s)-width ) )
+            end
+            if length(s) > width
+                s = replace( s, ",", "", length(s)-width )
+            end
+        elseif length(s) < width
+            if leftjustified
+                s = s * repeat( " ", width - length(s) )
+            else
+                s = repeat( " ", width - length(s) ) * s
+            end
+        end
+    end
+
+    s
+end
diff --git a/src/fmt.jl b/src/fmt.jl
new file mode 100644
index 0000000..41928fc
--- /dev/null
+++ b/src/fmt.jl
@@ -0,0 +1,173 @@
+
+# interface proposal by Tom Breloff (@tbreloff)... comments welcome
+# This uses the more basic formatting based on FormatSpec and the cfmt method (formerly called fmt, which I repurposed)
+
+# TODO: swap out FormatSpec for something that is able to use the "format" method, which has more options for units, prefixes, etc
+# TODO: support rational numbers, autoscale, etc as in "format"
+
+# --------------------------------------------------------------------------------------------------
+
+# the DefaultSpec object is just something to hold onto the current FormatSpec.
+# we keep the typechar around specically for the reset! function, to go back to the starting state
+
+type DefaultSpec
+  typechar::Char
+  fspec::FormatSpec
+  DefaultSpec(c::Char) = new(c, FormatSpec(c))
+end
+
+const DEFAULT_FORMATTERS = Dict{DataType, DefaultSpec}()
+
+# adds a new default formatter for this type
+defaultSpec!{T}(::Type{T}, c::Char) = (DEFAULT_FORMATTERS[T] = DefaultSpec(c); nothing)
+
+# note: types T and K will now both share K's default
+defaultSpec!{T,K}(::Type{T}, ::Type{K}) = (DEFAULT_FORMATTERS[T] = DEFAULT_FORMATTERS[K]; nothing)
+
+# seed it with some basic default formatters
+for (t, c) in [(Integer,'d'), (FloatingPoint,'f'), (Char,'c'), (String,'s')]
+  defaultSpec!(t, c)
+end
+
+reset!{T}(::Type{T}) = (dspec = defaultSpec(T); dspec.fspec = FormatSpec(dspec.typechar); nothing)
+
+
+# --------------------------------------------------------------------------------------------------
+
+
+function addKWArgsFromSymbols(kwargs, syms::Symbol...)
+  d = Dict(kwargs)
+  for s in syms
+    if s == :ljust || s == :left
+      d[:align] = '<'
+    elseif s == :rjust || s == :right
+      d[:align] = '>'
+    elseif s == :commas
+      d[:tsep] = true
+    elseif s == :zpad || s == :zeropad
+      d[:zpad] = true
+    elseif s == :ipre || s == :prefix
+      d[:ipre] = true
+    end
+  end
+  d
+end
+
+# --------------------------------------------------------------------------------------------------
+
+# methods to get the current default objects
+# note: if you want to set a default for an abstract type (i.e. FloatingPoint) you'll need to extend this method like here:
+defaultSpec{T<:Integer}(::Type{T}) = DEFAULT_FORMATTERS[Integer]
+defaultSpec{T<:FloatingPoint}(::Type{T}) = DEFAULT_FORMATTERS[FloatingPoint]
+defaultSpec{T<:String}(::Type{T}) = DEFAULT_FORMATTERS[String]
+function defaultSpec{T}(::Type{T})
+  get(DEFAULT_FORMATTERS, T) do
+    error("Missing default spec for type $T... call default!(T, c): $DEFAULT_FORMATTERS")
+  end
+end
+defaultSpec(x) = defaultSpec(typeof(x))
+
+fmt_default{T}(::Type{T}) = defaultSpec(T).fspec
+fmt_default(x) = defaultSpec(x).fspec
+
+
+
+# first resets the fmt_default spec to the given arg, then continue by updating with args and kwargs
+fmt_default!{T}(::Type{T}, c::Char, args...; kwargs...) = (defaultSpec!(T,c); fmt_default!(T, args...; kwargs...))
+fmt_default!{T,K}(::Type{T}, ::Type{K}, args...; kwargs...) = (defaultSpec!(T,K); fmt_default!(T, args...; kwargs...))
+
+# update the fmt_default for a specific type
+function fmt_default!{T}(::Type{T}, syms::Symbol...; kwargs...)
+  if isempty(syms)
+
+    # if there are no arguments, reset to initial defaults
+    if isempty(kwargs)
+      reset!(T)
+      return
+    end
+
+    # otherwise update the spec
+    dspec = defaultSpec(T)
+    dspec.fspec = FormatSpec(dspec.fspec; kwargs...)
+
+  else
+    d = addKWArgsFromSymbols(kwargs, syms...)
+    fmt_default!(T; d...)
+  end
+  nothing
+end
+
+# update the fmt_default for all types
+function fmt_default!(syms::Symbol...; kwargs...)
+  if isempty(syms)
+    for k in keys(DEFAULT_FORMATTERS)
+      fmt_default!(k; kwargs...)
+    end
+  else
+    d = addKWArgsFromSymbols(kwargs, syms...)
+    fmt_default!(; d...)
+  end
+  nothing
+end
+
+
+# --------------------------------------------------------------------------------------------------
+
+# TODO: get rid of this entire hack by moving commas into cfmt
+
+function optionalCommas(x::Real, s::String, fspec::FormatSpec)
+  dpos = findfirst(s, '.')
+  prevwidth = length(s)
+
+  if dpos == 0
+    s = addcommas(s)
+  else
+    s = string(addcommas(s[1:dpos-1]), '.', s[dpos+1:end])
+  end
+
+  # check for excess width from commas
+  w = length(s)
+  if fspec.width > 0 && w > fspec.width && w > prevwidth
+    # we may have made the string too wide with those commas... gotta fix it
+    s = strip(s)
+    n = fspec.width - length(s)
+    if fspec.align == '<' # left alignment
+      s = string(s, " "^n)
+    else
+      s = string(" "^n, s)
+    end 
+  end
+
+  s
+end
+optionalCommas(x, s::String, fspec::FormatSpec) = s
+
+# --------------------------------------------------------------------------------------------------
+
+
+# TODO: do more caching to optimize repeated calls
+
+# creates a new FormatSpec by overriding the defaults and passes it to cfmt
+# note: adding kwargs is only appropriate for one-off formatting.  
+#       normally it will be much faster to change the fmt_default formatting as needed
+function fmt(x; kwargs...)
+  fspec = isempty(kwargs) ? fmt_default(x) : FormatSpec(fmt_default(x); kwargs...)
+  s = cfmt(fspec, x)
+
+  # add the commas now... I was confused as to when this is done currently
+  if fspec.tsep
+    return optionalCommas(x, s, fspec)
+  end
+  s
+end
+
+# some helper method calls, which just convert to kwargs
+fmt(x, prec::Int, args...; kwargs...) = fmt(x, args...; prec=prec, kwargs...)
+fmt(x, prec::Int, width::Int, args...; kwargs...) = fmt(x, args...; prec=prec, width=width, kwargs...)
+
+# integrate some symbol shorthands into the keyword args
+# note: as above, this will generate relavent kwargs, so to format in a tight loop, you should probably update the fmt_default
+function fmt(x, syms::Symbol...; kwargs...)
+  d = addKWArgsFromSymbols(kwargs, syms...)
+  fmt(x; d...)
+end
diff --git a/src/fmtcore.jl b/src/fmtcore.jl
new file mode 100644
index 0000000..a0fb5ee
--- /dev/null
+++ b/src/fmtcore.jl
@@ -0,0 +1,292 @@
+# core formatting functions
+
+### auxiliary functions
+
+function _repwrite(out::IO, c::Char, n::Int)
+    while n > 0
+        write(out, c)
+        n -= 1
+    end
+end
+
+
+### print string or char
+
+function _pfmt_s(out::IO, fs::FormatSpec, s::Union(String,Char))
+    wid = fs.width
+    slen = length(s)
+    if wid <= slen
+        write(out, s)
+    else
+        a = fs.align
+        if a == '<'
+            write(out, s)
+            _repwrite(out, fs.fill, wid-slen)
+        else
+            _repwrite(out, fs.fill, wid-slen)
+            write(out, s)
+        end
+    end
+end
+
+
+### print integers
+
+_mul(x::Integer, ::_Dec) = x * 10
+_mul(x::Integer, ::_Bin) = x << 1
+_mul(x::Integer, ::_Oct) = x << 3
+_mul(x::Integer, ::Union(_Hex, _HEX)) = x << 4
+
+_div(x::Integer, ::_Dec) = div(x, 10)
+_div(x::Integer, ::_Bin) = x >> 1
+_div(x::Integer, ::_Oct) = x >> 3
+_div(x::Integer, ::Union(_Hex, _HEX)) = x >> 4
+
+function _ndigits(x::Integer, op)  # suppose x is non-negative
+    m = 1
+    q = _div(x, op)
+    while q > 0
+        m += 1
+        q = _div(q, op)
+    end
+    return m
+end
+
+_ipre(op) = ""
+_ipre(::Union(_Hex, _HEX)) = "0x"
+_ipre(::_Oct) = "0o"
+_ipre(::_Bin) = "0b"
+
+_digitchar(x::Integer, ::_Bin) = @compat Char(x == 0 ? '0' : '1')
+_digitchar(x::Integer, ::_Dec) = @compat Char('0' + x)
+_digitchar(x::Integer, ::_Oct) = @compat Char('0' + x)
+_digitchar(x::Integer, ::_Hex) = @compat Char(x < 10 ? '0' + x : 'a' + (x - 10))
+_digitchar(x::Integer, ::_HEX) = @compat Char(x < 10 ? '0' + x : 'A' + (x - 10))
+
+_signchar(x::Number, s::Char) = x < 0 ? '-' :
+                                s == '+' ? '+' :
+                                s == ' ' ? ' ' : '\0'
+
+function _pfmt_int{Op}(out::IO, sch::Char, ip::ASCIIString, zs::Integer, ax::Integer, op::Op)
+    # print sign
+    if sch != '\0'
+        write(out, sch)
+    end
+    # print prefix
+    if !isempty(ip)
+        write(out, ip)
+    end
+    # print padding zeros
+    if zs > 0
+        _repwrite(out, '0', zs)
+    end
+    # print actual digits
+    if ax == 0
+        write(out, '0')
+    else
+        _pfmt_intdigits(out, ax, op)
+    end
+end
+
+function _pfmt_intdigits{Op,T<:Integer}(out::IO, ax::T, op::Op)
+    b_lb = _div(ax, op)   
+    b = one(T)
+    while b <= b_lb
+        b = _mul(b, op)
+    end
+    r = ax
+    while b > 0
+        (q, r) = divrem(r, b)
+        write(out, _digitchar(q, op))
+        b = _div(b, op)
+    end
+end
+
+function _pfmt_i{Op}(out::IO, fs::FormatSpec, x::Integer, op::Op)
+    # calculate actual length
+    ax = abs(x)
+    xlen = _ndigits(abs(x), op)
+    # sign char
+    sch = _signchar(x, fs.sign)
+    if sch != '\0'
+        xlen += 1
+    end
+    # prefix (e.g. 0x, 0b, 0o)
+    ip = ""
+    if fs.ipre
+        ip = _ipre(op)
+        xlen += length(ip)
+    end
+
+    # printing
+    wid = fs.width
+    if wid <= xlen
+        _pfmt_int(out, sch, ip, 0, ax, op)
+    elseif fs.zpad
+        _pfmt_int(out, sch, ip, wid-xlen, ax, op)
+    else
+        a = fs.align
+        if a == '<'
+            _pfmt_int(out, sch, ip, 0, ax, op)
+            _repwrite(out, fs.fill, wid-xlen)
+        else
+            _repwrite(out, fs.fill, wid-xlen)
+            _pfmt_int(out, sch, ip, 0, ax, op)
+        end
+    end
+end
+
+
+### print floating point numbers
+
+function _pfmt_float(out::IO, sch::Char, zs::Integer, intv::Real, decv::Real, prec::Int)
+    # print sign
+    if sch != '\0'
+        write(out, sch)
+    end
+    # print padding zeros
+    if zs > 0
+        _repwrite(out, '0', zs)
+    end
+    idecv = round(Integer, decv * exp10(prec))
+    if idecv == exp10(prec)
+        intv += 1
+        idecv = 0
+    end
+    # print integer part
+    if intv == 0
+        write(out, '0')
+    else
+        _pfmt_intdigits(out, intv, _Dec())
+    end
+    # print decimal point
+    write(out, '.')
+    # print decimal part
+    if prec > 0
+        nd = _ndigits(idecv, _Dec())
+        if nd < prec
+            _repwrite(out, '0', prec - nd)
+        end
+        _pfmt_intdigits(out, idecv, _Dec())
+    end
+end
+
+function _pfmt_f(out::IO, fs::FormatSpec, x::FloatingPoint)
+    # separate sign, integer, and decimal part
+    ax = abs(x)
+    sch = _signchar(x, fs.sign)
+    intv = trunc(Integer, ax)
+    decv = ax - intv
+
+    # calculate length
+    xlen = _ndigits(intv, _Dec()) + 1 + fs.prec
+    if sch != '\0'
+        xlen += 1
+    end
+
+    # print
+    wid = fs.width
+    if wid <= xlen
+        _pfmt_float(out, sch, 0, intv, decv, fs.prec)
+    elseif fs.zpad
+        _pfmt_float(out, sch, wid-xlen, intv, decv, fs.prec)
+    else
+        a = fs.align
+        if a == '<'
+            _pfmt_float(out, sch, 0, intv, decv, fs.prec)
+            _repwrite(out, fs.fill, wid-xlen)
+        else
+            _repwrite(out, fs.fill, wid-xlen)
+            _pfmt_float(out, sch, 0, intv, decv, fs.prec)
+        end
+    end
+end
+
+function _pfmt_floate(out::IO, sch::Char, zs::Integer, u::Real, prec::Int, e::Int, ec::Char)
+    intv = trunc(Integer,u)
+    decv = u - intv
+    if round(Integer, decv * exp10(prec)) == exp10(prec)
+        intv += 1
+        if intv == 10
+            intv = 1
+            e += 1
+        end
+        decv = 0.
+    end
+    _pfmt_float(out, sch, zs, intv, decv, prec)
+    write(out, ec)
+    if e >= 0
+        write(out, '+')
+    else
+        write(out, '-')
+        e = -e
+    end
+    (e1, e2) = divrem(e, 10)
+    write(out, @compat Char('0' + e1))
+    write(out, @compat Char('0' + e2))
+end
+
+
+function _pfmt_e(out::IO, fs::FormatSpec, x::FloatingPoint)
+    # extract sign, significand, and exponent
+    ax = abs(x)
+    sch = _signchar(x, fs.sign)
+    if ax == 0.0
+        e = 0
+        u = zero(x)
+    else
+        e = floor(Integer,log10(ax))  # exponent
+        u = ax / exp10(e)  # significand
+    end
+
+    # calculate length
+    xlen = 6 + fs.prec
+    if sch != '\0'
+        xlen += 1
+    end 
+
+    # print
+    ec = isupper(fs.typ) ? 'E' : 'e'
+    wid = fs.width
+    if wid <= xlen
+        _pfmt_floate(out, sch, 0, u, fs.prec, e, ec)
+    elseif fs.zpad
+        _pfmt_floate(out, sch, wid-xlen, u, fs.prec, e, ec)
+    else
+        a = fs.align
+        if a == '<'
+            _pfmt_floate(out, sch, 0, u, fs.prec, e, ec)
+            _repwrite(out, fs.fill, wid-xlen)
+        else
+            _repwrite(out, fs.fill, wid-xlen)
+            _pfmt_floate(out, sch, 0, u, fs.prec, e, ec)
+        end
+    end
+end
+
+
+function _pfmt_g(out::IO, fs::FormatSpec, x::FloatingPoint)
+    # number decomposition
+    ax = abs(x)
+    if 1.0e-4 <= ax < 1.0e6
+        _pfmt_f(out, fs, x)
+    else
+        _pfmt_e(out, fs, x)
+    end
+end
+
+function _pfmt_specialf(out::IO, fs::FormatSpec, x::FloatingPoint)
+    if isinf(x) 
+        if x > 0
+            _pfmt_s(out, fs, "Inf")
+        else
+            _pfmt_s(out, fs, "-Inf")
+        end
+    else
+        @assert isnan(x)
+        _pfmt_s(out, fs, "NaN")
+    end
+end
+
+
+
diff --git a/src/fmtspec.jl b/src/fmtspec.jl
new file mode 100644
index 0000000..a372167
--- /dev/null
+++ b/src/fmtspec.jl
@@ -0,0 +1,213 @@
+# formatting specification
+
+# formatting specification language
+#
+#  spec  ::= [[fill]align][sign][#][0][width][,][.prec][type]
+#  fill  ::= <any character>
+#  align ::= '<' | '>'
+#  sign  ::= '+' | '-' | ' '
+#  width ::= <integer>
+#  prec  ::= <integer>
+#  type  ::= 'b' | 'c' | 'd' | 'e' | 'E' | 'f' | 'F' | 'g' | 'G' |
+#            'n' | 'o' | 'x' | 'X' | 's'
+#
+# Please refer to http://docs.python.org/2/library/string.html#formatspec
+# for more details
+#
+
+## FormatSpec type
+
+const _numtypchars = Set(['b', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'n', 'o', 'x', 'X'])
+
+_tycls(c::Char) =
+    (c == 'd' || c == 'n' || c == 'b' || c == 'o' || c == 'x') ? 'i' :
+    (c == 'e' || c == 'f' || c == 'g') ? 'f' :
+    (c == 'c') ? 'c' :
+    (c == 's') ? 's' :
+    error("Invalid type char $(c)")
+
+immutable FormatSpec
+    cls::Char    # category: 'i' | 'f' | 'c' | 's'
+    typ::Char
+    fill::Char
+    align::Char
+    sign::Char
+    width::Int
+    prec::Int
+    ipre::Bool   # whether to prefix 0b, 0o, or 0x
+    zpad::Bool   # whether to do zero-padding
+    tsep::Bool   # whether to use thousand-separator
+
+    function FormatSpec(typ::Char;
+               fill::Char=' ', 
+               align::Char='\0',
+               sign::Char='-',
+               width::Int=-1,
+               prec::Int=-1,
+               ipre::Bool=false,
+               zpad::Bool=false,
+               tsep::Bool=false)
+
+        if align=='\0'
+            align = (typ in _numtypchars) ? '>' : '<'
+        end
+        cls = _tycls(lowercase(typ))
+        if cls == 'f' && prec < 0
+            prec = 6
+        end
+        new(cls, typ, fill, align, sign, width, prec, ipre, zpad, tsep)
+    end
+
+    # copy constructor with overrides
+    function FormatSpec(spec::FormatSpec;
+               fill::Char=spec.fill,
+               align::Char=spec.align,
+               sign::Char=spec.sign,
+               width::Int=spec.width,
+               prec::Int=spec.prec,
+               ipre::Bool=spec.ipre,
+               zpad::Bool=spec.zpad,
+               tsep::Bool=spec.tsep)
+        new(spec.cls, spec.typ, fill, align, sign, width, prec, ipre, zpad, tsep)
+    end
+end
+
+function show(io::IO, fs::FormatSpec)
+    println(io, "$(typeof(fs))")
+    println(io, "  cls   = $(fs.cls)")
+    println(io, "  typ   = $(fs.typ)")
+    println(io, "  fill  = $(fs.fill)")
+    println(io, "  align = $(fs.align)")
+    println(io, "  sign  = $(fs.sign)")
+    println(io, "  width = $(fs.width)")
+    println(io, "  prec  = $(fs.prec)")
+    println(io, "  ipre  = $(fs.ipre)")
+    println(io, "  zpad  = $(fs.zpad)")
+    println(io, "  tsep  = $(fs.tsep)")
+end
+
+## parse FormatSpec from a string
+
+const _spec_regex = r"^(.?[<>])?([ +-])?(#)?(\d+)?(,)?(.\d+)?([bcdeEfFgGnosxX])?$"
+
+function FormatSpec(s::String)
+    # default spec
+    _fill = ' '
+    _align = '\0'
+    _sign = '-'
+    _width = -1
+    _prec = -1
+    _ipre = false
+    _zpad = false
+    _tsep = false
+    _typ = 's'
+
+    if !isempty(s)
+        m = match(_spec_regex, s)
+        if m == nothing
+            error("Invalid formatting spec: $(s)")
+        end
+        (a1, a2, a3, a4, a5, a6, a7) = m.captures
+
+        # a1: [[fill]align]
+        if a1 != nothing
+            if length(a1) == 1
+                _align = a1[1]
+            else
+                _fill = a1[1]
+                _align = a1[2]
+            end
+        end
+
+        # a2: [sign]
+        if a2 != nothing
+            _sign = a2[1]
+        end
+
+        # a3: [#]
+        if a3 != nothing
+            _ipre = true
+        end
+
+        # a4: [0][width]
+        if a4 != nothing
+            if a4[1] == '0'
+                _zpad = true
+                if length(a4) > 1
+                    _width = parse(Int,a4[2:end])
+                end
+            else
+                _width = parse(Int,a4)
+            end
+        end
+
+        # a5: [,]
+        if a5 != nothing
+            _tsep = true
+        end
+
+        # a6 [.prec]
+        if a6 != nothing
+            _prec = parse(Int,a6[2:end])
+        end
+
+        # a7: [type]
+        if a7 != nothing
+            _typ = a7[1]
+        end
+    end
+
+    return FormatSpec(_typ;
+                      fill=_fill,
+                      align=_align,
+                      sign=_sign,
+                      width=_width,
+                      prec=_prec,
+                      ipre=_ipre,
+                      zpad=_zpad,
+                      tsep=_tsep)
+end
+
+
+## formatted printing using a format spec
+
+type _Dec end
+type _Oct end
+type _Hex end
+type _HEX end
+type _Bin end
+
+_srepr(x) = repr(x)
+_srepr(x::String) = x
+_srepr(x::Char) = string(x)
+
+function printfmt(io::IO, fs::FormatSpec, x)
+    cls = fs.cls
+    ty = fs.typ
+    if cls == 'i'
+        ix = @compat Integer(x)
+        ty == 'd' || ty == 'n' ? _pfmt_i(io, fs, ix, _Dec()) :
+        ty == 'x' ? _pfmt_i(io, fs, ix, _Hex()) :
+        ty == 'X' ? _pfmt_i(io, fs, ix, _HEX()) :
+        ty == 'o' ? _pfmt_i(io, fs, ix, _Oct()) :
+        _pfmt_i(io, fs, ix, _Bin())
+    elseif cls == 'f'
+        fx = float(x)
+        if isfinite(fx)
+            ty == 'f' || ty == 'F' ? _pfmt_f(io, fs, fx) :
+            ty == 'e' || ty == 'E' ? _pfmt_e(io, fs, fx) :
+            error("format for type g or G is not supported yet (use f or e instead).")
+        else
+            _pfmt_specialf(io, fs, fx)
+        end
+    elseif cls == 's'
+        _pfmt_s(io, fs, _srepr(x))
+    else # cls == 'c'
+        _pfmt_s(io, fs, @compat Char(x))
+    end
+end
+
+printfmt(fs::FormatSpec, x) = printfmt(STDOUT, fs, x)
+
+cfmt(fs::FormatSpec, x) = (buf = IOBuffer(); printfmt(buf, fs, x); bytestring(buf))
+cfmt(spec::String, x) = cfmt(FormatSpec(spec), x)
diff --git a/src/formatexpr.jl b/src/formatexpr.jl
new file mode 100644
index 0000000..7dee0c4
--- /dev/null
+++ b/src/formatexpr.jl
@@ -0,0 +1,170 @@
+# formatting expression
+
+### Argument specification
+
+immutable ArgSpec
+    argidx::Int
+    hasfilter::Bool
+    filter::Function
+
+    function ArgSpec(idx::Int, hasfil::Bool, filter::Function)
+        idx != 0 || error("Argument index cannot be zero.")
+        new(idx, hasfil, filter)
+    end
+end
+
+getarg(args, sp::ArgSpec) = 
+    (a = args[sp.argidx]; sp.hasfilter ? sp.filter(a) : a)
+
+# pos > 0: must not have iarg in expression (use pos+1), return (entry, pos + 1)
+# pos < 0: must have iarg in expression, return (entry, -1)
+# pos = 0: no positional argument before, can be either, return (entry, 1) or (entry, -1)
+function make_argspec(s::String, pos::Int)
+    # for argument position
+    iarg::Int = -1
+    hasfil::Bool = false
+    ff::Function = Base.identity
+
+    if !isempty(s)
+        ifil = searchindex(s, "|>")
+        if ifil == 0
+            iarg = parse(Int,s)
+        else
+            iarg = ifil > 1 ? parse(Int,s[1:ifil-1]) : -1
+            hasfil = true
+            ff = eval(symbol(s[ifil+2:end]))
+        end
+    end
+
+    if pos > 0
+        iarg < 0 || error("entry with and without argument index must not coexist.")
+        iarg = (pos += 1)
+    elseif pos < 0
+        iarg > 0 || error("entry with and without argument index must not coexist.")
+    else # pos == 0
+        if iarg < 0
+            iarg = pos = 1
+        else
+            pos = -1
+        end
+    end 
+
+    return (ArgSpec(iarg, hasfil, ff), pos)
+end
+
+
+### Format entry
+
+immutable FormatEntry
+    argspec::ArgSpec
+    spec::FormatSpec
+end
+
+function make_formatentry(s::String, pos::Int)
+    @assert s[1] == '{' && s[end] == '}'
+    sc = s[2:end-1]
+    icolon = search(sc, ':')
+    if icolon == 0  # no colon
+        (argspec, pos) = make_argspec(sc, pos)
+        spec = FormatSpec('s')
+    else
+        (argspec, pos) = make_argspec(sc[1:icolon-1], pos)
+        spec = FormatSpec(sc[icolon+1:end])
+    end
+    return (FormatEntry(argspec, spec), pos)
+end
+
+
+### Format expression
+
+type FormatExpr
+    prefix::UTF8String
+    suffix::UTF8String
+    entries::Vector{FormatEntry}
+    inter::Vector{UTF8String}
+end
+
+_raise_unmatched_lbrace() = error("Unmatched { in format expression.")
+
+function find_next_entry_open(s::String, si::Int)
+    slen = length(s)
+    p = search(s, '{', si)
+    p < slen || _raise_unmatched_lbrace()
+    while p > 0 && s[p+1] == '{'  # escape `{{`
+        p = search(s, '{', p+2)
+        p < slen || _raise_unmatched_lbrace()
+    end
+    # println("open at $p")
+    pre = p > 0 ? s[si:p-1] : s[si:end]
+    if !isempty(pre)
+        pre = replace(pre, "{{", '{')
+        pre = replace(pre, "}}", '}')
+    end
+    return (p, utf8(pre))
+end
+
+function find_next_entry_close(s::String, si::Int)
+    slen = length(s)
+    p = search(s, '}', si)
+    p > 0 || _raise_unmatched_lbrace()
+    # println("close at $p")
+    return p
+end
+
+function FormatExpr(s::String)
+    slen = length(s)
+    
+    # init
+    prefix = utf8("")
+    suffix = utf8("")
+    entries = FormatEntry[]
+    inter = UTF8String[]
+
+    # scan
+    (p, prefix) = find_next_entry_open(s, 1)
+    if p > 0
+        q = find_next_entry_close(s, p+1)
+        (e, pos) = make_formatentry(s[p:q], 0)
+        push!(entries, e)
+        (p, pre) = find_next_entry_open(s, q+1)
+        while p > 0
+            push!(inter, pre)
+            q = find_next_entry_close(s, p+1)
+            (e, pos) = make_formatentry(s[p:q], pos)
+            push!(entries, e)
+            (p, pre) = find_next_entry_open(s, q+1)
+        end
+        suffix = pre
+    end
+    FormatExpr(prefix, suffix, entries, inter)
+end
+
+function printfmt(io::IO, fe::FormatExpr, args...)
+    if !isempty(fe.prefix)
+        write(io, fe.prefix)
+    end
+    ents = fe.entries
+    ne = length(ents)
+    if ne > 0
+        e = ents[1]
+        printfmt(io, e.spec, getarg(args, e.argspec))
+        for i = 2:ne
+            write(io, fe.inter[i-1])
+            e = ents[i]
+            printfmt(io, e.spec, getarg(args, e.argspec))
+        end
+    end
+    if !isempty(fe.suffix)
+        write(io, fe.suffix)
+    end
+end
+
+printfmt(io::IO, fe::String, args...) = printfmt(io, FormatExpr(fe), args...)
+printfmt(fe::Union(String,FormatExpr), args...) = printfmt(STDOUT, fe, args...)
+
+printfmtln(io::IO, fe::Union(String,FormatExpr), args...) = (printfmt(io, fe, args...); println(io))
+printfmtln(fe::Union(String,FormatExpr), args...) = printfmtln(STDOUT, fe, args...)
+
+format(fe::Union(String,FormatExpr), args...) = 
+    (buf = IOBuffer(); printfmt(buf, fe, args...); bytestring(buf))
+
diff --git a/src/literals.jl b/src/literals.jl
index 7178e4c..5497129 100644
--- a/src/literals.jl
+++ b/src/literals.jl
@@ -165,9 +165,9 @@ function s_interp_parse(s::AbstractString, unescape::Function, p::Function)
                     is(ex.head, :continue) && throw(ParseError("Incomplete expression"))
                     # Need to wrap call to fmt around expression
                     if ex.head == :tuple
-                        push!(sx, esc(:(fmt(ex.args...))))
+                        push!(sx, esc(:(fmt($(ex.args...)))))
                     else
-                        push!(sx, esc(:(fmt(ex.args[1]))))
+                        push!(sx, esc(:(fmt($(ex.args[1])))))
                     end
                 else
                     push!(sx, esc(:(fmt($ex))))

From be97d208ce2bfe3ea4895ab8d4368ff4a4db0867 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 18 Feb 2016 20:15:07 -0500
Subject: [PATCH 3/4] Fix deprecations in code from Formatting.jl & Tom's PR

---
 src/cformat.jl    |  10 +--
 src/fmt.jl        | 165 +++++++++++++++++++++++-----------------------
 src/fmtcore.jl    |  30 ++++-----
 src/fmtspec.jl    |  10 +--
 src/formatexpr.jl |  21 +++---
 5 files changed, 117 insertions(+), 119 deletions(-)

diff --git a/src/cformat.jl b/src/cformat.jl
index 1d9479a..9e28f1a 100644
--- a/src/cformat.jl
+++ b/src/cformat.jl
@@ -6,10 +6,6 @@ function sprintf1( fmt::ASCIIString, x )
     f( x )
 end
 
-if VERSION < v"0.4-"
-    const base64encode = base64
-end
-
 function generate_formatter( fmt::ASCIIString )
     global formatters
     if haskey( formatters, fmt )
@@ -153,11 +149,11 @@ function format{T<:Real}( x::T;
         parens::Bool=false, # use (1.00) instead of -1.00. Used in finance
         alternative::Bool=false, # usually for hex
         mixedfraction::Bool=false,
-        mixedfractionsep::String="_",
-        fractionsep::String="/", # num / den
+        mixedfractionsep::UTF8String="_",
+        fractionsep::UTF8String="/", # num / den
         fractionwidth::Int = 0,
         tryden::Int = 0, # if 2 or higher, try to use this denominator, without losing precision
-        suffix::String="", # useful for units/%
+        suffix::UTF8String="", # useful for units/%
         autoscale::Symbol=:none, # :metric, :binary or :finance
         conversion::ASCIIString=""
         )
diff --git a/src/fmt.jl b/src/fmt.jl
index 41928fc..b9d0c6e 100644
--- a/src/fmt.jl
+++ b/src/fmt.jl
@@ -11,9 +11,9 @@
 # we keep the typechar around specically for the reset! function, to go back to the starting state
 
 type DefaultSpec
-  typechar::Char
-  fspec::FormatSpec
-  DefaultSpec(c::Char) = new(c, FormatSpec(c))
+    typechar::Char
+    fspec::FormatSpec
+    DefaultSpec(c::Char) = new(c, FormatSpec(c))
 end
 
 const DEFAULT_FORMATTERS = Dict{DataType, DefaultSpec}()
@@ -25,8 +25,8 @@ defaultSpec!{T}(::Type{T}, c::Char) = (DEFAULT_FORMATTERS[T] = DefaultSpec(c); n
 defaultSpec!{T,K}(::Type{T}, ::Type{K}) = (DEFAULT_FORMATTERS[T] = DEFAULT_FORMATTERS[K]; nothing)
 
 # seed it with some basic default formatters
-for (t, c) in [(Integer,'d'), (FloatingPoint,'f'), (Char,'c'), (String,'s')]
-  defaultSpec!(t, c)
+for (t, c) in [(Integer,'d'), (AbstractFloat,'f'), (Char,'c'), (AbstractString,'s')]
+    defaultSpec!(t, c)
 end
 
 reset!{T}(::Type{T}) = (dspec = defaultSpec(T); dspec.fspec = FormatSpec(dspec.typechar); nothing)
@@ -36,34 +36,34 @@ reset!{T}(::Type{T}) = (dspec = defaultSpec(T); dspec.fspec = FormatSpec(dspec.t
 
 
 function addKWArgsFromSymbols(kwargs, syms::Symbol...)
-  d = Dict(kwargs)
-  for s in syms
-    if s == :ljust || s == :left
-      d[:align] = '<'
-    elseif s == :rjust || s == :right
-      d[:align] = '>'
-    elseif s == :commas
-      d[:tsep] = true
-    elseif s == :zpad || s == :zeropad
-      d[:zpad] = true
-    elseif s == :ipre || s == :prefix
-      d[:ipre] = true
+    d = Dict(kwargs)
+    for s in syms
+        if s == :ljust || s == :left
+            d[:align] = '<'
+        elseif s == :rjust || s == :right
+            d[:align] = '>'
+        elseif s == :commas
+            d[:tsep] = true
+        elseif s == :zpad || s == :zeropad
+            d[:zpad] = true
+        elseif s == :ipre || s == :prefix
+            d[:ipre] = true
+        end
     end
-  end
-  d
+    d
 end
 
 # --------------------------------------------------------------------------------------------------
 
 # methods to get the current default objects
-# note: if you want to set a default for an abstract type (i.e. FloatingPoint) you'll need to extend this method like here:
+# note: if you want to set a default for an abstract type (i.e. AbstractFloat) you'll need to extend this method like here:
 defaultSpec{T<:Integer}(::Type{T}) = DEFAULT_FORMATTERS[Integer]
-defaultSpec{T<:FloatingPoint}(::Type{T}) = DEFAULT_FORMATTERS[FloatingPoint]
-defaultSpec{T<:String}(::Type{T}) = DEFAULT_FORMATTERS[String]
+defaultSpec{T<:AbstractFloat}(::Type{T}) = DEFAULT_FORMATTERS[AbstractFloat]
+defaultSpec{T<:AbstractString}(::Type{T}) = DEFAULT_FORMATTERS[AbstractString]
 function defaultSpec{T}(::Type{T})
-  get(DEFAULT_FORMATTERS, T) do
-    error("Missing default spec for type $T... call default!(T, c): $DEFAULT_FORMATTERS")
-  end
+    get(DEFAULT_FORMATTERS, T) do
+        error("Missing default spec for type $T... call default!(T, c): $DEFAULT_FORMATTERS")
+    end
 end
 defaultSpec(x) = defaultSpec(typeof(x))
 
@@ -78,69 +78,68 @@ fmt_default!{T,K}(::Type{T}, ::Type{K}, args...; kwargs...) = (defaultSpec!(T,K)
 
 # update the fmt_default for a specific type
 function fmt_default!{T}(::Type{T}, syms::Symbol...; kwargs...)
-  if isempty(syms)
+    if isempty(syms)
 
-    # if there are no arguments, reset to initial defaults
-    if isempty(kwargs)
-      reset!(T)
-      return
-    end
+        # if there are no arguments, reset to initial defaults
+        if isempty(kwargs)
+            reset!(T)
+            return
+        end
 
-    # otherwise update the spec
-    dspec = defaultSpec(T)
-    dspec.fspec = FormatSpec(dspec.fspec; kwargs...)
+        # otherwise update the spec
+        dspec = defaultSpec(T)
+        dspec.fspec = FormatSpec(dspec.fspec; kwargs...)
 
-  else
-    d = addKWArgsFromSymbols(kwargs, syms...)
-    fmt_default!(T; d...)
-  end
-  nothing
+    else
+        d = addKWArgsFromSymbols(kwargs, syms...)
+        fmt_default!(T; d...)
+    end
+    nothing
 end
 
 # update the fmt_default for all types
 function fmt_default!(syms::Symbol...; kwargs...)
-  if isempty(syms)
-    for k in keys(DEFAULT_FORMATTERS)
-      fmt_default!(k; kwargs...)
+    if isempty(syms)
+        for k in keys(DEFAULT_FORMATTERS)
+            fmt_default!(k; kwargs...)
+        end
+    else
+        d = addKWArgsFromSymbols(kwargs, syms...)
+        fmt_default!(; d...)
     end
-  else
-    d = addKWArgsFromSymbols(kwargs, syms...)
-    fmt_default!(; d...)
-  end
-  nothing
+    nothing
 end
 
-
 # --------------------------------------------------------------------------------------------------
 
 # TODO: get rid of this entire hack by moving commas into cfmt
 
-function optionalCommas(x::Real, s::String, fspec::FormatSpec)
-  dpos = findfirst(s, '.')
-  prevwidth = length(s)
-
-  if dpos == 0
-    s = addcommas(s)
-  else
-    s = string(addcommas(s[1:dpos-1]), '.', s[dpos+1:end])
-  end
-
-  # check for excess width from commas
-  w = length(s)
-  if fspec.width > 0 && w > fspec.width && w > prevwidth
-    # we may have made the string too wide with those commas... gotta fix it
-    s = strip(s)
-    n = fspec.width - length(s)
-    if fspec.align == '<' # left alignment
-      s = string(s, " "^n)
+function optionalCommas(x::Real, s::AbstractString, fspec::FormatSpec)
+    dpos = findfirst(s, '.')
+    prevwidth = length(s)
+
+    if dpos == 0
+        s = addcommas(s)
     else
-      s = string(" "^n, s)
-    end 
-  end
+        s = string(addcommas(s[1:dpos-1]), '.', s[dpos+1:end])
+    end
+
+    # check for excess width from commas
+    w = length(s)
+    if fspec.width > 0 && w > fspec.width && w > prevwidth
+        # we may have made the string too wide with those commas... gotta fix it
+        s = strip(s)
+        n = fspec.width - length(s)
+        if fspec.align == '<' # left alignment
+            s = string(s, " "^n)
+        else
+            s = string(" "^n, s)
+        end 
+    end
 
-  s
+    s
 end
-optionalCommas(x, s::String, fspec::FormatSpec) = s
+optionalCommas(x, s::AbstractString, fspec::FormatSpec) = s
 
 # --------------------------------------------------------------------------------------------------
 
@@ -151,23 +150,25 @@ optionalCommas(x, s::String, fspec::FormatSpec) = s
 # note: adding kwargs is only appropriate for one-off formatting.  
 #       normally it will be much faster to change the fmt_default formatting as needed
 function fmt(x; kwargs...)
-  fspec = isempty(kwargs) ? fmt_default(x) : FormatSpec(fmt_default(x); kwargs...)
-  s = cfmt(fspec, x)
-
-  # add the commas now... I was confused as to when this is done currently
-  if fspec.tsep
-    return optionalCommas(x, s, fspec)
-  end
-  s
+    fspec = isempty(kwargs) ? fmt_default(x) : FormatSpec(fmt_default(x); kwargs...)
+    s = cfmt(fspec, x)
+
+    # add the commas now... I was confused as to when this is done currently
+    if fspec.tsep
+        return optionalCommas(x, s, fspec)
+    end
+    s
 end
 
 # some helper method calls, which just convert to kwargs
 fmt(x, prec::Int, args...; kwargs...) = fmt(x, args...; prec=prec, kwargs...)
-fmt(x, prec::Int, width::Int, args...; kwargs...) = fmt(x, args...; prec=prec, width=width, kwargs...)
+
+fmt(x, prec::Int, width::Int, args...; kwargs...) =
+    fmt(x, args...; prec=prec, width=width, kwargs...)
 
 # integrate some symbol shorthands into the keyword args
 # note: as above, this will generate relavent kwargs, so to format in a tight loop, you should probably update the fmt_default
 function fmt(x, syms::Symbol...; kwargs...)
-  d = addKWArgsFromSymbols(kwargs, syms...)
-  fmt(x; d...)
+    d = addKWArgsFromSymbols(kwargs, syms...)
+    fmt(x; d...)
 end
diff --git a/src/fmtcore.jl b/src/fmtcore.jl
index a0fb5ee..cfad805 100644
--- a/src/fmtcore.jl
+++ b/src/fmtcore.jl
@@ -12,7 +12,7 @@ end
 
 ### print string or char
 
-function _pfmt_s(out::IO, fs::FormatSpec, s::Union(String,Char))
+function _pfmt_s(out::IO, fs::FormatSpec, s::Union{AbstractString,Char})
     wid = fs.width
     slen = length(s)
     if wid <= slen
@@ -35,12 +35,12 @@ end
 _mul(x::Integer, ::_Dec) = x * 10
 _mul(x::Integer, ::_Bin) = x << 1
 _mul(x::Integer, ::_Oct) = x << 3
-_mul(x::Integer, ::Union(_Hex, _HEX)) = x << 4
+_mul(x::Integer, ::Union{_Hex, _HEX}) = x << 4
 
 _div(x::Integer, ::_Dec) = div(x, 10)
 _div(x::Integer, ::_Bin) = x >> 1
 _div(x::Integer, ::_Oct) = x >> 3
-_div(x::Integer, ::Union(_Hex, _HEX)) = x >> 4
+_div(x::Integer, ::Union{_Hex, _HEX}) = x >> 4
 
 function _ndigits(x::Integer, op)  # suppose x is non-negative
     m = 1
@@ -53,15 +53,15 @@ function _ndigits(x::Integer, op)  # suppose x is non-negative
 end
 
 _ipre(op) = ""
-_ipre(::Union(_Hex, _HEX)) = "0x"
+_ipre(::Union{_Hex, _HEX}) = "0x"
 _ipre(::_Oct) = "0o"
 _ipre(::_Bin) = "0b"
 
-_digitchar(x::Integer, ::_Bin) = @compat Char(x == 0 ? '0' : '1')
-_digitchar(x::Integer, ::_Dec) = @compat Char('0' + x)
-_digitchar(x::Integer, ::_Oct) = @compat Char('0' + x)
-_digitchar(x::Integer, ::_Hex) = @compat Char(x < 10 ? '0' + x : 'a' + (x - 10))
-_digitchar(x::Integer, ::_HEX) = @compat Char(x < 10 ? '0' + x : 'A' + (x - 10))
+_digitchar(x::Integer, ::_Bin) = Char(x == 0 ? '0' : '1')
+_digitchar(x::Integer, ::_Dec) = Char('0' + x)
+_digitchar(x::Integer, ::_Oct) = Char('0' + x)
+_digitchar(x::Integer, ::_Hex) = Char(x < 10 ? '0' + x : 'a' + (x - 10))
+_digitchar(x::Integer, ::_HEX) = Char(x < 10 ? '0' + x : 'A' + (x - 10))
 
 _signchar(x::Number, s::Char) = x < 0 ? '-' :
                                 s == '+' ? '+' :
@@ -171,7 +171,7 @@ function _pfmt_float(out::IO, sch::Char, zs::Integer, intv::Real, decv::Real, pr
     end
 end
 
-function _pfmt_f(out::IO, fs::FormatSpec, x::FloatingPoint)
+function _pfmt_f(out::IO, fs::FormatSpec, x::AbstractFloat)
     # separate sign, integer, and decimal part
     ax = abs(x)
     sch = _signchar(x, fs.sign)
@@ -222,12 +222,12 @@ function _pfmt_floate(out::IO, sch::Char, zs::Integer, u::Real, prec::Int, e::In
         e = -e
     end
     (e1, e2) = divrem(e, 10)
-    write(out, @compat Char('0' + e1))
-    write(out, @compat Char('0' + e2))
+    write(out, Char('0' + e1))
+    write(out, Char('0' + e2))
 end
 
 
-function _pfmt_e(out::IO, fs::FormatSpec, x::FloatingPoint)
+function _pfmt_e(out::IO, fs::FormatSpec, x::AbstractFloat)
     # extract sign, significand, and exponent
     ax = abs(x)
     sch = _signchar(x, fs.sign)
@@ -265,7 +265,7 @@ function _pfmt_e(out::IO, fs::FormatSpec, x::FloatingPoint)
 end
 
 
-function _pfmt_g(out::IO, fs::FormatSpec, x::FloatingPoint)
+function _pfmt_g(out::IO, fs::FormatSpec, x::AbstractFloat)
     # number decomposition
     ax = abs(x)
     if 1.0e-4 <= ax < 1.0e6
@@ -275,7 +275,7 @@ function _pfmt_g(out::IO, fs::FormatSpec, x::FloatingPoint)
     end
 end
 
-function _pfmt_specialf(out::IO, fs::FormatSpec, x::FloatingPoint)
+function _pfmt_specialf(out::IO, fs::FormatSpec, x::AbstractFloat)
     if isinf(x) 
         if x > 0
             _pfmt_s(out, fs, "Inf")
diff --git a/src/fmtspec.jl b/src/fmtspec.jl
index a372167..d6ef878 100644
--- a/src/fmtspec.jl
+++ b/src/fmtspec.jl
@@ -90,7 +90,7 @@ end
 
 const _spec_regex = r"^(.?[<>])?([ +-])?(#)?(\d+)?(,)?(.\d+)?([bcdeEfFgGnosxX])?$"
 
-function FormatSpec(s::String)
+function FormatSpec(s::AbstractString)
     # default spec
     _fill = ' '
     _align = '\0'
@@ -178,14 +178,14 @@ type _HEX end
 type _Bin end
 
 _srepr(x) = repr(x)
-_srepr(x::String) = x
+_srepr(x::AbstractString) = x
 _srepr(x::Char) = string(x)
 
 function printfmt(io::IO, fs::FormatSpec, x)
     cls = fs.cls
     ty = fs.typ
     if cls == 'i'
-        ix = @compat Integer(x)
+        ix = Integer(x)
         ty == 'd' || ty == 'n' ? _pfmt_i(io, fs, ix, _Dec()) :
         ty == 'x' ? _pfmt_i(io, fs, ix, _Hex()) :
         ty == 'X' ? _pfmt_i(io, fs, ix, _HEX()) :
@@ -203,11 +203,11 @@ function printfmt(io::IO, fs::FormatSpec, x)
     elseif cls == 's'
         _pfmt_s(io, fs, _srepr(x))
     else # cls == 'c'
-        _pfmt_s(io, fs, @compat Char(x))
+        _pfmt_s(io, fs, Char(x))
     end
 end
 
 printfmt(fs::FormatSpec, x) = printfmt(STDOUT, fs, x)
 
 cfmt(fs::FormatSpec, x) = (buf = IOBuffer(); printfmt(buf, fs, x); bytestring(buf))
-cfmt(spec::String, x) = cfmt(FormatSpec(spec), x)
+cfmt(spec::AbstractString, x) = cfmt(FormatSpec(spec), x)
diff --git a/src/formatexpr.jl b/src/formatexpr.jl
index 7dee0c4..ecbf0ef 100644
--- a/src/formatexpr.jl
+++ b/src/formatexpr.jl
@@ -19,7 +19,7 @@ getarg(args, sp::ArgSpec) =
 # pos > 0: must not have iarg in expression (use pos+1), return (entry, pos + 1)
 # pos < 0: must have iarg in expression, return (entry, -1)
 # pos = 0: no positional argument before, can be either, return (entry, 1) or (entry, -1)
-function make_argspec(s::String, pos::Int)
+function make_argspec(s::AbstractString, pos::Int)
     # for argument position
     iarg::Int = -1
     hasfil::Bool = false
@@ -60,7 +60,7 @@ immutable FormatEntry
     spec::FormatSpec
 end
 
-function make_formatentry(s::String, pos::Int)
+function make_formatentry(s::AbstractString, pos::Int)
     @assert s[1] == '{' && s[end] == '}'
     sc = s[2:end-1]
     icolon = search(sc, ':')
@@ -86,7 +86,7 @@ end
 
 _raise_unmatched_lbrace() = error("Unmatched { in format expression.")
 
-function find_next_entry_open(s::String, si::Int)
+function find_next_entry_open(s::AbstractString, si::Int)
     slen = length(s)
     p = search(s, '{', si)
     p < slen || _raise_unmatched_lbrace()
@@ -103,7 +103,7 @@ function find_next_entry_open(s::String, si::Int)
     return (p, utf8(pre))
 end
 
-function find_next_entry_close(s::String, si::Int)
+function find_next_entry_close(s::AbstractString, si::Int)
     slen = length(s)
     p = search(s, '}', si)
     p > 0 || _raise_unmatched_lbrace()
@@ -111,7 +111,7 @@ function find_next_entry_close(s::String, si::Int)
     return p
 end
 
-function FormatExpr(s::String)
+function FormatExpr(s::AbstractString)
     slen = length(s)
     
     # init
@@ -159,12 +159,13 @@ function printfmt(io::IO, fe::FormatExpr, args...)
     end
 end
 
-printfmt(io::IO, fe::String, args...) = printfmt(io, FormatExpr(fe), args...)
-printfmt(fe::Union(String,FormatExpr), args...) = printfmt(STDOUT, fe, args...)
+typealias StringOrFE Union{AbstractString,FormatExpr}
+printfmt(io::IO, fe::AbstractString, args...) = printfmt(io, FormatExpr(fe), args...)
+printfmt(fe::StringOrFE, args...) = printfmt(STDOUT, fe, args...)
 
-printfmtln(io::IO, fe::Union(String,FormatExpr), args...) = (printfmt(io, fe, args...); println(io))
-printfmtln(fe::Union(String,FormatExpr), args...) = printfmtln(STDOUT, fe, args...)
+printfmtln(io::IO, fe::StringOrFE, args...) = (printfmt(io, fe, args...); println(io))
+printfmtln(fe::StringOrFE, args...) = printfmtln(STDOUT, fe, args...)
 
-format(fe::Union(String,FormatExpr), args...) = 
+format(fe::StringOrFE, args...) = 
     (buf = IOBuffer(); printfmt(buf, fe, args...); bytestring(buf))
 

From 3b6484b6d0b719458fdd27aa584eb66d8ee80686 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 18 Feb 2016 20:50:44 -0500
Subject: [PATCH 4/4] Fix support for calling fmt and cfmt functions

Add Python style Unicode names
---
 src/StringUtils.jl  |   1 +
 src/fmt.jl          |   6 +-
 src/literals.jl     |  57 ++++++++++----
 src/unicodenames.jl | 185 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 230 insertions(+), 19 deletions(-)
 create mode 100644 src/unicodenames.jl

diff --git a/src/StringUtils.jl b/src/StringUtils.jl
index fe99945..a02aada 100644
--- a/src/StringUtils.jl
+++ b/src/StringUtils.jl
@@ -9,6 +9,7 @@ export @u_str, @sinterpolate
 export s_unescape_string, s_escape_string, s_print_unescaped, s_print_escaped
 
 include("literals.jl")
+include("unicodenames.jl")
 
 # From Formatting.jl
 import Base.show
diff --git a/src/fmt.jl b/src/fmt.jl
index b9d0c6e..2a64e49 100644
--- a/src/fmt.jl
+++ b/src/fmt.jl
@@ -161,10 +161,10 @@ function fmt(x; kwargs...)
 end
 
 # some helper method calls, which just convert to kwargs
-fmt(x, prec::Int, args...; kwargs...) = fmt(x, args...; prec=prec, kwargs...)
+fmt(x, width::Int, args...; kwargs...) = fmt(x, args...; width=width, kwargs...)
 
-fmt(x, prec::Int, width::Int, args...; kwargs...) =
-    fmt(x, args...; prec=prec, width=width, kwargs...)
+fmt(x, width::Int, prec::Int, args...; kwargs...) =
+    fmt(x, args...; width=width, prec=prec, kwargs...)
 
 # integrate some symbol shorthands into the keyword args
 # note: as above, this will generate relavent kwargs, so to format in a tight loop, you should probably update the fmt_default
diff --git a/src/literals.jl b/src/literals.jl
index 5497129..60b8cbe 100644
--- a/src/literals.jl
+++ b/src/literals.jl
@@ -72,6 +72,26 @@ function s_parse_latex(io, s,  i)
     i
 end
 
+"""
+Handle Unicode name, of form \\N{<name>}, from Python
+"""
+function s_parse_uniname(io, s,  i)
+    done(s, i) && throw(ArgumentError("\\N incomplete in $(repr(s))"))
+    c, i = next(s, i)
+    c != '{' && throw(ArgumentError("\\N missing initial { in $(repr(s))"))
+    done(s, i) && throw(ArgumentError("\\N{ incomplete in $(repr(s))"))
+    beg = i # start location
+    c, i = next(s, i)
+    while c != '}'
+        done(s, i) && throw(ArgumentError("\\N{ missing closing } in $(repr(s))"))
+        c, i = next(s, i)
+    end
+    unichar = get(UnicodeNames, uppercase(s[beg:i-2]), typemax(UInt32))
+    unichar == typemax(UInt32) && throw(ArgumentError("Invalid Unicode name in $(repr(s))"))
+    print(io, Char(unichar))
+    i
+end
+
 """
 String interpolation parsing
 Based on code resurrected from Julia base:
@@ -89,6 +109,8 @@ function s_print_unescaped(io, s::AbstractString)
                 i = s_parse_emoji(io, s, i)
             elseif c == '{'	# LaTex
                 i = s_parse_latex(io, s, i)
+            elseif c == 'N'	# Unicode name
+                i = s_parse_uniname(io, s, i)
             else
                 c = (c == '0' ? '\0' :
                      c == '"' ? '"'  :
@@ -153,25 +175,28 @@ function s_interp_parse(s::AbstractString, unescape::Function, p::Function)
                 if !isempty(s[i:j-1])
                     push!(sx, unescape(s[i:j-1]))
                 end
-                c = s[k]
-                if c != '('
+                if s[k] == '('
+                    # Need to find end to parse to
+                    _, j = parse(s, k, greedy=false)
+                    # This is a bit hacky, and probably doesn't perform as well as it could,
+                    # but it works! Same below.
+                    str = "(fmt" * s[k:j-1] * ")"
+                else
                     # Move past %, c should point to letter
-                    c, k = next(s, k)
-                    s[k] == '(' || throw(ParseError("Missing ( in % format"))
-                end
-                # c is now either ( or C format letter to be used
-                ex, j = parse(s, k, greedy=false)
-                if isa(ex, Expr)
-                    is(ex.head, :continue) && throw(ParseError("Incomplete expression"))
-                    # Need to wrap call to fmt around expression
-                    if ex.head == :tuple
-                        push!(sx, esc(:(fmt($(ex.args...)))))
-                    else
-                        push!(sx, esc(:(fmt($(ex.args[1])))))
+                    beg = k
+                    while true
+                        c, k = next(s, k)
+                        done(s, k) && throw(ParseError("Incomplete % expression"))
+                        s[k] == '(' && break
                     end
-                else
-                    push!(sx, esc(:(fmt($ex))))
+                    _, j = parse(s, k, greedy=false)
+                    str = string("(cfmt(\"", s[beg:k-1], "\",", s[k+1:j-1], ")")
                 end
+                ex, _ = parse(str, 1, greedy=false)
+                if isa(ex, Expr) && is(ex.head, :continue)
+                    throw(ParseError("Incomplete expression"))
+                end
+                push!(sx, esc(ex))
                 i = j
             else
                 j = k
diff --git a/src/unicodenames.jl b/src/unicodenames.jl
new file mode 100644
index 0000000..d725c57
--- /dev/null
+++ b/src/unicodenames.jl
@@ -0,0 +1,185 @@
+const global UnicodeNames = Dict{ASCIIString, UInt32}(
+        ("EN QUAD" => 0x2000),
+	("EM QUAD" => 0x2001),
+	("EN SPACE" => 0x2002),
+	("EM SPACE" => 0x2003),
+	("THREE-PER-EM SPACE" => 0x2004),
+	("FOUR-PER-EM SPACE" => 0x2005),
+	("SIX-PER-EM SPACE" => 0x2006),
+	("FIGURE SPACE" => 0x2007),
+	("PUNCTUATION SPACE" => 0x2008),
+	("THIN SPACE" => 0x2009),
+	("HAIR SPACE" => 0x200A),
+	("ZERO WIDTH SPACE" => 0x200B),
+	("ZERO WIDTH NON-JOINER" => 0x200C),
+	("ZERO WIDTH JOINER" => 0x200D),
+	("LEFT-TO-RIGHT MARK" => 0x200E),
+	("RIGHT-TO-LEFT MARK" => 0x200F),
+	("HYPHEN" => 0x2010),
+	("NON-BREAKING HYPHEN" => 0x2011),
+	("FIGURE DASH" => 0x2012),
+	("EN DASH" => 0x2013),
+	("EM DASH" => 0x2014),
+	("HORIZONTAL BAR" => 0x2015),
+	("DOUBLE VERTICAL LINE" => 0x2016),
+	("DOUBLE LOW LINE" => 0x2017),
+	("LEFT SINGLE QUOTATION MARK" => 0x2018),
+	("RIGHT SINGLE QUOTATION MARK" => 0x2019),
+	("SINGLE LOW-9 QUOTATION MARK" => 0x201A),
+	("SINGLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201B),
+	("LEFT DOUBLE QUOTATION MARK" => 0x201C),
+	("RIGHT DOUBLE QUOTATION MARK" => 0x201D),
+	("DOUBLE LOW-9 QUOTATION MARK" => 0x201E),
+	("DOUBLE HIGH-REVERSED-9 QUOTATION MARK" => 0x201F),
+	("DAGGER" => 0x2020),
+	("DOUBLE DAGGER" => 0x2021),
+	("BULLET" => 0x2022),
+	("TRIANGULAR BULLET" => 0x2023),
+	("ONE DOT LEADER" => 0x2024),
+	("TWO DOT LEADER" => 0x2025),
+	("HORIZONTAL ELLIPSIS" => 0x2026),
+	("HYPHENATION POINT" => 0x2027),
+	("LINE SEPARATOR" => 0x2028),
+	("PARAGRAPH SEPARATOR" => 0x2029),
+	("LEFT-TO-RIGHT EMBEDDING" => 0x202A),
+	("RIGHT-TO-LEFT EMBEDDING" => 0x202B),
+	("POP DIRECTIONAL FORMATTING" => 0x202C),
+	("LEFT-TO-RIGHT OVERRIDE" => 0x202D),
+	("RIGHT-TO-LEFT OVERRIDE" => 0x202E),
+	("NARROW NO-BREAK SPACE" => 0x202F),
+	("PER MILLE SIGN" => 0x2030),
+	("PER TEN THOUSAND SIGN" => 0x2031),
+	("PRIME" => 0x2032),
+	("DOUBLE PRIME" => 0x2033),
+	("TRIPLE PRIME" => 0x2034),
+	("REVERSED PRIME" => 0x2035),
+	("REVERSED DOUBLE PRIME" => 0x2036),
+	("REVERSED TRIPLE PRIME" => 0x2037),
+	("CARET" => 0x2038),
+	("SINGLE LEFT-POINTING ANGLE QUOTATION MARK" => 0x2039),
+	("SINGLE RIGHT-POINTING ANGLE QUOTATION MARK" => 0x203A),
+	("REFERENCE MARK" => 0x203B),
+	("DOUBLE EXCLAMATION MARK" => 0x203C),
+	("INTERROBANG" => 0x203D),
+	("OVERLINE" => 0x203E),
+	("UNDERTIE" => 0x203F),
+	("CHARACTER TIE" => 0x2040),
+	("CARET INSERTION POINT" => 0x2041),
+	("ASTERISM" => 0x2042),
+	("HYPHEN BULLET" => 0x2043),
+	("FRACTION SLASH" => 0x2044),
+	("LEFT SQUARE BRACKET WITH QUILL" => 0x2045),
+	("RIGHT SQUARE BRACKET WITH QUILL" => 0x2046),
+	("DOUBLE QUESTION MARK" => 0x2047),
+	("QUESTION EXCLAMATION MARK" => 0x2048),
+	("EXCLAMATION QUESTION MARK" => 0x2049),
+	("TIRONIAN SIGN ET" => 0x204A),
+	("REVERSED PILCROW SIGN" => 0x204B),
+	("BLACK LEFTWARDS BULLET" => 0x204C),
+	("BLACK RIGHTWARDS BULLET" => 0x204D),
+	("LOW ASTERISK" => 0x204E),
+	("REVERSED SEMICOLON" => 0x204F),
+	("CLOSE UP" => 0x2050),
+	("TWO ASTERISKS ALIGNED VERTICALLY" => 0x2051),
+	("COMMERCIAL MINUS SIGN" => 0x2052),
+	("SWUNG DASH" => 0x2053),
+	("INVERTED UNDERTIE" => 0x2054),
+	("FLOWER PUNCTUATION MARK" => 0x2055),
+	("THREE DOT PUNCTUATION" => 0x2056),
+	("QUADRUPLE PRIME" => 0x2057),
+	("FOUR DOT PUNCTUATION" => 0x2058),
+	("FIVE DOT PUNCTUATION" => 0x2059),
+	("TWO DOT PUNCTUATION" => 0x205A),
+	("FOUR DOT MARK" => 0x205B),
+	("DOTTED CROSS" => 0x205C),
+	("TRICOLON" => 0x205D),
+	("VERTICAL FOUR DOTS" => 0x205E),
+	("MEDIUM MATHEMATICAL SPACE" => 0x205F),
+	("WORD JOINER" => 0x2060),
+	("FUNCTION APPLICATION" => 0x2061),
+	("INVISIBLE TIMES" => 0x2062),
+	("INVISIBLE SEPARATOR" => 0x2063),
+	("INVISIBLE PLUS" => 0x2064),
+	("LEFT-TO-RIGHT ISOLATE" => 0x2066),
+	("RIGHT-TO-LEFT ISOLATE" => 0x2067),
+	("FIRST STRONG ISOLATE" => 0x2068),
+	("POP DIRECTIONAL ISOLATE" => 0x2069),
+	("INHIBIT SYMMETRIC SWAPPING" => 0x206A),
+	("ACTIVATE SYMMETRIC SWAPPING" => 0x206B),
+	("INHIBIT ARABIC FORM SHAPING" => 0x206C),
+	("ACTIVATE ARABIC FORM SHAPING" => 0x206D),
+	("NATIONAL DIGIT SHAPES" => 0x206E),
+	("NOMINAL DIGIT SHAPES" => 0x206F),
+	("SUPERSCRIPT ZERO" => 0x2070),
+	("SUPERSCRIPT LATIN SMALL LETTER I" => 0x2071),
+	("SUPERSCRIPT FOUR" => 0x2074),
+	("SUPERSCRIPT FIVE" => 0x2075),
+	("SUPERSCRIPT SIX" => 0x2076),
+	("SUPERSCRIPT SEVEN" => 0x2077),
+	("SUPERSCRIPT EIGHT" => 0x2078),
+	("SUPERSCRIPT NINE" => 0x2079),
+	("SUPERSCRIPT PLUS SIGN" => 0x207A),
+	("SUPERSCRIPT MINUS" => 0x207B),
+	("SUPERSCRIPT EQUALS SIGN" => 0x207C),
+	("SUPERSCRIPT LEFT PARENTHESIS" => 0x207D),
+	("SUPERSCRIPT RIGHT PARENTHESIS" => 0x207E),
+	("SUPERSCRIPT LATIN SMALL LETTER N" => 0x207F),
+	("SUBSCRIPT ZERO" => 0x2080),
+	("SUBSCRIPT ONE" => 0x2081),
+	("SUBSCRIPT TWO" => 0x2082),
+	("SUBSCRIPT THREE" => 0x2083),
+	("SUBSCRIPT FOUR" => 0x2084),
+	("SUBSCRIPT FIVE" => 0x2085),
+	("SUBSCRIPT SIX" => 0x2086),
+	("SUBSCRIPT SEVEN" => 0x2087),
+	("SUBSCRIPT EIGHT" => 0x2088),
+	("SUBSCRIPT NINE" => 0x2089),
+	("SUBSCRIPT PLUS SIGN" => 0x208A),
+	("SUBSCRIPT MINUS" => 0x208B),
+	("SUBSCRIPT EQUALS SIGN" => 0x208C),
+	("SUBSCRIPT LEFT PARENTHESIS" => 0x208D),
+	("SUBSCRIPT RIGHT PARENTHESIS" => 0x208E),
+	("LATIN SUBSCRIPT SMALL LETTER A" => 0x2090),
+	("LATIN SUBSCRIPT SMALL LETTER E" => 0x2091),
+	("LATIN SUBSCRIPT SMALL LETTER O" => 0x2092),
+	("LATIN SUBSCRIPT SMALL LETTER X" => 0x2093),
+	("LATIN SUBSCRIPT SMALL LETTER SCHWA" => 0x2094),
+	("LATIN SUBSCRIPT SMALL LETTER H" => 0x2095),
+	("LATIN SUBSCRIPT SMALL LETTER K" => 0x2096),
+	("LATIN SUBSCRIPT SMALL LETTER L" => 0x2097),
+	("LATIN SUBSCRIPT SMALL LETTER M" => 0x2098),
+	("LATIN SUBSCRIPT SMALL LETTER N" => 0x2099),
+	("LATIN SUBSCRIPT SMALL LETTER P" => 0x209A),
+	("LATIN SUBSCRIPT SMALL LETTER S" => 0x209B),
+	("LATIN SUBSCRIPT SMALL LETTER T" => 0x209C),
+	("EURO-CURRENCY SIGN" => 0x20A0),
+	("COLON SIGN" => 0x20A1),
+	("CRUZEIRO SIGN" => 0x20A2),
+	("FRENCH FRANC SIGN" => 0x20A3),
+	("LIRA SIGN" => 0x20A4),
+	("MILL SIGN" => 0x20A5),
+	("NAIRA SIGN" => 0x20A6),
+	("PESETA SIGN" => 0x20A7),
+	("RUPEE SIGN" => 0x20A8),
+	("WON SIGN" => 0x20A9),
+	("NEW SHEQEL SIGN" => 0x20AA),
+	("DONG SIGN" => 0x20AB),
+	("EURO SIGN" => 0x20AC),
+	("KIP SIGN" => 0x20AD),
+	("TUGRIK SIGN" => 0x20AE),
+	("DRACHMA SIGN" => 0x20AF),
+	("GERMAN PENNY SIGN" => 0x20B0),
+	("PESO SIGN" => 0x20B1),
+	("GUARANI SIGN" => 0x20B2),
+	("AUSTRAL SIGN" => 0x20B3),
+	("HRYVNIA SIGN" => 0x20B4),
+	("CEDI SIGN" => 0x20B5),
+	("LIVRE TOURNOIS SIGN" => 0x20B6),
+	("SPESMILO SIGN" => 0x20B7),
+	("TENGE SIGN" => 0x20B8),
+	("INDIAN RUPEE SIGN" => 0x20B9),
+	("TURKISH LIRA SIGN" => 0x20BA),
+	("NORDIC MARK SIGN" => 0x20BB),
+	("MANAT SIGN" => 0x20BC),
+	("RUBLE SIGN" => 0x20BD),
+	("LARI SIGN" => 0x20BE))