JuliaIO · kmsquire · May 18, 2019 · May 13, 2019 · May 16, 2019 · May 17, 2019
diff --git a/src/Parser.jl b/src/Parser.jl
@@ -37,7 +37,7 @@ mutable struct StreamingParserState{T <: IO} <: ParserState
 end
 StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, PushVector{UInt8}())
 
-struct ParserContext{DictType, IntType} end
+struct ParserContext{DictType, IntType, AllowNanInf} end
 
 """
 Return the byte at the current position of the `ParserState`. If there is no
@@ -169,11 +169,11 @@ function parse_value(pc::ParserContext, ps::ParserState)
     elseif byte == ARRAY_BEGIN
         parse_array(pc, ps)
     else
-        parse_jsconstant(ps::ParserState)
+        parse_jsconstant(pc, ps)
     end
 end
 
-function parse_jsconstant(ps::ParserState)
+function parse_jsconstant(::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
     c = advance!(ps)
     if c == LATIN_T      # true
         skip!(ps, LATIN_R, LATIN_U, LATIN_E)
@@ -184,6 +184,12 @@ function parse_jsconstant(ps::ParserState)
     elseif c == LATIN_N  # null
         skip!(ps, LATIN_U, LATIN_L, LATIN_L)
         nothing
+    elseif AllowNanInf && c == LATIN_UPPER_N
+        skip!(ps, LATIN_A, LATIN_UPPER_N)
+        NaN
+    elseif AllowNanInf && c == LATIN_UPPER_I
+        skip!(ps, LATIN_N, LATIN_F, LATIN_I, LATIN_N, LATIN_I, LATIN_T, LATIN_Y)
+        Inf
     else
         _error(E_UNEXPECTED_CHAR, ps)
     end
@@ -207,7 +213,7 @@ function parse_array(pc::ParserContext, ps::ParserState)
 end
 
 
-function parse_object(pc::ParserContext{DictType, <:Real}, ps::ParserState) where DictType
+function parse_object(pc::ParserContext{DictType,<:Real,<:Any}, ps::ParserState) where DictType
     obj = DictType()
     keyT = keytype(typeof(obj))
 
@@ -323,7 +329,7 @@ end
 Parse an integer from the given bytes vector, starting at `from` and ending at
 the byte before `to`. Bytes enclosed should all be ASCII characters.
 """
-function int_from_bytes(pc::ParserContext{<:Any,IntType},
+function int_from_bytes(pc::ParserContext{<:Any,IntType,<:Any},
                         ps::ParserState,
                         bytes,
                         from::Int,
@@ -364,11 +370,21 @@ function number_from_bytes(pc::ParserContext,
 end
 
 
-function parse_number(pc::ParserContext, ps::ParserState)
+function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
     # Determine the end of the floating point by skipping past ASCII values
     # 0-9, +, -, e, E, and .
     number = ps.utf8array
     isint = true
+    negative = false
+
+    c = current(ps)
+
+    # Parse and keep track of initial minus sign (for parsing -Infinity)
+    if AllowNanInf && c == MINUS_SIGN
+        push!(number, UInt8(c)) # save in case the next character is a number
+        negative = true
+        incr!(ps)
+    end
 
     @inbounds while hasmore(ps)
         c = current(ps)
@@ -378,6 +394,10 @@ function parse_number(pc::ParserContext, ps::ParserState)
         elseif c in (PLUS_SIGN, LATIN_E, LATIN_UPPER_E, DECIMAL_POINT)
             push!(number, UInt8(c))
             isint = false
+        elseif AllowNanInf && c == LATIN_UPPER_I
+            infinity = parse_jsconstant(pc, ps)
+            resize!(number, 0)
+            return (negative ? -infinity : infinity)
         else
             break
         end
@@ -390,26 +410,28 @@ function parse_number(pc::ParserContext, ps::ParserState)
     return v
 end
 
+
 unparameterize_type(x) = x # Fallback for nontypes -- functions etc
 function unparameterize_type(T::Type)
     candidate = typeintersect(T, AbstractDict{String, Any})
     candidate <: Union{} ? T : candidate
 end
 
 # Workaround for slow dynamic dispatch for creating objects
-const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64}()
-function _get_parsercontext(dicttype, inttype)
-    if dicttype == Dict{String, Any} && inttype == Int64
+const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64, false}()
+function _get_parsercontext(dicttype, inttype, allownan)
+    if dicttype == Dict{String, Any} && inttype == Int64 && !allownan
         DEFAULT_PARSERCONTEXT
     else
-        ParserContext{unparameterize_type(dicttype), inttype}.instance
+        ParserContext{unparameterize_type(dicttype), inttype, allownan}.instance
     end
 end
 
 function parse(str::AbstractString;
                dicttype=Dict{String,Any},
-               inttype::Type{<:Real}=Int64)
-    pc = _get_parsercontext(dicttype, inttype)
+               inttype::Type{<:Real}=Int64,
+               allownan=true)
+    pc = _get_parsercontext(dicttype, inttype, allownan)
     ps = MemoryParserState(str, 1)
     v = parse_value(pc, ps)
     chomp_space!(ps)
@@ -421,20 +443,22 @@ end
 
 function parse(io::IO;
                dicttype=Dict{String,Any},
-               inttype::Type{<:Real}=Int64)
-    pc = _get_parsercontext(dicttype, inttype)
+               inttype::Type{<:Real}=Int64,
+               allownan=true)
+    pc = _get_parsercontext(dicttype, inttype, allownan)
     ps = StreamingParserState(io)
     parse_value(pc, ps)
 end
 
 function parsefile(filename::AbstractString;
                    dicttype=Dict{String, Any},
                    inttype::Type{<:Real}=Int64,
+                   allownan=true,
                    use_mmap=true)
     sz = filesize(filename)
     open(filename) do io
         s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String)
-        parse(s; dicttype=dicttype, inttype=inttype)
+        parse(s; dicttype=dicttype, inttype=inttype, allownan=allownan)
     end
 end
 

diff --git a/src/bytes.jl b/src/bytes.jl
@@ -17,19 +17,23 @@ const SEPARATOR      = UInt8(':')
 const LATIN_UPPER_A  = UInt8('A')
 const LATIN_UPPER_E  = UInt8('E')
 const LATIN_UPPER_F  = UInt8('F')
+const LATIN_UPPER_I  = UInt8('I')
+const LATIN_UPPER_N  = UInt8('N')
 const ARRAY_BEGIN    = UInt8('[')
 const BACKSLASH      = UInt8('\\')
 const ARRAY_END      = UInt8(']')
 const LATIN_A        = UInt8('a')
 const LATIN_B        = UInt8('b')
 const LATIN_E        = UInt8('e')
 const LATIN_F        = UInt8('f')
+const LATIN_I        = UInt8('i')
 const LATIN_L        = UInt8('l')
 const LATIN_N        = UInt8('n')
 const LATIN_R        = UInt8('r')
 const LATIN_S        = UInt8('s')
 const LATIN_T        = UInt8('t')
 const LATIN_U        = UInt8('u')
+const LATIN_Y        = UInt8('y')
 const OBJECT_BEGIN   = UInt8('{')
 const OBJECT_END     = UInt8('}')
 
@@ -62,6 +66,7 @@ end
 export BACKSPACE, TAB, NEWLINE, FORM_FEED, RETURN, SPACE, STRING_DELIM,
        PLUS_SIGN, DELIMITER, MINUS_SIGN, DECIMAL_POINT, SOLIDUS, DIGIT_ZERO,
        DIGIT_NINE, SEPARATOR, LATIN_UPPER_A, LATIN_UPPER_E, LATIN_UPPER_F,
-       ARRAY_BEGIN, BACKSLASH, ARRAY_END, LATIN_A, LATIN_B, LATIN_E, LATIN_F,
-       LATIN_L, LATIN_N, LATIN_R, LATIN_S, LATIN_T, LATIN_U, OBJECT_BEGIN,
-       OBJECT_END, ESCAPES, REVERSE_ESCAPES, ESCAPED_ARRAY
+       LATIN_UPPER_I, LATIN_UPPER_N, ARRAY_BEGIN, BACKSLASH, ARRAY_END,
+       LATIN_A, LATIN_B, LATIN_E, LATIN_F, LATIN_I, LATIN_L, LATIN_N, LATIN_R,
+       LATIN_S, LATIN_T, LATIN_U, LATIN_Y, OBJECT_BEGIN, OBJECT_END, ESCAPES,
+       REVERSE_ESCAPES, ESCAPED_ARRAY
diff --git a/src/specialized.jl b/src/specialized.jl
@@ -120,10 +120,19 @@ function parse_string(ps::MemoryParserState, b::IOBuffer)
     b
 end
 
-function parse_number(pc::ParserContext, ps::MemoryParserState)
+function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::MemoryParserState) where AllowNanInf
     s = p = ps.s
     e = length(ps)
     isint = true
+    negative = false
+
+    @inbounds c = ps[p]
+
+    # Parse and keep track of initial minus sign (for parsing -Infinity)
+    if AllowNanInf && c == MINUS_SIGN
+        negative = true
+        p += 1
+    end
 
     # Determine the end of the floating point by skipping past ASCII values
     # 0-9, +, -, e, E, and .
@@ -133,6 +142,10 @@ function parse_number(pc::ParserContext, ps::MemoryParserState)
         elseif PLUS_SIGN == c || LATIN_E == c || LATIN_UPPER_E == c ||
                 DECIMAL_POINT == c
             isint = false
+        elseif AllowNanInf && LATIN_UPPER_I == c
+            ps.s = p
+            infinity = parse_jsconstant(pc, ps)
+            return (negative ? -infinity : infinity)
         else
             break
         end

diff --git a/test/parser/nan-inf.jl b/test/parser/nan-inf.jl
@@ -0,0 +1,35 @@
+@testset begin
+    test_str = """
+        {
+            "x": NaN,
+            "y": Infinity,
+            "z": -Infinity,
+            "q": [true, null, "hello", 1, -1, 1.5, -1.5, [true]]
+        }"""
+
+    test_dict = Dict(
+        "x" => NaN,
+        "y" => Inf,
+        "z" => -Inf,
+        "q" => [true, nothing, "hello", 1, -1, 1.5, -1.5, [true]]
+    )
+
+    @test_throws ErrorException JSON.parse(test_str, allownan=false)
+    val = JSON.parse(test_str)
+    @test isequal(val, test_dict)
+
+    @test_throws ErrorException JSON.parse(IOBuffer(test_str), allownan=false)
+    val2 = JSON.parse(IOBuffer(test_str))
+    @test isequal(val2, test_dict)
+
+    # Test that the number following -Infinity parses correctly
+    @test isequal(JSON.parse("[-Infinity, 1]"), [-Inf, 1])
+    @test isequal(JSON.parse("[-Infinity, -1]"), [-Inf, -1])
+    @test isequal(JSON.parse("""{"a": -Infinity, "b": 1.0}"""), Dict("a" => -Inf, "b"=> 1.0))
+    @test isequal(JSON.parse("""{"a": -Infinity, "b": -1.0}"""), Dict("a" => -Inf, "b"=> -1.0))
+
+    @test isequal(JSON.parse(IOBuffer("[-Infinity, 1]")), [-Inf, 1])
+    @test isequal(JSON.parse(IOBuffer("[-Infinity, -1]")), [-Inf, -1])
+    @test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": 1.0}""")), Dict("a" => -Inf, "b"=> 1.0))
+    @test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": -1.0}""")), Dict("a" => -Inf, "b"=> -1.0))
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -25,6 +25,10 @@ include("json-samples.jl")
         include("parser/inttype.jl")
     end
 
+    @testset "nan_inf" begin
+        include("parser/nan-inf.jl")
+    end
+
     @testset "Miscellaneous" begin
         # test for single values
         @test JSON.parse("true") == true