Skip to content

Commit

Permalink
Merge pull request #280 from JuliaIO/kms/allownan
Browse files Browse the repository at this point in the history
RFC: Add `allownan` keyword argument to parse() (Fixes #168)
  • Loading branch information
kmsquire authored May 18, 2019
2 parents 63afbcf + 657e107 commit 9581e13
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 19 deletions.
54 changes: 39 additions & 15 deletions src/Parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mutable struct StreamingParserState{T <: IO} <: ParserState
end
StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, PushVector{UInt8}())

struct ParserContext{DictType, IntType} end
struct ParserContext{DictType, IntType, AllowNanInf} end

"""
Return the byte at the current position of the `ParserState`. If there is no
Expand Down Expand Up @@ -169,11 +169,11 @@ function parse_value(pc::ParserContext, ps::ParserState)
elseif byte == ARRAY_BEGIN
parse_array(pc, ps)
else
parse_jsconstant(ps::ParserState)
parse_jsconstant(pc, ps)
end
end

function parse_jsconstant(ps::ParserState)
function parse_jsconstant(::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
c = advance!(ps)
if c == LATIN_T # true
skip!(ps, LATIN_R, LATIN_U, LATIN_E)
Expand All @@ -184,6 +184,12 @@ function parse_jsconstant(ps::ParserState)
elseif c == LATIN_N # null
skip!(ps, LATIN_U, LATIN_L, LATIN_L)
nothing
elseif AllowNanInf && c == LATIN_UPPER_N
skip!(ps, LATIN_A, LATIN_UPPER_N)
NaN
elseif AllowNanInf && c == LATIN_UPPER_I
skip!(ps, LATIN_N, LATIN_F, LATIN_I, LATIN_N, LATIN_I, LATIN_T, LATIN_Y)
Inf
else
_error(E_UNEXPECTED_CHAR, ps)
end
Expand All @@ -207,7 +213,7 @@ function parse_array(pc::ParserContext, ps::ParserState)
end


function parse_object(pc::ParserContext{DictType, <:Real}, ps::ParserState) where DictType
function parse_object(pc::ParserContext{DictType,<:Real,<:Any}, ps::ParserState) where DictType
obj = DictType()
keyT = keytype(typeof(obj))

Expand Down Expand Up @@ -323,7 +329,7 @@ end
Parse an integer from the given bytes vector, starting at `from` and ending at
the byte before `to`. Bytes enclosed should all be ASCII characters.
"""
function int_from_bytes(pc::ParserContext{<:Any,IntType},
function int_from_bytes(pc::ParserContext{<:Any,IntType,<:Any},
ps::ParserState,
bytes,
from::Int,
Expand Down Expand Up @@ -364,11 +370,21 @@ function number_from_bytes(pc::ParserContext,
end


function parse_number(pc::ParserContext, ps::ParserState)
function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
# Determine the end of the floating point by skipping past ASCII values
# 0-9, +, -, e, E, and .
number = ps.utf8array
isint = true
negative = false

c = current(ps)

# Parse and keep track of initial minus sign (for parsing -Infinity)
if AllowNanInf && c == MINUS_SIGN
push!(number, UInt8(c)) # save in case the next character is a number
negative = true
incr!(ps)
end

@inbounds while hasmore(ps)
c = current(ps)
Expand All @@ -378,6 +394,10 @@ function parse_number(pc::ParserContext, ps::ParserState)
elseif c in (PLUS_SIGN, LATIN_E, LATIN_UPPER_E, DECIMAL_POINT)
push!(number, UInt8(c))
isint = false
elseif AllowNanInf && c == LATIN_UPPER_I
infinity = parse_jsconstant(pc, ps)
resize!(number, 0)
return (negative ? -infinity : infinity)
else
break
end
Expand All @@ -390,26 +410,28 @@ function parse_number(pc::ParserContext, ps::ParserState)
return v
end


unparameterize_type(x) = x # Fallback for nontypes -- functions etc
function unparameterize_type(T::Type)
candidate = typeintersect(T, AbstractDict{String, Any})
candidate <: Union{} ? T : candidate
end

# Workaround for slow dynamic dispatch for creating objects
const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64}()
function _get_parsercontext(dicttype, inttype)
if dicttype == Dict{String, Any} && inttype == Int64
const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64, false}()
function _get_parsercontext(dicttype, inttype, allownan)
if dicttype == Dict{String, Any} && inttype == Int64 && !allownan
DEFAULT_PARSERCONTEXT
else
ParserContext{unparameterize_type(dicttype), inttype}.instance
ParserContext{unparameterize_type(dicttype), inttype, allownan}.instance
end
end

function parse(str::AbstractString;
dicttype=Dict{String,Any},
inttype::Type{<:Real}=Int64)
pc = _get_parsercontext(dicttype, inttype)
inttype::Type{<:Real}=Int64,
allownan=true)
pc = _get_parsercontext(dicttype, inttype, allownan)
ps = MemoryParserState(str, 1)
v = parse_value(pc, ps)
chomp_space!(ps)
Expand All @@ -421,20 +443,22 @@ end

function parse(io::IO;
dicttype=Dict{String,Any},
inttype::Type{<:Real}=Int64)
pc = _get_parsercontext(dicttype, inttype)
inttype::Type{<:Real}=Int64,
allownan=true)
pc = _get_parsercontext(dicttype, inttype, allownan)
ps = StreamingParserState(io)
parse_value(pc, ps)
end

function parsefile(filename::AbstractString;
dicttype=Dict{String, Any},
inttype::Type{<:Real}=Int64,
allownan=true,
use_mmap=true)
sz = filesize(filename)
open(filename) do io
s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String)
parse(s; dicttype=dicttype, inttype=inttype)
parse(s; dicttype=dicttype, inttype=inttype, allownan=allownan)
end
end

Expand Down
11 changes: 8 additions & 3 deletions src/bytes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,23 @@ const SEPARATOR = UInt8(':')
const LATIN_UPPER_A = UInt8('A')
const LATIN_UPPER_E = UInt8('E')
const LATIN_UPPER_F = UInt8('F')
const LATIN_UPPER_I = UInt8('I')
const LATIN_UPPER_N = UInt8('N')
const ARRAY_BEGIN = UInt8('[')
const BACKSLASH = UInt8('\\')
const ARRAY_END = UInt8(']')
const LATIN_A = UInt8('a')
const LATIN_B = UInt8('b')
const LATIN_E = UInt8('e')
const LATIN_F = UInt8('f')
const LATIN_I = UInt8('i')
const LATIN_L = UInt8('l')
const LATIN_N = UInt8('n')
const LATIN_R = UInt8('r')
const LATIN_S = UInt8('s')
const LATIN_T = UInt8('t')
const LATIN_U = UInt8('u')
const LATIN_Y = UInt8('y')
const OBJECT_BEGIN = UInt8('{')
const OBJECT_END = UInt8('}')

Expand Down Expand Up @@ -62,6 +66,7 @@ end
export BACKSPACE, TAB, NEWLINE, FORM_FEED, RETURN, SPACE, STRING_DELIM,
PLUS_SIGN, DELIMITER, MINUS_SIGN, DECIMAL_POINT, SOLIDUS, DIGIT_ZERO,
DIGIT_NINE, SEPARATOR, LATIN_UPPER_A, LATIN_UPPER_E, LATIN_UPPER_F,
ARRAY_BEGIN, BACKSLASH, ARRAY_END, LATIN_A, LATIN_B, LATIN_E, LATIN_F,
LATIN_L, LATIN_N, LATIN_R, LATIN_S, LATIN_T, LATIN_U, OBJECT_BEGIN,
OBJECT_END, ESCAPES, REVERSE_ESCAPES, ESCAPED_ARRAY
LATIN_UPPER_I, LATIN_UPPER_N, ARRAY_BEGIN, BACKSLASH, ARRAY_END,
LATIN_A, LATIN_B, LATIN_E, LATIN_F, LATIN_I, LATIN_L, LATIN_N, LATIN_R,
LATIN_S, LATIN_T, LATIN_U, LATIN_Y, OBJECT_BEGIN, OBJECT_END, ESCAPES,
REVERSE_ESCAPES, ESCAPED_ARRAY
15 changes: 14 additions & 1 deletion src/specialized.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,19 @@ function parse_string(ps::MemoryParserState, b::IOBuffer)
b
end

function parse_number(pc::ParserContext, ps::MemoryParserState)
function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::MemoryParserState) where AllowNanInf
s = p = ps.s
e = length(ps)
isint = true
negative = false

@inbounds c = ps[p]

# Parse and keep track of initial minus sign (for parsing -Infinity)
if AllowNanInf && c == MINUS_SIGN
negative = true
p += 1
end

# Determine the end of the floating point by skipping past ASCII values
# 0-9, +, -, e, E, and .
Expand All @@ -133,6 +142,10 @@ function parse_number(pc::ParserContext, ps::MemoryParserState)
elseif PLUS_SIGN == c || LATIN_E == c || LATIN_UPPER_E == c ||
DECIMAL_POINT == c
isint = false
elseif AllowNanInf && LATIN_UPPER_I == c
ps.s = p
infinity = parse_jsconstant(pc, ps)
return (negative ? -infinity : infinity)
else
break
end
Expand Down
35 changes: 35 additions & 0 deletions test/parser/nan-inf.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@testset begin
test_str = """
{
"x": NaN,
"y": Infinity,
"z": -Infinity,
"q": [true, null, "hello", 1, -1, 1.5, -1.5, [true]]
}"""

test_dict = Dict(
"x" => NaN,
"y" => Inf,
"z" => -Inf,
"q" => [true, nothing, "hello", 1, -1, 1.5, -1.5, [true]]
)

@test_throws ErrorException JSON.parse(test_str, allownan=false)
val = JSON.parse(test_str)
@test isequal(val, test_dict)

@test_throws ErrorException JSON.parse(IOBuffer(test_str), allownan=false)
val2 = JSON.parse(IOBuffer(test_str))
@test isequal(val2, test_dict)

# Test that the number following -Infinity parses correctly
@test isequal(JSON.parse("[-Infinity, 1]"), [-Inf, 1])
@test isequal(JSON.parse("[-Infinity, -1]"), [-Inf, -1])
@test isequal(JSON.parse("""{"a": -Infinity, "b": 1.0}"""), Dict("a" => -Inf, "b"=> 1.0))
@test isequal(JSON.parse("""{"a": -Infinity, "b": -1.0}"""), Dict("a" => -Inf, "b"=> -1.0))

@test isequal(JSON.parse(IOBuffer("[-Infinity, 1]")), [-Inf, 1])
@test isequal(JSON.parse(IOBuffer("[-Infinity, -1]")), [-Inf, -1])
@test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": 1.0}""")), Dict("a" => -Inf, "b"=> 1.0))
@test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": -1.0}""")), Dict("a" => -Inf, "b"=> -1.0))
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ include("json-samples.jl")
include("parser/inttype.jl")
end

@testset "nan_inf" begin
include("parser/nan-inf.jl")
end

@testset "Miscellaneous" begin
# test for single values
@test JSON.parse("true") == true
Expand Down

0 comments on commit 9581e13

Please sign in to comment.