Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Add allownan keyword argument to parse() (Fixes #168) #280

Merged
merged 3 commits into from
May 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions src/Parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mutable struct StreamingParserState{T <: IO} <: ParserState
end
StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, PushVector{UInt8}())

struct ParserContext{DictType, IntType} end
struct ParserContext{DictType, IntType, AllowNanInf} end

"""
Return the byte at the current position of the `ParserState`. If there is no
Expand Down Expand Up @@ -169,11 +169,11 @@ function parse_value(pc::ParserContext, ps::ParserState)
elseif byte == ARRAY_BEGIN
parse_array(pc, ps)
else
parse_jsconstant(ps::ParserState)
parse_jsconstant(pc, ps)
end
end

function parse_jsconstant(ps::ParserState)
function parse_jsconstant(::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
c = advance!(ps)
if c == LATIN_T # true
skip!(ps, LATIN_R, LATIN_U, LATIN_E)
Expand All @@ -184,6 +184,12 @@ function parse_jsconstant(ps::ParserState)
elseif c == LATIN_N # null
skip!(ps, LATIN_U, LATIN_L, LATIN_L)
nothing
elseif AllowNanInf && c == LATIN_UPPER_N
skip!(ps, LATIN_A, LATIN_UPPER_N)
NaN
elseif AllowNanInf && c == LATIN_UPPER_I
skip!(ps, LATIN_N, LATIN_F, LATIN_I, LATIN_N, LATIN_I, LATIN_T, LATIN_Y)
Inf
else
_error(E_UNEXPECTED_CHAR, ps)
end
Expand All @@ -207,7 +213,7 @@ function parse_array(pc::ParserContext, ps::ParserState)
end


function parse_object(pc::ParserContext{DictType, <:Real}, ps::ParserState) where DictType
function parse_object(pc::ParserContext{DictType,<:Real,<:Any}, ps::ParserState) where DictType
obj = DictType()
keyT = keytype(typeof(obj))

Expand Down Expand Up @@ -323,7 +329,7 @@ end
Parse an integer from the given bytes vector, starting at `from` and ending at
the byte before `to`. Bytes enclosed should all be ASCII characters.
"""
function int_from_bytes(pc::ParserContext{<:Any,IntType},
function int_from_bytes(pc::ParserContext{<:Any,IntType,<:Any},
kmsquire marked this conversation as resolved.
Show resolved Hide resolved
ps::ParserState,
bytes,
from::Int,
Expand Down Expand Up @@ -364,11 +370,21 @@ function number_from_bytes(pc::ParserContext,
end


function parse_number(pc::ParserContext, ps::ParserState)
function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf
# Determine the end of the floating point by skipping past ASCII values
# 0-9, +, -, e, E, and .
number = ps.utf8array
isint = true
negative = false

c = current(ps)

# Parse and keep track of initial minus sign (for parsing -Infinity)
if AllowNanInf && c == MINUS_SIGN
push!(number, UInt8(c)) # save in case the next character is a number
negative = true
incr!(ps)
kmsquire marked this conversation as resolved.
Show resolved Hide resolved
end

@inbounds while hasmore(ps)
c = current(ps)
Expand All @@ -378,6 +394,10 @@ function parse_number(pc::ParserContext, ps::ParserState)
elseif c in (PLUS_SIGN, LATIN_E, LATIN_UPPER_E, DECIMAL_POINT)
push!(number, UInt8(c))
isint = false
elseif AllowNanInf && c == LATIN_UPPER_I
infinity = parse_jsconstant(pc, ps)
resize!(number, 0)
return (negative ? -infinity : infinity)
else
break
end
Expand All @@ -390,26 +410,28 @@ function parse_number(pc::ParserContext, ps::ParserState)
return v
end


unparameterize_type(x) = x # Fallback for nontypes -- functions etc
function unparameterize_type(T::Type)
candidate = typeintersect(T, AbstractDict{String, Any})
candidate <: Union{} ? T : candidate
end

# Workaround for slow dynamic dispatch for creating objects
const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64}()
function _get_parsercontext(dicttype, inttype)
if dicttype == Dict{String, Any} && inttype == Int64
const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64, false}()
function _get_parsercontext(dicttype, inttype, allownan)
if dicttype == Dict{String, Any} && inttype == Int64 && !allownan
DEFAULT_PARSERCONTEXT
else
ParserContext{unparameterize_type(dicttype), inttype}.instance
ParserContext{unparameterize_type(dicttype), inttype, allownan}.instance
end
end

function parse(str::AbstractString;
dicttype=Dict{String,Any},
inttype::Type{<:Real}=Int64)
pc = _get_parsercontext(dicttype, inttype)
inttype::Type{<:Real}=Int64,
allownan=true)
pc = _get_parsercontext(dicttype, inttype, allownan)
ps = MemoryParserState(str, 1)
v = parse_value(pc, ps)
chomp_space!(ps)
Expand All @@ -421,20 +443,22 @@ end

function parse(io::IO;
dicttype=Dict{String,Any},
inttype::Type{<:Real}=Int64)
pc = _get_parsercontext(dicttype, inttype)
inttype::Type{<:Real}=Int64,
allownan=true)
pc = _get_parsercontext(dicttype, inttype, allownan)
ps = StreamingParserState(io)
parse_value(pc, ps)
end

function parsefile(filename::AbstractString;
dicttype=Dict{String, Any},
inttype::Type{<:Real}=Int64,
allownan=true,
use_mmap=true)
sz = filesize(filename)
open(filename) do io
s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String)
parse(s; dicttype=dicttype, inttype=inttype)
parse(s; dicttype=dicttype, inttype=inttype, allownan=allownan)
end
end

Expand Down
11 changes: 8 additions & 3 deletions src/bytes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,23 @@ const SEPARATOR = UInt8(':')
const LATIN_UPPER_A = UInt8('A')
const LATIN_UPPER_E = UInt8('E')
const LATIN_UPPER_F = UInt8('F')
const LATIN_UPPER_I = UInt8('I')
const LATIN_UPPER_N = UInt8('N')
const ARRAY_BEGIN = UInt8('[')
const BACKSLASH = UInt8('\\')
const ARRAY_END = UInt8(']')
const LATIN_A = UInt8('a')
const LATIN_B = UInt8('b')
const LATIN_E = UInt8('e')
const LATIN_F = UInt8('f')
const LATIN_I = UInt8('i')
const LATIN_L = UInt8('l')
const LATIN_N = UInt8('n')
const LATIN_R = UInt8('r')
const LATIN_S = UInt8('s')
const LATIN_T = UInt8('t')
const LATIN_U = UInt8('u')
const LATIN_Y = UInt8('y')
const OBJECT_BEGIN = UInt8('{')
const OBJECT_END = UInt8('}')

Expand Down Expand Up @@ -62,6 +66,7 @@ end
export BACKSPACE, TAB, NEWLINE, FORM_FEED, RETURN, SPACE, STRING_DELIM,
PLUS_SIGN, DELIMITER, MINUS_SIGN, DECIMAL_POINT, SOLIDUS, DIGIT_ZERO,
DIGIT_NINE, SEPARATOR, LATIN_UPPER_A, LATIN_UPPER_E, LATIN_UPPER_F,
ARRAY_BEGIN, BACKSLASH, ARRAY_END, LATIN_A, LATIN_B, LATIN_E, LATIN_F,
LATIN_L, LATIN_N, LATIN_R, LATIN_S, LATIN_T, LATIN_U, OBJECT_BEGIN,
OBJECT_END, ESCAPES, REVERSE_ESCAPES, ESCAPED_ARRAY
LATIN_UPPER_I, LATIN_UPPER_N, ARRAY_BEGIN, BACKSLASH, ARRAY_END,
LATIN_A, LATIN_B, LATIN_E, LATIN_F, LATIN_I, LATIN_L, LATIN_N, LATIN_R,
LATIN_S, LATIN_T, LATIN_U, LATIN_Y, OBJECT_BEGIN, OBJECT_END, ESCAPES,
REVERSE_ESCAPES, ESCAPED_ARRAY
15 changes: 14 additions & 1 deletion src/specialized.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,19 @@ function parse_string(ps::MemoryParserState, b::IOBuffer)
b
end

function parse_number(pc::ParserContext, ps::MemoryParserState)
function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::MemoryParserState) where AllowNanInf
s = p = ps.s
e = length(ps)
isint = true
negative = false

@inbounds c = ps[p]

# Parse and keep track of initial minus sign (for parsing -Infinity)
if AllowNanInf && c == MINUS_SIGN
negative = true
p += 1
kmsquire marked this conversation as resolved.
Show resolved Hide resolved
end

# Determine the end of the floating point by skipping past ASCII values
# 0-9, +, -, e, E, and .
Expand All @@ -133,6 +142,10 @@ function parse_number(pc::ParserContext, ps::MemoryParserState)
elseif PLUS_SIGN == c || LATIN_E == c || LATIN_UPPER_E == c ||
DECIMAL_POINT == c
isint = false
elseif AllowNanInf && LATIN_UPPER_I == c
ps.s = p
infinity = parse_jsconstant(pc, ps)
return (negative ? -infinity : infinity)
else
break
end
Expand Down
35 changes: 35 additions & 0 deletions test/parser/nan-inf.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@testset begin
test_str = """
{
"x": NaN,
"y": Infinity,
"z": -Infinity,
"q": [true, null, "hello", 1, -1, 1.5, -1.5, [true]]
}"""

test_dict = Dict(
"x" => NaN,
"y" => Inf,
"z" => -Inf,
"q" => [true, nothing, "hello", 1, -1, 1.5, -1.5, [true]]
)

@test_throws ErrorException JSON.parse(test_str, allownan=false)
val = JSON.parse(test_str)
@test isequal(val, test_dict)

@test_throws ErrorException JSON.parse(IOBuffer(test_str), allownan=false)
val2 = JSON.parse(IOBuffer(test_str))
@test isequal(val2, test_dict)

# Test that the number following -Infinity parses correctly
@test isequal(JSON.parse("[-Infinity, 1]"), [-Inf, 1])
@test isequal(JSON.parse("[-Infinity, -1]"), [-Inf, -1])
@test isequal(JSON.parse("""{"a": -Infinity, "b": 1.0}"""), Dict("a" => -Inf, "b"=> 1.0))
@test isequal(JSON.parse("""{"a": -Infinity, "b": -1.0}"""), Dict("a" => -Inf, "b"=> -1.0))

@test isequal(JSON.parse(IOBuffer("[-Infinity, 1]")), [-Inf, 1])
@test isequal(JSON.parse(IOBuffer("[-Infinity, -1]")), [-Inf, -1])
@test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": 1.0}""")), Dict("a" => -Inf, "b"=> 1.0))
@test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": -1.0}""")), Dict("a" => -Inf, "b"=> -1.0))
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ include("json-samples.jl")
include("parser/inttype.jl")
end

@testset "nan_inf" begin
include("parser/nan-inf.jl")
end

@testset "Miscellaneous" begin
# test for single values
@test JSON.parse("true") == true
Expand Down