Skip to content

Commit

Permalink
add optional preceding whitespace, explain format
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Feb 15, 2022
1 parent 3dfa904 commit 540f95c
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 32 deletions.
8 changes: 7 additions & 1 deletion loadbj.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
% including uint16(u), uint32(m), uint64(M) and half-precision float (h).
% Starting from BJD Draft-2 (JSONLab 3.0 beta or later), all integer and
% floating-point numbers are parsed in Little-Endian as opposed to
% Big-Endian form as in BJD Draft-1/UBJSON Draft-12 (JSONLab 2.1 or older)
% Big-Endian form as in BJD Draft-1/UBJSON Draft-12 (JSONLab 2.0 or older)
%
% authors:Qianqian Fang (q.fang <at> neu.edu)
% initially created on 2013/08/01
Expand Down Expand Up @@ -71,6 +71,12 @@
% output:
% dat: a cell array, where {...} blocks are converted into cell arrays,
% and [...] are converted to arrays
% mmap: (optional) a cell array in the form of
% {{jsonpath1,[start,length]}, {jsonpath2,[start,length]}, ...}
% where jsonpath_i is a string in the form of JSONPath, and
% start is an integer referring to the offset from the begining
% of the stream, and length is the JSON object string length.
% For more details, please see the help section of loadjson.m
%
% examples:
% obj=struct('string','value','array',[1 2 3]);
Expand Down
95 changes: 72 additions & 23 deletions loadjson.m
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,60 @@
% output:
% dat: a cell array, where {...} blocks are converted into cell arrays,
% and [...] are converted to arrays
% mmap: (optional) a cell array in the form of
% {{jsonpath1,[start,length]}, {jsonpath2,[start,length]}, ...}
% where jsonpath_i is a string in the form of JSONPath [1], and
% start is an integer referring to the offset from the begining
% of the stream, and length is the JSON object string length.
% mmap: (optional) a cell array as memory-mapping table in the form of
% {{jsonpath1,[start,length,<whitespace>]},
% {jsonpath2,[start,length,<whitespace>]}, ...}
% where jsonpath_i is a string in the JSONPath [1,2] format, and
% "start" is an integer referring to the offset from the begining
% of the stream, and "length" is the JSON object string length.
% An optional 3rd integer "whitespace" may appear to record the
% preceding whitespace length in case expansion of the data
% record is needed when using the mmap.
%
% Memory-mapping table (mmap) is useful when fast reading/writing
% specific data records inside a large JSON file without needing
% to load/parse/overwrite the entire file.
%
% The JSONPath keys used in mmap is largely compatible to the
% upstream specification defined in [1], with a slight extension
% to handle contatenated JSON files.
%
% In the mmap jsonpath key, a '$' denotes the root object, a '.'
% denotes a child of the preceding element; '.key' points to the
% value segment of the child named "key" of the preceding
% object; '.[i]' denotes the (i+1)th member of the preceding
% element, which must be an array. For example, a key
%
% $.obj1.obj2.[0].obj3
%
% defines the memory-map of the "value" section in the below
% hierarchy:
% {
% "obj1":{
% "obj2":[
% {"obj3":value},
% ...
% ],
% ...
% }
% }
% Please note that "value" can be any valid JSON value, including
% an array, an object, a string or numerical value.
%
% To handle concatenated JSON objects (including ndjson,
% http://ndjson.org/), such as
%
% {"root1": {"obj1": ...}}
% ["root2", value1, value2, {"obj2": ...}]
% {"root3": ...}
%
% we use '$' or '$0' for the first root-object, and '$1' refers
% to the 2nd root object (["root2",...]) and '$2' referrs to the
% 3rd root object, and so on. Please note that this syntax is an
% extension from the JSONPath documentation [1,2]
%
% [1] https://goessner.net/articles/JsonPath/
% [2] http://jsonpath.herokuapp.com/
%
% examples:
% dat=loadjson('{"obj":{"string":"value","array":[1,2,3]}}')
Expand Down Expand Up @@ -151,7 +200,7 @@
opt.parsestringarray=jsonopt('ParseStringArray',0,opt);
opt.usemap=jsonopt('UseMap',0,opt);
opt.arraydepth_=1;
mmaponly=jsonopt('MmapOnly',0,opt);
opt.mmaponly=jsonopt('MmapOnly',0,opt);

if(jsonopt('ShowProgress',0,opt)==1)
opt.progressbar_=waitbar(0,'loading ...');
Expand All @@ -163,27 +212,27 @@
maxobjid=inf;
end
opt.jsonpath_='$';
if(nargout>1 || mmaponly)
if(nargout>1 || opt.mmaponly)
mmap={};
end
jsoncount=1;
while pos <= inputlen
[cc,pos,w1]=next_char(inputstr, pos);
switch(cc)
case '{'
if(nargout>1 || mmaponly)
mmap{end+1}={opt.jsonpath_,pos-w1};
if(nargout>1 || opt.mmaponly)
mmap{end+1}={opt.jsonpath_,[pos, 0, w1]};
[data{jsoncount},pos,index_esc,newmmap] = parse_object(inputstr, pos, esc, index_esc,opt);
mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}];
mmap{end}{2}(2)=pos-mmap{end}{2}(1);
mmap=[mmap(:);newmmap(:)];
else
[data{jsoncount},pos,index_esc] = parse_object(inputstr, pos, esc, index_esc,opt);
end
case '['
if(nargout>1 || mmaponly)
mmap{end+1}={opt.jsonpath_,pos-w1};
if(nargout>1 || opt.mmaponly)
mmap{end+1}={opt.jsonpath_,[pos,0,w1]};
[data{jsoncount},pos,index_esc,newmmap] = parse_array(inputstr, pos, esc, index_esc,opt);
mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}];
mmap{end}{2}(2)=pos-mmap{end}{2}(1);
mmap=[mmap(:);newmmap(:)];
else
[data{jsoncount},pos,index_esc] = parse_array(inputstr, pos, esc, index_esc,opt);
Expand All @@ -206,10 +255,11 @@
if(jsoncount==1 && iscell(data))
data=data{1};
end
if(nargout>1 || mmaponly)
if(nargout>1 || opt.mmaponly)
mmap=mmap';
mmap=filterjsonmmap(mmap, jsonopt('MMapExclude',{},opt), 0);
mmap=filterjsonmmap(mmap, jsonopt('MMapInclude',{},opt), 1);
mmap=cellfun(@(x) {x{1},x{2}(1:(2+int8(length(x{2})>=3 && (x{2}(3)>0))))}, mmap, 'UniformOutput', false);
end
if(jsonopt('JDataDecode',1,varargin{:})==1)
try
Expand All @@ -220,7 +270,7 @@
ME.identifier, ME.message, savejson('',ME.stack));
end
end
if(mmaponly)
if(opt.mmaponly)
data=mmap;
end
if(isfield(opt,'progressbar_'))
Expand Down Expand Up @@ -307,13 +357,14 @@
catch
end
if(isempty(endpos) || pos~=endpos)
w2=0;
while 1
varargin{1}.arraydepth_=arraydepth+1;
if(nargout>3)
varargin{1}.jsonpath_=[origpath '.' sprintf('[%d]',length(object))];
mmap{end+1}={varargin{1}.jsonpath_, pos};
mmap{end+1}={varargin{1}.jsonpath_, [pos, 0, w2]};
[val, pos, index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc,varargin{:});
mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}];
mmap{end}{2}(2)=pos-mmap{end}{2}(1);
mmap=[mmap(:);newmmap(:)];
else
[val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc,varargin{:});
Expand All @@ -323,7 +374,7 @@
if cc == ']'
break;
end
pos=parse_char(inputstr, pos, ',');
[pos, w1, w2]=parse_char(inputstr, pos, ',');
end
end
end
Expand Down Expand Up @@ -528,14 +579,12 @@
if isempty(str)
pos=error_pos('Name of value at position %d cannot be empty',inputstr,pos);
end
pos=parse_char(inputstr, pos, ':');
[pos, w1, w2]=parse_char(inputstr, pos, ':');
if(nargout>3)
varargin{1}.jsonpath_=[origpath,'.',str];
mmap{end+1}={varargin{1}.jsonpath_,pos};
end
if(nargout>3)
mmap{end+1}={varargin{1}.jsonpath_,[pos,0,w2]};
[val, pos,index_esc, newmmap] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}];
mmap{end}{2}(2)=pos-mmap{end}{2}(1);
mmap=[mmap(:);newmmap(:)];
else
[val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
Expand Down
10 changes: 5 additions & 5 deletions savejson.m
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
% wrapped inside a function call as 'foo(...);'
% UnpackHex [1|0]: conver the 0x[hex code] output by loadjson
% back to the string form
% SaveBinary [0|1]: 1 - save the JSON file in binary mode; 0 - text mode.
% SaveBinary [1|0]: 1 - save the JSON file in binary mode; 0 - text mode.
% Compact [0|1]: 1- out compact JSON format (remove all newlines and tabs)
% Compression 'zlib', 'gzip', 'lzma', 'lzip', 'lz4' or 'lz4hc': specify array
% compression method; currently only supports 6 methods. The
Expand Down Expand Up @@ -254,11 +254,11 @@
if(jsonopt('Append',0,opt))
mode='a';
end
if(jsonopt('SaveBinary',0,opt)==1)
if(~isempty(encoding))
fid = fopen(filename, [mode 'b'],endian,encoding);
else
if(jsonopt('SaveBinary',1,opt)==1)
if(isempty(encoding))
fid = fopen(filename, [mode 'b'],endian);
else
fid = fopen(filename, [mode 'b'],endian,encoding);
end
fwrite(fid,json);
else
Expand Down
6 changes: 3 additions & 3 deletions test/run_jsonlab_test.m
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ function run_jsonlab_test(tests)
test_jsonlab('mmap of an object',@savejson,loadjson('{"a":1,"b":[2,3]}','mmaponly',1),...
'[["$",[1,17]],["$.a",[6,1]],["$.b",[12,5]]]','compact',1);
test_jsonlab('mmap of object with white-space',@savejson,loadjson('{"a":1 , "b" : [2,3]}','mmaponly',1),...
'[["$",[1,23]],["$.a",[6,1]],["$.b",[18,5]]]','compact',1);
'[["$",[1,23]],["$.a",[6,1]],["$.b",[18,5,2]]]','compact',1);
test_jsonlab('mmapinclude option',@savejson,loadjson('[[1,2,3],{"a":[4,5]}]','mmaponly',1,'mmapinclude','.a'),...
'[["$.[1].a",[15,5]]]','compact',1);
test_jsonlab('mmapexclude option',@savejson,loadjson('[[1,2,3],{"a":[4,5]}]','mmaponly',1,'mmapexclude',{'[0]','[1]','[2]'}),...
Expand All @@ -328,6 +328,6 @@ function run_jsonlab_test(tests)
'[["$.[1].a",[15,8]]]','compact',1);
test_jsonlab('mmapexclude option',@savejson,loadbj(savebj({[1,2,3],struct('a',[4,5])}),'mmaponly',1,'mmapexclude',{'[0]','[1]','[2]'}),...
'[["$",[1,24]]]','compact',1);
test_jsonlab('json with indentation',@savejson,loadbj(savebj({[1,2,3],struct('a',[4,5])}),'mmaponly',1,'mmapinclude','.a'),...
'[["$.[1].a",[15,8]]]','compact',1);
test_jsonlab('test multiple root objects with N padding',@savejson,loadbj([savebj({[1,2,3],struct('a',[4,5])}) 'NNN' savebj(struct('b',[4,5]))],'mmaponly',1,'mmapinclude','.b'),...
'[["$1.b",[32,8]]]','compact',1);
end

0 comments on commit 540f95c

Please sign in to comment.