Skip to content

Commit

Permalink
debugged and tested mmap, add mmapinclude and mmapexclude options
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Feb 14, 2022
1 parent 6150ae1 commit 3dfa904
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 89 deletions.
53 changes: 53 additions & 0 deletions filterjsonmmap.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
function mmap=filterjsonmmap(mmap, patterns, isinclude)
%
% mmap=filterjsonmmap(mmap, patterns, isinclude)
%
% filter JSON mmap keys based on inclusive or exclusive string patterns
%
% authors:Qianqian Fang (q.fang <at> neu.edu)
% initially created on 2022/02/13
%
% input:
% mmap: memory-map returned by loadjson/loadbj of the same data
% important: mmap must be produced from the same file/string,
% otherwise calling this function may cause data corruption
% patterns: a string or a cell array of strings, each string will
% be tested to match the JSONPath keys in mmap
% isinclude: 1 (default) to include all mmap entries that match at
% least one of the patterns, and 0 - exclude those that match
%
% output:
% mmap: a filtered JSON mmap
%
% examples:
% str='{"arr":[[1,2],"a",{"c":2}],"obj":{"k":"test"}}';
% [dat, mmap]=loadjson(str);
% savejson('',mmap)
% newmmap=filterjsonmmap(mmap,{'arr.[1]', 'obj.k'});
% savejson('',newmmap)
%
% license:
% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
%
% -- this function is part of JSONLab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
%
if(nargin<3)
isinclude=1;
end
if(nargin>1 && ~isempty(patterns))
keylist=[mmap{:}];
keylist=keylist(1:2:end);
if(~iscell(patterns))
patterns={patterns};
end
mask=zeros(1,length(keylist));
for i=1:length(patterns)
mask=mask+cellfun(@length, strfind(keylist,patterns{i}));
end
if(isinclude)
mmap=mmap(mask>0);
else
mmap(mask>0)=[];
end
end
end
53 changes: 31 additions & 22 deletions jsonget.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
% important: mmap must be produced from the same file/string,
% otherwise calling this function may cause data corruption
% '$.jsonpath1,2,3,...': a series of strings in the form of JSONPath
% as the key to each of the record to be retrieved
% as the key to each of the record to be retrieved; if no paths
% are given, all items in mmap are retrieved
%
% output:
% json: a cell array, made of elements {'$.jsonpath_i',json_string_i}
Expand All @@ -23,8 +24,8 @@
% str='[[1,2],"a",{"c":2}]{"k":"test"}';
% [dat, mmap]=loadjson(str);
% savejson('',dat,'filename','mydata.json','compact',1);
% json=jsonget(str,mmap,'$.[0].[*]','$.[2].c')
% json=jsonget('mydata.json',mmap,'$.[0].[*]','$.[2].c')
% json=jsonget(str,mmap,'$.[0]','$.[2].c')
% json=jsonget('mydata.json',mmap,'$.[0]','$.[2].c')
%
% license:
% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
Expand All @@ -34,37 +35,45 @@

if(regexp(fname,'^\s*(?:\[.*\])|(?:\{.*\})\s*$','once'))
inputstr=fname;
elseif(isoctavemesh)
elseif(~exist('memmapfile','file'))
if(exist(fname,'file'))
try
fid = fopen(fname,'rb');
inputstr = fread(fid,'char',inf)';
fclose(fid);
catch
try
inputstr = urlread(['file://',fname]);
catch
inputstr = urlread(['file://',fullfile(pwd,fname)]);
end
end
end
end

mmap=[mmap{:}];
keylist=mmap(1:2:end);

loc=1:length(keylist);
if(length(varargin)>=1)
[tf,loc]=ismember(varargin,keylist);
if(any(tf))
keylist=keylist(loc);
else
keylist={};
end
end

json={};
for i=1:length(varargin)
if(regexp(varargin{i},'^\$'))
[tf,loc]=ismember(varargin{i},keylist);
if(tf)
rec={'uint8',[1,mmap{loc*2}(2)], 'x'};
if(exist('inputstr','var'))
json{end+1}={varargin{i}, inputstr(mmap{loc*2}(1):mmap{loc*2}(1)+mmap{loc*2}(2)-1)};
else
fmap=memmapfile(fname,'writable',false, 'offset',mmap{loc*2}(1),'format', rec);
json{end+1}={varargin{i}, char(fmap.Data(1).x)};
end
for i=1:length(keylist)
bmap=mmap{loc(i)*2};
rec={'uint8',[1,bmap(2)], 'x'};
if(exist('inputstr','var'))
json{end+1}={keylist{i}, inputstr(bmap(1):bmap(1)+bmap(2)-1)};
else
if(exist('fid','var') && fid>=0)
fseek(fid, bmap(1), -1);
json{end+1}={keylist{i}, fread(fid,bmap(1),'uint8=>char')};
else
fmap=memmapfile(fname,'writable',false, 'offset',bmap(1),'format', rec);
json{end+1}={keylist{i}, char(fmap.Data(1).x)};
end
end
end

if(exist('fid','var') && fid>0)
fclose(fid);
end
40 changes: 26 additions & 14 deletions jsonset.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
% json=jsonset(fname,mmap,'$.jsonpath1',newval1,'$.jsonpath2','newval2',...)
%
% Fast writing of JSON data records to stream or disk using memory-map
% (mmap) returned by loadjson and JSONPath-like keys
% (mmap) returned by loadjson/loadbj and JSONPath-like keys
%
% authors:Qianqian Fang (q.fang <at> neu.edu)
% initially created on 2022/02/02
Expand All @@ -22,11 +22,20 @@
% written
%
% examples:
% str='[[1,2],"a",{"c":2}]{"k":"test"}';
% [dat, mmap]=loadjson(str);
% savejson('',dat,'filename','mydata.json','compact',1);
% json=jsonset(str,mmap,'$.[2].c','5')
% json=jsonset('mydata.json',mmap,'$.[2].c','"c":5')
% % create test data
% d.arr={[1,2],'a',struct('c',2)}; d.obj=struct('k','test')
% % convert to json string
% str=savejson('',d,'compact',1)
% % parse and return mmap
% [dat, mmap]=loadjson(str);
% % display mmap entries
% savejson('',mmap)
% % replace value using mmap
% json=jsonset(str,mmap,'$.arr.[2].c','5')
% % save same json string to file (must set savebinary 1)
% savejson('',d,'filename','file.json','compact',1,'savebinary',1);
% % fast write to file
% json=jsonset('file.json',mmap,'$.arr.[2].c','5')
%
% license:
% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
Expand All @@ -37,7 +46,9 @@
if(regexp(fname,'^\s*(?:\[.*\])|(?:\{.*\})\s*$','once'))
inputstr=fname;
else
fid=fopen(fname,'wb');
if(~exist('memmapfile','file'))
fid=fopen(fname,'r+b');
end
end

mmap=[mmap{:}];
Expand All @@ -55,22 +66,23 @@
if(regexp(varargin{i},'^\$'))
[tf,loc]=ismember(varargin{i},keylist);
if(tf)
rec={'uint8',[1,mmap{loc*2}(2)], 'x'};
bmap=mmap{loc*2};
if(ischar(varargin{i+1}))
val=varargin{i+1};
else
val=savejson('',varargin{i+1},'compact',1);
end
if(length(val)<=rec{1,2}(2))
val=[val repmat(' ',[1,rec{1,2}(2)-length(val)])];
if(length(val)<=bmap(2))
val=[val repmat(' ',[1,bmap(2)-length(val)])];
if(exist('inputstr','var'))
inputstr(mmap{loc*2}(1):mmap{loc*2}(1)+mmap{loc*2}(2)-1)=val;
inputstr(bmap(1):bmap(1)+bmap(2)-1)=val;
else
if(exist('memmapfile','file'))
fmap=memmapfile(fname,'writable',true,'offset',mmap{loc*2}(1),'format', rec);
fmap.x=val;
rec={'uint8', [1 bmap(2)], 'x'};
fmap=memmapfile(fname,'writable',true,'offset', bmap(1)-1, 'format', rec, 'repeat',1);
fmap.Data.x=uint8(val);
else
fseek(fid,mmap{loc*2}(1)-1,'bof');
fseek(fid,bmap(1)-1,'bof');
fwrite(fid,val);
end
json{end+1}={varargin{i},val};
Expand Down
62 changes: 37 additions & 25 deletions loadbj.m
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@
% for output format, it is incompatible with all
% previous releases; if old output is desired,
% please set FormatVersion to 1.9 or earlier.
% MmapOnly [0|1]: if set to 1, this function only returns mmap
% MMapInclude 'str1' or {'str1','str2',..}: if provided, the
% returned mmap will be filtered by only keeping
% entries containing any one of the string patterns
% provided in a cell
% MMapExclude 'str1' or {'str1','str2',..}: if provided, the
% returned mmap will be filtered by removing
% entries containing any one of the string patterns
% provided in a cell
%
% output:
% dat: a cell array, where {...} blocks are converted into cell arrays,
Expand Down Expand Up @@ -98,6 +107,7 @@
opt.simplifycellarray=jsonopt('SimplifyCellArray',0,opt);
opt.usemap=jsonopt('UseMap',0,opt);
opt.nameisstring=jsonopt('NameIsString',0,opt);
mmaponly=jsonopt('MmapOnly',0,opt);

[os,maxelem,systemendian]=computer;
opt.flipendian_=(systemendian ~= upper(jsonopt('Endian','L',opt)));
Expand All @@ -108,22 +118,28 @@
maxobjid=inf;
end

mmap={};
opt.jsonpath_='$';
if(nargout>1 || mmaponly)
mmap={};
end
jsoncount=1;
while pos <= inputlen
[cc, pos]=next_char(inputstr, pos);
switch(cc)
case '{'
if(nargout>1)
if(nargout>1 || mmaponly)
mmap{end+1}={opt.jsonpath_,pos};
[data{jsoncount}, pos, newmmap] = parse_object(inputstr, pos, opt);
mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}];
mmap=[mmap(:);newmmap(:)];
else
[data{jsoncount}, pos] = parse_object(inputstr, pos, opt);
end
case '['
if(nargout>1)
if(nargout>1 || mmaponly)
mmap{end+1}={opt.jsonpath_,pos};
[data{jsoncount}, pos, newmmap] = parse_array(inputstr, pos, opt);
mmap{end}{2}=[mmap{end}{2},pos-mmap{end}{2}];
mmap=[mmap(:);newmmap(:)];
else
[data{jsoncount}, pos] = parse_array(inputstr, pos, opt);
Expand All @@ -133,9 +149,9 @@
otherwise
error_pos('Outer level structure must be an object or an array', inputstr, pos);
end
if(jsoncount>=maxobjid)
break;
end
if(jsoncount>=maxobjid)
break;
end
opt.jsonpath_=sprintf('$%d',jsoncount);
jsoncount=jsoncount+1;
end % while
Expand All @@ -148,7 +164,11 @@
if(jsoncount==1 && iscell(data))
data=data{1};
end

if(nargout>1 || mmaponly)
mmap=mmap';
mmap=filterjsonmmap(mmap, jsonopt('MMapExclude',{},opt), 0);
mmap=filterjsonmmap(mmap, jsonopt('MMapInclude',{},opt), 1);
end
if(jsonopt('JDataDecode',1,varargin{:})==1)
try
data=jdatadecode(data,'Base64',0,'Recursive',1,varargin{:});
Expand All @@ -158,6 +178,9 @@
ME.identifier, ME.message, savejson('',ME.stack));
end
end
if(mmaponly)
data=mmap;
end
end

%%-------------------------------------------------------------------------
Expand All @@ -179,7 +202,7 @@

function [object, pos, mmap] = parse_array(inputstr, pos, varargin) % JSON array is written in row-major order
if(nargout>2)
mmap={{[varargin{1}.jsonpath_ '.[*]'],pos}};
mmap={};
origpath=varargin{1}.jsonpath_;
end
pos=parse_char(inputstr, pos, '[');
Expand Down Expand Up @@ -216,9 +239,6 @@
object=permute(reshape(object,fliplr(dim(:)')),length(dim):-1:1);
end
pos=pos+adv;
if(nargout>2)
mmap{1}{2}=[mmap{1}{2},pos-mmap{1}{2}];
end
return;
else
endpos=match_bracket(inputstr,pos);
Expand All @@ -227,9 +247,6 @@
[object, adv]=parse_block(inputstr, pos, type,count,varargin{:});
pos=pos+adv;
pos=parse_char(inputstr, pos, ']');
if(nargout>2)
mmap{1}{2}=[mmap{1}{2},pos-mmap{1}{2}+1];
end
return;
end
end
Expand All @@ -238,7 +255,9 @@
while 1
if(nargout>2)
varargin{1}.jsonpath_=[origpath '.' sprintf('[%d]',length(object))];
mmap{end+1}={varargin{1}.jsonpath_, pos};
[val, pos, newmmap] = parse_value(inputstr, pos, varargin{:});
mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}];
mmap=[mmap(:);newmmap(:)];
else
[val, pos] = parse_value(inputstr, pos, varargin{:});
Expand Down Expand Up @@ -277,9 +296,6 @@
if(count==-1)
pos=parse_char(inputstr, pos, ']');
end
if(nargout>2)
mmap{1}{2}=[mmap{1}{2},pos-mmap{1}{2}+1];
end
end

%%-------------------------------------------------------------------------
Expand Down Expand Up @@ -413,7 +429,8 @@
%%-------------------------------------------------------------------------
function [object, pos, mmap] = parse_object(inputstr, pos, varargin)
if(nargout>2)
mmap={{varargin{1}.jsonpath_,pos}};
mmap={};
origpath=varargin{1}.jsonpath_;
end
pos=parse_char(inputstr,pos,'{');
usemap=varargin{1}.usemap;
Expand Down Expand Up @@ -446,10 +463,8 @@
error_pos('Name of value at position %d cannot be empty', inputstr, pos);
end
if(nargout>2)
varargin{1}.jsonpath_=[mmap{1}{1},'.',str];
mmap{end+1}={varargin{1}.jsonpath_,pos-length(str)-2};
end
if(nargout>2)
varargin{1}.jsonpath_=[origpath,'.',str];
mmap{end+1}={varargin{1}.jsonpath_,pos};
[val, pos,newmmap] = parse_value(inputstr, pos, varargin{:});
mmap{end}{2}=[mmap{end}{2}, pos-mmap{end}{2}];
mmap=[mmap(:);newmmap(:)];
Expand All @@ -471,9 +486,6 @@
if(count==-1)
pos=parse_char(inputstr, pos, '}');
end
if(nargout>2)
mmap{1}={[mmap{1}{1} '.*'],[mmap{1}{2}, pos-mmap{1}{2}]};
end
end

%%-------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 3dfa904

Please sign in to comment.