Skip to content

Commit

Permalink
[feat] support caching data from any URL using hash, add NO_ZMAT flag
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Mar 26, 2024
1 parent 22d297e commit dfc744b
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 32 deletions.
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ included in the official distributions of Debian Bullseye and Ubuntu 21.04 or ne
- 2024-03-22 [22435e4] [bug] fix jsonpath handling of recursive deep scans
- 2024-03-21 [c9f8a20] [bug] support deep scan in cell and struct, merge struct/containers.Map
- 2024-03-21 [394394a] [bug] improve jsonpath cell with deep scan
- 2024-03-20 [a599e71] [feat] add jsoncache to handle _DataLink_ download cache, rename jsonpath
- 2024-03-20 [a599e71] [feat] add jsoncache to handle ``_DataLink_`` download cache, rename jsonpath
- 2024-02-19*[4f2edeb] [feat] support .. jsonpath operator for deep scan
- 2024-01-11 [c43a758] [bug] fix missing index_esc reset, add test for automap
- 2024-01-11*[ef5b472] [feat] automatically switch to map object when key length > 63
Expand Down Expand Up @@ -140,7 +140,7 @@ included in the official distributions of Debian Bullseye and Ubuntu 21.04 or ne
- 2022-03-30 [4433a21] improve datalink uri handling to consider : inside uri
- 2022-03-30 [6368409] make datalink URL query more robust
- 2022-03-29 [dd9e9c6] when file suffix is missing, assume JSON feed
- 2022-03-29*[07c58f3] initial support for _DataLink_ of online/local file with JSONPath ref
- 2022-03-29*[07c58f3] initial support for ``_DataLink_`` of online/local file with JSONPath ref
- 2022-03-29 [897b7ba] fix test for older octave
- 2022-03-20 [bf03eff] force msgpack to use big-endian
- 2022-03-13 [46bbfa9] support empty name key, which is valid in JSON, fix #79
Expand Down Expand Up @@ -398,7 +398,7 @@ for reading and writing below files types:

- JSON based files: ``.json`, ``.jdt`` (text JData file), ``.jmsh`` (text JMesh file),
``.jnii`` (text JNIfTI file), ``.jnirs`` (text JSNIRF file)
- BJData based files: ``.bjd`, ``.jdb` (binary JData file), ``.bmsh`` (binary JMesh file),
- BJData based files: ``.bjd``, ``.jdb`` (binary JData file), ``.bmsh`` (binary JMesh file),
``.bnii`` (binary JNIfTI file), ``.bnirs`` (binary JSNIRF file), ``.pmat`` (MATLAB session file)
- UBJSON based files: ``.ubj``
- MessagePack based files: ``.msgpack``
Expand Down
5 changes: 4 additions & 1 deletion gzipdecode.m
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
if (nargin == 0)
error('you must provide at least 1 input');
end
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)

nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');

if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
if (nargin > 1)
[varargout{1:nargout}] = zmat(varargin{1}, varargin{2:end});
else
Expand Down
4 changes: 3 additions & 1 deletion gzipencode.m
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
error('you must provide at least 1 input');
end

if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');

if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
[varargout{1:nargout}] = zmat(varargin{1}, 1, 'gzip');
return
elseif (isoctavemesh)
Expand Down
2 changes: 1 addition & 1 deletion jdlink.m
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
if (opt.showlink)
fprintf(1, 'downloading from URL: %s\n', uripath);
end
rawdata = webread(uripath);
rawdata = webread(uripath, weboptions('ContentType', 'binary'));
fname = [cachepath{1} filesep filename];
fpath = fileparts(fname);
if (~exist(fpath, 'dir'))
Expand Down
83 changes: 59 additions & 24 deletions jsoncache.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,25 @@
%
% input:
% hyperlink: if a single input is provided, the function check if it is
% a hyperlink starting with http:// or https://, if so, it
% trys to extract the database name, document name and file
% name using NeuroJSON's standard link format as
% a hyperlink starting with http://, https:// or ftp://, if
% so, it trys to extract the database name, document name and
% file name using NeuroJSON's standard link format as
%
% https://neurojson.org/io/stat.cgi?dbname=..&docname=..&file=..&size=..
%
% if the string does not contain a link, it is treated as a
% local file path
% if the URL does not follow the above format, a SHA-256 hash
% will be computed based on the full URL to produce filename;
% dbname is set as the first 2 letters of the hash and
% docname is set to the 3rd/4th letters of the hash; the
% domain name is also extracted from the URL; if the URL
% contains the file's suffix, it is appended to the filename.
%
% if the string does not contain a link, or the link starts
% with file://, it is treated as a local file path
% dbname: the name of the NeuroJSON database (must exist)
% docname: the name of the NeuroJSON dataset document (must exist)
% filename: the name of the data file - may contain a relative folder
% domain: optional, if not given, 'io' is used; otherwise, user can
% domain: optional, if not given, 'default' is used; otherwise, user can
% specify customized domain name
%
% output:
Expand All @@ -46,6 +53,13 @@
% if a global variable NEUROJSON_CACHE is set in 'base', it will be
% used instead of the above search paths
%
%
% example:
% [cachepath, filename] = jsoncache('https://neurojson.org/io/stat.cgi?action=get&db=openneuro&doc=ds000001&file=sub-01/anat/sub-01_inplaneT2.nii.gz&size=669578')
% [cachepath, filename] = jsoncache('https://raw.githubusercontent.com/fangq/jsonlab/master/examples/example1.json')
% [cachepath, filename] = jsoncache('https://neurojson.io:7777/adhd200/Brown')
% [cachepath, filename] = jsoncache('https://neurojson.io:7777/openneuro/ds003805')
%
% -- this function is part of iso2mesh toolbox (http://iso2mesh.sf.net)
%

Expand All @@ -66,13 +80,13 @@
end

if (nargin < 4)
domain = 'io';
domain = 'default';
end

if (nargin == 1)
link = dbname;
if (isempty(regexp(link, '://', 'once')))
filename = link;
if (~isempty(regexp(link, '^file://', 'once')) || isempty(regexp(link, '://', 'once')))
filename = regexprep(link, '^file://', '');
if (exist(filename, 'file'))
cachepath = filename;
filename = true;
Expand All @@ -81,24 +95,45 @@
else
if (~isempty(regexp(link, '^https*://neurojson.org/io/', 'once')))
domain = 'io';
end
dbname = regexp(link, '(?<=db=)[^&]+', 'match');
if (~isempty(dbname))
dbname = dbname{1};
else
dbname = '';
newdomain = regexprep(regexp(link, '^(https*|ftp)://[^\/?#:]+', 'match', 'once'), '^(https*|ftp)://', '');
if (~isempty(newdomain))
domain = newdomain;
end
end
docname = regexp(link, '(?<=doc=)[^&]+', 'match');
if (~isempty(docname))
docname = docname{1};
else
docname = '';
dbname = regexp(link, '(?<=db=)[^&]+', 'match', 'once');
docname = regexp(link, '(?<=doc=)[^&]+', 'match', 'once');
filename = regexp(link, '(?<=file=)[^&]+', 'match', 'once');
if (isempty(filename) && strcmp(domain, 'neurojson.io'))
ref = regexp(link, '^(https*|ftp)://neurojson.io(:\d+)*(?<dbname>/[^\/]+)(?<docname>/[^\/]+)(?<filename>/[^\/?]+)*', 'names', 'once');
if (~isempty(ref))
if (~isempty(ref.dbname))
dbname = ref.dbname(2:end);
end
if (~isempty(ref.docname))
docname = ref.docname(2:end);
end
if (~isempty(ref.filename))
filename = ref.filename(2:end);
elseif (~isempty(dbname))
if (~isempty(docname))
filename = [docname '.json'];
else
filename = [dbname '.json'];
end
end
end
end
filename = regexp(link, '(?<=file=)[^&]+', 'match');
if (~isempty(filename))
filename = filename{1};
else
filename = '';
if (isempty(filename))
filename = jdatahash(link);
suffix = regexp(link, '\.\w{1,5}(?=([#&].*)*$)', 'match', 'once');
filename = [filename suffix];
if (isempty(dbname))
dbname = filename(1:2);
end
if (isempty(docname))
docname = filename(3:4);
end
end
end
end
Expand Down
4 changes: 4 additions & 0 deletions loadjson.m
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@
if (nargout > 1 || opt.mmaponly)
mmap = {};
end
if (regexp(inputstr, '^\s*$'))
data = [];
inputlen = 0;
end
jsoncount = 1;
while pos <= inputlen
[cc, pos, w1] = next_char(inputstr, pos);
Expand Down
5 changes: 4 additions & 1 deletion zlibdecode.m
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
if (nargin == 0)
error('you must provide at least 1 input');
end
if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)

nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');

if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
if (nargin > 1)
[varargout{1:nargout}] = zmat(varargin{1}, varargin{2:end});
else
Expand Down
4 changes: 3 additions & 1 deletion zlibencode.m
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
error('you must provide at least 1 input');
end

if (exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3)
nozmat = getvarfrom({'caller', 'base'}, 'NO_ZMAT');

if ((exist('zmat', 'file') == 2 || exist('zmat', 'file') == 3) && (isempty(nozmat) || nozmat == 0))
[varargout{1:nargout}] = zmat(varargin{1}, 1, 'zlib');
return
elseif (isoctavemesh)
Expand Down

0 comments on commit dfc744b

Please sign in to comment.