From 68fd51911e2cd022be274b966c117b3cc00ceb1e Mon Sep 17 00:00:00 2001 From: daltonkell Date: Mon, 20 Apr 2020 13:04:54 +0000 Subject: [PATCH] Enable ncCF format requests to TableDAP --- compliance_checker/protocols/erddap.py | 42 +++++++++++++++++++++++++ compliance_checker/protocols/opendap.py | 37 ++++++++++++++++++++++ compliance_checker/suite.py | 10 ++++-- 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 compliance_checker/protocols/erddap.py diff --git a/compliance_checker/protocols/erddap.py b/compliance_checker/protocols/erddap.py new file mode 100644 index 00000000..f31ea648 --- /dev/null +++ b/compliance_checker/protocols/erddap.py @@ -0,0 +1,42 @@ +import io +import urllib.request +import compliance_checker.protocols.opendap as opendap + +def is_tabledap(url): + """ + Identify a dataset as an ERDDAP TableDAP dataset. + + Parameters + ---------- + url (str) : URL to dataset + + Returns + ------- + bool + """ + + if "tabledap" in url: + return True + return False + +def get_tabledap_bytes(url, ftype): + """ + ERDDAP TableDAP returns an OPeNDAP "sequence" response by default + when no file extensions are provided. If a user wishes to get a dataset + from an ERDDAP TableDAP URL, append the desired file extension and return + a byte buffer object containing the data. + + Parameters + ---------- + url (str) : URL to TableDAP dataset + ftype (str) : file format extension + + Return + ------ + io.BytesIO buffer object + """ + + vstr = opendap.create_DAP_variable_str(url) # variable str from DDS + _url = f'{".".join([url, ftype])}?{vstr}' + with urllib.request.urlopen(_url) as resp: + return io.BytesIO(resp.read()) diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index fb4fcd5a..1006f106 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -4,8 +4,45 @@ Functions to assist in determining if the URL is an OPeNDAP endpoint ''' +import io import requests +import urllib.parse +import urllib.request +def create_DAP_variable_str(url): + """ + Create a URL-encoded string of variables for a given DAP dataset. + Works on OPeNDAP datasets. + + Parameters + ---------- + url (str): endpoint to *DAP dataset + + Returns + ------- + str + """ + + # get dds + with urllib.request.urlopen(f"{url}.dds") as resp: + strb = io.StringIO(resp.read().decode()) + + strb.seek(8) # remove "Dataset " + x = strb.read() + strb.close() + + # remove beginning and ending braces, split on newlines + lst = list(filter(lambda x: "{" not in x and "}" not in x, x.split("\n"))) + + # remove all the extra space used in the DDS string + lst = list(filter(None, map(lambda x: x.strip(" "), lst))) + + # now need to split from type, grab only the variable and remove ; + lst = list(map(lambda x: x.split(" ")[-1].strip(";"), lst)) + + # encode as proper URL characters + varstr = urllib.parse.quote(",".join(lst)) + return varstr def is_opendap(url): ''' diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 23ecb73a..4ad02e6a 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -15,7 +15,7 @@ from compliance_checker.cf.cf import CFBaseCheck from owslib.sos import SensorObservationService from owslib.swe.sensor.sml import SensorML -from compliance_checker.protocols import opendap, netcdf, cdl +from compliance_checker.protocols import opendap, netcdf, cdl, erddap from compliance_checker.base import BaseCheck from compliance_checker import MemoizedDataset from collections import defaultdict @@ -723,7 +723,13 @@ def load_remote_dataset(self, ds_str): :param str ds_str: URL to the remote resource ''' - if opendap.is_opendap(ds_str): + if erddap.is_tabledap(ds_str): + return Dataset( + ds_str, + mode="r", + memory=erddap.get_tabledap_bytes(ds_str, "ncCF").getbuffer() + ) + elif opendap.is_opendap(ds_str): return Dataset(ds_str) else: # Check if the HTTP response is XML, if it is, it's likely SOS so