Skip to content

Commit

Permalink
[GeoJSON] OGR_SCHEMA open option implementation (#11464)
Browse files Browse the repository at this point in the history
  • Loading branch information
elpaso authored Dec 10, 2024
1 parent 1ee024f commit f6b2cd5
Show file tree
Hide file tree
Showing 5 changed files with 428 additions and 12 deletions.
272 changes: 272 additions & 0 deletions autotest/ogr/ogr_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

pytestmark = pytest.mark.require_driver("GeoJSON")


###############################################################################
# Test utilities

Expand Down Expand Up @@ -5502,3 +5503,274 @@ def test_ogr_geojson_force_opening_stacta():

ds = gdal.OpenEx("../gdrivers/data/stacta/test.json", allowed_drivers=["GeoJSON"])
assert ds.GetDriver().GetDescription() == "GeoJSON"


######################################################################
# Test schema override open option with GeoJSON driver
#
@pytest.mark.parametrize(
"open_options, expected_field_types, expected_field_names, expected_warning",
[
(
[],
[
ogr.OFTString,
ogr.OFTInteger,
ogr.OFTReal,
ogr.OFTInteger, # bool subType
ogr.OFTString, # int string
ogr.OFTString, # real string
(ogr.OFTString, ogr.OFSTNone), # json subType
ogr.OFTString, # uuid subType
],
[],
None,
),
# Override string field with integer
(
[
r'OGR_SCHEMA={"layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "Integer" }]}]}'
],
[
ogr.OFTInteger, # <-- overridden
ogr.OFTInteger,
ogr.OFTReal,
ogr.OFTInteger, # bool subType
ogr.OFTString, # int string
ogr.OFTString, # real string
ogr.OFTString, # json subType
ogr.OFTString, # uuid subType
],
[],
None,
),
# Override full schema and JSON/UUID subtype
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "schemaType": "Full", "fields": [{ "name": "json_str", "subType": "JSON", "new_name": "json_str" }, {"name": "uuid_str", "subType": "UUID" }]}]}'
],
[
(ogr.OFTString, ogr.OFSTJSON), # json subType
(ogr.OFTString, ogr.OFSTUUID), # uuid subType
],
["json_str"],
None,
),
# Test width and precision override
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "real", "width": 7, "precision": 3 }]}]}'
],
[
ogr.OFTString,
ogr.OFTInteger,
ogr.OFTReal,
ogr.OFTInteger, # bool subType
ogr.OFTString, # int string
ogr.OFTString, # real string
(ogr.OFTString, ogr.OFSTNone), # json subType
ogr.OFTString, # uuid subType
],
[],
None,
),
# Test boolean and short integer subtype
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int", "subType": "Boolean" }, { "name": "real", "type": "Integer", "subType": "Int16" }]}]}'
],
[
ogr.OFTString,
(ogr.OFTInteger, ogr.OFSTBoolean), # bool overridden subType
(ogr.OFTInteger, ogr.OFSTInt16), # int16 overridden subType
ogr.OFTInteger, # bool subType
ogr.OFTString, # int string
ogr.OFTString, # real string
ogr.OFTString, # json subType
ogr.OFTString, # uuid subType
],
[],
None,
),
# Test real and int str override
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int_str", "type": "Integer" }, { "name": "real_str", "type": "Real" }]}]}'
],
[
ogr.OFTString,
ogr.OFTInteger,
ogr.OFTReal,
ogr.OFTInteger, # bool subType
ogr.OFTInteger, # int string
ogr.OFTReal, # real string
ogr.OFTString, # json subType
ogr.OFTString, # uuid subType
],
[],
None,
),
# Test invalid schema
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "xxxxx" }]}]}'
],
[],
[],
"Unsupported field type: xxxxx for field str",
),
# Test invalid field name
(
[
r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "xxxxx", "type": "String", "new_name": "new_str" }]}]}'
],
[],
[],
"Field xxxxx not found",
),
# Test invalid layer name
(
[
r'OGR_SCHEMA={ "layers": [{"name": "xxxxx", "fields": [{ "name": "str", "type": "String" }]}]}'
],
[],
[],
"Layer xxxxx not found",
),
],
)
def test_ogr_geojson_schema_override(
tmp_path, open_options, expected_field_types, expected_field_names, expected_warning
):

json_data = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"str": "1",
"int": 2,
"real": 3.4,
"bool": 1,
"int_str": "2",
"real_str": "3.4",
"json_str": '{"foo": 1}',
"uuid_str": "123e4567-e89b-12d3-a456-426614174000",
},
"geometry": {"type": "Point", "coordinates": [1, 2]},
}
],
}

json_file = tmp_path / "test_point.json"
with open(json_file, "w") as f:
json.dump(json_data, f)

gdal.ErrorReset()

try:
schema = open_options[0].split("=")[1]
open_options = open_options[1:]
except IndexError:
schema = None

with gdal.quiet_errors():

if schema:
open_options.append("OGR_SCHEMA=" + schema)
else:
open_options = []

# Validate the JSON schema
if not expected_warning and schema:
schema = json.loads(schema)
gdaltest.validate_json(schema, "ogr_fields_override.schema.json")

# Check error if expected_field_types is empty
if not expected_field_types:
with gdaltest.disable_exceptions():
ds = gdal.OpenEx(
tmp_path / "test_point.json",
gdal.OF_VECTOR | gdal.OF_READONLY,
open_options=open_options,
allowed_drivers=["GeoJSON"],
)
assert (
gdal.GetLastErrorMsg().find(expected_warning) != -1
), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead"
assert ds is None
else:

ds = gdal.OpenEx(
tmp_path / "test_point.json",
gdal.OF_VECTOR | gdal.OF_READONLY,
open_options=open_options,
allowed_drivers=["GeoJSON"],
)

assert ds is not None

lyr = ds.GetLayer(0)

assert lyr.GetFeatureCount() == 1

lyr_defn = lyr.GetLayerDefn()

assert lyr_defn.GetFieldCount() == len(expected_field_types)

if len(expected_field_names) == 0:
expected_field_names = [
"str",
"int",
"real",
"bool",
"int_str",
"real_str",
"json_str",
"uuid_str",
]

feat = lyr.GetNextFeature()

# Check field types
for i in range(len(expected_field_names)):
try:
expected_type, expected_subtype = expected_field_types[i]
assert feat.GetFieldDefnRef(i).GetType() == expected_type
assert feat.GetFieldDefnRef(i).GetSubType() == expected_subtype
except TypeError:
expected_type = expected_field_types[i]
assert feat.GetFieldDefnRef(i).GetType() == expected_type
assert feat.GetFieldDefnRef(i).GetName() == expected_field_names[i]

# Test width and precision override
if len(open_options) > 0 and "precision" in open_options[0]:
assert feat.GetFieldDefnRef(2).GetWidth() == 7
assert feat.GetFieldDefnRef(2).GetPrecision() == 3

# Check feature content
if len(expected_field_names) > 0:
if "int" in expected_field_names:
int_sub_type = feat.GetFieldDefnRef("int").GetSubType()
assert (
feat.GetFieldAsInteger("int") == 1
if int_sub_type == ogr.OFSTBoolean
else 2
)
if "str" in expected_field_names:
assert feat.GetFieldAsString("str") == "1"
if "new_str" in expected_field_names:
assert feat.GetFieldAsString("new_str") == "1"
if "real_str" in expected_field_names:
assert feat.GetFieldAsDouble("real_str") == 3.4
if "int_str" in expected_field_names:
assert feat.GetFieldAsInteger("int_str") == 2
else:
assert feat.GetFieldAsInteger("int") == 2
assert feat.GetFieldAsString("str") == "1"

if expected_warning:
assert (
gdal.GetLastErrorMsg().find(expected_warning) != -1
), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead"
33 changes: 21 additions & 12 deletions doc/source/drivers/vector/geojson.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ logic:
#. Otherwise OGRGeoJSON is used.

Accessing Web Service as a datasource (i.e. FeatureServer), each request
will produce new layer. This behavior conforms to stateless nature of
will produce a new layer. This behavior conforms to the stateless nature of
HTTP transaction and is similar to how Web browsers operate: single
request == single page.

If a top-level member of GeoJSON data is of any other type than
*FeatureCollection*, the driver will produce a layer with only one
feature. Otherwise, a layer will consists of a set of features.
feature. Otherwise, a layer will consist of a set of features.

If the :oo:`NATIVE_DATA` open option is set to YES, members at the level of
the FeatureCollection will be stored as a serialized JSON object in the
Expand All @@ -111,19 +111,19 @@ properties, then resulting schema of fields in OGRFeatureDefn is
generated as `union <http://en.wikipedia.org/wiki/Union_(set_theory)>`__
of all *Feature* properties.

Schema detection will recognized fields of type String, Integer, Real,
Schema detection will recognize fields of type String, Integer, Real,
StringList, IntegerList and RealList, Integer(Boolean), Date, Time and DateTime.

It is possible to tell the driver to not to process attributes by
setting configuration option :config:`ATTRIBUTES_SKIP=YES`.
Default behavior is to preserve all attributes (as an union, see
previous paragraph), what is equal to setting
It is possible to tell the driver not to process attributes by
setting the configuration option :config:`ATTRIBUTES_SKIP=YES`.
The default behavior is to preserve all attributes (as a union, see
previous paragraph), which is equal to setting
:config:`ATTRIBUTES_SKIP=NO`.

If the :oo:`NATIVE_DATA` open option is set to YES, the Feature JSON object
will be stored as a serialized JSON object in the NativeData property of
the OGRFeature object (and "application/vnd.geo+json" in the
NativeMediaType property). On write, if a OGRFeature to be written has
NativeMediaType property). On write, if the OGRFeature to be written has
its NativeMediaType property set to "application/vnd.geo+json" and its
NativeData property set to a string that is a serialized JSON object,
then extra members of this object (i.e. not the "property" dictionary,
Expand All @@ -135,14 +135,14 @@ Geometry
--------

Similarly to the issue with mixed-properties features, the *GeoJSON
Specification* draft does not require all *Feature* objects in a
collection must have geometry of the same type. Fortunately, OGR objects
Specification* draft does not require that all *Feature* objects in a
collection have the same type of geometry. Fortunately, OGR objects
model does allow to have geometries of different types in single layer -
a heterogeneous layer. By default, the GeoJSON driver preserves type of
geometries.

However, sometimes there is a need to generate a homogeneous layer from
a set of heterogeneous features. For this purpose, it is possible to
However, sometimes the need arises to have a homogeneous layer from a
set of heterogeneous features. For this purpose, it is possible to
tell the driver to wrap all geometries with OGRGeometryCollection type
as a common denominator. This behavior may be controlled by setting
the :config:`GEOMETRY_AS_COLLECTION` configuration option to YES.
Expand Down Expand Up @@ -239,6 +239,15 @@ This driver supports the following open options:
Can also be set with the :config:`OGR_GEOJSON_DATE_AS_STRING`
configuration option.

- .. oo:: OGR_SCHEMA
:choices: <filename>|<json string>
:since: 3.11.0

Partially or totally overrides the auto-detected schema to use for creating the layer.
The overrides are defined as a JSON list of field definitions.
This can be a filename, a URL or JSON string conformant with the `ogr_fields_override.schema.json schema <https://raw.githubusercontent.com/OSGeo/gdal/refs/heads/master/ogr/data/ogr_fields_override.schema.json>`_


To explain :oo:`FLATTEN_NESTED_ATTRIBUTES`, consider the following GeoJSON
fragment:

Expand Down
3 changes: 3 additions & 0 deletions ogr/ogrsf_frmts/geojson/ogr_geojson.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ class OGRGeoJSONDataSource final : public GDALDataset

CPLErr Close() override;

// Analyze the OGR_SCHEMA open options and apply changes to the feature definition, return false in case of a critical error
bool DealWithOgrSchemaOpenOption(const GDALOpenInfo *poOpenInfo);

static const size_t SPACE_FOR_BBOX = 130;

private:
Expand Down
Loading

0 comments on commit f6b2cd5

Please sign in to comment.