diff --git a/autotest/ogr/ogr_geojson.py b/autotest/ogr/ogr_geojson.py index 2850abb0be6d..e89af09b9d8e 100755 --- a/autotest/ogr/ogr_geojson.py +++ b/autotest/ogr/ogr_geojson.py @@ -40,6 +40,7 @@ pytestmark = pytest.mark.require_driver("GeoJSON") + ############################################################################### # Test utilities @@ -5502,3 +5503,274 @@ def test_ogr_geojson_force_opening_stacta(): ds = gdal.OpenEx("../gdrivers/data/stacta/test.json", allowed_drivers=["GeoJSON"]) assert ds.GetDriver().GetDescription() == "GeoJSON" + + +###################################################################### +# Test schema override open option with GeoJSON driver +# +@pytest.mark.parametrize( + "open_options, expected_field_types, expected_field_names, expected_warning", + [ + ( + [], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + (ogr.OFTString, ogr.OFSTNone), # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Override string field with integer + ( + [ + r'OGR_SCHEMA={"layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "Integer" }]}]}' + ], + [ + ogr.OFTInteger, # <-- overridden + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Override full schema and JSON/UUID subtype + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "schemaType": "Full", "fields": [{ "name": "json_str", "subType": "JSON", "new_name": "json_str" }, {"name": "uuid_str", "subType": "UUID" }]}]}' + ], + [ + (ogr.OFTString, ogr.OFSTJSON), # json subType + (ogr.OFTString, ogr.OFSTUUID), # uuid subType + ], + ["json_str"], + None, + ), + # Test width and precision override + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "real", "width": 7, "precision": 3 }]}]}' + ], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + (ogr.OFTString, ogr.OFSTNone), # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test boolean and short integer subtype + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int", "subType": "Boolean" }, { "name": "real", "type": "Integer", "subType": "Int16" }]}]}' + ], + [ + ogr.OFTString, + (ogr.OFTInteger, ogr.OFSTBoolean), # bool overridden subType + (ogr.OFTInteger, ogr.OFSTInt16), # int16 overridden subType + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test real and int str override + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int_str", "type": "Integer" }, { "name": "real_str", "type": "Real" }]}]}' + ], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTInteger, # int string + ogr.OFTReal, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test invalid schema + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "xxxxx" }]}]}' + ], + [], + [], + "Unsupported field type: xxxxx for field str", + ), + # Test invalid field name + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "xxxxx", "type": "String", "new_name": "new_str" }]}]}' + ], + [], + [], + "Field xxxxx not found", + ), + # Test invalid layer name + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "xxxxx", "fields": [{ "name": "str", "type": "String" }]}]}' + ], + [], + [], + "Layer xxxxx not found", + ), + ], +) +def test_ogr_geojson_schema_override( + tmp_path, open_options, expected_field_types, expected_field_names, expected_warning +): + + json_data = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "str": "1", + "int": 2, + "real": 3.4, + "bool": 1, + "int_str": "2", + "real_str": "3.4", + "json_str": '{"foo": 1}', + "uuid_str": "123e4567-e89b-12d3-a456-426614174000", + }, + "geometry": {"type": "Point", "coordinates": [1, 2]}, + } + ], + } + + json_file = tmp_path / "test_point.json" + with open(json_file, "w") as f: + json.dump(json_data, f) + + gdal.ErrorReset() + + try: + schema = open_options[0].split("=")[1] + open_options = open_options[1:] + except IndexError: + schema = None + + with gdal.quiet_errors(): + + if schema: + open_options.append("OGR_SCHEMA=" + schema) + else: + open_options = [] + + # Validate the JSON schema + if not expected_warning and schema: + schema = json.loads(schema) + gdaltest.validate_json(schema, "ogr_fields_override.schema.json") + + # Check error if expected_field_types is empty + if not expected_field_types: + with gdaltest.disable_exceptions(): + ds = gdal.OpenEx( + tmp_path / "test_point.json", + gdal.OF_VECTOR | gdal.OF_READONLY, + open_options=open_options, + allowed_drivers=["GeoJSON"], + ) + assert ( + gdal.GetLastErrorMsg().find(expected_warning) != -1 + ), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead" + assert ds is None + else: + + ds = gdal.OpenEx( + tmp_path / "test_point.json", + gdal.OF_VECTOR | gdal.OF_READONLY, + open_options=open_options, + allowed_drivers=["GeoJSON"], + ) + + assert ds is not None + + lyr = ds.GetLayer(0) + + assert lyr.GetFeatureCount() == 1 + + lyr_defn = lyr.GetLayerDefn() + + assert lyr_defn.GetFieldCount() == len(expected_field_types) + + if len(expected_field_names) == 0: + expected_field_names = [ + "str", + "int", + "real", + "bool", + "int_str", + "real_str", + "json_str", + "uuid_str", + ] + + feat = lyr.GetNextFeature() + + # Check field types + for i in range(len(expected_field_names)): + try: + expected_type, expected_subtype = expected_field_types[i] + assert feat.GetFieldDefnRef(i).GetType() == expected_type + assert feat.GetFieldDefnRef(i).GetSubType() == expected_subtype + except TypeError: + expected_type = expected_field_types[i] + assert feat.GetFieldDefnRef(i).GetType() == expected_type + assert feat.GetFieldDefnRef(i).GetName() == expected_field_names[i] + + # Test width and precision override + if len(open_options) > 0 and "precision" in open_options[0]: + assert feat.GetFieldDefnRef(2).GetWidth() == 7 + assert feat.GetFieldDefnRef(2).GetPrecision() == 3 + + # Check feature content + if len(expected_field_names) > 0: + if "int" in expected_field_names: + int_sub_type = feat.GetFieldDefnRef("int").GetSubType() + assert ( + feat.GetFieldAsInteger("int") == 1 + if int_sub_type == ogr.OFSTBoolean + else 2 + ) + if "str" in expected_field_names: + assert feat.GetFieldAsString("str") == "1" + if "new_str" in expected_field_names: + assert feat.GetFieldAsString("new_str") == "1" + if "real_str" in expected_field_names: + assert feat.GetFieldAsDouble("real_str") == 3.4 + if "int_str" in expected_field_names: + assert feat.GetFieldAsInteger("int_str") == 2 + else: + assert feat.GetFieldAsInteger("int") == 2 + assert feat.GetFieldAsString("str") == "1" + + if expected_warning: + assert ( + gdal.GetLastErrorMsg().find(expected_warning) != -1 + ), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead" diff --git a/doc/source/drivers/vector/geojson.rst b/doc/source/drivers/vector/geojson.rst index 0621e70791a9..4bc519abb7d6 100644 --- a/doc/source/drivers/vector/geojson.rst +++ b/doc/source/drivers/vector/geojson.rst @@ -78,13 +78,13 @@ logic: #. Otherwise OGRGeoJSON is used. Accessing Web Service as a datasource (i.e. FeatureServer), each request -will produce new layer. This behavior conforms to stateless nature of +will produce a new layer. This behavior conforms to the stateless nature of HTTP transaction and is similar to how Web browsers operate: single request == single page. If a top-level member of GeoJSON data is of any other type than *FeatureCollection*, the driver will produce a layer with only one -feature. Otherwise, a layer will consists of a set of features. +feature. Otherwise, a layer will consist of a set of features. If the :oo:`NATIVE_DATA` open option is set to YES, members at the level of the FeatureCollection will be stored as a serialized JSON object in the @@ -111,19 +111,19 @@ properties, then resulting schema of fields in OGRFeatureDefn is generated as `union `__ of all *Feature* properties. -Schema detection will recognized fields of type String, Integer, Real, +Schema detection will recognize fields of type String, Integer, Real, StringList, IntegerList and RealList, Integer(Boolean), Date, Time and DateTime. -It is possible to tell the driver to not to process attributes by -setting configuration option :config:`ATTRIBUTES_SKIP=YES`. -Default behavior is to preserve all attributes (as an union, see -previous paragraph), what is equal to setting +It is possible to tell the driver not to process attributes by +setting the configuration option :config:`ATTRIBUTES_SKIP=YES`. +The default behavior is to preserve all attributes (as a union, see +previous paragraph), which is equal to setting :config:`ATTRIBUTES_SKIP=NO`. If the :oo:`NATIVE_DATA` open option is set to YES, the Feature JSON object will be stored as a serialized JSON object in the NativeData property of the OGRFeature object (and "application/vnd.geo+json" in the -NativeMediaType property). On write, if a OGRFeature to be written has +NativeMediaType property). On write, if the OGRFeature to be written has its NativeMediaType property set to "application/vnd.geo+json" and its NativeData property set to a string that is a serialized JSON object, then extra members of this object (i.e. not the "property" dictionary, @@ -135,14 +135,14 @@ Geometry -------- Similarly to the issue with mixed-properties features, the *GeoJSON -Specification* draft does not require all *Feature* objects in a -collection must have geometry of the same type. Fortunately, OGR objects +Specification* draft does not require that all *Feature* objects in a +collection have the same type of geometry. Fortunately, OGR objects model does allow to have geometries of different types in single layer - a heterogeneous layer. By default, the GeoJSON driver preserves type of geometries. -However, sometimes there is a need to generate a homogeneous layer from -a set of heterogeneous features. For this purpose, it is possible to +However, sometimes the need arises to have a homogeneous layer from a +set of heterogeneous features. For this purpose, it is possible to tell the driver to wrap all geometries with OGRGeometryCollection type as a common denominator. This behavior may be controlled by setting the :config:`GEOMETRY_AS_COLLECTION` configuration option to YES. @@ -239,6 +239,15 @@ This driver supports the following open options: Can also be set with the :config:`OGR_GEOJSON_DATE_AS_STRING` configuration option. +- .. oo:: OGR_SCHEMA + :choices: | + :since: 3.11.0 + + Partially or totally overrides the auto-detected schema to use for creating the layer. + The overrides are defined as a JSON list of field definitions. + This can be a filename, a URL or JSON string conformant with the `ogr_fields_override.schema.json schema `_ + + To explain :oo:`FLATTEN_NESTED_ATTRIBUTES`, consider the following GeoJSON fragment: diff --git a/ogr/ogrsf_frmts/geojson/ogr_geojson.h b/ogr/ogrsf_frmts/geojson/ogr_geojson.h index 6a18a332d944..61a1b46e2913 100644 --- a/ogr/ogrsf_frmts/geojson/ogr_geojson.h +++ b/ogr/ogrsf_frmts/geojson/ogr_geojson.h @@ -291,6 +291,9 @@ class OGRGeoJSONDataSource final : public GDALDataset CPLErr Close() override; + // Analyze the OGR_SCHEMA open options and apply changes to the feature definition, return false in case of a critical error + bool DealWithOgrSchemaOpenOption(const GDALOpenInfo *poOpenInfo); + static const size_t SPACE_FOR_BBOX = 130; private: diff --git a/ogr/ogrsf_frmts/geojson/ogrgeojsondatasource.cpp b/ogr/ogrsf_frmts/geojson/ogrgeojsondatasource.cpp index 17f38a92e756..123b83b7bef2 100644 --- a/ogr/ogrsf_frmts/geojson/ogrgeojsondatasource.cpp +++ b/ogr/ogrsf_frmts/geojson/ogrgeojsondatasource.cpp @@ -40,6 +40,7 @@ #include "ogrgeojsonutils.h" #include "ogrgeojsonwriter.h" #include "ogrsf_frmts.h" +#include "ogr_schema_override.h" // #include "symbol_renames.h" @@ -87,6 +88,125 @@ CPLErr OGRGeoJSONDataSource::Close() return eErr; } +/************************************************************************/ +/* DealWithOgrSchemaOpenOption() */ +/************************************************************************/ + +bool OGRGeoJSONDataSource::DealWithOgrSchemaOpenOption( + const GDALOpenInfo *poOpenInfo) +{ + + std::string osFieldsSchemaOverrideParam = + CSLFetchNameValueDef(poOpenInfo->papszOpenOptions, "OGR_SCHEMA", ""); + + if (!osFieldsSchemaOverrideParam.empty()) + { + + if (poOpenInfo->eAccess == GA_Update) + { + CPLError(CE_Failure, CPLE_NotSupported, + "OGR_SCHEMA open option is not supported in update mode."); + return false; + } + + OGRSchemaOverride osSchemaOverride; + if (!osSchemaOverride.LoadFromJSON(osFieldsSchemaOverrideParam) || + !osSchemaOverride.IsValid()) + { + return false; + } + + const auto &oLayerOverrides = osSchemaOverride.GetLayerOverrides(); + for (const auto &oLayer : oLayerOverrides) + { + const auto &oLayerName = oLayer.first; + const auto &oLayerFieldOverride = oLayer.second; + const bool bIsFullOverride{oLayerFieldOverride.IsFullOverride()}; + auto oFieldOverrides = oLayerFieldOverride.GetFieldOverrides(); + std::vector aoFields; + + CPLDebug("GeoJSON", "Applying schema override for layer %s", + oLayerName.c_str()); + + // Fail if the layer name does not exist + auto poLayer = GetLayerByName(oLayerName.c_str()); + if (poLayer == nullptr) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Layer %s not found in GeoJSON file", + oLayerName.c_str()); + return false; + } + + // Patch field definitions + auto poLayerDefn = poLayer->GetLayerDefn(); + for (int i = 0; i < poLayerDefn->GetFieldCount(); i++) + { + auto poFieldDefn = poLayerDefn->GetFieldDefn(i); + auto oFieldOverride = + oFieldOverrides.find(poFieldDefn->GetNameRef()); + if (oFieldOverride != oFieldOverrides.cend()) + { + if (oFieldOverride->second.GetFieldType().has_value()) + whileUnsealing(poFieldDefn) + ->SetType( + oFieldOverride->second.GetFieldType().value()); + if (oFieldOverride->second.GetFieldWidth().has_value()) + whileUnsealing(poFieldDefn) + ->SetWidth( + oFieldOverride->second.GetFieldWidth().value()); + if (oFieldOverride->second.GetFieldPrecision().has_value()) + whileUnsealing(poFieldDefn) + ->SetPrecision( + oFieldOverride->second.GetFieldPrecision() + .value()); + if (oFieldOverride->second.GetFieldSubType().has_value()) + whileUnsealing(poFieldDefn) + ->SetSubType( + oFieldOverride->second.GetFieldSubType() + .value()); + if (oFieldOverride->second.GetFieldName().has_value()) + whileUnsealing(poFieldDefn) + ->SetName(oFieldOverride->second.GetFieldName() + .value() + .c_str()); + + if (bIsFullOverride) + { + aoFields.push_back(poFieldDefn); + } + oFieldOverrides.erase(oFieldOverride); + } + } + + // Error if any field override is not found + if (!oFieldOverrides.empty()) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Field %s not found in layer %s", + oFieldOverrides.cbegin()->first.c_str(), + oLayerName.c_str()); + return false; + } + + // Remove fields not in the override + if (bIsFullOverride) + { + for (int i = poLayerDefn->GetFieldCount() - 1; i >= 0; i--) + { + auto poFieldDefn = poLayerDefn->GetFieldDefn(i); + if (std::find(aoFields.begin(), aoFields.end(), + poFieldDefn) == aoFields.end()) + { + whileUnsealing(poLayerDefn)->DeleteFieldDefn(i); + } + } + } + } + } + return true; +} + /************************************************************************/ /* Open() */ /************************************************************************/ @@ -175,6 +295,13 @@ int OGRGeoJSONDataSource::Open(GDALOpenInfo *poOpenInfo, SetDescription(poOpenInfo->pszFilename); LoadLayers(poOpenInfo, nSrcType, pszUnprefixed, pszJSonFlavor); + + if (!DealWithOgrSchemaOpenOption(poOpenInfo)) + { + Clear(); + return FALSE; + } + if (nLayers_ == 0) { bool bEmitError = true; diff --git a/ogr/ogrsf_frmts/geojson/ogrgeojsondriver.cpp b/ogr/ogrsf_frmts/geojson/ogrgeojsondriver.cpp index 748cb66ebf3a..93755c74f981 100644 --- a/ogr/ogrsf_frmts/geojson/ogrgeojsondriver.cpp +++ b/ogr/ogrsf_frmts/geojson/ogrgeojsondriver.cpp @@ -709,6 +709,11 @@ void RegisterOGRGeoJSON() "