diff --git a/.github/workflows/linux_build.yml b/.github/workflows/linux_build.yml index 678873756a39..8f6e304f5c2e 100644 --- a/.github/workflows/linux_build.yml +++ b/.github/workflows/linux_build.yml @@ -342,6 +342,7 @@ jobs: # For cache mkdir -p .gdal + mkdir -p .duckdb docker run \ -e CI \ @@ -355,6 +356,7 @@ jobs: --add-host=host.docker.internal:host-gateway \ --rm \ -v $(pwd)/.gdal:/.gdal \ + -v $(pwd)/.duckdb:/.duckdb \ -v $(pwd):$(pwd) \ --workdir $(pwd)/build-${{ matrix.id }} \ ${CONTAINER_NAME_FULL} \ diff --git a/apps/test_ogrsf.cpp b/apps/test_ogrsf.cpp index 0b4e72b6e9d4..adbb5b717d52 100644 --- a/apps/test_ogrsf.cpp +++ b/apps/test_ogrsf.cpp @@ -4057,14 +4057,16 @@ static int64_t CountFeaturesUsingArrowStream(OGRLayer *poLayer, if (nExpectedFID >= 0 && !bExpectedFIDFound) { bOK = false; - printf("ERROR: expected to find feature of id %" PRId64 + printf("ERROR: CountFeaturesUsingArrowStream() :" + "expected to find feature of id %" PRId64 ", but did not get it\n", nExpectedFID); } if (nUnexpectedFID >= 0 && bUnexpectedFIDFound) { bOK = false; - printf("ERROR: expected *not* to find feature of id %" PRId64 + printf("ERROR: CountFeaturesUsingArrowStream(): " + "expected *not* to find feature of id %" PRId64 ", but did get it\n", nUnexpectedFID); } diff --git a/autotest/ogr/data/duckdb/poly_with_spatial_index.duckdb b/autotest/ogr/data/duckdb/poly_with_spatial_index.duckdb new file mode 100644 index 000000000000..f5c7a6246f90 Binary files /dev/null and b/autotest/ogr/data/duckdb/poly_with_spatial_index.duckdb differ diff --git a/autotest/ogr/ogr_adbc.py b/autotest/ogr/ogr_adbc.py index 1429d4a1785f..e1c5dc118441 100755 --- a/autotest/ogr/ogr_adbc.py +++ b/autotest/ogr/ogr_adbc.py @@ -231,29 +231,97 @@ def test_ogr_adbc_duckdb_parquet_with_sql_open_option(): ############################################################################### -def test_ogr_adbc_duckdb_parquet_with_spatial(): +@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"]) +def test_ogr_adbc_duckdb_parquet_with_spatial(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL): if not _has_libduckdb(): pytest.skip("libduckdb.so missing") - if gdaltest.is_travis_branch("ubuntu_2404"): - # Works locally for me when replicating the Dockerfile ... - pytest.skip("fails on ubuntu_2404 for unknown reason") + with gdal.config_option( + "OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL + ): + with gdal.OpenEx( + "data/parquet/poly.parquet", + gdal.OF_VECTOR, + allowed_drivers=["ADBC"], + open_options=[ + "PRELUDE_STATEMENTS=INSTALL spatial", + ] + if OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL == "ON" + else [], + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPolygon + assert lyr.TestCapability(ogr.OLCFastGetExtent) + assert lyr.TestCapability(ogr.OLCFastSpatialFilter) + minx, maxx, miny, maxy = lyr.GetExtent() + assert (minx, maxx, miny, maxy) == ( + 478315.53125, + 481645.3125, + 4762880.5, + 4765610.5, + ) + assert lyr.GetExtent3D() == ( + 478315.53125, + 481645.3125, + 4762880.5, + 4765610.5, + float("inf"), + float("-inf"), + ) + assert lyr.GetSpatialRef().GetAuthorityCode(None) == "27700" + f = lyr.GetNextFeature() + assert f.GetGeometryRef().ExportToWkt().startswith("POLYGON ((") - with gdal.OpenEx( - "data/parquet/poly.parquet", - gdal.OF_VECTOR, - allowed_drivers=["ADBC"], - open_options=[ - "PRELUDE_STATEMENTS=INSTALL spatial", - "PRELUDE_STATEMENTS=LOAD spatial", - ], - ) as ds: + assert lyr.GetFeatureCount() == 10 + lyr.SetAttributeFilter("false") + + assert lyr.GetFeatureCount() == 0 + lyr.SetAttributeFilter("true") + + lyr.SetAttributeFilter(None) + assert lyr.GetFeatureCount() == 10 + lyr.SetSpatialFilterRect(minx, miny, maxx, maxy) + assert lyr.GetFeatureCount() == 10 + lyr.SetSpatialFilterRect(minx, miny, minx, maxy) + assert lyr.GetFeatureCount() < 10 + lyr.SetSpatialFilterRect(maxx, miny, maxx, maxy) + assert lyr.GetFeatureCount() < 10 + lyr.SetSpatialFilterRect(minx, miny, maxx, miny) + assert lyr.GetFeatureCount() < 10 + lyr.SetSpatialFilterRect(minx, maxy, maxx, maxy) + assert lyr.GetFeatureCount() < 10 + + lyr.SetAttributeFilter("true") + lyr.SetSpatialFilter(None) + assert lyr.GetFeatureCount() == 10 + lyr.SetSpatialFilterRect(minx, miny, maxx, maxy) + assert lyr.GetFeatureCount() == 10 + + lyr.SetAttributeFilter("false") + lyr.SetSpatialFilterRect(minx, miny, maxx, maxy) + assert lyr.GetFeatureCount() == 0 + + +############################################################################### + + +@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"]) +def test_ogr_adbc_duckdb_with_spatial_index(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL): + + if not _has_libduckdb(): + pytest.skip("libduckdb.so missing") + + with gdal.config_option( + "OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL + ): + ds = ogr.Open("data/duckdb/poly_with_spatial_index.duckdb") + lyr = ds.GetLayer(0) with ds.ExecuteSQL( - "SELECT ST_AsText(geometry) FROM read_parquet('data/parquet/poly.parquet')" + "SELECT 1 FROM duckdb_extensions() WHERE extension_name='spatial' AND loaded = true" ) as sql_lyr: - f = sql_lyr.GetNextFeature() - assert f.GetField(0).startswith("POLYGON") + spatial_loaded = sql_lyr.GetNextFeature() is not None + assert lyr.TestCapability(ogr.OLCFastSpatialFilter) == spatial_loaded ############################################################################### @@ -325,6 +393,30 @@ def test_ogr_adbc_test_ogrsf_parquet_filename_with_glob(): assert "ERROR" not in ret +############################################################################### +# Run test_ogrsf on a GeoParquet file + + +@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"]) +def test_ogr_adbc_test_ogrsf_geoparquet(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL): + + if not _has_libduckdb(): + pytest.skip("libduckdb.so missing") + + import test_cli_utilities + + if test_cli_utilities.get_test_ogrsf_path() is None: + pytest.skip() + + ret = gdaltest.runexternal( + test_cli_utilities.get_test_ogrsf_path() + + f" -ro ADBC:data/parquet/poly.parquet --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}" + ) + + assert "INFO" in ret + assert "ERROR" not in ret + + ############################################################################### # Test DATETIME_AS_STRING=YES GetArrowStream() option @@ -359,7 +451,34 @@ def test_ogr_adbc_arrow_stream_numpy_datetime_as_string(tmp_vsimem): # Run test_ogrsf on a DuckDB dataset -def test_ogr_adbc_test_ogrsf_duckdb(): +@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"]) +def test_ogr_adbc_test_ogrsf_duckdb(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL): + + if not _has_libduckdb(): + pytest.skip("libduckdb.so missing") + + import test_cli_utilities + + if test_cli_utilities.get_test_ogrsf_path() is None: + pytest.skip() + + ret = gdaltest.runexternal( + test_cli_utilities.get_test_ogrsf_path() + + f" -ro ADBC:data/duckdb/poly.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}" + ) + + assert "INFO" in ret + assert "ERROR" not in ret + + +############################################################################### +# Run test_ogrsf on a DuckDB dataset + + +@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"]) +def test_ogr_adbc_test_ogrsf_duckdb_with_spatial_index( + OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL, +): if not _has_libduckdb(): pytest.skip("libduckdb.so missing") @@ -370,7 +489,8 @@ def test_ogr_adbc_test_ogrsf_duckdb(): pytest.skip() ret = gdaltest.runexternal( - test_cli_utilities.get_test_ogrsf_path() + " -ro ADBC:data/duckdb/poly.duckdb" + test_cli_utilities.get_test_ogrsf_path() + + f" -ro ADBC:data/duckdb/poly_with_spatial_index.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}" ) assert "INFO" in ret diff --git a/doc/source/drivers/vector/adbc.rst b/doc/source/drivers/vector/adbc.rst index 40a312323367..abc50b2bb989 100644 --- a/doc/source/drivers/vector/adbc.rst +++ b/doc/source/drivers/vector/adbc.rst @@ -25,7 +25,11 @@ Consult the `installation instruction { + public: + //! Describe the bbox column of a geometry column + struct GeomColBBOX + { + std::string osXMin{}; // empty if no bbox column + std::string osYMin{}; + std::string osXMax{}; + std::string osYMax{}; + }; + + private: friend class OGRADBCDataset; OGRADBCDataset *m_poDS = nullptr; + const std::string m_osBaseStatement{}; // as provided by user + std::string m_osModifiedBaseStatement{}; // above tuned to use ST_AsWKB() + std::string m_osModifiedSelect{}; // SELECT part of above + std::string m_osAttributeFilter{}; std::unique_ptr m_statement{}; std::unique_ptr m_poAdapterLayer{}; std::unique_ptr m_stream{}; @@ -103,17 +118,27 @@ class OGRADBCLayer final : public OGRLayer, GIntBig m_nFeatureID = 0; bool m_bIsParquetLayer = false; + std::vector + m_geomColBBOX{}; // same size as GetGeomFieldCount() + std::vector m_extents{}; // same size as GetGeomFieldCount() + OGRFeature *GetNextRawFeature(); bool GetArrowStreamInternal(struct ArrowArrayStream *out_stream); GIntBig GetFeatureCountParquet(); + void BuildLayerDefn(bool bInternalUse); + bool ReplaceStatement(const char *pszNewStatement); + bool UpdateStatement(); + std::string GetCurrentStatement() const; + CPL_DISALLOW_COPY_ASSIGN(OGRADBCLayer) public: OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName, + const char *pszStatement, std::unique_ptr poStatement, std::unique_ptr poStream, - ArrowSchema *schema); + ArrowSchema *schema, bool bInternalUse); ~OGRADBCLayer() override; OGRFeatureDefn *GetLayerDefn() override @@ -128,6 +153,20 @@ class OGRADBCLayer final : public OGRLayer, bool GetArrowStream(struct ArrowArrayStream *out_stream, CSLConstList papszOptions = nullptr) override; GIntBig GetFeatureCount(int bForce) override; + + void SetSpatialFilter(OGRGeometry *poGeom) override + { + SetSpatialFilter(0, poGeom); + } + + OGRErr SetAttributeFilter(const char *pszFilter) override; + void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override; + + OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override; + OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, + int bForce = TRUE) override; + OGRErr GetExtent3D(int iGeomField, OGREnvelope3D *psExtent, + int bForce = TRUE) override; }; /************************************************************************/ @@ -143,6 +182,8 @@ class OGRADBCDataset final : public GDALDataset std::unique_ptr m_connection{}; std::vector> m_apoLayers{}; std::string m_osParquetFilename{}; + bool m_bIsDuckDB = false; + bool m_bSpatialLoaded = false; public: OGRADBCDataset() = default; @@ -164,7 +205,13 @@ class OGRADBCDataset final : public GDALDataset OGRLayer *GetLayerByName(const char *pszName) override; std::unique_ptr CreateLayer(const char *pszStatement, - const char *pszLayerName); + const char *pszLayerName, + bool bInternalUse); + + std::unique_ptr CreateInternalLayer(const char *pszStatement) + { + return CreateLayer(pszStatement, "temp", true); + } OGRLayer *ExecuteSQL(const char *pszStatement, OGRGeometry *poSpatialFilter, const char *pszDialect) override; diff --git a/ogr/ogrsf_frmts/adbc/ogradbcdataset.cpp b/ogr/ogrsf_frmts/adbc/ogradbcdataset.cpp index 66d822167c1a..1256147033ca 100644 --- a/ogr/ogrsf_frmts/adbc/ogradbcdataset.cpp +++ b/ogr/ogrsf_frmts/adbc/ogradbcdataset.cpp @@ -15,6 +15,7 @@ #include "ogradbcdrivercore.h" #include "ogr_mem.h" #include "ogr_p.h" +#include "cpl_error.h" #include "cpl_json.h" #include "gdal_adbc.h" @@ -79,7 +80,8 @@ OGRADBCDataset::~OGRADBCDataset() /************************************************************************/ std::unique_ptr -OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName) +OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName, + bool bInternalUse) { OGRADBCError error; @@ -164,7 +166,8 @@ OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName) } return std::make_unique( - this, pszLayerName, std::move(statement), std::move(stream), &schema); + this, pszLayerName, osStatement.c_str(), std::move(statement), + std::move(stream), &schema, bInternalUse); } /************************************************************************/ @@ -181,7 +184,7 @@ OGRLayer *OGRADBCDataset::ExecuteSQL(const char *pszStatement, pszDialect); } - auto poLayer = CreateLayer(pszStatement, "RESULTSET"); + auto poLayer = CreateLayer(pszStatement, "RESULTSET", false); if (poLayer && poSpatialFilter) { if (poLayer->GetGeomType() == wkbNone) @@ -211,7 +214,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) } const char *pszADBCDriverName = CSLFetchNameValue(poOpenInfo->papszOpenOptions, "ADBC_DRIVER"); - const bool bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo); + m_bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo); const bool bIsSQLite3 = (pszADBCDriverName && EQUAL(pszADBCDriverName, "adbc_driver_sqlite")) || OGRADBCDriverIsSQLite3(poOpenInfo); @@ -221,7 +224,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) if (!pszADBCDriverName) { - if (bIsDuckDB || bIsParquet) + if (m_bIsDuckDB || bIsParquet) { pszADBCDriverName = #ifdef _WIN32 @@ -249,7 +252,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) // Load the driver if (pszADBCDriverName && - (bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb"))) + (m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb"))) { if (OGRADBCLoadDriver(pszADBCDriverName, "duckdb_adbc_init", &m_driver, error) != ADBC_STATUS_OK) @@ -280,7 +283,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) // Set options if (pszADBCDriverName && - (bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb"))) + (m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb"))) { if (ADBC_CALL(DatabaseSetOption, &m_database, "path", bIsParquet ? ":memory:" : pszFilename, @@ -346,9 +349,30 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) for (const char *pszStatement : cpl::Iterate(CSLConstList(papszPreludeStatements))) { - CreateLayer(pszStatement, "temp"); + CreateInternalLayer(pszStatement); } CSLDestroy(papszPreludeStatements); + if ((bIsParquet || m_bIsDuckDB) && + CPLTestBool( + CPLGetConfigOption("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", "ON"))) + { + auto poTmpLayer = + CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE " + "extension_name='spatial' AND loaded = false"); + if (poTmpLayer && std::unique_ptr( + poTmpLayer->GetNextFeature()) != nullptr) + { + CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler); + CreateInternalLayer("LOAD spatial"); + } + + poTmpLayer = + CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE " + "extension_name='spatial' AND loaded = true"); + m_bSpatialLoaded = + poTmpLayer && std::unique_ptr( + poTmpLayer->GetNextFeature()) != nullptr; + } std::string osLayerName = "RESULTSET"; std::string osSQL; @@ -374,18 +398,89 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) { if (pszSQL[0]) { - auto poLayer = CreateLayer(pszSQL, osLayerName.c_str()); + std::unique_ptr poLayer; + if ((bIsParquet || m_bIsDuckDB) && m_bSpatialLoaded) + { + std::string osErrorMsg; + { + CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler); + poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false); + if (!poLayer) + osErrorMsg = CPLGetLastErrorMsg(); + } + if (!poLayer) + { + CPLDebug("ADBC", + "Connecting with 'LOAD spatial' did not work " + "(%s). Retrying without it", + osErrorMsg.c_str()); + ADBC_CALL(ConnectionRelease, m_connection.get(), error); + m_connection.reset(); + + ADBC_CALL(DatabaseRelease, &m_database, error); + memset(&m_database, 0, sizeof(m_database)); + + if (ADBC_CALL(DatabaseNew, &m_database, error) != + ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcDatabaseNew() failed: %s", + error.message()); + return false; + } + if (ADBC_CALL(DatabaseSetOption, &m_database, "path", + ":memory:", error) != ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcDatabaseSetOption() failed: %s", + error.message()); + return false; + } + + if (ADBC_CALL(DatabaseInit, &m_database, error) != + ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcDatabaseInit() failed: %s", + error.message()); + return false; + } + + m_connection = std::make_unique(); + if (ADBC_CALL(ConnectionNew, m_connection.get(), error) != + ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcConnectionNew() failed: %s", + error.message()); + return false; + } + + if (ADBC_CALL(ConnectionInit, m_connection.get(), + &m_database, error) != ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcConnectionInit() failed: %s", + error.message()); + return false; + } + } + } if (!poLayer) - return false; + { + poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false); + if (!poLayer) + return false; + } + poLayer->m_bIsParquetLayer = bIsParquetLayer; m_apoLayers.emplace_back(std::move(poLayer)); } } - else if (bIsDuckDB || bIsSQLite3) + else if (m_bIsDuckDB || bIsSQLite3) { - auto poLayerList = CreateLayer( - "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')", - "LAYERLIST"); + auto poLayerList = CreateInternalLayer( + "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')"); if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 1) { return false; @@ -400,7 +495,8 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) CPLSPrintf("SELECT * FROM \"%s\"", OGRDuplicateCharacter(pszLayerName, '"').c_str()); CPLTurnFailureIntoWarning(true); - auto poLayer = CreateLayer(osStatement.c_str(), pszLayerName); + auto poLayer = + CreateLayer(osStatement.c_str(), pszLayerName, false); CPLTurnFailureIntoWarning(false); if (poLayer) { @@ -410,13 +506,12 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) } else if (bIsPostgreSQL) { - auto poLayerList = CreateLayer( + auto poLayerList = CreateInternalLayer( "SELECT n.nspname, c.relname FROM pg_class c " "JOIN pg_namespace n ON c.relnamespace = n.oid " "AND c.relkind in ('r','v','m','f') " "AND n.nspname NOT IN ('pg_catalog', 'information_schema') " - "ORDER BY c.oid", - "LAYERLIST"); + "ORDER BY c.oid"); if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 2) { return false; @@ -432,9 +527,9 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo) OGRDuplicateCharacter(pszTableName, '"').c_str()); CPLTurnFailureIntoWarning(true); - auto poLayer = - CreateLayer(osStatement.c_str(), - CPLSPrintf("%s.%s", pszNamespace, pszTableName)); + auto poLayer = CreateLayer( + osStatement.c_str(), + CPLSPrintf("%s.%s", pszNamespace, pszTableName), false); CPLTurnFailureIntoWarning(false); if (poLayer) { @@ -474,8 +569,9 @@ OGRLayer *OGRADBCDataset::GetLayerByName(const char *pszName) } auto statement = std::make_unique(); - OGRADBCLayer tmpLayer(this, "", std::move(statement), - std::move(objectsStream), &schema); + OGRADBCLayer tmpLayer(this, "", "", std::move(statement), + std::move(objectsStream), &schema, + /* bInternalUse = */ true); const auto tmpLayerDefn = tmpLayer.GetLayerDefn(); if (tmpLayerDefn->GetFieldIndex("catalog_name") < 0 || tmpLayerDefn->GetFieldIndex("catalog_db_schemas") < 0) diff --git a/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp b/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp index 66169b486dca..015abf34f567 100644 --- a/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp +++ b/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp @@ -12,31 +12,434 @@ ****************************************************************************/ #include "ogr_adbc.h" +#include "ogr_spatialref.h" #include "ogr_p.h" +#include "cpl_json.h" + +#include +#include +#include +#include #define ADBC_CALL(func, ...) m_poDS->m_driver.func(__VA_ARGS__) +/************************************************************************/ +/* GetGeometryTypeFromString() */ +/************************************************************************/ + +static OGRwkbGeometryType GetGeometryTypeFromString(const std::string &osType) +{ + OGRwkbGeometryType eGeomType = wkbUnknown; + OGRReadWKTGeometryType(osType.c_str(), &eGeomType); + if (eGeomType == wkbUnknown && !osType.empty()) + { + CPLDebug("ADBC", "Unknown geometry type: %s", osType.c_str()); + } + return eGeomType; +} + /************************************************************************/ /* OGRADBCLayer() */ /************************************************************************/ OGRADBCLayer::OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName, + const char *pszStatement, std::unique_ptr poStatement, std::unique_ptr poStream, - ArrowSchema *schema) - : m_poDS(poDS), m_statement(std::move(poStatement)), - m_stream(std::move(poStream)) + ArrowSchema *schema, bool bInternalUse) + : m_poDS(poDS), m_osBaseStatement(pszStatement), + m_osModifiedBaseStatement(m_osBaseStatement), + m_statement(std::move(poStatement)), m_stream(std::move(poStream)) { SetDescription(pszName); memcpy(&m_schema, schema, sizeof(m_schema)); schema->release = nullptr; - m_poAdapterLayer = - std::make_unique(pszName); + BuildLayerDefn(bInternalUse); +} + +/************************************************************************/ +/* ParseGeometryColumnCovering() */ +/************************************************************************/ + +//! Parse bounding box column definition +static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef, + std::string &osBBOXColumn, + std::string &osXMin, + std::string &osYMin, + std::string &osXMax, + std::string &osYMax) +{ + const auto oCovering = oJSONDef["covering"]; + if (oCovering.IsValid() && + oCovering.GetType() == CPLJSONObject::Type::Object) + { + const auto oBBOX = oCovering["bbox"]; + if (oBBOX.IsValid() && oBBOX.GetType() == CPLJSONObject::Type::Object) + { + const auto oXMin = oBBOX["xmin"]; + const auto oYMin = oBBOX["ymin"]; + const auto oXMax = oBBOX["xmax"]; + const auto oYMax = oBBOX["ymax"]; + if (oXMin.IsValid() && oYMin.IsValid() && oXMax.IsValid() && + oYMax.IsValid() && + oXMin.GetType() == CPLJSONObject::Type::Array && + oYMin.GetType() == CPLJSONObject::Type::Array && + oXMax.GetType() == CPLJSONObject::Type::Array && + oYMax.GetType() == CPLJSONObject::Type::Array) + { + const auto osXMinArray = oXMin.ToArray(); + const auto osYMinArray = oYMin.ToArray(); + const auto osXMaxArray = oXMax.ToArray(); + const auto osYMaxArray = oYMax.ToArray(); + if (osXMinArray.Size() == 2 && osYMinArray.Size() == 2 && + osXMaxArray.Size() == 2 && osYMaxArray.Size() == 2 && + osXMinArray[0].GetType() == CPLJSONObject::Type::String && + osXMinArray[1].GetType() == CPLJSONObject::Type::String && + osYMinArray[0].GetType() == CPLJSONObject::Type::String && + osYMinArray[1].GetType() == CPLJSONObject::Type::String && + osXMaxArray[0].GetType() == CPLJSONObject::Type::String && + osXMaxArray[1].GetType() == CPLJSONObject::Type::String && + osYMaxArray[0].GetType() == CPLJSONObject::Type::String && + osYMaxArray[1].GetType() == CPLJSONObject::Type::String && + osXMinArray[0].ToString() == osYMinArray[0].ToString() && + osXMinArray[0].ToString() == osXMaxArray[0].ToString() && + osXMinArray[0].ToString() == osYMaxArray[0].ToString()) + { + osBBOXColumn = osXMinArray[0].ToString(); + osXMin = osXMinArray[1].ToString(); + osYMin = osYMinArray[1].ToString(); + osXMax = osXMaxArray[1].ToString(); + osYMax = osYMaxArray[1].ToString(); + return true; + } + } + } + } + return false; +} + +/************************************************************************/ +/* ParseGeoParquetColumn() */ +/************************************************************************/ + +static void ParseGeoParquetColumn( + const CPLJSONObject &oColumn, + std::map &oMapType, + std::map &oMapExtent, + std::map + &oMapGeomColumnToCoveringBBOXColumn, + std::map> + &oMapGeomColumnsFromGeoParquet, + std::set &oSetCoveringBBoxColumn) +{ + auto oCrs = oColumn.GetObj("crs"); + if (!oCrs.IsValid()) + { + // WGS 84 is implied if no crs member is found. + auto poSRS = std::make_unique(); + poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER); + poSRS->importFromEPSG(4326); + oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = std::move(poSRS); + } + else if (oCrs.GetType() == CPLJSONObject::Type::Object) + { + // CRS encoded as PROJJSON (extension) + const auto oType = oCrs["type"]; + if (oType.IsValid() && oType.GetType() == CPLJSONObject::Type::String) + { + const auto osType = oType.ToString(); + if (osType.find("CRS") != std::string::npos) + { + auto poSRS = std::make_unique(); + poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER); + + if (poSRS->SetFromUserInput(oCrs.ToString().c_str()) == + OGRERR_NONE) + { + oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = + std::move(poSRS); + } + } + } + } + else + { + oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = nullptr; + } + + OGRwkbGeometryType eGeomType = wkbUnknown; + auto oType = oColumn.GetObj("geometry_types"); + if (oType.GetType() == CPLJSONObject::Type::Array) + { + const auto oTypeArray = oType.ToArray(); + if (oTypeArray.Size() == 1) + { + eGeomType = GetGeometryTypeFromString(oTypeArray[0].ToString()); + } + else if (oTypeArray.Size() > 1) + { + const auto PromoteToCollection = [](OGRwkbGeometryType eType) + { + if (eType == wkbPoint) + return wkbMultiPoint; + if (eType == wkbLineString) + return wkbMultiLineString; + if (eType == wkbPolygon) + return wkbMultiPolygon; + return eType; + }; + bool bMixed = false; + bool bHasMulti = false; + bool bHasZ = false; + bool bHasM = false; + const auto eFirstType = OGR_GT_Flatten( + GetGeometryTypeFromString(oTypeArray[0].ToString())); + const auto eFirstTypeCollection = PromoteToCollection(eFirstType); + for (int i = 0; i < oTypeArray.Size(); ++i) + { + const auto eThisGeom = + GetGeometryTypeFromString(oTypeArray[i].ToString()); + if (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) != + eFirstTypeCollection) + { + bMixed = true; + break; + } + bHasZ |= OGR_GT_HasZ(eThisGeom) != FALSE; + bHasM |= OGR_GT_HasM(eThisGeom) != FALSE; + bHasMulti |= (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) == + OGR_GT_Flatten(eThisGeom)); + } + if (!bMixed) + { + if (eFirstTypeCollection == wkbMultiPolygon || + eFirstTypeCollection == wkbMultiLineString) + { + if (bHasMulti) + eGeomType = OGR_GT_SetModifier(eFirstTypeCollection, + bHasZ, bHasM); + else + eGeomType = + OGR_GT_SetModifier(eFirstType, bHasZ, bHasM); + } + } + } + } + + oMapType[oColumn.GetName()] = eGeomType; + + OGREnvelope3D sExtent; + const auto oBBox = oColumn.GetArray("bbox"); + if (oBBox.IsValid() && oBBox.Size() == 4) + { + sExtent.MinX = oBBox[0].ToDouble(); + sExtent.MinY = oBBox[1].ToDouble(); + sExtent.MinZ = std::numeric_limits::infinity(); + sExtent.MaxX = oBBox[2].ToDouble(); + sExtent.MaxY = oBBox[3].ToDouble(); + sExtent.MaxZ = -std::numeric_limits::infinity(); + if (sExtent.MinX <= sExtent.MaxX) + { + oMapExtent[oColumn.GetName()] = sExtent; + } + } + else if (oBBox.IsValid() && oBBox.Size() == 6) + { + sExtent.MinX = oBBox[0].ToDouble(); + sExtent.MinY = oBBox[1].ToDouble(); + sExtent.MinZ = oBBox[2].ToDouble(); + sExtent.MaxX = oBBox[3].ToDouble(); + sExtent.MaxY = oBBox[4].ToDouble(); + sExtent.MaxZ = oBBox[5].ToDouble(); + if (sExtent.MinX <= sExtent.MaxX) + { + oMapExtent[oColumn.GetName()] = sExtent; + } + } + + std::string osBBOXColumn; + std::string osXMin, osYMin, osXMax, osYMax; + if (ParseGeometryColumnCovering(oColumn, osBBOXColumn, osXMin, osYMin, + osXMax, osYMax)) + { + OGRADBCLayer::GeomColBBOX geomColBBOX; + const std::string osPrefix = + std::string("\"") + .append(OGRDuplicateCharacter(osBBOXColumn, '"')) + .append("\".\""); + geomColBBOX.osXMin = std::string(osPrefix) + .append(OGRDuplicateCharacter(osXMin, '"')) + .append("\""); + geomColBBOX.osYMin = std::string(osPrefix) + .append(OGRDuplicateCharacter(osYMin, '"')) + .append("\""); + geomColBBOX.osXMax = std::string(osPrefix) + .append(OGRDuplicateCharacter(osXMax, '"')) + .append("\""); + geomColBBOX.osYMax = std::string(osPrefix) + .append(OGRDuplicateCharacter(osYMax, '"')) + .append("\""); + oMapGeomColumnToCoveringBBOXColumn[oColumn.GetName()] = geomColBBOX; + oSetCoveringBBoxColumn.insert(osBBOXColumn); + } +} + +/************************************************************************/ +/* BuildLayerDefn() */ +/************************************************************************/ + +void OGRADBCLayer::BuildLayerDefn(bool bInternalUse) +{ + // Identify geometry columns for Parquet files, and query them with + // ST_AsWKB() to avoid getting duckdb_spatial own geometry encoding + // (https://github.com/duckdb/duckdb_spatial/blob/a60aa3733741a99c49baaf33390c0f7c8a9598a3/spatial/src/spatial/core/geometry/geometry_serialization.cpp#L11) + std::map> oMapGeomColumns; + std::map oMapType; + std::map oMapExtent; + std::map oMapGeomColumnToCoveringBBOXColumn; + if (!bInternalUse && STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT ") && + (m_poDS->m_bIsDuckDB || + (!m_poDS->m_osParquetFilename.empty() && + CPLString(m_osBaseStatement) + .ifind(std::string(" FROM '").append(OGRDuplicateCharacter( + m_poDS->m_osParquetFilename, '\''))) != + std::string::npos))) + { + // Try to read GeoParquet 'geo' metadata + std::map> + oMapGeomColumnsFromGeoParquet; + std::set oSetCoveringBBoxColumn; + + std::string osGeoParquetMD; + if (!m_poDS->m_osParquetFilename.empty()) + { + auto poMetadataLayer = m_poDS->CreateInternalLayer( + std::string("SELECT value FROM parquet_kv_metadata('") + .append(OGRDuplicateCharacter(m_poDS->m_osParquetFilename, + '\'')) + .append("') WHERE key = 'geo'") + .c_str()); + if (poMetadataLayer) + { + auto f = std::unique_ptr( + poMetadataLayer->GetNextFeature()); + if (f) + { + int nBytes = 0; + const GByte *pabyData = f->GetFieldAsBinary(0, &nBytes); + osGeoParquetMD.assign( + reinterpret_cast(pabyData), nBytes); + // CPLDebug("ADBC", "%s", osGeoParquetMD.c_str()); + } + } + } + CPLJSONDocument oDoc; + if (!osGeoParquetMD.empty() && oDoc.LoadMemory(osGeoParquetMD)) + { + const auto oColums = oDoc.GetRoot().GetObj("columns"); + for (const auto &oColumn : oColums.GetChildren()) + { + if (oColumn.GetString("encoding") == "WKB") + { + ParseGeoParquetColumn(oColumn, oMapType, oMapExtent, + oMapGeomColumnToCoveringBBOXColumn, + oMapGeomColumnsFromGeoParquet, + oSetCoveringBBoxColumn); + } + } + } + + auto poDescribeLayer = m_poDS->CreateInternalLayer( + std::string("DESCRIBE ").append(m_osBaseStatement).c_str()); + std::string osNewStatement; + bool bNewStatement = false; + if (poDescribeLayer && + // cppcheck-suppress knownConditionTrueFalse + (m_poDS->m_bIsDuckDB || !oMapGeomColumnsFromGeoParquet.empty())) + { + for (auto &&f : *poDescribeLayer) + { + const char *pszColName = f->GetFieldAsString("column_name"); + if (cpl::contains(oSetCoveringBBoxColumn, pszColName)) + { + bNewStatement = true; + continue; + } + + // f->DumpReadable(stdout); + if (osNewStatement.empty()) + osNewStatement = "SELECT "; + else + osNewStatement += ", "; + + auto oIter = oMapGeomColumnsFromGeoParquet.find(pszColName); + if (oIter != oMapGeomColumnsFromGeoParquet.end()) + { + oMapGeomColumns[pszColName] = std::move(oIter->second); + } + if (EQUAL(f->GetFieldAsString("column_type"), "GEOMETRY") && + m_poDS->m_bSpatialLoaded) + { + bNewStatement = true; + osNewStatement += "ST_AsWKB(\""; + osNewStatement += OGRDuplicateCharacter(pszColName, '"'); + osNewStatement += "\") AS "; + if (oIter == oMapGeomColumnsFromGeoParquet.end()) + oMapGeomColumns[pszColName] = nullptr; + } + osNewStatement += '"'; + osNewStatement += OGRDuplicateCharacter(pszColName, '"'); + osNewStatement += '"'; + } + m_osModifiedSelect = osNewStatement; + osNewStatement += " FROM ("; + osNewStatement += m_osBaseStatement; + osNewStatement += " )"; + } + + if (bNewStatement) + { + // CPLDebug("ADBC", "%s -> %s", m_osBaseStatement.c_str(), osNewStatement.c_str()); + if (ReplaceStatement(osNewStatement.c_str())) + { + m_osModifiedBaseStatement = osNewStatement; + } + else + { + m_osModifiedSelect.clear(); + oMapGeomColumns.clear(); + } + } + } + + m_poAdapterLayer = std::make_unique( + GetDescription()); + for (int i = 0; i < m_schema.n_children; ++i) { - m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]); + const char *pszColName = m_schema.children[i]->name; + auto oIter = oMapGeomColumns.find(pszColName); + if (oIter != oMapGeomColumns.end()) + { + OGRGeomFieldDefn oGeomFieldDefn(pszColName, oMapType[pszColName]); + auto poSRS = std::move(oIter->second).release(); + if (poSRS) + { + oGeomFieldDefn.SetSpatialRef(poSRS); + poSRS->Release(); + } + m_poAdapterLayer->m_poLayerDefn->AddGeomFieldDefn(&oGeomFieldDefn); + + m_extents.push_back(oMapExtent[pszColName]); + m_geomColBBOX.push_back( + oMapGeomColumnToCoveringBBOXColumn[pszColName]); + } + else + { + m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]); + } } } @@ -53,6 +456,67 @@ OGRADBCLayer::~OGRADBCLayer() m_schema.release(&m_schema); } +/************************************************************************/ +/* ReplaceStatement() */ +/************************************************************************/ + +bool OGRADBCLayer::ReplaceStatement(const char *pszNewStatement) +{ + // CPLDebug("ADBC", "%s", pszNewStatement); + OGRADBCError error; + auto statement = std::make_unique(); + if (ADBC_CALL(StatementNew, m_poDS->m_connection.get(), statement.get(), + error) != ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, "AdbcStatementNew() failed: %s", + error.message()); + ADBC_CALL(StatementRelease, statement.get(), error); + } + else if (ADBC_CALL(StatementSetSqlQuery, statement.get(), pszNewStatement, + error) != ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcStatementSetSqlQuery() failed: %s", error.message()); + error.clear(); + ADBC_CALL(StatementRelease, statement.get(), error); + } + else + { + auto stream = std::make_unique(); + int64_t rows_affected = -1; + ArrowSchema newSchema; + memset(&newSchema, 0, sizeof(newSchema)); + if (ADBC_CALL(StatementExecuteQuery, statement.get(), stream->get(), + &rows_affected, error) != ADBC_STATUS_OK) + { + CPLError(CE_Failure, CPLE_AppDefined, + "AdbcStatementExecuteQuery() failed: %s", error.message()); + error.clear(); + ADBC_CALL(StatementRelease, statement.get(), error); + } + else if (stream->get_schema(&newSchema) != 0) + { + CPLError(CE_Failure, CPLE_AppDefined, "get_schema() failed"); + ADBC_CALL(StatementRelease, statement.get(), error); + } + else + { + if (m_schema.release) + m_schema.release(&m_schema); + memcpy(&m_schema, &newSchema, sizeof(newSchema)); + + if (m_statement) + ADBC_CALL(StatementRelease, m_statement.get(), error); + m_statement = std::move(statement); + + m_stream = std::move(stream); + + return true; + } + } + return false; +} + /************************************************************************/ /* GetNextRawFeature() */ /************************************************************************/ @@ -99,6 +563,16 @@ OGRFeature *OGRADBCLayer::GetNextRawFeature() } auto poFeature = m_poAdapterLayer->m_apoFeatures[m_nIdx++].release(); + const int nGeomFieldCount = + m_poAdapterLayer->m_poLayerDefn->GetFieldCount(); + for (int i = 0; i < nGeomFieldCount; ++i) + { + auto poGeom = poFeature->GetGeomFieldRef(i); + if (poGeom) + poGeom->assignSpatialReference( + m_poAdapterLayer->m_poLayerDefn->GetGeomFieldDefn(i) + ->GetSpatialRef()); + } poFeature->SetFID(m_nFeatureID++); return poFeature; } @@ -119,6 +593,194 @@ void OGRADBCLayer::ResetReading() } } +/************************************************************************/ +/* GetExtent() */ +/************************************************************************/ + +OGRErr OGRADBCLayer::GetExtent(OGREnvelope *psExtent, int bForce) +{ + return GetExtent(0, psExtent, bForce); +} + +/************************************************************************/ +/* GetExtent() */ +/************************************************************************/ + +OGRErr OGRADBCLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, + int bForce) +{ + if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount()) + { + if (iGeomField != 0) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid geometry field index : %d", iGeomField); + } + return OGRERR_FAILURE; + } + + *psExtent = m_extents[iGeomField]; + if (psExtent->IsInit()) + return OGRERR_NONE; + + return GetExtentInternal(iGeomField, psExtent, bForce); +} + +/************************************************************************/ +/* GetExtent3D() */ +/************************************************************************/ + +OGRErr OGRADBCLayer::GetExtent3D(int iGeomField, OGREnvelope3D *psExtent, + int bForce) +{ + if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount()) + { + if (iGeomField != 0) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid geometry field index : %d", iGeomField); + } + return OGRERR_FAILURE; + } + + *psExtent = m_extents[iGeomField]; + if (psExtent->IsInit()) + return OGRERR_NONE; + + return GetExtentInternal(iGeomField, psExtent, bForce); +} + +/************************************************************************/ +/* GetCurrentStatement() */ +/************************************************************************/ + +std::string OGRADBCLayer::GetCurrentStatement() const +{ + if (!m_osModifiedSelect.empty() && + STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") && + (!m_osAttributeFilter.empty() || + (m_poFilterGeom && + (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() || + m_poDS->m_bSpatialLoaded)))) + { + std::string osStatement(m_osModifiedSelect); + osStatement.append(" FROM (") + .append(m_osBaseStatement) + .append(") WHERE "); + + bool bAddAnd = false; + if (m_poFilterGeom) + { + const double dfMinX = std::isinf(m_sFilterEnvelope.MinX) + ? -std::numeric_limits::max() + : m_sFilterEnvelope.MinX; + const double dfMinY = std::isinf(m_sFilterEnvelope.MinY) + ? -std::numeric_limits::max() + : m_sFilterEnvelope.MinY; + const double dfMaxX = std::isinf(m_sFilterEnvelope.MaxX) + ? std::numeric_limits::max() + : m_sFilterEnvelope.MaxX; + const double dfMaxY = std::isinf(m_sFilterEnvelope.MaxY) + ? std::numeric_limits::max() + : m_sFilterEnvelope.MaxY; + if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty()) + { + bAddAnd = true; + osStatement.append(m_geomColBBOX[m_iGeomFieldFilter].osXMin) + .append(" <= ") + .append(CPLSPrintf("%.17g", dfMaxX)) + .append(" AND ") + .append(m_geomColBBOX[m_iGeomFieldFilter].osXMax) + .append(" >= ") + .append(CPLSPrintf("%.17g", dfMinX)) + .append(" AND ") + .append(m_geomColBBOX[m_iGeomFieldFilter].osYMin) + .append(" <= ") + .append(CPLSPrintf("%.17g", dfMaxY)) + .append(" AND ") + .append(m_geomColBBOX[m_iGeomFieldFilter].osYMax) + .append(" >= ") + .append(CPLSPrintf("%.17g", dfMinY)); + } + if (m_poDS->m_bSpatialLoaded) + { + if (bAddAnd) + osStatement.append(" AND "); + bAddAnd = true; + osStatement.append("ST_Intersects(\"") + .append(OGRDuplicateCharacter( + m_poAdapterLayer->m_poLayerDefn + ->GetGeomFieldDefn(m_iGeomFieldFilter) + ->GetNameRef(), + '"')) + .append(CPLSPrintf( + "\", ST_MakeEnvelope(%.17g,%.17g,%.17g,%.17g))", dfMinX, + dfMinY, dfMaxX, dfMaxY)); + } + } + if (!m_osAttributeFilter.empty()) + { + if (bAddAnd) + osStatement.append(" AND "); + osStatement.append("("); + osStatement.append(m_osAttributeFilter); + osStatement.append(")"); + } + + return osStatement; + } + else + { + return m_osModifiedBaseStatement; + } +} + +/************************************************************************/ +/* UpdateStatement() */ +/************************************************************************/ + +bool OGRADBCLayer::UpdateStatement() +{ + return ReplaceStatement(GetCurrentStatement().c_str()); +} + +/***********************************************************************/ +/* SetAttributeFilter() */ +/***********************************************************************/ + +OGRErr OGRADBCLayer::SetAttributeFilter(const char *pszFilter) +{ + if (!m_osModifiedSelect.empty() && + STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ")) + { + m_osAttributeFilter = pszFilter ? pszFilter : ""; + return UpdateStatement() ? OGRERR_NONE : OGRERR_FAILURE; + } + else + { + return OGRLayer::SetAttributeFilter(pszFilter); + } +} + +/************************************************************************/ +/* SetSpatialFilter() */ +/************************************************************************/ + +void OGRADBCLayer::SetSpatialFilter(int iGeomField, OGRGeometry *poGeomIn) + +{ + if (!ValidateGeometryFieldIndexForSetSpatialFilter(iGeomField, poGeomIn)) + return; + + if (iGeomField < GetLayerDefn()->GetGeomFieldCount()) + { + m_iGeomFieldFilter = iGeomField; + if (InstallFilter(poGeomIn)) + ResetReading(); + UpdateStatement(); + } +} + /************************************************************************/ /* TestCapability() */ /************************************************************************/ @@ -127,16 +789,46 @@ int OGRADBCLayer::TestCapability(const char *pszCap) { if (EQUAL(pszCap, OLCFastGetArrowStream)) { - return !m_poFilterGeom && !m_poAttrQuery; + return !m_poFilterGeom && !m_poAttrQuery && m_osAttributeFilter.empty(); } else if (EQUAL(pszCap, OLCFastFeatureCount)) { - return !m_poFilterGeom && !m_poAttrQuery && m_bIsParquetLayer; + return !m_poFilterGeom && !m_poAttrQuery && + m_osAttributeFilter.empty() && m_bIsParquetLayer; } - else + else if (EQUAL(pszCap, OLCFastGetExtent)) { - return false; + return !m_extents.empty() && m_extents[0].IsInit(); + } + else if (EQUAL(pszCap, OLCFastSpatialFilter) && m_iGeomFieldFilter >= 0 && + m_iGeomFieldFilter < GetLayerDefn()->GetGeomFieldCount()) + { + if (m_poDS->m_bSpatialLoaded && m_poDS->m_bIsDuckDB) + { + const char *pszGeomColName = + m_poAdapterLayer->m_poLayerDefn + ->GetGeomFieldDefn(m_iGeomFieldFilter) + ->GetNameRef(); + auto poTmpLayer = m_poDS->CreateInternalLayer(CPLSPrintf( + "SELECT 1 FROM sqlite_master WHERE tbl_name = '%s' AND type = " + "'index' AND (sql LIKE '%%USING RTREE (%s)%%' OR sql LIKE " + "'%%USING RTREE (\"%s\")%%')", + OGRDuplicateCharacter(GetDescription(), '\'').c_str(), + pszGeomColName, + OGRDuplicateCharacter(pszGeomColName, '"').c_str())); + return poTmpLayer && + std::unique_ptr(poTmpLayer->GetNextFeature()); + } + else if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty()) + { + // Let's assume that the presence of a geometry bounding box + // column is sufficient enough to pretend to have fast spatial + // filter capabilities + return true; + } } + + return false; } /************************************************************************/ @@ -196,8 +888,30 @@ bool OGRADBCLayer::GetArrowStreamInternal(struct ArrowArrayStream *out_stream) GIntBig OGRADBCLayer::GetFeatureCount(int bForce) { - if (m_poFilterGeom || m_poAttrQuery) + if (m_poFilterGeom || m_poAttrQuery || !m_osAttributeFilter.empty()) { + if (!m_osModifiedSelect.empty() && + STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") && + (!m_poFilterGeom || + !m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() || + m_poDS->m_bSpatialLoaded)) + { + const std::string osCurStatement = GetCurrentStatement(); + auto poCountLayer = m_poDS->CreateInternalLayer( + std::string("SELECT COUNT(*) FROM (") + .append(osCurStatement) + .append(")") + .c_str()); + if (poCountLayer && + poCountLayer->GetLayerDefn()->GetFieldCount() == 1) + { + auto poFeature = + std::unique_ptr(poCountLayer->GetNextFeature()); + if (poFeature) + return poFeature->GetFieldAsInteger64(0); + } + } + return OGRLayer::GetFeatureCount(bForce); } @@ -249,7 +963,7 @@ GIntBig OGRADBCLayer::GetFeatureCountParquet() const std::string osSQL(CPLSPrintf( "SELECT CAST(SUM(num_rows) AS BIGINT) FROM parquet_file_metadata('%s')", OGRDuplicateCharacter(m_poDS->m_osParquetFilename, '\'').c_str())); - auto poCountLayer = m_poDS->CreateLayer(osSQL.c_str(), "numrows"); + auto poCountLayer = m_poDS->CreateInternalLayer(osSQL.c_str()); if (poCountLayer && poCountLayer->GetLayerDefn()->GetFieldCount() == 1) { auto poFeature = diff --git a/port/cpl_known_config_options.h b/port/cpl_known_config_options.h index 88a949d1048b..ddb077899615 100644 --- a/port/cpl_known_config_options.h +++ b/port/cpl_known_config_options.h @@ -651,6 +651,7 @@ constexpr static const char* const apszKnownConfigOptions[] = "ODS_RESOLVE_FORMULAS", // from ogrodsdatasource.cpp "OGR2OGR_MIN_FEATURES_FOR_THREADED_REPROJ", // from ogr2ogr_lib.cpp "OGR2OGR_USE_ARROW_API", // from ogr2ogr_lib.cpp + "OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", // from ogradbcdataset.cpp "OGR_API_SPY_FILE", // from ograpispy.cpp "OGR_API_SPY_SNAPSHOT_PATH", // from ograpispy.cpp "OGR_APPLY_GEOM_SET_PRECISION", // from ogr2ogr_lib.cpp, ogrlayer.cpp