Skip to content

Commit

Permalink
Enhance/Fix filter support
Browse files Browse the repository at this point in the history
re: Discussion #2214

The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.

In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.

So this PR implements the dispatch function and implements
the following standard functions:
    + bzip2
    + zstandard
    + blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.

## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
   dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
  to shell scripts.
* Cleanup szip support to properly support szip
  when HDF5 is disabled. This involves detecting
  libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
  filter API. This means that, for example, nc_def_var_fletcher32
  is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.

## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
  libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
  (including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
  or _Shuffle or _Fletcher32. Used for testing.
  • Loading branch information
DennisHeimbigner committed Mar 14, 2022
1 parent f121e0b commit 3ffe7be
Show file tree
Hide file tree
Showing 96 changed files with 2,419 additions and 1,324 deletions.
29 changes: 0 additions & 29 deletions .github/workflows/mingw.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/run_tests_win_mingw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
name: Run MSYS2, MinGW64-based Tests


on: [ pull_request ]
on: [pull_request]

jobs:

Expand Down
138 changes: 85 additions & 53 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ SET(PACKAGE_VERSION ${VERSION})

# Version of the dispatch table. This must match the value in
# configure.ac.
SET(NC_DISPATCH_VERSION 4)
SET(NC_DISPATCH_VERSION 5)

# Get system configuration, Use it to determine osname, os release, cpu. These
# will be used when committing to CDash.
Expand Down Expand Up @@ -588,6 +588,33 @@ ENDIF(ENABLE_STRICT_NULL_BYTE_HEADER_PADDING)
# SET(BUILD_RPC ON CACHE BOOL "")
#ENDIF()

# Note that szip management is tricky.
# This is because we have three things to consider:
# 1. is libsz available?
# 2. is szip enabled in HDF5?
# 3. is nczarr enabled?
# We need separate flags for cases 1 and 2

# We need to determine if libsz is available both for HDF5 and NCZarr
# If user has specified the `SZIP_LIBRARY`, use it; otherwise try to find...
IF(NOT SZIP_LIBRARY)
FIND_LIBRARY(SZIP PATH NAMES szip sz sz2)
IF(SZIP)
SET(SZIP_LIBRARY ${SZIP})
ELSE()
UNSET(SZIP_LIBRARY)
UNSET(SZIP)
ENDIF()
ENDIF()

IF(SZIP_LIBRARY)
SET(SZIP_FOUND yes)
SET(HAVE_SZ yes)
ELSE()
SET(SZIP_FOUND no)
SET(HAVE_SZ no)
ENDIF()

##
# Option to Enable HDF5
#
Expand Down Expand Up @@ -847,19 +874,18 @@ IF(USE_HDF5)
#error
#endif
int main() {
int x = 1;}" USE_SZIP)
IF(USE_SZIP)
int x = 1;}" USE_HDF5_SZIP)
IF(USE_HDF5_SZIP)
SET(HAVE_H5Z_SZIP yes)
# If user has specified the `SZIP_LIBRARY`, use it; otherwise try to find...
IF(NOT SZIP_LIBRARY)
FIND_LIBRARY(SZIP PATH NAMES szip sz)
SET(SZIP_LIBRARY ${SZIP})
IF(NOT SZIP)
IF(SZIP_FOUND)
SET(CMAKE_REQUIRED_LIBRARIES ${SZIP_LIBRARY} ${CMAKE_REQUIRED_LIBRARIES})
MESSAGE(STATUS "HDF5 has szip.")
ELSE()
MESSAGE(FATAL_ERROR "HDF5 Requires SZIP, but cannot find libszip or libsz.")
ENDIF()
ENDIF()
SET(HAVE_H5Z_SZIP 1)
SET(CMAKE_REQUIRED_LIBRARIES ${SZIP_LIBRARY} ${CMAKE_REQUIRED_LIBRARIES})
MESSAGE(STATUS "HDF5 has szip.")
ELSE()
SET(HAVE_H5Z_SZIP no)
ENDIF()

####
Expand Down Expand Up @@ -891,9 +917,6 @@ IF(USE_HDF5)

#Check to see if HDF5 library has collective metadata APIs, (HDF5 >= 1.10.0)
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5Pset_all_coll_metadata_ops "" HDF5_HAS_COLL_METADATA_OPS)
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5free_memory "" HAVE_H5FREE_MEMORY)
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5allocate_memory "" HAVE_H5ALLOCATE_MEMORY)
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5resize_memory "" HAVE_H5RESIZE_MEMORY)

IF(HDF5_PARALLEL)
SET(HDF5_CC h5pcc)
Expand Down Expand Up @@ -1055,30 +1078,33 @@ ELSE()
SET(ENABLE_ZLIB FALSE)
ENDIF()

# See if we have libblosc
IF(!MSVC)
FIND_PACKAGE(Blosc)
ENDIF()
# Define a test flag for have blosc library
IF(Blosc_FOUND)
INCLUDE_DIRECTORIES(${Blosc_INCLUDE_DIRS})
SET(ENABLE_BLOSC TRUE)
macro(set_std_filter filter)
# Upper case the filter name
string(TOUPPER "${filter}" upfilter)
string(TOLOWER "${filter}" downfilter)
# Define a test flag for filter
IF(${filter}_FOUND)
INCLUDE_DIRECTORIES(${filter}_INCLUDE_DIRS})
SET(ENABLE_${upfilter} TRUE)
SET(STD_FILTERS "${STD_FILTERS},${downfilter}")
ELSE()
SET(ENABLE_BLOSC FALSE)
SET(ENABLE_${upfilter} FALSE)
ENDIF()
endmacro(set_std_filter)

# See if we have libszip
IF(!MSVC)
#FIND_PACKAGE(SZIP)
#FIND_LIBRARY(SZIP PATH NAMES szip sz)
SET(SZIP_LIBRARY ${SZIP})
ENDIF()
# Define a test flag for have szip library
IF(SZIP_FOUND)
INCLUDE_DIRECTORIES(${SZIP_INCLUDE_DIRS})
SET(ENABLE_SZIP TRUE)
ELSE()
SET(ENABLE_SZIP FALSE)
# Locate some compressors
FIND_PACKAGE(Bz2)
FIND_PACKAGE(Blosc)
FIND_PACKAGE(Zstd)

# Accumulate standard filters
set(STD_FILTERS "deflate") # Always have deflate */
set_std_filter(SZIP)
set_std_filter(Blosc)
set_std_filter(Zstd)
set_std_filter(Bz2)
IF(NOT Bz2_FOUND)
set(STD_FILTERS "${STD_FILTERS},bzip2") # Always have bzip2 */
ENDIF()

# See if we have libzip
Expand Down Expand Up @@ -1160,21 +1186,22 @@ IF(ENABLE_NCZARR_S3_TESTS AND NOT ENABLE_NCZARR_S3)
SET(ENABLE_NCZARR_S3_TESTS OFF CACHE BOOL "NCARR S3 TESTS" FORCE)
ENDIF()

# See if aws-s3-sdk is available
# But only if enabled
# Note we check for the library after checking for enable_nczarr_s3
# because for some reason this screws up if we unconditionally test for sdk
# and it is not available. Fix someday
IF(ENABLE_NCZARR_S3)
find_package(AWSSDK REQUIRED COMPONENTS s3;core)
IF(AWSSDK_FOUND)
SET(service s3;core)
AWSSDK_DETERMINE_LIBS_TO_LINK(service AWS_LINK_LIBRARIES)
SET(ENABLE_S3_SDK ON CACHE BOOL "S3 SDK" FORCE)
# See if aws-s3-sdk is available
find_package(AWSSDK REQUIRED COMPONENTS s3;core)
IF(AWSSDK_FOUND)
SET(service s3;core)
AWSSDK_DETERMINE_LIBS_TO_LINK(service AWS_LINK_LIBRARIES)
SET(ENABLE_S3_SDK ON CACHE BOOL "S3 SDK" FORCE)
ELSE()
SET(ENABLE_S3_SDK OFF CACHE BOOL "S3 SDK" FORCE)
ENDIF()
ELSE()
SET(ENABLE_S3_SDK OFF CACHE BOOL "S3 SDK" FORCE)
ENDIF()
ELSE(ENABLE_NCZARR_S3)
# Unconditionally disable
SET(ENABLE_S3_SDK OFF CACHE BOOL "S3 SDK" FORCE)
ENDIF(ENABLE_NCZARR_S3)

IF(NOT ENABLE_S3_SDK)
IF(ENABLE_NCZARR_S3 OR ENABLE_NCZARR_S3_TESTS)
Expand Down Expand Up @@ -1493,11 +1520,19 @@ IF(NOT BUILD_SHARED_LIBS)
ENDIF()

OPTION(ENABLE_NCZARR_FILTERS "Enable NCZarr filters" yes)
OPTION(ENABLE_NCZARR_FILTERS_TESTING "Enable NCZarr filter testing." yes)

# Constraints
IF (NOT ENABLE_PLUGINS)
SET(ENABLE_NCZARR_FILTERS OFF CACHE BOOL "Enable NCZarr Filters." FORCE)
MESSAGE(WARNING "ENABLE_FILTER_TESTING requires shared libraries. Disabling.")
SET(ENABLE_NCZARR_FILTERS OFF CACHE BOOL "Enable NCZarr Filters." FORCE)
ENDIF()

IF (NOT ENABLE_NCZARR)
MESSAGE(WARNING "ENABLE_NCZARR==NO => ENABLE_NCZARR_FILTERS==NO AND ENABLE_NCZARR_FILTER_TESTING==NO")
SET(ENABLE_NCZARR_FILTERS OFF CACHE BOOL "Disable NCZARR_FILTERS" FORCE)
ENDIF()

OPTION(ENABLE_NCZARR_FILTERS_TESTING "Enable NCZarr filter testing." yes)
IF (NOT ENABLE_NCZARR_FILTERS)
SET(ENABLE_NCZARR_FILTER_TESTING OFF CACHE BOOL "Enable NCZarr Filter Testing" FORCE)
ENDIF()
Expand Down Expand Up @@ -2357,9 +2392,6 @@ is_enabled(ENABLE_V2_API HAS_NC2)
is_enabled(ENABLE_NETCDF_4 HAS_NC4)
is_enabled(ENABLE_HDF4 HAS_HDF4)
is_enabled(USE_HDF5 HAS_HDF5)
is_enabled(USE_SZIP HAS_SZIP)
is_enabled(USE_SZIP HAS_SZIP_WRITE)
is_enabled(USE_SZIP HAS_SZLIB_WRITE)
is_enabled(STATUS_PNETCDF HAS_PNETCDF)
is_enabled(STATUS_PARALLEL HAS_PARALLEL)
is_enabled(ENABLE_PARALLEL4 HAS_PARALLEL4)
Expand All @@ -2373,7 +2405,6 @@ is_enabled(JNA HAS_JNA)
is_enabled(ENABLE_ZERO_LENGTH_COORD_BOUND RELAX_COORD_BOUND)
is_enabled(USE_CDF5 HAS_CDF5)
is_enabled(ENABLE_ERANGE_FILL HAS_ERANGE_FILL)
is_enabled(HAVE_H5Z_SZIP HAS_SZLIB)
is_enabled(HDF5_HAS_PAR_FILTERS HAS_PAR_FILTERS)
is_enabled(ENABLE_NCZARR HAS_NCZARR)
is_enabled(ENABLE_NCZARR_S3_TESTS DO_NCZARR_S3_TESTS)
Expand All @@ -2382,7 +2413,8 @@ is_enabled(ENABLE_NCZARR_ZIP DO_NCZARR_ZIP_TESTS)
is_enabled(ENABLE_QUANTIZE HAS_QUANTIZE)
is_enabled(ENABLE_LOGGING HAS_LOGGING)
is_enabled(ENABLE_FILTER_TESTING DO_FILTER_TESTS)
is_enabled(ENABLE_BLOSC HAS_BLOSC)
is_enabled(HAVE_SZ HAS_SZIP)
is_enabled(HAVE_SZ HAS_SZLIB_WRITE)

# Generate file from template.
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/libnetcdf.settings.in"
Expand Down
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.8.2 - TBD

* [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
* [Bug Fix] Require that the type of the variable in nc_def_var_filter is not variable length. See [Github #/2231](https://github.com/Unidata/netcdf-c/pull/2231).
* [File Change] Apply HDF5 v1.8 format compatibility when writing to previous files, as well as when creating new files. The superblock version remains at 2 for newly created files. Full backward read/write compatibility for netCDF-4 is maintained in all cases. See [Github #2176](https://github.com/Unidata/netcdf-c/issues/2176).
* [Enhancement] Add ability to set dataset alignment for netcdf-4/HDF5 files. See [Github #2206](https://github.com/Unidata/netcdf-c/pull/2206).
Expand Down
64 changes: 64 additions & 0 deletions cmake/modules/FindBz2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Searches for an installation of the bz2 library. On success, it sets the following variables:
#
# Bz2_FOUND Set to true to indicate the bz2 library was found
# Bz2_INCLUDE_DIRS The directory containing the header file bz2/bz2.h
# Bz2_LIBRARIES The libraries needed to use the bz2 library
#
# To specify an additional directory to search, set Bz2_ROOT.
#
# Author: Siddhartha Chaudhuri, 2009
#

# Look for the header, first in the user-specified location and then in the system locations
SET(Bz2_INCLUDE_DOC "The directory containing the header file bz2.h")
FIND_PATH(Bz2_INCLUDE_DIRS NAMES bz2.h bz2/bz2.h PATHS ${Bz2_ROOT} ${Bz2_ROOT}/include DOC ${Bz2_INCLUDE_DOC} NO_DEFAULT_PATH)
IF(NOT Bz2_INCLUDE_DIRS) # now look in system locations
FIND_PATH(Bz2_INCLUDE_DIRS NAMES bzlib.h DOC ${Bz2_INCLUDE_DOC})
ENDIF(NOT Bz2_INCLUDE_DIRS)

SET(Bz2_FOUND FALSE)

IF(Bz2_INCLUDE_DIRS)
SET(Bz2_LIBRARY_DIRS ${Bz2_INCLUDE_DIRS})

IF("${Bz2_LIBRARY_DIRS}" MATCHES "/include$")
# Strip off the trailing "/include" in the path.
GET_FILENAME_COMPONENT(Bz2_LIBRARY_DIRS ${Bz2_LIBRARY_DIRS} PATH)
ENDIF("${Bz2_LIBRARY_DIRS}" MATCHES "/include$")

IF(EXISTS "${Bz2_LIBRARY_DIRS}/lib")
SET(Bz2_LIBRARY_DIRS ${Bz2_LIBRARY_DIRS}/lib)
ENDIF(EXISTS "${Bz2_LIBRARY_DIRS}/lib")

# Find Bz2 libraries
FIND_LIBRARY(Bz2_DEBUG_LIBRARY NAMES bz2d bz2_d libbz2d libbz2_d libbz2
PATH_SUFFIXES Debug ${CMAKE_LIBRARY_ARCHITECTURE} ${CMAKE_LIBRARY_ARCHITECTURE}/Debug
PATHS ${Bz2_LIBRARY_DIRS} NO_DEFAULT_PATH)
FIND_LIBRARY(Bz2_RELEASE_LIBRARY NAMES bz2 libbz2
PATH_SUFFIXES Release ${CMAKE_LIBRARY_ARCHITECTURE} ${CMAKE_LIBRARY_ARCHITECTURE}/Release
PATHS ${Bz2_LIBRARY_DIRS} NO_DEFAULT_PATH)

SET(Bz2_LIBRARIES )
IF(Bz2_DEBUG_LIBRARY AND Bz2_RELEASE_LIBRARY)
SET(Bz2_LIBRARIES debug ${Bz2_DEBUG_LIBRARY} optimized ${Bz2_RELEASE_LIBRARY})
ELSEIF(Bz2_DEBUG_LIBRARY)
SET(Bz2_LIBRARIES ${Bz2_DEBUG_LIBRARY})
ELSEIF(Bz2_RELEASE_LIBRARY)
SET(Bz2_LIBRARIES ${Bz2_RELEASE_LIBRARY})
ENDIF(Bz2_DEBUG_LIBRARY AND Bz2_RELEASE_LIBRARY)

IF(Bz2_LIBRARIES)
SET(Bz2_FOUND TRUE)
ENDIF(Bz2_LIBRARIES)
ENDIF(Bz2_INCLUDE_DIRS)

IF(Bz2_FOUND)
# IF(NOT Bz2_FIND_QUIETLY)
MESSAGE(STATUS "Found Bz2: headers at ${Bz2_INCLUDE_DIRS}, libraries at ${Bz2_LIBRARY_DIRS}")
MESSAGE(STATUS " library is ${Bz2_LIBRARIES}")
# ENDIF(NOT Bz2_FIND_QUIETLY)
ELSE(Bz2_FOUND)
IF(Bz2_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Bz2 library not found")
ENDIF(Bz2_FIND_REQUIRED)
ENDIF(Bz2_FOUND)
64 changes: 64 additions & 0 deletions cmake/modules/FindZstd.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Searches for an installation of the zstd library. On success, it sets the following variables:
#
# Zstd_FOUND Set to true to indicate the zstd library was found
# Zstd_INCLUDE_DIRS The directory containing the header file zstd/zstd.h
# Zstd_LIBRARIES The libraries needed to use the zstd library
#
# To specify an additional directory to search, set Zstd_ROOT.
#
# Author: Siddhartha Chaudhuri, 2009
#

# Look for the header, first in the user-specified location and then in the system locations
SET(Zstd_INCLUDE_DOC "The directory containing the header file zstd.h")
FIND_PATH(Zstd_INCLUDE_DIRS NAMES zstd.h zstd/zstd.h PATHS ${Zstd_ROOT} ${Zstd_ROOT}/include DOC ${Zstd_INCLUDE_DOC} NO_DEFAULT_PATH)
IF(NOT Zstd_INCLUDE_DIRS) # now look in system locations
FIND_PATH(Zstd_INCLUDE_DIRS NAMES zstd.h zstd/zstd.h DOC ${Zstd_INCLUDE_DOC})
ENDIF(NOT Zstd_INCLUDE_DIRS)

SET(Zstd_FOUND FALSE)

IF(Zstd_INCLUDE_DIRS)
SET(Zstd_LIBRARY_DIRS ${Zstd_INCLUDE_DIRS})

IF("${Zstd_LIBRARY_DIRS}" MATCHES "/include$")
# Strip off the trailing "/include" in the path.
GET_FILENAME_COMPONENT(Zstd_LIBRARY_DIRS ${Zstd_LIBRARY_DIRS} PATH)
ENDIF("${Zstd_LIBRARY_DIRS}" MATCHES "/include$")

IF(EXISTS "${Zstd_LIBRARY_DIRS}/lib")
SET(Zstd_LIBRARY_DIRS ${Zstd_LIBRARY_DIRS}/lib)
ENDIF(EXISTS "${Zstd_LIBRARY_DIRS}/lib")

# Find Zstd libraries
FIND_LIBRARY(Zstd_DEBUG_LIBRARY NAMES zstdd zstd_d libzstdd libzstd_d libzstd
PATH_SUFFIXES Debug ${CMAKE_LIBRARY_ARCHITECTURE} ${CMAKE_LIBRARY_ARCHITECTURE}/Debug
PATHS ${Zstd_LIBRARY_DIRS} NO_DEFAULT_PATH)
FIND_LIBRARY(Zstd_RELEASE_LIBRARY NAMES zstd libzstd
PATH_SUFFIXES Release ${CMAKE_LIBRARY_ARCHITECTURE} ${CMAKE_LIBRARY_ARCHITECTURE}/Release
PATHS ${Zstd_LIBRARY_DIRS} NO_DEFAULT_PATH)

SET(Zstd_LIBRARIES )
IF(Zstd_DEBUG_LIBRARY AND Zstd_RELEASE_LIBRARY)
SET(Zstd_LIBRARIES debug ${Zstd_DEBUG_LIBRARY} optimized ${Zstd_RELEASE_LIBRARY})
ELSEIF(Zstd_DEBUG_LIBRARY)
SET(Zstd_LIBRARIES ${Zstd_DEBUG_LIBRARY})
ELSEIF(Zstd_RELEASE_LIBRARY)
SET(Zstd_LIBRARIES ${Zstd_RELEASE_LIBRARY})
ENDIF(Zstd_DEBUG_LIBRARY AND Zstd_RELEASE_LIBRARY)

IF(Zstd_LIBRARIES)
SET(Zstd_FOUND TRUE)
ENDIF(Zstd_LIBRARIES)
ENDIF(Zstd_INCLUDE_DIRS)

IF(Zstd_FOUND)
# IF(NOT Zstd_FIND_QUIETLY)
MESSAGE(STATUS "Found Zstd: headers at ${Zstd_INCLUDE_DIRS}, libraries at ${Zstd_LIBRARY_DIRS}")
MESSAGE(STATUS " library is ${Zstd_LIBRARIES}")
# ENDIF(NOT Zstd_FIND_QUIETLY)
ELSE(Zstd_FOUND)
IF(Zstd_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Zstd library not found")
ENDIF(Zstd_FIND_REQUIRED)
ENDIF(Zstd_FOUND)
Loading

0 comments on commit 3ffe7be

Please sign in to comment.