From 1ca9cb5049e39c1165b1a63404d466380303f099 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 6 Jul 2023 22:55:49 -0700 Subject: [PATCH 01/44] add equality, null, and range filter changes: * new filters that preserve match value typing to better handle filtering different column types * sql planner uses new filters by default in sql compatible null handling mode * remove isFilterable from column capabilities * proper handling of array filtering, add array processor to column processors --- ...ryEncodedStringIndexSupplierBenchmark.java | 6 +- .../sql/TDigestSketchSqlAggregatorTest.java | 9 +- .../HllSketchBuildColumnProcessorFactory.java | 8 + .../HllSketchBuildVectorProcessorFactory.java | 9 + .../KllDoublesSketchAggregatorFactory.java | 17 +- .../kll/KllFloatsSketchAggregatorFactory.java | 18 +- .../DoublesSketchAggregatorFactory.java | 13 + .../ToObjectVectorColumnProcessorFactory.java | 6 + .../hll/sql/HllSketchSqlAggregatorTest.java | 23 +- .../sql/DoublesSketchSqlAggregatorTest.java | 20 +- .../sql/ThetaSketchSqlAggregatorTest.java | 24 +- .../sql/BloomFilterSqlAggregatorTest.java | 9 +- ...etsHistogramQuantileSqlAggregatorTest.java | 6 +- .../sql/QuantileSqlAggregatorTest.java | 6 +- .../apache/druid/msq/exec/MSQSelectTest.java | 8 +- .../sql/VarianceSqlAggregatorTest.java | 18 +- .../indexing/input/DruidSegmentReader.java | 6 + .../CardinalityVectorProcessorFactory.java | 10 + .../druid/query/filter/BoundDimFilter.java | 10 +- .../apache/druid/query/filter/DimFilter.java | 5 +- .../druid/query/filter/DimFilterUtils.java | 4 + .../query/filter/DruidFloatPredicate.java | 2 + .../query/filter/DruidPredicateFactory.java | 6 + .../druid/query/filter/EqualityFilter.java | 537 ++++++++++ .../org/apache/druid/query/filter/Filter.java | 2 +- .../druid/query/filter/InDimFilter.java | 6 +- .../apache/druid/query/filter/NullFilter.java | 310 ++++++ .../druid/query/filter/RangeFilter.java | 771 +++++++++++++++ .../vector/ArrayVectorValueMatcher.java | 94 ++ .../vector/DoubleVectorValueMatcher.java | 20 +- .../vector/FloatVectorValueMatcher.java | 19 + .../filter/vector/LongVectorValueMatcher.java | 19 + .../MultiValueStringVectorValueMatcher.java | 11 + .../vector/ObjectVectorValueMatcher.java | 8 + .../SingleValueStringVectorValueMatcher.java | 11 + .../StringObjectVectorValueMatcher.java | 11 + ...torValueMatcherColumnProcessorFactory.java | 6 + .../vector/VectorValueMatcherFactory.java | 3 + .../GroupByVectorColumnProcessorFactory.java | 13 + .../druid/query/metadata/SegmentAnalyzer.java | 2 +- .../DefaultFramedOnHeapAggregatable.java | 1 - .../druid/query/search/AutoStrategy.java | 2 +- .../query/search/UseIndexesStrategy.java | 4 +- .../druid/segment/ColumnProcessorFactory.java | 2 + .../druid/segment/ColumnProcessors.java | 4 + .../ColumnSelectorColumnIndexSelector.java | 5 +- .../apache/druid/segment/FilterAnalysis.java | 2 +- .../apache/druid/segment/FilteredOffset.java | 2 +- .../QueryableIndexIndexableAdapter.java | 2 +- .../segment/QueryableIndexStorageAdapter.java | 2 +- .../segment/VectorColumnProcessorFactory.java | 2 + .../column/CapabilitiesBasedFormat.java | 1 - .../druid/segment/column/ColumnBuilder.java | 7 - .../segment/column/ColumnCapabilities.java | 6 - .../column/ColumnCapabilitiesImpl.java | 15 - .../druid/segment/column/ColumnConfig.java | 9 +- .../druid/segment/filter/AndFilter.java | 2 +- .../druid/segment/filter/BoundFilter.java | 8 +- .../filter/ColumnComparisonFilter.java | 20 +- .../filter/DimensionPredicateFilter.java | 2 +- .../segment/filter/ExpressionFilter.java | 29 +- .../druid/segment/filter/FalseFilter.java | 4 +- .../apache/druid/segment/filter/Filters.java | 33 +- .../segment/filter/JavaScriptFilter.java | 2 +- .../druid/segment/filter/LikeFilter.java | 10 +- .../druid/segment/filter/NotFilter.java | 2 +- .../apache/druid/segment/filter/OrFilter.java | 2 +- .../filter/PredicateValueMatcherFactory.java | 50 +- .../druid/segment/filter/SelectorFilter.java | 12 +- .../druid/segment/filter/SpatialFilter.java | 6 +- ...=> StringConstantValueMatcherFactory.java} | 10 +- .../druid/segment/filter/TrueFilter.java | 4 +- .../druid/segment/filter/ValueMatchers.java | 37 +- .../AllFalseBitmapColumnIndex.java | 4 +- .../AllTrueBitmapColumnIndex.java | 4 +- .../{column => index}/BitmapColumnIndex.java | 3 +- .../DictionaryEncodedStringValueIndex.java | 3 +- .../DictionaryEncodedValueIndex.java | 3 +- .../DruidPredicateIndex.java | 2 +- ...ringDictionaryEncodedStringValueIndex.java | 2 +- .../IndexedStringDruidPredicateIndex.java | 4 +- .../IndexedUtf8LexicographicalRangeIndex.java | 4 +- .../IndexedUtf8ValueSetIndex.java | 2 +- .../LexicographicalRangeIndex.java | 2 +- .../{column => index}/NullValueIndex.java | 2 +- .../{column => index}/NumericRangeIndex.java | 2 +- .../SimpleBitmapColumnIndex.java | 5 +- .../SimpleImmutableBitmapIndex.java | 2 +- .../SimpleImmutableBitmapIterableIndex.java | 2 +- .../{column => index}/SpatialIndex.java | 2 +- .../StringValueSetIndex.java | 2 +- .../{column => index}/Utf8ValueSetIndex.java | 2 +- .../join/lookup/LookupJoinMatcher.java | 6 + .../join/table/IndexedTableJoinMatcher.java | 8 + .../nested/NestedCommonFormatColumn.java | 3 +- .../nested/NestedDataComplexTypeSerde.java | 3 +- .../NestedFieldColumnIndexSupplier.java | 22 +- .../ScalarDoubleColumnAndIndexSupplier.java | 20 +- .../ScalarLongColumnAndIndexSupplier.java | 20 +- .../druid/segment/nested/VariantColumn.java | 34 +- .../nested/VariantColumnAndIndexSupplier.java | 12 +- .../NestedCommonFormatColumnPartSerde.java | 26 +- .../segment/serde/NullColumnPartSerde.java | 1 - .../segment/serde/NullValueIndexSupplier.java | 6 +- .../serde/StringUtf8ColumnIndexSupplier.java | 26 +- .../segment/virtual/ExpressionSelectors.java | 2 +- .../virtual/ListFilteredVirtualColumn.java | 18 +- .../virtual/NestedFieldVirtualColumn.java | 8 +- ...owReadingVectorColumnProcessorFactory.java | 6 + .../druid/query/filter/InDimFilterTest.java | 6 +- .../druid/query/filter/LikeDimFilterTest.java | 6 +- .../query/scan/NestedDataScanQueryTest.java | 4 +- ...ColumnSelectorColumnIndexSelectorTest.java | 7 +- .../segment/IndexMergerNullHandlingTest.java | 4 +- .../druid/segment/IndexMergerTestBase.java | 2 +- .../column/ColumnCapabilitiesImplTest.java | 5 +- .../druid/segment/filter/BaseFilterTest.java | 117 ++- .../druid/segment/filter/BoundFilterTest.java | 44 +- .../filter/ColumnComparisonFilterTest.java | 4 +- .../segment/filter/EqualityFilterTest.java | 437 ++++++++ .../segment/filter/ExpressionFilterTest.java | 8 +- .../segment/filter/FilterPartitionTest.java | 20 +- .../druid/segment/filter/InFilterTest.java | 30 +- .../segment/filter/JavaScriptFilterTest.java | 21 +- .../druid/segment/filter/NullFilterTest.java | 240 +++++ .../druid/segment/filter/RangeFilterTest.java | 857 ++++++++++++++++ .../druid/segment/filter/RegexFilterTest.java | 12 +- .../segment/filter/SearchQueryFilterTest.java | 22 +- .../segment/filter/SelectorFilterTest.java | 23 +- .../IncrementalIndexStorageAdapterTest.java | 4 +- .../druid/segment/join/JoinTestHelper.java | 6 + .../nested/NestedDataColumnSupplierTest.java | 8 +- .../NestedDataColumnSupplierV4Test.java | 9 +- .../NestedFieldColumnIndexSupplierTest.java | 18 +- .../ScalarDoubleColumnSupplierTest.java | 6 +- .../nested/ScalarLongColumnSupplierTest.java | 6 +- .../ScalarStringColumnSupplierTest.java | 6 +- .../nested/VariantColumnSupplierTest.java | 6 +- ...tionaryEncodedStringIndexSupplierTest.java | 4 +- .../serde/NullColumnPartSerdeTest.java | 1 - ...ListFilteredVirtualColumnSelectorTest.java | 2 +- .../resources/nested-all-types-test-data.json | 2 +- .../org/apache/druid/cli/DumpSegment.java | 2 +- .../org/apache/druid/cli/DumpSegmentTest.java | 2 +- .../builtin/BitwiseSqlAggregator.java | 8 +- .../builtin/StringSqlAggregator.java | 8 +- .../sql/calcite/expression/Expressions.java | 245 +++-- .../builtin/CaseOperatorConversion.java | 20 +- .../filtration/CombineAndSimplifyBounds.java | 80 +- .../filtration/ConvertBoundsToSelectors.java | 17 + .../filtration/ConvertSelectorsToIns.java | 42 + .../MoveTimeFiltersToIntervals.java | 32 + .../sql/calcite/filtration/RangeRefKey.java | 107 ++ .../sql/calcite/filtration/RangeValue.java | 93 ++ .../druid/sql/calcite/filtration/Ranges.java | 225 +++++ .../druid/sql/calcite/planner/Calcites.java | 9 +- .../sql/calcite/planner/DruidRexExecutor.java | 5 +- .../sql/calcite/planner/PlannerContext.java | 22 + .../sql/calcite/run/NativeQueryMaker.java | 25 +- .../sql/calcite/BaseCalciteQueryTest.java | 101 +- .../sql/calcite/CalciteArraysQueryTest.java | 77 +- .../calcite/CalciteCorrelatedQueryTest.java | 32 +- .../sql/calcite/CalciteExplainQueryTest.java | 108 +- .../sql/calcite/CalciteInsertDmlTest.java | 4 +- .../sql/calcite/CalciteJoinQueryTest.java | 483 +++++---- .../CalciteMultiValueStringQueryTest.java | 28 +- .../calcite/CalciteNestedDataQueryTest.java | 934 ++++++++++++++++-- .../calcite/CalciteParameterQueryTest.java | 25 +- .../druid/sql/calcite/CalciteQueryTest.java | 753 ++++++++------ .../sql/calcite/CalciteReplaceDmlTest.java | 2 +- .../sql/calcite/CalciteSelectQueryTest.java | 95 +- .../sql/calcite/CalciteSubqueryTest.java | 20 +- 172 files changed, 6898 insertions(+), 1311 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/NullFilter.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java rename processing/src/main/java/org/apache/druid/segment/filter/{ConstantValueMatcherFactory.java => StringConstantValueMatcherFactory.java} (87%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/AllFalseBitmapColumnIndex.java (90%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/AllTrueBitmapColumnIndex.java (90%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/BitmapColumnIndex.java (91%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/DictionaryEncodedStringValueIndex.java (94%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/DictionaryEncodedValueIndex.java (94%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/DruidPredicateIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/IndexedStringDictionaryEncodedStringValueIndex.java (98%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/IndexedStringDruidPredicateIndex.java (96%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/IndexedUtf8LexicographicalRangeIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/IndexedUtf8ValueSetIndex.java (99%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/LexicographicalRangeIndex.java (98%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/NullValueIndex.java (95%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/NumericRangeIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/SimpleBitmapColumnIndex.java (87%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/SimpleImmutableBitmapIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/SimpleImmutableBitmapIterableIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/SpatialIndex.java (95%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/StringValueSetIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/{column => index}/Utf8ValueSetIndex.java (96%) create mode 100644 processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeRefKey.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeValue.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java index 73cba8c5d977..739a9d01d82f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java @@ -26,12 +26,12 @@ import org.apache.druid.collections.bitmap.RoaringBitmapFactory; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.segment.column.BitmapColumnIndex; -import org.apache.druid.segment.column.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java index b304dba196a3..9359bee75e43 100644 --- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java +++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java @@ -38,7 +38,6 @@ import org.apache.druid.query.aggregation.tdigestsketch.TDigestSketchToQuantilePostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.groupby.GroupByQuery; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; @@ -444,7 +443,7 @@ public void testEmptyTimeseriesResults() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters(equality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators(ImmutableList.of( new TDigestSketchAggregatorFactory("a0:agg", "m1", TDigestSketchAggregatorFactory.DEFAULT_COMPRESSION), @@ -476,7 +475,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -484,11 +483,11 @@ public void testGroupByAggregatorDefaultValues() aggregators( new FilteredAggregatorFactory( new TDigestSketchAggregatorFactory("a0:agg", "m1", TDigestSketchAggregatorFactory.DEFAULT_COMPRESSION), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new TDigestSketchAggregatorFactory("a1:agg", "qsketch_m1", 100), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java index d0823889578c..4a8b15a85a00 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java @@ -21,6 +21,7 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.segment.BaseDoubleColumnValueSelector; import org.apache.druid.segment.BaseFloatColumnValueSelector; import org.apache.druid.segment.BaseLongColumnValueSelector; @@ -96,6 +97,13 @@ public Consumer> makeLongProcessor(BaseLongColumnValueSelect }; } + @Override + public Consumer> makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + // todo (clint): pass in type info so we can convert these arrays to byte arrays + throw new UOE("HLL sketch does not support ARRAY inputs"); + } + @Override public Consumer> makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java index aac55a2e0b72..5c430c7ce4a9 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -83,6 +83,15 @@ public HllSketchBuildVectorProcessor makeLongProcessor(ColumnCapabilities capabi return new LongHllSketchBuildVectorProcessor(helper, selector); } + @Override + public HllSketchBuildVectorProcessor makeArrayProcessor( + ColumnCapabilities capabilities, + VectorObjectSelector selector + ) + { + return null; + } + @Override public HllSketchBuildVectorProcessor makeObjectProcessor( ColumnCapabilities capabilities, diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java index 23207a596779..bea0bc610e09 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java @@ -179,7 +179,7 @@ public VectorAggregator makeSingleValueDimensionProcessor( SingleValueDimensionVectorSelector selector ) { - return new KllSketchNoOpBufferAggregator(getEmptySketch()); + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } @Override @@ -188,7 +188,7 @@ public VectorAggregator makeMultiValueDimensionProcessor( MultiValueDimensionVectorSelector selector ) { - return new KllSketchNoOpBufferAggregator(getEmptySketch()); + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } @Override @@ -209,6 +209,19 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto return new KllDoublesSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); } + @Override + public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + // todo (clint): y tho? shouldn't this (and string inputs) be an error? + /* + throw new UOE( + "KLL Doubles sketch does not support[%s] inputs", + capabilities.toColumnType() + ); + */ + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); + } + @Override public VectorAggregator makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java index 6b7f563c6750..e05e627a9702 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java @@ -179,7 +179,7 @@ public VectorAggregator makeSingleValueDimensionProcessor( SingleValueDimensionVectorSelector selector ) { - return new KllSketchNoOpBufferAggregator(getEmptySketch()); + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } @Override @@ -188,7 +188,7 @@ public VectorAggregator makeMultiValueDimensionProcessor( MultiValueDimensionVectorSelector selector ) { - return new KllSketchNoOpBufferAggregator(getEmptySketch()); + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } @Override @@ -209,6 +209,20 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto return new KllFloatsSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); } + @Override + public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + + // todo (clint): y tho? shouldn't this (and string inputs) be an error? + /* + throw new UOE( + "KLL Floats sketch does not support[%s] inputs", + capabilities.toColumnType() + ); + */ + return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); + } + @Override public VectorAggregator makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java index 3b12a73163f7..2c54a3c3db5b 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java @@ -208,6 +208,19 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto return new DoublesSketchBuildVectorAggregator(selector, k, getMaxIntermediateSizeWithNulls()); } + @Override + public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + /* + throw new UOE( + "Doubles sketch does not support[%s] inputs", + capabilities.toColumnType() + ); + */ + // todo (clint): y tho? shouldn't this (and string inputs) be an error? + return new NoopDoublesSketchBufferAggregator(); + } + @Override public VectorAggregator makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/util/ToObjectVectorColumnProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/util/ToObjectVectorColumnProcessorFactory.java index 2915f34a7930..37df64f0df2e 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/util/ToObjectVectorColumnProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/util/ToObjectVectorColumnProcessorFactory.java @@ -133,6 +133,12 @@ public Supplier makeLongProcessor(ColumnCapabilities capabilities, Vec }; } + @Override + public Supplier makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + return selector::getObjectVector; + } + @Override public Supplier makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 498bb06d9afd..0819d58854ae 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -175,7 +175,10 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest private static final List EXPECTED_FILTERED_AGGREGATORS = EXPECTED_PA_AGGREGATORS.stream() .limit(5) - .map(factory -> new FilteredAggregatorFactory(factory, selector("dim2", "a", null))) + .map(factory -> new FilteredAggregatorFactory( + factory, + equality("dim2", "a", ColumnType.STRING) + )) .collect(Collectors.toList()); /** @@ -344,7 +347,7 @@ public void testApproxCountDistinctHllSketch() new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND), new FilteredAggregatorFactory( new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND), - BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null)) + not(equality("dim2", "", null)) ), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND), @@ -436,7 +439,7 @@ public void testAvgDailyCountDistinctHllSketch() new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("_a0:count"), - BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null)) + notNull("a0") ) ) ) @@ -480,7 +483,7 @@ public void testApproxCountDistinctHllSketchIsRounded() new HllSketchBuildAggregatorFactory("a0", "m1", null, null, null, true, true) ) ) - .setHavingSpec(having(selector("a0", "2", null))) + .setHavingSpec(having(equality("a0", 2L, ColumnType.LONG))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -895,7 +898,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -911,7 +914,7 @@ public void testGroupByAggregatorDefaultValues() null, true ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new HllSketchBuildAggregatorFactory( @@ -923,7 +926,7 @@ public void testGroupByAggregatorDefaultValues() false, true ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) @@ -954,7 +957,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -962,11 +965,11 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() aggregators( new FilteredAggregatorFactory( new HllSketchBuildAggregatorFactory("a0", "v0", null, null, null, null, true), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new HllSketchBuildAggregatorFactory("a1", "v0", null, null, null, null, true), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java index 184aba375a02..27a14bd17ef9 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java @@ -815,7 +815,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -823,19 +823,19 @@ public void testGroupByAggregatorDefaultValues() aggregators( new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a2:agg", "m1", null, null, false), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a3:agg", "qsketch_m1", null, null, false), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) @@ -886,7 +886,7 @@ public void testGroupByAggregatorDefaultValuesWithFinalizeSketches() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -894,19 +894,19 @@ public void testGroupByAggregatorDefaultValuesWithFinalizeSketches() aggregators( new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a2:agg", "m1", null, null, true), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a3:agg", "qsketch_m1", null, null, true), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index c1ddfa279d21..8086898f3c3b 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -241,7 +241,7 @@ public void testApproxCountDistinctThetaSketch() null, null ), - BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null)) + not(equality("dim2", "", ColumnType.STRING)) ), new SketchMergeAggregatorFactory( "a3", @@ -341,7 +341,7 @@ public void testAvgDailyCountDistinctThetaSketch() new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("_a0:count"), - BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null)) + notNull("a0") ) ) ) @@ -890,7 +890,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -905,7 +905,7 @@ public void testGroupByAggregatorDefaultValues() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -916,7 +916,7 @@ public void testGroupByAggregatorDefaultValues() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -927,7 +927,7 @@ public void testGroupByAggregatorDefaultValues() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -938,7 +938,7 @@ public void testGroupByAggregatorDefaultValues() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) @@ -971,7 +971,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -986,7 +986,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -997,7 +997,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -1008,7 +1008,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -1019,7 +1019,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, null ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index fcf7099f28b0..90beea5bd87e 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -39,7 +39,6 @@ import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.query.filter.BloomKFilter; import org.apache.druid.query.groupby.GroupByQuery; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; @@ -489,7 +488,7 @@ public void testEmptyTimeseriesResults() throws Exception .dataSource(CalciteTests.DATASOURCE3) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(BaseCalciteQueryTest.bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters(equality("dim2", 0L, ColumnType.LONG)) .aggregators( ImmutableList.of( new BloomFilterAggregatorFactory( @@ -536,7 +535,7 @@ public void testGroupByAggregatorDefaultValues() throws Exception GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE3) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) @@ -548,7 +547,7 @@ public void testGroupByAggregatorDefaultValues() throws Exception new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new BloomFilterAggregatorFactory( @@ -556,7 +555,7 @@ public void testGroupByAggregatorDefaultValues() throws Exception new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index a4eae57756ce..ab3c0fda4a0c 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -570,7 +570,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -586,7 +586,7 @@ public void testGroupByAggregatorDefaultValues() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new FixedBucketsHistogramAggregatorFactory( @@ -598,7 +598,7 @@ public void testGroupByAggregatorDefaultValues() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 4a67833b5249..292f41b5be4b 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -411,7 +411,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) @@ -419,11 +419,11 @@ public void testGroupByAggregatorDefaultValues() aggregators( new FilteredAggregatorFactory( new ApproximateHistogramFoldingAggregatorFactory("a0:agg", "hist_m1", null, null, null, null, false), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new ApproximateHistogramAggregatorFactory("a1:agg", "m1", null, null, null, null, false), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index 0f4210e7f59a..3e9a15c47934 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -375,7 +375,7 @@ public void testSelectOnFooWhereMatchesNoData() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Intervals.ETERNITY)) .columns("cnt", "dim1") - .filters(selector("dim2", "nonexistent", null)) + .filters(equality("dim2", "nonexistent", ColumnType.STRING)) .context(defaultScanQueryContext(context, resultSignature)) .build() ) @@ -406,7 +406,7 @@ public void testSelectAndOrderByOnFooWhereMatchesNoData() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Intervals.ETERNITY)) .columns("cnt", "dim1") - .filters(selector("dim2", "nonexistent", null)) + .filters(equality("dim2", "nonexistent", ColumnType.STRING)) .context(defaultScanQueryContext(context, resultSignature)) .orderBy(ImmutableList.of(new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING))) .build() @@ -704,7 +704,7 @@ public void testJoinWithLookup() ) ) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(not(selector("j0.v", "xa", null))) + .setDimFilter(not(equality("j0.v", "xa", ColumnType.STRING))) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("j0.v", "d0"))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -885,7 +885,7 @@ private void testJoin(final JoinAlgorithm joinAlgorithm) new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory( new CountAggregatorFactory("a0:count"), - not(selector("m2", null, null)), + notNull("m2"), // Not sure why the name is only set in SQL-compatible null mode. Seems strange. // May be due to JSON serialization: name is set on the serialized aggregator even diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java index 5c496c466351..f8e7fcbe3771 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java @@ -568,7 +568,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE3) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) @@ -576,35 +576,35 @@ public void testGroupByAggregatorDefaultValues() aggregators( new FilteredAggregatorFactory( new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a1:agg", "d1", "sample", "double"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a2:agg", "d1", "sample", "double"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a3:agg", "d1", "sample", "double"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a4:agg", "l1", "population", "long"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a5:agg", "l1", "sample", "long"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a6:agg", "l1", "sample", "long"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new VarianceAggregatorFactory("a7:agg", "l1", "sample", "long"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) ) ) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java index decbf3b4038a..301d718aab99 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java @@ -276,6 +276,12 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) return () -> selector.isNull() ? null : selector.getLong(); } + @Override + public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return selector::getObject; + } + @Override public Supplier makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/vector/CardinalityVectorProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/vector/CardinalityVectorProcessorFactory.java index c69a06520986..21b1d493fbab 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/vector/CardinalityVectorProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/vector/CardinalityVectorProcessorFactory.java @@ -19,6 +19,7 @@ package org.apache.druid.query.aggregation.cardinality.vector; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.segment.VectorColumnProcessorFactory; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; @@ -66,6 +67,15 @@ public CardinalityVectorProcessor makeLongProcessor(ColumnCapabilities capabilit return new LongCardinalityVectorProcessor(selector); } + @Override + public CardinalityVectorProcessor makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + throw new UOE( + "Cardinality aggregator does not support[%s] inputs", + capabilities.toColumnType() + ); + } + @Override public CardinalityVectorProcessor makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 1e317c5fbcc2..958e4306e073 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -610,13 +610,14 @@ private Supplier makeDoublePredicateSupplier() return Suppliers.memoize(doublePredicate); } - private static DruidLongPredicate makeLongPredicateFromBounds( + static DruidLongPredicate makeLongPredicateFromBounds( final boolean hasLowerLongBound, final boolean hasUpperLongBound, final boolean lowerStrict, final boolean upperStrict, final long lowerLongBound, - final long upperLongBound) + final long upperLongBound + ) { if (hasLowerLongBound && hasUpperLongBound) { if (upperStrict && lowerStrict) { @@ -645,13 +646,14 @@ private static DruidLongPredicate makeLongPredicateFromBounds( } } - private static DruidDoublePredicate makeDoublePredicateFromBounds( + static DruidDoublePredicate makeDoublePredicateFromBounds( final boolean hasLowerDoubleBound, final boolean hasUpperDoubleBound, final boolean lowerStrict, final boolean upperStrict, final double lowerDoubleBound, - final double upperDoubleBound) + final double upperDoubleBound + ) { if (hasLowerDoubleBound && hasUpperDoubleBound) { if (upperStrict && lowerStrict) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index 60580881c392..4e4a3b10ec94 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -48,7 +48,10 @@ @JsonSubTypes.Type(name = "like", value = LikeDimFilter.class), @JsonSubTypes.Type(name = "expression", value = ExpressionDimFilter.class), @JsonSubTypes.Type(name = "true", value = TrueDimFilter.class), - @JsonSubTypes.Type(name = "false", value = FalseDimFilter.class) + @JsonSubTypes.Type(name = "false", value = FalseDimFilter.class), + @JsonSubTypes.Type(name = "null", value = NullFilter.class), + @JsonSubTypes.Type(name = "equals", value = EqualityFilter.class), + @JsonSubTypes.Type(name = "range", value = RangeFilter.class) }) public interface DimFilter extends Cacheable { diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java index 3fcd719e25f5..27a0581d4752 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java @@ -54,6 +54,10 @@ public class DimFilterUtils static final byte TRUE_CACHE_ID = 0xF; static final byte FALSE_CACHE_ID = 0x11; public static final byte BLOOM_DIM_FILTER_CACHE_ID = 0x10; + static final byte NULL_CACHE_ID = 0x12; + static final byte EQUALS_CACHE_ID = 0x13; + static final byte RANGE_CACHE_ID = 0x14; + public static final byte STRING_SEPARATOR = (byte) 0xFF; diff --git a/processing/src/main/java/org/apache/druid/query/filter/DruidFloatPredicate.java b/processing/src/main/java/org/apache/druid/query/filter/DruidFloatPredicate.java index f805d2b0d29f..6559ca79f6a2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DruidFloatPredicate.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DruidFloatPredicate.java @@ -30,6 +30,8 @@ public interface DruidFloatPredicate { DruidFloatPredicate ALWAYS_FALSE = input -> false; + DruidFloatPredicate ALWAYS_TRUE = input -> true; + DruidFloatPredicate MATCH_NULL_ONLY = new DruidFloatPredicate() { @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java index aff4346c4350..9433973e8d2c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java @@ -21,6 +21,7 @@ import com.google.common.base.Predicate; import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode; +import org.apache.druid.java.util.common.UOE; @SubclassesMustOverrideEqualsAndHashCode public interface DruidPredicateFactory @@ -33,6 +34,11 @@ public interface DruidPredicateFactory DruidDoublePredicate makeDoublePredicate(); + default Predicate makeArrayPredicate() + { + throw new UOE("Predicate does not support ARRAY types"); + } + /** * Object predicate is currently only used by vectorized matchers for non-string object selectors. This currently * means it will be used only if we encounter COMPLEX types, but will also include array types once they are more diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java new file mode 100644 index 000000000000..debc00078190 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -0,0 +1,537 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.BaseDoubleColumnValueSelector; +import org.apache.druid.segment.BaseFloatColumnValueSelector; +import org.apache.druid.segment.BaseLongColumnValueSelector; +import org.apache.druid.segment.BaseObjectColumnValueSelector; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnProcessorFactory; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.filter.DimensionPredicateFilter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.filter.PredicateValueMatcherFactory; +import org.apache.druid.segment.filter.ValueMatchers; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class EqualityFilter extends AbstractOptimizableDimFilter implements Filter +{ + private final String column; + private final ColumnType matchValueType; + private final Object matchValue; + @Nullable + private final ExtractionFn extractionFn; + @Nullable + private final FilterTuning filterTuning; + private final DruidPredicateFactory predicateFactory; + + @JsonCreator + public EqualityFilter( + @JsonProperty("column") String column, + @JsonProperty("matchValueType") ColumnType matchValueType, + @JsonProperty("matchValue") Object matchValue, + @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, + @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning + ) + { + Preconditions.checkArgument(column != null, "column must not be null"); + Preconditions.checkArgument(matchValue != null, "value must not be null"); + + this.column = column; + this.matchValueType = matchValueType; + this.matchValue = matchValue; + this.extractionFn = extractionFn; + this.filterTuning = filterTuning; + this.predicateFactory = new EqualityPredicateFactory(matchValue, matchValueType); + } + + @Override + public byte[] getCacheKey() + { + final TypeStrategy typeStrategy = matchValueType.getStrategy(); + final int size = typeStrategy.estimateSizeBytes(matchValue); + final ByteBuffer valueBuffer = ByteBuffer.allocate(size); + typeStrategy.write(valueBuffer, matchValue, size); + return new CacheKeyBuilder(DimFilterUtils.EQUALS_CACHE_ID) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(column) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(matchValueType.asTypeString()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(valueBuffer.array()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .build(); + } + + @Override + public DimFilter optimize() + { + return this; + } + + @Override + public Filter toFilter() + { + if (extractionFn == null) { + return this; + } else { + return new DimensionPredicateFilter(column, predicateFactory, extractionFn, filterTuning); + } + } + + @JsonProperty + public String getColumn() + { + return column; + } + + @JsonProperty + public ColumnType getMatchValueType() + { + return matchValueType; + } + + @JsonProperty + public Object getMatchValue() + { + return matchValue; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public FilterTuning getFilterTuning() + { + return filterTuning; + } + + @Override + public String toString() + { + DimFilter.DimFilterToStringBuilder bob = new DimFilter.DimFilterToStringBuilder().appendDimension( + column, + extractionFn + ) + .append(" = ") + .append(matchValue); + + if (!ColumnType.STRING.equals(matchValueType)) { + bob.append(" (" + matchValueType.asTypeString() + ")"); + } + return bob.appendFilterTuning(filterTuning).build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + boolean valuesMatch = false; + EqualityFilter that = (EqualityFilter) o; + if (matchValue instanceof Object[] && that.matchValue instanceof Object[]) { + valuesMatch = Arrays.deepEquals((Object[]) matchValue, (Object[]) that.matchValue); + } else { + valuesMatch = Objects.equals(matchValue, that.matchValue); + } + return column.equals(that.column) && + Objects.equals(matchValueType, that.matchValueType) && + valuesMatch && + Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash(column, matchValueType, matchValue, extractionFn, filterTuning); + } + + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + if (!Objects.equals(getColumn(), dimension) || getExtractionFn() != null) { + return null; + } + RangeSet retSet = TreeRangeSet.create(); + // todo (clint): this is lame.. but matches how range partitioning currently works i think + retSet.add(Range.singleton(String.valueOf(matchValue))); + return retSet; + } + + @Nullable + @Override + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { + return null; + } + + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + if (indexSupplier == null) { + return Filters.makeNullIndex(false, selector); + } + + // todo (clint): do it for reals, i think we can do better than string value set ... + + final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + if (valueSetIndex == null) { + // column exists, but has no index + return null; + } + return valueSetIndex.forValue(String.valueOf(matchValue)); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return ColumnProcessors.makeProcessor( + column, + new TypedConstantValueMatcherFactory(matchValue, matchValueType), + factory + ); + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + final ColumnCapabilities capabilities = factory.getColumnCapabilities(column); + + if (matchValueType.isPrimitive() && (capabilities == null || capabilities.isPrimitive())) { + return ColumnProcessors.makeVectorProcessor( + column, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(matchValue, matchValueType); + } + return ColumnProcessors.makeVectorProcessor( + column, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(new EqualityPredicateFactory(matchValue, matchValueType)); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, ColumnIndexSelector indexSelector) + { + return Filters.supportsSelectivityEstimation(this, column, columnSelector, indexSelector); + } + + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return true; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(column); + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(column); + + if (rewriteDimensionTo == null) { + throw new IAE( + "Received a non-applicable rewrite: %s, filter's dimension: %s", + columnRewrites, + columnRewrites + ); + } + + return new EqualityFilter( + rewriteDimensionTo, + matchValueType, + matchValue, + extractionFn, + filterTuning + ); + } + + private static class EqualityPredicateFactory implements DruidPredicateFactory + { + private final ExprEval matchValue; + private final ColumnType matchValueType; + + private final Object initLock = new Object(); + + private volatile DruidLongPredicate longPredicate; + private volatile DruidFloatPredicate floatPredicate; + private volatile DruidDoublePredicate doublePredicate; + + public EqualityPredicateFactory(Object matchValue, ColumnType matchValueType) + { + this.matchValue = ExprEval.ofType(ExpressionType.fromColumnType(matchValueType), matchValue); + this.matchValueType = matchValueType; + } + + @Override + public Predicate makeStringPredicate() + { + return Predicates.equalTo(matchValue.castTo(ExpressionType.STRING).asString()); + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + initLongPredicate(); + return longPredicate; + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + initFloatPredicate(); + return floatPredicate; + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + initDoublePredicate(); + return doublePredicate; + } + + @Override + public Predicate makeArrayPredicate() + { + final Object[] arrayValue = matchValue.asArray(); + return input -> Arrays.deepEquals(input, arrayValue); + } + + @Override + public Predicate makeObjectPredicate() + { + return Predicates.equalTo(matchValue.valueOrDefault()); + } + + private void initLongPredicate() + { + if (longPredicate != null) { + return; + } + synchronized (initLock) { + if (longPredicate != null) { + return; + } + if (matchValue == null) { + longPredicate = DruidLongPredicate.MATCH_NULL_ONLY; + return; + } + final Long valueAsLong = (Long) matchValue.castTo(ExpressionType.LONG).valueOrDefault(); + + if (valueAsLong == null) { + longPredicate = DruidLongPredicate.ALWAYS_FALSE; + } else { + // store the primitive, so we don't unbox for every comparison + final long unboxedLong = valueAsLong; + longPredicate = input -> input == unboxedLong; + } + } + } + + private void initFloatPredicate() + { + if (floatPredicate != null) { + return; + } + synchronized (initLock) { + if (floatPredicate != null) { + return; + } + + if (matchValue == null) { + floatPredicate = DruidFloatPredicate.MATCH_NULL_ONLY; + return; + } + final Float valueAsFloat = ((Number) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault()).floatValue(); + + if (valueAsFloat == null) { + floatPredicate = DruidFloatPredicate.ALWAYS_FALSE; + } else { + // Compare with floatToIntBits instead of == to canonicalize NaNs. + final int floatBits = Float.floatToIntBits(valueAsFloat); + floatPredicate = input -> Float.floatToIntBits(input) == floatBits; + } + } + } + + private void initDoublePredicate() + { + if (doublePredicate != null) { + return; + } + synchronized (initLock) { + if (doublePredicate != null) { + return; + } + if (matchValue == null) { + doublePredicate = DruidDoublePredicate.MATCH_NULL_ONLY; + return; + } + final Double aDouble = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); + + if (aDouble == null) { + doublePredicate = DruidDoublePredicate.ALWAYS_FALSE; + } else { + // Compare with doubleToLongBits instead of == to canonicalize NaNs. + final long bits = Double.doubleToLongBits(aDouble); + doublePredicate = input -> Double.doubleToLongBits(input) == bits; + } + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + EqualityPredicateFactory that = (EqualityPredicateFactory) o; + return Objects.equals(matchValue, that.matchValue) && Objects.equals(matchValueType, that.matchValueType); + } + + @Override + public int hashCode() + { + return Objects.hash(matchValue, matchValueType); + } + } + + private static class TypedConstantValueMatcherFactory implements ColumnProcessorFactory + { + private final ExprEval matchValue; + private final ColumnType matchValueType; + + public TypedConstantValueMatcherFactory(Object matchValue, ColumnType matchValueType) + { + this.matchValue = ExprEval.ofType(ExpressionType.fromColumnType(matchValueType), matchValue); + this.matchValueType = matchValueType; + } + + @Override + public ColumnType defaultType() + { + return ColumnType.UNKNOWN_COMPLEX; + } + + @Override + public ValueMatcher makeDimensionProcessor(DimensionSelector selector, boolean multiValue) + { + return ValueMatchers.makeStringValueMatcher( + selector, + matchValue.castTo(ExpressionType.STRING).asString(), + multiValue + ); + } + + @Override + public ValueMatcher makeFloatProcessor(BaseFloatColumnValueSelector selector) + { + return ValueMatchers.makeFloatValueMatcher(selector, (float) matchValue.castTo(ExpressionType.DOUBLE).asDouble()); + } + + @Override + public ValueMatcher makeDoubleProcessor(BaseDoubleColumnValueSelector selector) + { + return ValueMatchers.makeDoubleValueMatcher(selector, matchValue.castTo(ExpressionType.DOUBLE).asDouble()); + } + + @Override + public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) + { + return ValueMatchers.makeLongValueMatcher(selector, matchValue.castTo(ExpressionType.LONG).asLong()); + } + + @Override + public ValueMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return new PredicateValueMatcherFactory( + new EqualityPredicateFactory(matchValue.valueOrDefault(), matchValueType) + ).makeArrayProcessor(selector); + } + + @Override + public ValueMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) + { + return new PredicateValueMatcherFactory( + new EqualityPredicateFactory(matchValue.valueOrDefault(), matchValueType) + ).makeComplexProcessor(selector); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index 725db374ffb2..87d34e2e26b9 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -26,7 +26,7 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index afddb0e42af4..0759ebe58f3e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -57,11 +57,11 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.StringValueSetIndex; -import org.apache.druid.segment.column.Utf8ValueSetIndex; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.Utf8ValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java new file mode 100644 index 000000000000..7adad991c7e8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.filter.DimensionPredicateFilter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class NullFilter extends AbstractOptimizableDimFilter implements Filter +{ + public static NullFilter forColumn(String column) + { + return new NullFilter(column, null, null); + } + + private final String column; + @Nullable + private final ExtractionFn extractionFn; + @Nullable + private final FilterTuning filterTuning; + + @JsonCreator + public NullFilter( + @JsonProperty("column") String column, + @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, + @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning + ) + { + Preconditions.checkArgument(column != null, "column must not be null"); + this.column = column; + this.extractionFn = extractionFn; + this.filterTuning = filterTuning; + } + + @JsonProperty + public String getColumn() + { + return column; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public FilterTuning getFilterTuning() + { + return filterTuning; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(DimFilterUtils.NULL_CACHE_ID) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(column) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .build(); + } + + @Override + public DimFilter optimize() + { + return this; + } + + @Override + public Filter toFilter() + { + if (extractionFn == null) { + return this; + } else { + return new DimensionPredicateFilter(column, NullPredicateFactory.INSTANCE, extractionFn, filterTuning); + } + } + + @Nullable + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + RangeSet retSet = TreeRangeSet.create(); + // Nulls are less than empty String in segments + retSet.add(Range.lessThan("")); + return retSet; + } + + @Nullable + @Override + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { + return null; + } + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + if (indexSupplier == null) { + return Filters.makeNullIndex(true, selector); + } + final NullValueIndex nullValueIndex = indexSupplier.as(NullValueIndex.class); + if (nullValueIndex == null) { + return null; + } + return nullValueIndex.forNull(); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, column, NullPredicateFactory.INSTANCE); + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return ColumnProcessors.makeVectorProcessor( + column, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(NullPredicateFactory.INSTANCE); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, ColumnIndexSelector indexSelector) + { + return Filters.supportsSelectivityEstimation(this, column, columnSelector, indexSelector); + } + + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return true; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(column); + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(column); + + if (rewriteDimensionTo == null) { + throw new IAE( + "Received a non-applicable rewrite: %s, filter's dimension: %s", + columnRewrites, + columnRewrites + ); + } + return new NullFilter(rewriteDimensionTo, extractionFn, filterTuning); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + NullFilter that = (NullFilter) o; + return Objects.equals(column, that.column) && + Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash(column, extractionFn, filterTuning); + } + + @Override + public String toString() + { + return new DimFilterToStringBuilder().appendDimension(column, extractionFn) + .append(" IS NULL") + .appendFilterTuning(filterTuning) + .build(); + } + + private static class NullPredicateFactory implements DruidPredicateFactory + { + public static final NullPredicateFactory INSTANCE = new NullPredicateFactory(); + + private NullPredicateFactory() + { + // no instantiation + } + + @Override + public Predicate makeStringPredicate() + { + return Predicates.isNull(); + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return DruidLongPredicate.MATCH_NULL_ONLY; + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + return DruidFloatPredicate.MATCH_NULL_ONLY; + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + return DruidDoublePredicate.MATCH_NULL_ONLY; + } + + @Override + public Predicate makeArrayPredicate() + { + return Predicates.isNull(); + } + + @Override + public Predicate makeObjectPredicate() + { + return Predicates.isNull(); + } + + @Override + public int hashCode() + { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + return true; + } + + @Override + public String toString() + { + return "NullPredicateFactory{}"; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java new file mode 100644 index 000000000000..1d1ab9ab6bcf --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -0,0 +1,771 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; +import com.google.common.collect.BoundType; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.filter.DimensionPredicateFilter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class RangeFilter extends AbstractOptimizableDimFilter implements Filter +{ + private final String column; + private final ColumnType matchValueType; + + @Nullable + private final Object upper; + + @Nullable + private final Object lower; + + private final ExprEval upperEval; + private final ExprEval lowerEval; + private final boolean lowerStrict; + private final boolean upperStrict; + @Nullable + private final ExtractionFn extractionFn; + + private final Supplier> stringPredicateSupplier; + private final Supplier longPredicateSupplier; + private final Supplier floatPredicateSupplier; + private final Supplier doublePredicateSupplier; + @Nullable + private final FilterTuning filterTuning; + + @JsonCreator + public RangeFilter( + @JsonProperty("column") String column, + @JsonProperty("matchValueType") ColumnType matchValueType, + @JsonProperty("lower") @Nullable Object lower, + @JsonProperty("upper") @Nullable Object upper, + @JsonProperty("lowerStrict") @Nullable Boolean lowerStrict, + @JsonProperty("upperStrict") @Nullable Boolean upperStrict, + @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, + @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning + ) + { + this.column = Preconditions.checkNotNull(column, "column can not be null"); + this.matchValueType = Preconditions.checkNotNull(matchValueType, "matchValueType can not be null"); + Preconditions.checkState((lower != null) || (upper != null), "lower and upper can not be null at the same time"); + final ExpressionType expressionType = ExpressionType.fromColumnType(matchValueType); + this.upper = upper; + this.lower = lower; + this.upperEval = ExprEval.ofType(expressionType, upper); + this.lowerEval = ExprEval.ofType(expressionType, lower); + if (expressionType.isNumeric()) { + if (upper != null && upperEval.isNumericNull()) { + throw new IAE("Match value is specified as [%s] but [%s] cannot be parsed", expressionType, upper); + } + if (lower != null && lowerEval.isNumericNull()) { + throw new IAE("Match value is specified as [%s] but [%s] cannot be parsed", expressionType, lower); + } + } + this.lowerStrict = lowerStrict != null && lowerStrict; + this.upperStrict = upperStrict != null && upperStrict; + this.extractionFn = extractionFn; + this.stringPredicateSupplier = makeStringPredicateSupplier(); + this.longPredicateSupplier = makeLongPredicateSupplier(); + this.floatPredicateSupplier = makeFloatPredicateSupplier(); + this.doublePredicateSupplier = makeDoublePredicateSupplier(); + this.filterTuning = filterTuning; + } + + @JsonProperty + public String getColumn() + { + return column; + } + + @JsonProperty + public ColumnType getMatchValueType() + { + return matchValueType; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public Object getUpper() + { + return upper; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public Object getLower() + { + return lower; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean isLowerStrict() + { + return lowerStrict; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean isUpperStrict() + { + return upperStrict; + } + + public boolean hasLowerBound() + { + return lower != null; + } + + public boolean hasUpperBound() + { + return upper != null; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + + @Nullable + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty + public FilterTuning getFilterTuning() + { + return filterTuning; + } + + @Override + public byte[] getCacheKey() + { + final byte[] lowerBytes; + final byte[] upperBytes; + if (hasLowerBound()) { + final TypeStrategy typeStrategy = matchValueType.getStrategy(); + final int size = typeStrategy.estimateSizeBytes(lower); + final ByteBuffer valueBuffer = ByteBuffer.allocate(size); + typeStrategy.write(valueBuffer, lower, size); + lowerBytes = valueBuffer.array(); + } else { + lowerBytes = new byte[0]; + } + if (hasUpperBound()) { + final TypeStrategy typeStrategy = matchValueType.getStrategy(); + final int size = typeStrategy.estimateSizeBytes(upper); + final ByteBuffer valueBuffer = ByteBuffer.allocate(size); + typeStrategy.write(valueBuffer, upper, size); + upperBytes = valueBuffer.array(); + } else { + upperBytes = new byte[0]; + } + byte boundType = 0x1; + if (this.getLower() == null) { + boundType = 0x2; + } else if (this.getUpper() == null) { + boundType = 0x3; + } + + final byte lowerStrictByte = this.isLowerStrict() ? (byte) 1 : 0x0; + final byte upperStrictByte = this.isUpperStrict() ? (byte) 1 : 0x0; + + return new CacheKeyBuilder(DimFilterUtils.RANGE_CACHE_ID) + .appendByte(boundType) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(column) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(matchValueType.asTypeString()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(upperBytes) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(lowerBytes) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByte(lowerStrictByte) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByte(upperStrictByte) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .build(); + } + + @Override + public DimFilter optimize() + { + return this; + } + + @Override + public Filter toFilter() + { + if (extractionFn != null) { + return new DimensionPredicateFilter(column, getPredicateFactory(), extractionFn, filterTuning); + } + return this; + } + + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + // range partitioning converts stuff to strings.. so do that i guess + + String lowerString = lowerEval.asString(); + String upperString = upperEval.asString(); + RangeSet retSet = TreeRangeSet.create(); + Range range; + if (getLower() == null) { + range = isUpperStrict() ? Range.lessThan(upperString) : Range.atMost(upperString); + } else if (getUpper() == null) { + range = isLowerStrict() ? Range.greaterThan(lowerString) : Range.atLeast(lowerString); + } else { + range = Range.range( + lowerString, + isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED, + upperString, + isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED + ); + } + retSet.add(range); + return retSet; + } + + @Nullable + @Override + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { + return null; + } + if (matchValueType.is(ValueType.STRING) && extractionFn == null) { + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + if (indexSupplier == null) { + return Filters.makeNullIndex(false, selector); + } + final LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); + if (rangeIndex != null) { + final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( + hasLowerBound() ? lowerEval.asString() : null, + lowerStrict, + hasUpperBound() ? upperEval.asString() : null, + upperStrict + ); + if (rangeBitmaps != null) { + return rangeBitmaps; + } + } + } + if (matchValueType.isNumeric() && extractionFn == null) { + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + if (indexSupplier == null) { + return Filters.makeNullIndex(false, selector); + } + final NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); + if (rangeIndex != null) { + final Number lower = (Number) lowerEval.valueOrDefault(); + final Number upper = (Number) upperEval.valueOrDefault(); + final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( + lower, + isLowerStrict(), + upper, + isUpperStrict() + ); + if (rangeBitmaps != null) { + return rangeBitmaps; + } + } + } + + // fall back to predicate based index if it is available + return Filters.makePredicateIndex(column, selector, getPredicateFactory()); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, column, getPredicateFactory()); + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return ColumnProcessors.makeVectorProcessor( + column, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(getPredicateFactory()); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, ColumnIndexSelector indexSelector) + { + return Filters.supportsSelectivityEstimation(this, column, columnSelector, indexSelector); + } + + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return true; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(column); + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(column); + + if (rewriteDimensionTo == null) { + throw new IAE( + "Received a non-applicable rewrite: %s, filter's dimension: %s", + columnRewrites, + column + ); + } + return new RangeFilter( + rewriteDimensionTo, + matchValueType, + lower, + upper, + lowerStrict, + upperStrict, + extractionFn, + filterTuning + ); + } + + public boolean isEquality() + { + if (!hasUpperBound() || !hasLowerBound() || lowerStrict || upperStrict) { + return false; + } + if (matchValueType.isArray()) { + ExpressionType matchArrayType = ExpressionType.fromColumnType(matchValueType); + return Arrays.deepEquals( + ExprEval.ofType(matchArrayType, upper).asArray(), + ExprEval.ofType(matchArrayType, lower).asArray() + ); + } else { + return Objects.equals(upper, lower); + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + RangeFilter that = (RangeFilter) o; + boolean upperSame; + boolean lowerSame; + if (matchValueType.isArray()) { + ExpressionType matchArrayType = ExpressionType.fromColumnType(matchValueType); + upperSame = Arrays.deepEquals( + ExprEval.ofType(matchArrayType, upper).asArray(), + ExprEval.ofType(matchArrayType, that.upper).asArray() + ); + lowerSame = Arrays.deepEquals( + ExprEval.ofType(matchArrayType, lower).asArray(), + ExprEval.ofType(matchArrayType, that.lower).asArray() + ); + } else { + upperSame = Objects.equals(upper, that.upper); + lowerSame = Objects.equals(lower, that.lower); + } + + return lowerStrict == that.lowerStrict && + upperStrict == that.upperStrict && + column.equals(that.column) && + Objects.equals(matchValueType, that.matchValueType) && + upperSame && + lowerSame && + Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash( + column, + matchValueType, + upper, + lower, + lowerStrict, + upperStrict, + extractionFn, + filterTuning + ); + } + + @Override + public String toString() + { + final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); + + if (lower != null) { + builder.append(lower); + if (lowerStrict) { + builder.append(" < "); + } else { + builder.append(" <= "); + } + } + + builder.appendDimension(column, extractionFn); + + builder.append(StringUtils.format(" as %s", matchValueType.toString())); + + if (upper != null) { + if (upperStrict) { + builder.append(" < "); + } else { + builder.append(" <= "); + } + builder.append(upper); + } + + return builder.appendFilterTuning(filterTuning).build(); + } + + private DruidPredicateFactory getPredicateFactory() + { + return new RangePredicateFactory(this); + } + + private Supplier makeLongPredicateSupplier() + { + return Suppliers.memoize(() -> { + boolean hasLowerBound; + boolean hasUpperBound; + long lowerBound; + long upperBound; + + if (hasLowerBound()) { + ExprEval lowerCast = lowerEval.castTo(ExpressionType.LONG); + if (lowerCast.isNumericNull()) { + hasLowerBound = false; + lowerBound = Long.MIN_VALUE; + } else { + lowerBound = lowerCast.asLong(); + hasLowerBound = true; + } + } else { + hasLowerBound = false; + lowerBound = Long.MIN_VALUE; + } + + if (hasUpperBound()) { + ExprEval upperCast = upperEval.castTo(ExpressionType.LONG); + if (upperCast.isNumericNull()) { + // upper value is not null, but isn't convertible to a long so is effectively null, nothing matches + return DruidLongPredicate.ALWAYS_FALSE; + } else { + hasUpperBound = true; + upperBound = upperCast.asLong(); + } + } else { + hasUpperBound = false; + upperBound = Long.MAX_VALUE; + } + return BoundDimFilter.makeLongPredicateFromBounds( + hasLowerBound, + hasUpperBound, + lowerStrict, + upperStrict, + lowerBound, + upperBound + ); + }); + } + + private Supplier makeFloatPredicateSupplier() + { + return Suppliers.memoize(() -> { + DruidDoublePredicate doublePredicate = makeDoublePredicateSupplier().get(); + return doublePredicate::applyDouble; + }); + } + + private Supplier makeDoublePredicateSupplier() + { + return Suppliers.memoize(() -> { + boolean hasLowerBound; + boolean hasUpperBound; + double lowerBound; + double upperBound; + + if (hasLowerBound()) { + ExprEval lowerCast = lowerEval.castTo(ExpressionType.DOUBLE); + if (lowerCast.isNumericNull()) { + hasLowerBound = false; + lowerBound = Double.NEGATIVE_INFINITY; + } else { + lowerBound = lowerCast.asDouble(); + hasLowerBound = true; + } + } else { + hasLowerBound = false; + lowerBound = Double.NEGATIVE_INFINITY; + } + + if (hasUpperBound()) { + ExprEval upperCast = upperEval.castTo(ExpressionType.DOUBLE); + if (upperCast.isNumericNull()) { + // upper value is not null, but isn't convertible to a long so is effectively null, nothing matches + return DruidDoublePredicate.ALWAYS_FALSE; + } else { + hasUpperBound = true; + upperBound = upperCast.asDouble(); + } + } else { + hasUpperBound = false; + upperBound = Double.POSITIVE_INFINITY; + } + + return BoundDimFilter.makeDoublePredicateFromBounds( + hasLowerBound, + hasUpperBound, + lowerStrict, + upperStrict, + lowerBound, + upperBound + ); + }); + } + + private Supplier> makeStringPredicateSupplier() + { + return Suppliers.memoize(() -> { + Comparator stringComparator = matchValueType.isNumeric() + ? StringComparators.NUMERIC + : StringComparators.LEXICOGRAPHIC; + String lowerBound = lowerEval.castTo(ExpressionType.STRING).asString(); + String upperBound = upperEval.castTo(ExpressionType.STRING).asString(); + + if (hasLowerBound() && hasUpperBound()) { + if (upperStrict && lowerStrict) { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + final int upperComparing = stringComparator.compare(upperBound, input); + return ((lowerComparing > 0)) && (upperComparing > 0); + }; + } else if (lowerStrict) { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + final int upperComparing = stringComparator.compare(upperBound, input); + return (lowerComparing > 0) && (upperComparing >= 0); + }; + } else if (upperStrict) { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + final int upperComparing = stringComparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing > 0); + }; + } else { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + final int upperComparing = stringComparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing >= 0); + }; + } + } else if (hasUpperBound()) { + if (upperStrict) { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int upperComparing = stringComparator.compare(upperBound, input); + return upperComparing > 0; + }; + } else { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int upperComparing = stringComparator.compare(upperBound, input); + return upperComparing >= 0; + }; + } + } else if (hasLowerBound()) { + if (lowerStrict) { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + return lowerComparing > 0; + }; + } else { + return input -> { + if (NullHandling.isNullOrEquivalent(input)) { + return false; + } + final int lowerComparing = stringComparator.compare(input, lowerBound); + return lowerComparing >= 0; + }; + } + } else { + return Predicates.notNull(); + } + }); + } + + private class RangePredicateFactory implements DruidPredicateFactory + { + private final RangeFilter rangeFilter; + + private RangePredicateFactory(RangeFilter rangeFilter) + { + this.rangeFilter = rangeFilter; + } + + @Override + public Predicate makeStringPredicate() + { + return stringPredicateSupplier.get(); + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + if (matchValueType.isNumeric()) { + return longPredicateSupplier.get(); + } + Predicate stringPredicate = stringPredicateSupplier.get(); + return input -> stringPredicate.apply(String.valueOf(input)); + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + if (matchValueType.isNumeric()) { + return floatPredicateSupplier.get(); + } + Predicate stringPredicate = stringPredicateSupplier.get(); + return input -> stringPredicate.apply(String.valueOf(input)); + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + if (matchValueType.isNumeric()) { + return doublePredicateSupplier.get(); + } + Predicate stringPredicate = stringPredicateSupplier.get(); + return input -> stringPredicate.apply(String.valueOf(input)); + } + + @Override + public int hashCode() + { + return rangeFilter.hashCode(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RangePredicateFactory that = (RangePredicateFactory) o; + return Objects.equals(rangeFilter, that.rangeFilter); + } + + @Override + public String toString() + { + return "RangePredicateFactory{" + + "rangeFilter=" + rangeFilter + + '}'; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java new file mode 100644 index 000000000000..5a559abbdcbd --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; + +public class ArrayVectorValueMatcher implements VectorValueMatcherFactory +{ + protected final TypeSignature columnType; + protected final VectorObjectSelector selector; + + public ArrayVectorValueMatcher( + TypeSignature columnType, + VectorObjectSelector selector + ) + { + this.columnType = columnType; + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable String value) + { + throw new UnsupportedOperationException( + "Vectorized matcher cannot make string matcher for ARRAY types" + ); + } + + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + throw new UnsupportedOperationException( + "Vectorized matcher cannot make object matcher for ARRAY types" + ); + } + + @Override + public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) + { + final Predicate predicate = predicateFactory.makeArrayPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final Object[] vector = selector.getObjectVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + Object o = vector[rowNum]; + if ((o == null || o instanceof Object[]) && predicate.apply((Object[]) o)) { + selection[numRows++] = rowNum; + } else if (predicate.apply(new Object[]{o})) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java index 1b95cb977bbb..44e448e86d29 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java @@ -19,9 +19,12 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; @@ -48,8 +51,22 @@ public VectorValueMatcher makeMatcher(@Nullable final String value) return BooleanVectorValueMatcher.of(selector, false); } - final double matchValDouble = matchVal; + return makeDoubleMatcher(matchVal); + } + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.DOUBLE); + if (cast.isNumericNull()) { + return makeNullValueMatcher(selector); + } + return makeDoubleMatcher(cast.asDouble()); + } + + private BaseVectorValueMatcher makeDoubleMatcher(double matchValDouble) + { return new BaseVectorValueMatcher(selector) { final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); @@ -80,6 +97,7 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) }; } + @Override public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java index cd62e406ab3b..46c0ea961f43 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java @@ -19,9 +19,12 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidFloatPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; @@ -50,6 +53,22 @@ public VectorValueMatcher makeMatcher(@Nullable final String value) final float matchValFloat = matchVal; + return makeFloatMatcher(matchValFloat); + } + + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.DOUBLE); + if (cast.isNumericNull()) { + return makeNullValueMatcher(selector); + } + return makeFloatMatcher((float) cast.asDouble()); + } + + private BaseVectorValueMatcher makeFloatMatcher(float matchValFloat) + { return new BaseVectorValueMatcher(selector) { final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java index 55084a67c275..13fe7f07cbe1 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java @@ -19,9 +19,12 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; @@ -50,6 +53,22 @@ public VectorValueMatcher makeMatcher(@Nullable final String value) final long matchValLong = matchVal; + return makeLongMatcher(matchValLong); + } + + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.LONG); + if (cast.isNumericNull()) { + return makeNullValueMatcher(selector); + } + return makeLongMatcher(cast.asLong()); + } + + private BaseVectorValueMatcher makeLongMatcher(long matchValLong) + { return new BaseVectorValueMatcher(selector) { final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java index d192673784dc..e1fd144aaabd 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java @@ -21,8 +21,11 @@ import com.google.common.base.Predicate; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; @@ -98,6 +101,14 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } } + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.STRING); + return makeMatcher(cast.asString()); + } + @Override public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java index 05f30ce8fbef..f62039012046 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java @@ -21,6 +21,7 @@ import com.google.common.base.Predicate; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; @@ -52,6 +53,13 @@ public VectorValueMatcher makeMatcher(@Nullable String value) return BooleanVectorValueMatcher.of(selector, value == null); } + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + // todo (clint): something cooler... + return BooleanVectorValueMatcher.of(selector, value == null); + } + @Override public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java index 49646fb97567..c73f868abccb 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java @@ -21,8 +21,11 @@ import com.google.common.base.Predicate; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.filter.ValueMatchers; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; @@ -106,6 +109,14 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } } + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.STRING); + return makeMatcher(cast.asString()); + } + @Override public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java index 6e5e09dbc253..26982013cf56 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java @@ -20,7 +20,10 @@ package org.apache.druid.query.filter.vector; import com.google.common.base.Predicate; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; @@ -67,6 +70,14 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) }; } + @Override + public VectorValueMatcher makeMatcher(Object value, ColumnType type) + { + ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnType(type), value); + ExprEval cast = eval.castTo(ExpressionType.STRING); + return makeMatcher(cast.asString()); + } + @Override public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java index 93826c729174..0d16ee24230b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java @@ -86,6 +86,12 @@ public VectorValueMatcherFactory makeLongProcessor( return new LongVectorValueMatcher(selector); } + @Override + public VectorValueMatcherFactory makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + return new ArrayVectorValueMatcher(capabilities, selector); + } + @Override public VectorValueMatcherFactory makeObjectProcessor( final ColumnCapabilities capabilities, diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java index e61373cc80fb..666e85f4f238 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java @@ -20,6 +20,7 @@ package org.apache.druid.query.filter.vector; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; @@ -28,6 +29,8 @@ public interface VectorValueMatcherFactory { VectorValueMatcher makeMatcher(@Nullable String value); + VectorValueMatcher makeMatcher(Object value, ColumnType type); + VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory); default VectorValueMatcher makeNullValueMatcher(VectorValueSelector selector) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index 3af16ea3d0dc..9c73d0714087 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -106,6 +106,14 @@ public GroupByVectorColumnSelector makeLongProcessor( return new NullableLongGroupByVectorColumnSelector(selector); } + @Override + public GroupByVectorColumnSelector makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + throw new UnsupportedOperationException( + "Vectorized groupBys on ARRAY columns are not yet implemented" + ); + } + @Override public GroupByVectorColumnSelector makeObjectProcessor( final ColumnCapabilities capabilities, @@ -113,6 +121,11 @@ public GroupByVectorColumnSelector makeObjectProcessor( ) { if (capabilities.is(ValueType.STRING)) { + if (capabilities.hasMultipleValues().isTrue()) { + throw new UnsupportedOperationException( + "Vectorized groupBys on multi-value dictionary-encoded dimensions are not yet implemented" + ); + } return new DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector); } return NilGroupByVectorColumnSelector.INSTANCE; diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 7511cedae217..3f45f5c82992 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -44,12 +44,12 @@ import org.apache.druid.segment.column.ColumnTypeFactory; import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java index a6c9642c3bac..bb011ff79312 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java @@ -526,7 +526,6 @@ public CumulativeColumnSelectorFactory(AggregatorFactory factory, Object[] resul .setDictionaryEncoded(false) .setHasMultipleValues(false) .setDictionaryValuesUnique(false) - .setFilterable(false) .setType(factory.getIntermediateType()); } diff --git a/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java b/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java index 36ddcc3ad9f7..8e22aec5d6a1 100644 --- a/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java @@ -29,7 +29,7 @@ import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import java.util.List; diff --git a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java index 60d3b17ba6ce..1f4751e56079 100644 --- a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java @@ -39,12 +39,12 @@ import org.apache.druid.segment.Segment; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.NumericColumn; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.virtual.VirtualizedColumnInspector; import org.joda.time.Interval; diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java index 1c4ca2f6f723..360ba5abf69c 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java @@ -76,6 +76,8 @@ public interface ColumnProcessorFactory */ T makeLongProcessor(BaseLongColumnValueSelector selector); + T makeArrayProcessor(BaseObjectColumnValueSelector selector); + /** * Create a processor for a complex column. * diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 28eaef4c2697..3cbfb7e05f65 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -286,6 +286,8 @@ private static T makeProcessorInternal( return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory)); case DOUBLE: return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory)); + case ARRAY: + return processorFactory.makeArrayProcessor(valueSelectorFunction.apply(selectorFactory)); case COMPLEX: return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory)); default: @@ -359,6 +361,8 @@ private static T makeVectorProcessorInternal( return processorFactory.makeFloatProcessor(capabilities, valueSelectorFn.apply(selectorFactory)); case DOUBLE: return processorFactory.makeDoubleProcessor(capabilities, valueSelectorFn.apply(selectorFactory)); + case ARRAY: + return processorFactory.makeArrayProcessor(capabilities, objectSelectorFn.apply(selectorFactory)); case COMPLEX: return processorFactory.makeObjectProcessor(capabilities, objectSelectorFn.apply(selectorFactory)); default: diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelector.java index 6b7e0b666b65..62d2870371b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelector.java @@ -80,10 +80,9 @@ public ColumnIndexSupplier getIndexSupplier(String column) indexSupplier = virtualColumns.getIndexSupplier(column, columnSelector); } else { final ColumnHolder columnHolder = columnSelector.getColumnHolder(column); - // for missing columns and columns with types that do not support filtering, - // treat the column as if it were full of nulls. This allows callers to fabricate an 'all true' or 'all false' + // for missing columns we return null here. This allows callers to fabricate an 'all true' or 'all false' // index so that filters which match the values can still use "indexes". - if (columnHolder == null || !columnHolder.getCapabilities().isFilterable()) { + if (columnHolder == null) { return null; } indexSupplier = columnHolder.getIndexSupplier(); diff --git a/processing/src/main/java/org/apache/druid/segment/FilterAnalysis.java b/processing/src/main/java/org/apache/druid/segment/FilterAnalysis.java index fc9529f7d29d..c700c5b5e86b 100644 --- a/processing/src/main/java/org/apache/druid/segment/FilterAnalysis.java +++ b/processing/src/main/java/org/apache/druid/segment/FilterAnalysis.java @@ -24,9 +24,9 @@ import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; import java.util.ArrayList; diff --git a/processing/src/main/java/org/apache/druid/segment/FilteredOffset.java b/processing/src/main/java/org/apache/druid/segment/FilteredOffset.java index 64fd12d374a6..f6103c3ce2fa 100644 --- a/processing/src/main/java/org/apache/druid/segment/FilteredOffset.java +++ b/processing/src/main/java/org/apache/druid/segment/FilteredOffset.java @@ -27,10 +27,10 @@ import org.apache.druid.query.filter.RowOffsetMatcherFactory; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.data.Offset; import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.filter.BooleanValueMatcher; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.roaringbitmap.IntIterator; public final class FilteredOffset extends Offset diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java index 9ad2e734dc7d..17e49cd7c38c 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java @@ -30,11 +30,11 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; import org.apache.druid.segment.data.BitmapValues; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.ImmutableBitmapValues; import org.apache.druid.segment.data.IndexedIterable; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; import org.apache.druid.segment.nested.NestedCommonFormatColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.SortedValueDictionary; diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index c0c117b8c8a6..a6f52e1fe5a0 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -33,9 +33,9 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.DateTime; import org.joda.time.Interval; diff --git a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java index d9f4bbce456d..4df6a81ff96b 100644 --- a/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/VectorColumnProcessorFactory.java @@ -83,6 +83,8 @@ T makeMultiValueDimensionProcessor( */ T makeLongProcessor(ColumnCapabilities capabilities, VectorValueSelector selector); + T makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector); + /** * Called when {@link ColumnCapabilities#getType()} is COMPLEX. May also be called for STRING typed columns in * cases where the dictionary does not exist or is not expected to be useful. diff --git a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java index d04123d46536..81da30cceede 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java +++ b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java @@ -136,7 +136,6 @@ public ColumnFormat merge(@Nullable ColumnFormat otherFormat) if (merged.hasSpatialIndexes() != otherSnapshot.hasSpatialIndexes()) { merged.setHasSpatialIndexes(merged.hasSpatialIndexes() || otherSnapshot.hasSpatialIndexes()); } - merged.setFilterable(merged.isFilterable() && otherSnapshot.isFilterable()); return new CapabilitiesBasedFormat(merged); } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index e6d5b418786b..f84873700943 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -96,13 +96,6 @@ public ColumnBuilder setDictionaryEncodedColumnSupplier(Supplier columnSupplier) { checkColumnSupplierNotSet(); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java index 4ea27c495a32..93769376b245 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java @@ -74,12 +74,6 @@ public interface ColumnCapabilities extends TypeSignature */ boolean hasSpatialIndexes(); - /** - * All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns - * do not support direct filtering, unless provided by through a custom implementation. - */ - boolean isFilterable(); - /** * Does this column contain null values? If so, callers, especially for primitive numeric columns, will need to check * for null value rows and act accordingly diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 74d090be8c4b..f8464a0cf716 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -79,7 +79,6 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted(); capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique(); capabilities.hasNulls = other.hasNulls(); - capabilities.filterable = other.isFilterable(); } return capabilities; } @@ -179,8 +178,6 @@ public static ColumnCapabilitiesImpl createSimpleArrayColumnCapabilities(TypeSig @JsonIgnore private Capable dictionaryValuesUnique = Capable.UNKNOWN; @JsonIgnore - private boolean filterable; - @JsonIgnore private Capable hasNulls = Capable.UNKNOWN; @Nullable @@ -314,16 +311,4 @@ public ColumnCapabilitiesImpl setHasNulls(Capable hasNulls) this.hasNulls = hasNulls; return this; } - - @Override - public boolean isFilterable() - { - return (type != null && (isPrimitive() || isArray())) || filterable; - } - - public ColumnCapabilitiesImpl setFilterable(boolean filterable) - { - this.filterable = filterable; - return this; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index bad9dc6a6f38..924bc576cbb2 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -19,9 +19,16 @@ package org.apache.druid.segment.column; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NumericRangeIndex; + public interface ColumnConfig { - ColumnConfig DEFAULT = new ColumnConfig() {}; + ColumnConfig DEFAULT = new ColumnConfig() + { + }; ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig() { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 270e3b0ff677..d411589d99ed 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -39,9 +39,9 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 00cc5898989b..f08b19813990 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -44,12 +44,12 @@ import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.NumericRangeIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.NumericRangeIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java index b306f6bfa3cc..b280be27a9f2 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.filter; import com.google.common.base.Preconditions; +import org.apache.druid.math.expr.Evals; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.ColumnIndexSelector; import org.apache.druid.query.filter.Filter; @@ -34,9 +35,9 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; import java.util.ArrayList; @@ -233,6 +234,23 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector }; } + @Override + public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return () -> { + final Object o = selector.getObject(); + if (o instanceof Object[]) { + final Object[] arr = (Object[]) o; + final String[] s = new String[arr.length]; + for (int i = 0; i < arr.length; i++) { + s[i] = Evals.asString(arr[i]); + } + return s; + } + return NULL_VALUE; + }; + } + @Override public Supplier makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java index c11ae08eb3de..e5e6e9563a7f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java @@ -39,7 +39,7 @@ import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index a8f16e47f95c..c4d3fd3db0fa 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -46,10 +46,10 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.virtual.ExpressionSelectors; import org.apache.druid.segment.virtual.ExpressionVectorSelectors; @@ -122,6 +122,11 @@ public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(), ExpressionVectorSelectors.makeVectorObjectSelector(factory, theExpr) ).makeMatcher(predicateFactory); + case ARRAY: + return VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor( + ColumnCapabilitiesImpl.createDefault().setType(ExpressionType.toColumnType(outputType)).setHasNulls(true), + ExpressionVectorSelectors.makeVectorObjectSelector(factory, theExpr) + ).makeMatcher(predicateFactory); default: if (ExpressionType.NESTED_DATA.equals(outputType)) { return VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor( @@ -199,10 +204,7 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) // we use a default 'all false' capabilities here because if the column has a bitmap index, but the capabilities // are null, it means that the column is missing and should take the single valued path, while truly unknown // things will not have a bitmap index available - final ColumnCapabilities capabilities = selector.getColumnCapabilitiesWithDefault( - column, - ColumnCapabilitiesImpl.createDefault() - ); + final ColumnCapabilities capabilities = selector.getColumnCapabilities(column); if (ExpressionSelectors.canMapOverDictionary(details, capabilities)) { if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { return null; @@ -210,7 +212,7 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) return Filters.makePredicateIndex( column, selector, - getBitmapPredicateFactory() + getBitmapPredicateFactory(capabilities) ); } } @@ -338,7 +340,7 @@ public boolean equals(Object obj) * {@link DruidPredicateFactory} which evaluates the expression using the value as input, used for building predicate * indexes where the raw column values will be checked against this predicate */ - private DruidPredicateFactory getBitmapPredicateFactory() + private DruidPredicateFactory getBitmapPredicateFactory(@Nullable ColumnCapabilities inputCapabilites) { return new DruidPredicateFactory() { @@ -414,6 +416,19 @@ public boolean applyNull() }; } + @Override + public Predicate makeArrayPredicate() + { + if (inputCapabilites == null) { + return input -> expr.get() + .eval(InputBindings.forInputSupplier(ExpressionType.STRING_ARRAY, () -> input)) + .asBoolean(); + } + return input -> expr.get().eval( + InputBindings.forInputSupplier(ExpressionType.fromColumnType(inputCapabilites), () -> input) + ).asBoolean(); + } + // The hashcode and equals are to make SubclassesMustOverrideEqualsAndHashCodeTest stop complaining.. // DruidPredicateFactory currently doesn't really need equals or hashcode since 'toString' method that is actually // called when testing equality of DimensionPredicateFilter, so it's the truly required method, but that seems diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java index 295f7c744263..cbc2b21b2875 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java @@ -27,8 +27,8 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.AllFalseBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 7f9abfae2fa0..69fef386952e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -35,16 +35,16 @@ import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.AllFalseBitmapColumnIndex; -import org.apache.druid.segment.column.AllTrueBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; import org.apache.druid.segment.filter.cnf.CNFFilterExplosionException; import org.apache.druid.segment.filter.cnf.CalciteCnfHelper; import org.apache.druid.segment.filter.cnf.HiveCnfHelper; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; import org.apache.druid.segment.join.filter.AllNullColumnSelectorFactory; import javax.annotation.Nullable; @@ -90,29 +90,6 @@ public static Filter toFilter(@Nullable DimFilter dimFilter) return dimFilter == null ? null : dimFilter.toOptimizedFilter(); } - /** - * Create a ValueMatcher that compares row values to the provided string. - *

- * An implementation of this method should be able to handle dimensions of various types. - * - * @param columnSelectorFactory Selector for columns. - * @param columnName The column to filter. - * @param value The value to match against, represented as a String. - * - * @return An object that matches row values on the provided value. - */ - public static ValueMatcher makeValueMatcher( - final ColumnSelectorFactory columnSelectorFactory, - final String columnName, - final String value - ) - { - return ColumnProcessors.makeProcessor( - columnName, - new ConstantValueMatcherFactory(value), - columnSelectorFactory - ); - } /** * Create a ValueMatcher that applies a predicate to row values. diff --git a/processing/src/main/java/org/apache/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/JavaScriptFilter.java index 5ae1679d0458..05e357970cde 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/JavaScriptFilter.java @@ -27,7 +27,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; import java.util.Objects; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index e4ffb7f428f5..e3e8fe85ffb5 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -34,12 +34,12 @@ import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.AllFalseBitmapColumnIndex; -import org.apache.druid.segment.column.AllTrueBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.StringValueSetIndex; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java index b508dd45a1ad..2201c06c4410 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java @@ -32,8 +32,8 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexCapabilities; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 77110f741a6e..f5840d3d0c8c 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -39,9 +39,9 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java index 6ccd46648f7f..7a536c7a2094 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java @@ -22,6 +22,7 @@ import com.google.common.base.Predicate; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Rows; +import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidFloatPredicate; import org.apache.druid.query.filter.DruidLongPredicate; @@ -46,7 +47,7 @@ public class PredicateValueMatcherFactory implements ColumnProcessorFactory selector) + { + if (selector instanceof NilColumnValueSelector) { + // Column does not exist, or is unfilterable. Treat it as all nulls. + return BooleanValueMatcher.of(predicateFactory.makeArrayPredicate().apply(null)); + } else { + // use the object predicate + final Predicate predicate = predicateFactory.makeArrayPredicate(); + return new ValueMatcher() + { + @Override + public boolean matches() + { + Object o = selector.getObject(); + if (o == null || o instanceof Object[]) { + return predicate.apply((Object[]) o); + } + if (o instanceof List) { + ExprEval oEval = ExprEval.bestEffortArray((List) o); + return predicate.apply(oEval.asArray()); + } + // upcast non-array to a single element array to behave consistently with expressions.. idk if this is cool + return predicate.apply(new Object[]{o}); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + inspector.visit("predicate", predicate); + } + }; + } + } + @Override public ValueMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) { @@ -117,6 +154,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) private DruidLongPredicate longPredicate; private DruidFloatPredicate floatPredicate; private DruidDoublePredicate doublePredicate; + private Predicate arrayPredicate; @Override public boolean matches() @@ -134,6 +172,8 @@ public boolean matches() } else if (rowValue instanceof Number) { // Double or some other non-int, non-long, non-float number. return getDoublePredicate().applyDouble((double) rowValue); + } else if (rowValue instanceof Object[]) { + return getArrayPredicate().apply((Object[]) rowValue); } else { // Other types. Cast to list of strings and evaluate them as strings. // Boolean values are handled here as well since it is not a known type in Druid. @@ -196,6 +236,14 @@ private DruidDoublePredicate getDoublePredicate() return doublePredicate; } + + private Predicate getArrayPredicate() + { + if (arrayPredicate == null) { + arrayPredicate = predicateFactory.makeArrayPredicate(); + } + return arrayPredicate; + } }; } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index 036059d95c75..a0a648b91351 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -33,10 +33,10 @@ import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -108,7 +108,11 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { - return Filters.makeValueMatcher(factory, dimension, value); + return ColumnProcessors.makeProcessor( + dimension, + new StringConstantValueMatcherFactory(value), + factory + ); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java index 1efa22f1f0ae..0b5c7da80171 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java @@ -36,13 +36,13 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.AllFalseBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; -import org.apache.druid.segment.column.SpatialIndex; import org.apache.druid.segment.incremental.SpatialDimensionRowTransformer; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SpatialIndex; import javax.annotation.Nullable; import java.util.Objects; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ConstantValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java similarity index 87% rename from processing/src/main/java/org/apache/druid/segment/filter/ConstantValueMatcherFactory.java rename to processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java index 7dae76cf5422..017235c5694d 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ConstantValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java @@ -35,12 +35,12 @@ /** * Creates {@link ValueMatcher} that match constants. */ -public class ConstantValueMatcherFactory implements ColumnProcessorFactory +public class StringConstantValueMatcherFactory implements ColumnProcessorFactory { @Nullable private final String matchValue; - ConstantValueMatcherFactory(@Nullable String matchValue) + StringConstantValueMatcherFactory(@Nullable String matchValue) { this.matchValue = NullHandling.emptyToNullIfNeeded(matchValue); } @@ -76,6 +76,12 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) return ValueMatchers.makeLongValueMatcher(selector, matchValue); } + @Override + public ValueMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return new PredicateValueMatcherFactory(new SelectorPredicateFactory(matchValue)).makeArrayProcessor(selector); + } + @Override public ValueMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java index 079d2f5d103a..b363b384331c 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java @@ -27,8 +27,8 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.column.AllTrueBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ValueMatchers.java b/processing/src/main/java/org/apache/druid/segment/filter/ValueMatchers.java index 2d09680a4510..75b4dcc77f45 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ValueMatchers.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ValueMatchers.java @@ -39,7 +39,7 @@ import java.util.Objects; /** - * Utility methods for creating {@link ValueMatcher} instances. Mainly used by {@link ConstantValueMatcherFactory} + * Utility methods for creating {@link ValueMatcher} instances. Mainly used by {@link StringConstantValueMatcherFactory} * and {@link PredicateValueMatcherFactory}. */ public class ValueMatchers @@ -114,8 +114,22 @@ public static ValueMatcher makeFloatValueMatcher( return makeNumericNullValueMatcher(selector); } + return makeFloatValueMatcher(selector, matchVal); + } + + /** + * Creates a constant-based {@link ValueMatcher} for a float-typed selector. + * + * @param selector column selector + * @param value value to match + */ + public static ValueMatcher makeFloatValueMatcher( + final BaseFloatColumnValueSelector selector, + final float value + ) + { // Use "floatToIntBits" to canonicalize NaN values. - final int matchValIntBits = Float.floatToIntBits(matchVal); + final int matchValIntBits = Float.floatToIntBits(value); return new ValueMatcher() { @Override @@ -141,7 +155,11 @@ public static ValueMatcher makeLongValueMatcher(final BaseLongColumnValueSelecto if (matchVal == null) { return makeNumericNullValueMatcher(selector); } - final long matchValLong = matchVal; + return makeLongValueMatcher(selector, matchVal); + } + + public static ValueMatcher makeLongValueMatcher(final BaseLongColumnValueSelector selector, long value) + { return new ValueMatcher() { @Override @@ -150,7 +168,7 @@ public boolean matches() if (selector.isNull()) { return false; } - return selector.getLong() == matchValLong; + return selector.getLong() == value; } @Override @@ -187,6 +205,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) }; } + /** * Creates a predicate-based {@link ValueMatcher} for a float-typed selector. * @@ -235,8 +254,16 @@ public static ValueMatcher makeDoubleValueMatcher( return makeNumericNullValueMatcher(selector); } + return makeDoubleValueMatcher(selector, matchVal); + } + + public static ValueMatcher makeDoubleValueMatcher( + final BaseDoubleColumnValueSelector selector, + final double value + ) + { // Use "doubleToLongBits" to canonicalize NaN values. - final long matchValLongBits = Double.doubleToLongBits(matchVal); + final long matchValLongBits = Double.doubleToLongBits(value); return new ValueMatcher() { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/column/AllFalseBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java similarity index 90% rename from processing/src/main/java/org/apache/druid/segment/column/AllFalseBitmapColumnIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java index 4c6b0f301cc2..fb986c51d97b 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/AllFalseBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java @@ -17,10 +17,12 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.ColumnIndexSelector; +import org.apache.druid.segment.column.ColumnIndexCapabilities; +import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; public class AllFalseBitmapColumnIndex implements BitmapColumnIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/column/AllTrueBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java similarity index 90% rename from processing/src/main/java/org/apache/druid/segment/column/AllTrueBitmapColumnIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java index d129f814afa7..686add0693ce 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/AllTrueBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java @@ -17,10 +17,12 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.ColumnIndexSelector; +import org.apache.druid.segment.column.ColumnIndexCapabilities; +import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; public class AllTrueBitmapColumnIndex implements BitmapColumnIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/column/BitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java similarity index 91% rename from processing/src/main/java/org/apache/druid/segment/column/BitmapColumnIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java index 255e19de4cac..77b78ac0698e 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/BitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.query.BitmapResultFactory; +import org.apache.druid.segment.column.ColumnIndexCapabilities; public interface BitmapColumnIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java similarity index 94% rename from processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedStringValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java index 6913ac998948..5579b54a7262 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.segment.column.DictionaryEncodedColumn; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java similarity index 94% rename from processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java index b1f0115062fd..35f6d381d0a1 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.segment.column.DictionaryEncodedColumn; /** * This exposes a 'raw' view into bitmap value indexes for {@link DictionaryEncodedColumn}. This allows callers diff --git a/processing/src/main/java/org/apache/druid/segment/column/DruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/DruidPredicateIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java index 44f69279aa9e..a14ca5f1d7e9 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.query.filter.DruidPredicateFactory; diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java similarity index 98% rename from processing/src/main/java/org/apache/druid/segment/column/IndexedStringDictionaryEncodedStringValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java index 668694bb4a13..0c84bba8749d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java similarity index 96% rename from processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java index 9c5aa9af1212..3ed85acd52c5 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java @@ -17,12 +17,14 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import com.google.common.base.Predicate; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java index 5ddb01ba02c6..0eea6974341e 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -29,6 +29,8 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.IntListUtils; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java similarity index 99% rename from processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java index c568e78d9b07..5e3146cefbbb 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; diff --git a/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java similarity index 98% rename from processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java index 83c8fcfabd54..701d377ced9d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import com.google.common.base.Predicate; diff --git a/processing/src/main/java/org/apache/druid/segment/column/NullValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java similarity index 95% rename from processing/src/main/java/org/apache/druid/segment/column/NullValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java index 65ab439b56df..7fccecb36380 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/NullValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; /** * Provides index for all null rows in a column, to use with IS/IS NOT NULL filters diff --git a/processing/src/main/java/org/apache/druid/segment/column/NumericRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/NumericRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java index cebece48b2cc..ca1b32e347f1 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/NumericRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/column/SimpleBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/SimpleBitmapColumnIndex.java similarity index 87% rename from processing/src/main/java/org/apache/druid/segment/column/SimpleBitmapColumnIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/SimpleBitmapColumnIndex.java index 9f494d0cd21a..29f04007a74a 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/SimpleBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/SimpleBitmapColumnIndex.java @@ -17,7 +17,10 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; + +import org.apache.druid.segment.column.ColumnIndexCapabilities; +import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; /** * {@link BitmapColumnIndex} with Druids "default" {@link ColumnIndexCapabilities}. diff --git a/processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIndex.java b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java index 411e79e5761c..c314d3c329af 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.BitmapResultFactory; diff --git a/processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIterableIndex.java b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIterableIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIterableIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIterableIndex.java index 67587725f763..2a6d47c1b537 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/SimpleImmutableBitmapIterableIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIterableIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.BitmapResultFactory; diff --git a/processing/src/main/java/org/apache/druid/segment/column/SpatialIndex.java b/processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java similarity index 95% rename from processing/src/main/java/org/apache/druid/segment/column/SpatialIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java index 462a15a079b3..7b7705dd55e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/SpatialIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.spatial.ImmutableRTree; diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java index 8ceca2d86add..3845e8ca752a 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.ImmutableBitmap; diff --git a/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java similarity index 96% rename from processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java index 6598e36f2069..50ef48724c03 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.column; +package org.apache.druid.segment.index; import org.apache.druid.collections.bitmap.ImmutableBitmap; diff --git a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java index c1703640330f..1e58646fcfef 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java @@ -112,6 +112,12 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) } } + @Override + public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + throw new QueryUnsupportedException("Joining against a ARRAY columns is not supported."); + } + @Override public Supplier makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java index 5004c897a466..3935184e52b2 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java @@ -491,6 +491,14 @@ public ConditionMatcher makeLongProcessor(BaseLongColumnValueSelector selector) } } + @Override + public ConditionMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return () -> { + throw new QueryUnsupportedException("Joining against ARRAY columns is not supported."); + }; + } + @Override public ConditionMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java index 59c0070d2430..abb91fc1483f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java @@ -147,10 +147,9 @@ public ColumnCapabilities toColumnCapabilities() .setDictionaryValuesSorted(true) .setDictionaryValuesUnique(true) .setHasBitmapIndexes(true) - .setFilterable(true) .setHasNulls(hasNulls); } - return ColumnCapabilitiesImpl.createDefault().setType(logicalType).setHasNulls(hasNulls).setFilterable(true); + return ColumnCapabilitiesImpl.createDefault().setType(logicalType).setHasNulls(hasNulls); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index f1f298e08897..a35aa93c25b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -106,7 +106,6 @@ public void deserializeColumn( } builder.setComplexColumnSupplier(supplier); builder.setColumnFormat(new NestedColumnFormatV4()); - builder.setFilterable(true); } @Override @@ -188,7 +187,7 @@ public ColumnFormat merge(@Nullable ColumnFormat otherFormat) @Override public ColumnCapabilities toColumnCapabilities() { - return ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA).setHasNulls(true).setFilterable(true); + return ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA).setHasNulls(true); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index 465d8cb30c55..4f050fd5e430 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -44,23 +44,23 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.IntListUtils; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.NumericRangeIndex; -import org.apache.druid.segment.column.SimpleBitmapColumnIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 23c21b876bcf..04165fd2d568 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -38,26 +38,26 @@ import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.IntListUtils; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.NumericRangeIndex; -import org.apache.druid.segment.column.SimpleBitmapColumnIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.CompressedColumnarDoublesSuppliers; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 39d6057aefcc..47c9d1d78ac2 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -37,26 +37,26 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.IntListUtils; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.NumericRangeIndex; -import org.apache.druid.segment.column.SimpleBitmapColumnIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java index bf09e9fe1574..a1c9cd06e1a3 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java @@ -23,6 +23,8 @@ import com.google.common.base.Predicates; import com.google.common.primitives.Doubles; import com.google.common.primitives.Floats; +import it.unimi.dsi.fastutil.ints.IntArraySet; +import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.IAE; @@ -310,6 +312,32 @@ public int lookupId(String val) return -1; } + + public IntSet lookupIds(String val) + { + IntSet intList = new IntArraySet(3); + if (val == null) { + intList.add(0); + return intList; + } + int candidate = stringDictionary.indexOf(StringUtils.toUtf8ByteBuffer(val)); + if (candidate >= 0) { + intList.add(candidate); + } + candidate = longDictionary.indexOf(GuavaUtils.tryParseLong(val)); + if (candidate >= 0) { + candidate += adjustLongId; + intList.add(candidate); + } + candidate = doubleDictionary.indexOf(Doubles.tryParse(val)); + if (candidate >= 0) { + candidate += adjustDoubleId; + intList.add(candidate); + } + + return intList; + } + @Override public int getCardinality() { @@ -428,14 +456,14 @@ public int getRowValue(int offset) public ValueMatcher makeValueMatcher(final @Nullable String value) { if (extractionFn == null) { - final int valueId = lookupId(value); - if (valueId >= 0) { + final IntSet valueIds = VariantColumn.this.lookupIds(value); + if (valueIds.size() > 0) { return new ValueMatcher() { @Override public boolean matches() { - return getRowValue() == valueId; + return valueIds.contains(getRowValue()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index 69246edf6677..f18f756ec5ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -26,13 +26,10 @@ import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarInts; @@ -43,6 +40,9 @@ import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -239,6 +239,12 @@ public VariantColumnAndIndexSupplier( this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0); } + @Nullable + public Byte getVariantTypeSetByte() + { + return variantTypeSetByte; + } + @Override public NestedCommonFormatColumn get() { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index 9b107b1d78bd..001ca4dbfa06 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -29,6 +29,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerdeFactory; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.nested.NestedCommonFormatColumn; import org.apache.druid.segment.nested.NestedCommonFormatColumnSerializer; import org.apache.druid.segment.nested.NestedDataColumnSupplier; @@ -194,7 +195,6 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); } } @@ -218,7 +218,6 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); } } @@ -242,7 +241,6 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); } } @@ -260,13 +258,20 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo columnConfig ); ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); + // if we are a mixed type, don't call ourself dictionary encoded for now so we don't end up doing the wrong thing + // in places. technically we could probably get by by indicating that our dictionary ids are not unique/sorted + // but just in case that still causes problems, skip it all... + if (supplier.getVariantTypeSetByte() == null) { + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + } builder.setType(logicalType); builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); + builder.setColumnFormat(new NestedCommonFormatColumn.Format( + logicalType, + capabilitiesBuilder.hasNulls().isTrue() + )); } } @@ -293,17 +298,16 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo builder.setType(logicalType); builder.setNestedCommonFormatColumnSupplier(supplier); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls)); - builder.setFilterable(true); } } public static class SerializerBuilder { - private ColumnType logicalType; + private ColumnType logicalType = ColumnType.NESTED_DATA; private boolean hasNulls; private boolean isVariantType; private ByteOrder byteOrder = ByteOrder.nativeOrder(); - BitmapSerdeFactory bitmapSerdeFactory; + BitmapSerdeFactory bitmapSerdeFactory = RoaringBitmapSerdeFactory.getInstance(); @Nullable private Serializer serializer = null; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NullColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NullColumnPartSerde.java index 95df716b2ce5..ea8af5e85416 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NullColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NullColumnPartSerde.java @@ -108,7 +108,6 @@ public Deserializer getDeserializer() return (buffer, builder, columnConfig) -> { builder.setHasMultipleValues(false) .setHasNulls(true) - .setFilterable(true) // this is a bit sneaky, we set supplier to null here to act like a null column instead of a column // without any indexes, which is the default state .setIndexSupplier(null, true, false) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java index d66d15ca4b8e..81390c685b9c 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java @@ -20,10 +20,10 @@ package org.apache.druid.segment.serde; import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index de9c791c7eb7..90695317e582 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -24,24 +24,24 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.spatial.ImmutableRTree; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.IndexedStringDictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.IndexedStringDruidPredicateIndex; -import org.apache.druid.segment.column.IndexedUtf8LexicographicalRangeIndex; -import org.apache.druid.segment.column.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.column.SpatialIndex; import org.apache.druid.segment.column.StringEncodingStrategies; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.IndexedStringDictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.IndexedStringDruidPredicateIndex; +import org.apache.druid.segment.index.IndexedUtf8LexicographicalRangeIndex; +import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SpatialIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 345ecb93ac01..41704fadb3da 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -300,7 +300,7 @@ public static DimensionSelector makeDimensionSelector( */ public static boolean canMapOverDictionary( final Expr.BindingAnalysis bindingAnalysis, - final ColumnCapabilities columnCapabilities + @Nullable final ColumnCapabilities columnCapabilities ) { Preconditions.checkState(bindingAnalysis.getRequiredBindings().size() == 1, "requiredBindings.size == 1"); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index db72ec56d7f6..f55512403d0f 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -41,20 +41,20 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IdMapping; import org.apache.druid.segment.VirtualColumn; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.SimpleBitmapColumnIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import javax.annotation.Nullable; import java.util.Collections; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 02c3d879e837..f024169d2b79 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -1017,7 +1017,13 @@ private void computeVectorsIfNeeded() if (v instanceof Number) { l = ((Number) v).longValue(); } else { - l = GuavaUtils.tryParseLong(String.valueOf(v)); + final String s = String.valueOf(v); + final Double d = Doubles.tryParse(s); + if (d != null) { + l = d.longValue(); + } else { + l = GuavaUtils.tryParseLong(s); + } } if (l != null) { longVector[i] = l; diff --git a/processing/src/test/java/org/apache/druid/frame/testutil/RowReadingVectorColumnProcessorFactory.java b/processing/src/test/java/org/apache/druid/frame/testutil/RowReadingVectorColumnProcessorFactory.java index fcaec310e0ac..ad89346f2d68 100644 --- a/processing/src/test/java/org/apache/druid/frame/testutil/RowReadingVectorColumnProcessorFactory.java +++ b/processing/src/test/java/org/apache/druid/frame/testutil/RowReadingVectorColumnProcessorFactory.java @@ -127,6 +127,12 @@ public Supplier makeLongProcessor(ColumnCapabilities capabilities, Vec }; } + @Override + public Supplier makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + return selector::getObjectVector; + } + @Override public Supplier makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index cf47b2243a13..3d1f171ac4c0 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -31,12 +31,12 @@ import org.apache.druid.query.extraction.RegexDimExtractionFn; import org.apache.druid.segment.RowAdapters; import org.apache.druid.segment.RowBasedColumnSelectorFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.column.StringValueSetIndex; -import org.apache.druid.segment.column.Utf8ValueSetIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.Utf8ValueSetIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java index c51ec817b739..8b35ee32c942 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java @@ -24,10 +24,10 @@ import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.query.extraction.SubstringDimExtractionFn; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.StringValueSetIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 2f510cae56b8..6133a16b1eaa 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -789,7 +789,7 @@ public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryTypeGauntlet() throw + "[1672531200000, null, 0, 0.0, true, 51, 1, [], {a=700, b={x=g, y=1.1, z=[9, null, 9, 9]}}, {x=400, y=[{l=[null], m=100, n=5}, {l=[a, b, c], m=a, n=1}], z={}}, null, [a, b], null, [2, 3], null, [null], null, [true, false, true], null, [{x=1}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, null, 2, 0.0, false, b, b, 2, {a=200, b={x=b, y=1.1, z=[2, 4, 6]}}, {x=10, y=[{l=[b, b, c], m=b, n=2}, [1, 2, 3]], z={a=[5.5], b=false}}, [a, b, c], [null, b], [2, 3], null, [3.3, 4.4, 5.5], [999.0, null, 5.5], [null, null, 2.2], [true, true], [null, [null], []], [{x=3}, {x=4}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, a, 1, 1.0, true, 1, 1, 1, {a=100, b={x=a, y=1.1, z=[1, 2, 3, 4]}}, {x=1234, y=[{l=[a, b, c], m=a, n=1}, {l=[a, b, c], m=a, n=1}], z={a=[1.1, 2.2, 3.3], b=true}}, [a, b], [a, b], [1, 2, 3], [1, null, 3], [1.1, 2.2, 3.3], [1.1, 2.2, null], [a, 1, 2.2], [true, false, true], [[1, 2, null], [3, 4]], [{x=1}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " - + "[1672531200000, b, 4, 3.3, true, 4, {}, 4, {a=400, b={x=d, y=1.1, z=[3, 4]}}, {x=1234, z={a=[1.1, 2.2, 3.3], b=true}}, [d, e], [b, b], [1, 4], [1], [2.2, 3.3, 4.0], null, [a, b, c], [null, false, true], [[1, 2], [3, 4], [5, 6, 7]], [{x=null}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + + "[1672531200000, b, 4, 3.3, true, 1, {}, 4, {a=400, b={x=d, y=1.1, z=[3, 4]}}, {x=1234, z={a=[1.1, 2.2, 3.3], b=true}}, [d, e], [b, b], [1, 4], [1], [2.2, 3.3, 4.0], null, [a, b, c], [null, false, true], [[1, 2], [3, 4], [5, 6, 7]], [{x=null}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, c, 0, 4.4, true, hello, {}, [], {a=500, b={x=e, z=[1, 2, 3, 4]}}, {x=11, y=[], z={a=[null], b=false}}, null, null, [1, 2, 3], [], [1.1, 2.2, 3.3], null, null, [false], null, [{x=1000}, {y=2000}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, d, 5, 5.9, false, null, a, 6, {a=600, b={x=f, y=1.1, z=[6, 7, 8, 9]}}, null, [a, b], null, null, [null, 2, 9], null, [999.0, 5.5, null], [a, 1, 2.2], [], [[1], [1, 2, null]], [{a=1}, {b=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, null, 3, 2.0, null, 3.0, 3.3, 3, {a=300}, {x=4, y=[{l=[], m=100, n=3}, {l=[a]}, {l=[b], n=[]}], z={a=[], b=true}}, [b, c], [d, null, b], [1, 2, 3, 4], [1, 2, 3], [1.1, 3.3], [null, 2.2, null], [1, null, 1], [true, null, true], [[1], null, [1, 2, 3]], [null, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1]" @@ -802,7 +802,7 @@ public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryTypeGauntlet() throw + "[1672531200000, null, null, null, true, 51, 1, [], {a=700, b={x=g, y=1.1, z=[9, null, 9, 9]}}, {x=400, y=[{l=[null], m=100, n=5}, {l=[a, b, c], m=a, n=1}], z={}}, null, [a, b], null, [2, 3], null, [null], null, [true, false, true], null, [{x=1}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, , 2, null, false, b, b, 2, {a=200, b={x=b, y=1.1, z=[2, 4, 6]}}, {x=10, y=[{l=[b, b, c], m=b, n=2}, [1, 2, 3]], z={a=[5.5], b=false}}, [a, b, c], [null, b], [2, 3], null, [3.3, 4.4, 5.5], [999.0, null, 5.5], [null, null, 2.2], [true, true], [null, [null], []], [{x=3}, {x=4}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, a, 1, 1.0, true, 1, 1, 1, {a=100, b={x=a, y=1.1, z=[1, 2, 3, 4]}}, {x=1234, y=[{l=[a, b, c], m=a, n=1}, {l=[a, b, c], m=a, n=1}], z={a=[1.1, 2.2, 3.3], b=true}}, [a, b], [a, b], [1, 2, 3], [1, null, 3], [1.1, 2.2, 3.3], [1.1, 2.2, null], [a, 1, 2.2], [true, false, true], [[1, 2, null], [3, 4]], [{x=1}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " - + "[1672531200000, b, 4, 3.3, true, 4, {}, 4, {a=400, b={x=d, y=1.1, z=[3, 4]}}, {x=1234, z={a=[1.1, 2.2, 3.3], b=true}}, [d, e], [b, b], [1, 4], [1], [2.2, 3.3, 4.0], null, [a, b, c], [null, false, true], [[1, 2], [3, 4], [5, 6, 7]], [{x=null}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + + "[1672531200000, b, 4, 3.3, true, 1, {}, 4, {a=400, b={x=d, y=1.1, z=[3, 4]}}, {x=1234, z={a=[1.1, 2.2, 3.3], b=true}}, [d, e], [b, b], [1, 4], [1], [2.2, 3.3, 4.0], null, [a, b, c], [null, false, true], [[1, 2], [3, 4], [5, 6, 7]], [{x=null}, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, c, null, 4.4, true, hello, {}, [], {a=500, b={x=e, z=[1, 2, 3, 4]}}, {x=11, y=[], z={a=[null], b=false}}, null, null, [1, 2, 3], [], [1.1, 2.2, 3.3], null, null, [false], null, [{x=1000}, {y=2000}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, d, 5, 5.9, false, null, a, 6, {a=600, b={x=f, y=1.1, z=[6, 7, 8, 9]}}, null, [a, b], null, null, [null, 2, 9], null, [999.0, 5.5, null], [a, 1, 2.2], [], [[1], [1, 2, null]], [{a=1}, {b=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1], " + "[1672531200000, null, 3, 2.0, null, 3.0, 3.3, 3, {a=300}, {x=4, y=[{l=[], m=100, n=3}, {l=[a]}, {l=[b], n=[]}], z={a=[], b=true}}, [b, c], [d, null, b], [1, 2, 3, 4], [1, 2, 3], [1.1, 3.3], [null, 2.2, null], [1, null, 1], [true, null, true], [[1], null, [1, 2, 3]], [null, {x=2}], null, hello, 1234, 1.234, {x=1, y=hello, z={a=1.1, b=1234, c=[a, b, c]}}, [a, b, c], [1, 2, 3], [1.1, 2.2, 3.3], [], {}, [null, null], [{}, {}, {}], [{a=b, x=1, y=1.3}], 1]" diff --git a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java index 9d71c0c1e371..391d0d705ea5 100644 --- a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java @@ -22,14 +22,14 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.DefaultBitmapResultFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringValueSetIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier; import org.easymock.EasyMock; import org.junit.Assert; @@ -94,7 +94,6 @@ public void setup() .setDictionaryValuesUnique(true) .setDictionaryValuesSorted(true) .setHasBitmapIndexes(true) - .setFilterable(true) ).anyTimes(); EasyMock.replay(bitmapFactory, virtualColumns, index, indexSupplier, holder, stringColumn, nonStringHolder, someIndex, columnIndex, valueIndex, someBitmap); diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java index 9f0ee07d0928..b84e209e37ff 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java @@ -32,11 +32,11 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.IncrementalIndexTest; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.junit.Assert; import org.junit.Before; diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index 30c41bea70f5..4f4471f9e85b 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -49,7 +49,6 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.BitmapValues; import org.apache.druid.segment.data.CompressionFactory; @@ -61,6 +60,7 @@ import org.apache.druid.segment.incremental.IncrementalIndexAdapter; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.Interval; diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index cb2cb95d5df4..17ee7d6e60e5 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -37,8 +37,7 @@ public void testSerde() throws Exception .setHasMultipleValues(true) .setHasSpatialIndexes(true) .setType(ColumnType.UNKNOWN_COMPLEX) - .setHasNulls(true) - .setFilterable(true)); + .setHasNulls(true)); Assert.assertFalse(json.contains("filterable")); @@ -51,7 +50,6 @@ public void testSerde() throws Exception Assert.assertTrue(cc.hasBitmapIndexes()); // hasNulls and isFilterable are computed, these should not be set Assert.assertFalse(cc.hasNulls().isTrue()); - Assert.assertFalse(cc.isFilterable()); } @Test @@ -78,6 +76,5 @@ public void testDeserialization() throws Exception Assert.assertTrue(cc.hasBitmapIndexes()); // hasNulls and isFilterable are computed, these should not be set Assert.assertFalse(cc.hasNulls().isTrue()); - Assert.assertFalse(cc.isFilterable()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 3495d1cb6d9c..e1b49bb7c1df 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -79,7 +79,6 @@ import org.apache.druid.segment.RowBasedStorageAdapter; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.StringEncodingStrategy; @@ -92,6 +91,7 @@ import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorCursor; @@ -491,6 +491,14 @@ public static Collection makeConstructors() return constructors; } + protected boolean isAutoSchema() + { + if (testName.contains("AutoTypes")) { + return true; + } + return false; + } + private Filter makeFilter(final DimFilter dimFilter) { if (dimFilter == null) { @@ -900,6 +908,31 @@ protected void assertFilterMatches( assertFilterMatches(filter, expectedRows, testVectorized); } + protected void assertFilterMatchesSkipArrays( + final DimFilter filter, + final List expectedRows + ) + { + // IncrementalIndex, RowBasedSegment cannot vectorize. + // Columnar FrameStorageAdapter *can* vectorize, but the tests won't pass, because the vectorizable cases + // differ from QueryableIndexStorageAdapter due to frames not having indexes. So, skip these too. + final boolean testVectorized = + !(adapter instanceof IncrementalIndexStorageAdapter) + && !(adapter instanceof RowBasedStorageAdapter) + && !(adapter instanceof FrameStorageAdapter); + + if (isAutoSchema()) { + Throwable t = Assert.assertThrows( + Throwable.class, + () -> assertFilterMatches(filter, expectedRows, testVectorized) + ); + // todo (clint): maybe better? + Assert.assertTrue(t.getMessage().contains("ARRAY")); + } else { + assertFilterMatches(filter, expectedRows, testVectorized); + } + } + protected void assertFilterMatchesSkipVectorize( final DimFilter filter, final List expectedRows @@ -914,66 +947,58 @@ private void assertFilterMatches( final boolean testVectorized ) { - try { + Assert.assertEquals( + "Cursor: " + filter, + expectedRows, + selectColumnValuesMatchingFilter(filter, "dim0") + ); + + if (testVectorized) { Assert.assertEquals( - "Cursor: " + filter, + "Cursor (vectorized): " + filter, expectedRows, - selectColumnValuesMatchingFilter(filter, "dim0") + selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") ); - if (testVectorized) { - Assert.assertEquals( - "Cursor (vectorized): " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") - ); - - Assert.assertEquals( - "Cursor Virtual Column (vectorized): " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor(filter, "vdim0", "dim0") - ); - } - Assert.assertEquals( - "Cursor with postFiltering: " + filter, + "Cursor Virtual Column (vectorized): " + filter, expectedRows, - selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") + selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor(filter, "vdim0", "dim0") ); + } - if (testVectorized) { - Assert.assertEquals( - "Cursor with postFiltering (vectorized): " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") - ); - } + Assert.assertEquals( + "Cursor with postFiltering: " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") + ); + if (testVectorized) { Assert.assertEquals( - "Filtered aggregator: " + filter, - expectedRows.size(), - selectCountUsingFilteredAggregator(filter) + "Cursor with postFiltering (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") ); + } - if (testVectorized) { - Assert.assertEquals( - "Filtered aggregator (vectorized): " + filter, - expectedRows.size(), - selectCountUsingVectorizedFilteredAggregator(filter) - ); - } + Assert.assertEquals( + "Filtered aggregator: " + filter, + expectedRows.size(), + selectCountUsingFilteredAggregator(filter) + ); + if (testVectorized) { Assert.assertEquals( - "RowBasedColumnSelectorFactory: " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(filter, "dim0") + "Filtered aggregator (vectorized): " + filter, + expectedRows.size(), + selectCountUsingVectorizedFilteredAggregator(filter) ); } - catch (ISE ise) { - // ignore failures resulting from 'auto' - if (!(testName.contains("AutoTypes") && "Unsupported type[ARRAY]".equals(ise.getMessage()))) { - throw ise; - } - } + + Assert.assertEquals( + "RowBasedColumnSelectorFactory: " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(filter, "dim0") + ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index ec952c31d100..933843abb257 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -90,7 +90,11 @@ public void testLexicographicMatchEverything() ); for (BoundDimFilter filter : filters) { - assertFilterMatches(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + if (filter.getDimension().equals("dim2")) { + assertFilterMatchesSkipArrays(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + } else { + assertFilterMatches(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + } } } @@ -105,12 +109,16 @@ public void testLexicographicMatchWithEmptyString() ); if (NullHandling.replaceWithDefault()) { for (BoundDimFilter filter : filters) { - assertFilterMatches(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + if (filter.getDimension().equals("dim2")) { + assertFilterMatchesSkipArrays(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + } else { + assertFilterMatches(filter, ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); + } } } else { assertFilterMatches(filters.get(0), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); assertFilterMatches(filters.get(1), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")); - assertFilterMatches(filters.get(2), ImmutableList.of("0", "2", "3", "4", "6", "7")); + assertFilterMatchesSkipArrays(filters.get(2), ImmutableList.of("0", "2", "3", "4", "6", "7")); assertFilterMatches(filters.get(3), ImmutableList.of()); } } @@ -127,12 +135,12 @@ public void testLexicographicMatchNull() ImmutableList.of("0") ); if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), ImmutableList.of("1", "2", "5") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), ImmutableList.of("2") ); @@ -278,7 +286,7 @@ public void testAlphaNumericMatchNull() ImmutableList.of("0") ); if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, true, null, StringComparators.ALPHANUMERIC), ImmutableList.of("1", "2", "5") ); @@ -287,7 +295,7 @@ public void testAlphaNumericMatchNull() ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, true, null, StringComparators.ALPHANUMERIC), ImmutableList.of("2") ); @@ -387,7 +395,7 @@ public void testNumericMatchNull() ImmutableList.of("0") ); if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of("1", "2", "5") ); @@ -396,7 +404,7 @@ public void testNumericMatchNull() ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of("2") ); @@ -470,6 +478,10 @@ public void testListFilteredVirtualColumn() ImmutableList.of("0", "1", "2", "3", "4", "5", "6") ); + if (isAutoSchema()) { + // bail out, auto ingests arrays instead of mvds and this virtual column is for mvd stuff + return; + } assertFilterMatchesSkipVectorize( new BoundDimFilter("allow-dim2", "a", "c", false, false, false, null, StringComparators.LEXICOGRAPHIC), ImmutableList.of("0", "3", "6") @@ -594,7 +606,7 @@ public void testMatchWithExtractionFn() ImmutableList.of("1", "2", "3") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter( "dim2", "super-", @@ -609,7 +621,7 @@ public void testMatchWithExtractionFn() ); if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter( "dim2", "super-null", @@ -622,7 +634,7 @@ public void testMatchWithExtractionFn() ), ImmutableList.of("1", "2", "5") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter( "dim2", "super-null", @@ -636,7 +648,7 @@ public void testMatchWithExtractionFn() ImmutableList.of("1", "2", "5") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter( "dim2", "super-null", @@ -649,11 +661,11 @@ public void testMatchWithExtractionFn() ), ImmutableList.of("1", "5") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), ImmutableList.of("2") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter( "dim2", "super-null", @@ -666,7 +678,7 @@ public void testMatchWithExtractionFn() ), ImmutableList.of("1", "5") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), ImmutableList.of("2") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java index 2bc1c2c5c4f3..8a46d313d2d0 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java @@ -97,7 +97,7 @@ public void testColumnsWithoutNulls() { // columns have mixed type input and so are ingested as COMPLEX // however the comparison filter currently nulls out complex types instead of comparing the values - if (testName.contains("AutoTypes")) { + if (isAutoSchema()) { return; } assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( @@ -124,7 +124,7 @@ public void testMissingColumnNotSpecifiedInDimensionList() { // columns have mixed type input and so are ingested as COMPLEX // however the comparison filter currently nulls out complex types instead of comparing the values - if (testName.contains("AutoTypes")) { + if (isAutoSchema()) { return; } assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java new file mode 100644 index 000000000000..95f856c0c4a7 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java @@ -0,0 +1,437 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.extraction.MapLookupExtractor; +import org.apache.druid.query.extraction.TimeDimExtractionFn; +import org.apache.druid.query.filter.EqualityFilter; +import org.apache.druid.query.lookup.LookupExtractionFn; +import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnType; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.Map; + +@RunWith(Parameterized.class) +public class EqualityFilterTest extends BaseFilterTest +{ + public EqualityFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean cnf, + boolean optimize + ) + { + super(testName, DEFAULT_ROWS, indexBuilder, finisher, cnf, optimize); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(EqualityFilterTest.class.getName()); + } + + @Test + public void testMatchNullThrowsError() + { + Throwable t = Assert.assertThrows( + IllegalArgumentException.class, + () -> assertFilterMatches( + new EqualityFilter("dim0", ColumnType.STRING, null, null, null), ImmutableList.of() + ) + ); + Assert.assertEquals("value must not be null", t.getMessage()); + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim0", ColumnType.STRING, "", null, null), ImmutableList.of()); + } + assertFilterMatches(new EqualityFilter("dim0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("dim0", ColumnType.STRING, "1", null, null), ImmutableList.of("1")); + + assertFilterMatches(new EqualityFilter("dim0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("dim0", ColumnType.LONG, 1L, null, null), ImmutableList.of("1")); + } + + @Test + public void testSingleValueVirtualStringColumnWithoutNulls() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("vdim0", ColumnType.STRING, "", null, null), ImmutableList.of()); + } + assertFilterMatches(new EqualityFilter("vdim0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vdim0", ColumnType.STRING, "1", null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("vdim0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vdim0", ColumnType.LONG, 1L, null, null), ImmutableList.of("1")); + } + + @Test + public void testListFilteredVirtualColumn() + { + assertFilterMatchesSkipVectorize( + new EqualityFilter("allow-dim0", ColumnType.STRING, "1", null, null), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("allow-dim0", ColumnType.STRING, "4", null, null), + ImmutableList.of("4") + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("deny-dim0", ColumnType.STRING, "0", null, null), + ImmutableList.of("0") + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("deny-dim0", ColumnType.STRING, "4", null, null), + ImmutableList.of() + ); + + // auto ingests arrays instead of MVDs which dont work with list filtered virtual column + if (!isAutoSchema()) { + assertFilterMatchesSkipVectorize( + new EqualityFilter("allow-dim2", ColumnType.STRING, "b", null, null), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("allow-dim2", ColumnType.STRING, "a", null, null), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("deny-dim2", ColumnType.STRING, "b", null, null), + ImmutableList.of("0") + ); + assertFilterMatchesSkipVectorize( + new EqualityFilter("deny-dim2", ColumnType.STRING, "a", null, null), + ImmutableList.of() + ); + } + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "", null, null), ImmutableList.of("0")); + } + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "10", null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "2", null, null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "1", null, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "abdef", null, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "abc", null, null), ImmutableList.of("5")); + assertFilterMatches(new EqualityFilter("dim1", ColumnType.STRING, "ab", null, null), ImmutableList.of()); + } + + @Test + public void testSingleValueVirtualStringColumnWithNulls() + { + // testSingleValueStringColumnWithNulls but with virtual column selector + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "", null, null), ImmutableList.of("0")); + } + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "10", null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "2", null, null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "1", null, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "abdef", null, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "abc", null, null), ImmutableList.of("5")); + assertFilterMatches(new EqualityFilter("vdim1", ColumnType.STRING, "ab", null, null), ImmutableList.of()); + } + + @Test + public void testMultiValueStringColumn() + { + if (isAutoSchema()) { + // auto ingests arrays instead of strings + // single values are implicitly upcast to single element arrays, so we get some matches here... + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null, null), ImmutableList.of("2")); + } + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null, null), ImmutableList.of()); + + // array matchers can match the whole array + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING, ImmutableList.of(""), null, null), + ImmutableList.of("2") + ); + } + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null, null), + ImmutableList.of("0") + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("a", "b"), null, null), + ImmutableList.of("0") + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a"}, null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"b"}, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"c"}, null, null), + ImmutableList.of("4") + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"d"}, null, null), + ImmutableList.of() + ); + } else { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null, null), ImmutableList.of("2")); + } + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING, "a", null, null), + ImmutableList.of("0", "3") + ); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null, null), ImmutableList.of()); + } + } + + @Test + public void testMissingColumnSpecifiedInDimensionList() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim3", ColumnType.STRING, "", null, null), ImmutableList.of()); + } + assertFilterMatches(new EqualityFilter("dim3", ColumnType.STRING, "a", null, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("dim3", ColumnType.STRING, "b", null, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("dim3", ColumnType.STRING, "c", null, null), ImmutableList.of()); + } + + @Test + public void testMissingColumnNotSpecifiedInDimensionList() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches(new EqualityFilter("dim4", ColumnType.STRING, "", null, null), ImmutableList.of()); + } + assertFilterMatches(new EqualityFilter("dim4", ColumnType.STRING, "a", null, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("dim4", ColumnType.STRING, "b", null, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("dim4", ColumnType.STRING, "c", null, null), ImmutableList.of()); + } + + @Test + public void testExpressionVirtualColumn() + { + assertFilterMatches( + new EqualityFilter("expr", ColumnType.STRING, "1.1", null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatches(new EqualityFilter("expr", ColumnType.STRING, "1.2", null, null), ImmutableList.of()); + + assertFilterMatches( + new EqualityFilter("expr", ColumnType.FLOAT, 1.1f, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatches(new EqualityFilter("expr", ColumnType.FLOAT, 1.2f, null, null), ImmutableList.of()); + + assertFilterMatches( + new EqualityFilter("expr", ColumnType.DOUBLE, 1.1, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatches(new EqualityFilter("expr", ColumnType.DOUBLE, 1.2, null, null), ImmutableList.of()); + + // if we accidentally specify it as a string, it works too... + assertFilterMatches( + new EqualityFilter("expr", ColumnType.DOUBLE, "1.1", null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatches(new EqualityFilter("expr", ColumnType.DOUBLE, "1.2", null, null), ImmutableList.of()); + } + + @Test + public void testWithTimeExtractionFnNull() + { + assertFilterMatches(new EqualityFilter( + "timeDim", + ColumnType.STRING, + "2017-07", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + null + ), ImmutableList.of("0", "1")); + assertFilterMatches(new EqualityFilter( + "timeDim", + ColumnType.STRING, + "2017-05", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + null + ), ImmutableList.of("2")); + + assertFilterMatches(new EqualityFilter( + "timeDim", + ColumnType.STRING, + "2020-01", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + null + ), ImmutableList.of("3", "5")); + } + + @Test + public void testSelectorWithLookupExtractionFn() + { + final Map stringMap = ImmutableMap.of( + "1", "HELLO", + "a", "HELLO", + "abdef", "HELLO", + "abc", "UNKNOWN" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches(new EqualityFilter("dim0", ColumnType.STRING, "HELLO", lookupFn, null), ImmutableList.of("1")); + assertFilterMatches( + new EqualityFilter("dim0", ColumnType.STRING, "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "2", "3", "4", "5") + ); + + assertFilterMatches( + new EqualityFilter("dim1", ColumnType.STRING, "HELLO", lookupFn, null), + ImmutableList.of("3", "4") + ); + assertFilterMatches( + new EqualityFilter("dim1", ColumnType.STRING, "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "5") + ); + + if (!isAutoSchema()) { + // auto ingests arrays instead of mvds, arrays do not support extractionFn + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING, "HELLO", lookupFn, null), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new EqualityFilter("dim2", ColumnType.STRING, "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "4", "5") + ); + + assertFilterMatches( + new EqualityFilter("dim3", ColumnType.STRING, "HELLO", lookupFn, null), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter("dim3", ColumnType.STRING, "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + assertFilterMatches( + new EqualityFilter("dim4", ColumnType.STRING, "HELLO", lookupFn, null), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter("dim4", ColumnType.STRING, "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + + final Map stringMap2 = ImmutableMap.of( + "2", "5" + ); + LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); + LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); + assertFilterMatches( + new EqualityFilter("dim0", ColumnType.STRING, "5", lookupFn2, null), + ImmutableList.of("2", "5") + ); + + if (NullHandling.sqlCompatible()) { + + final Map stringMap3 = ImmutableMap.of( + "1", "" + ); + LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); + LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); + assertFilterMatches( + new EqualityFilter("dim0", ColumnType.STRING, "", lookupFn3, null), + ImmutableList.of("1") + ); + } + } + + @Test + public void testNumericColumnNullsAndDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 0f, null, null), ImmutableList.of("0", "4")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 0.0, null, null), ImmutableList.of("0", "2")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0", "3")); + assertFilterMatches(new EqualityFilter("f0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "4")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "2")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "3")); + } else { + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 0f, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 0.0, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("f0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + } + } + + @Test + public void testVirtualNumericColumnNullsAndDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertFilterMatches(new EqualityFilter("vf0", ColumnType.FLOAT, 0f, null, null), ImmutableList.of("0", "4")); + assertFilterMatches(new EqualityFilter("vd0", ColumnType.DOUBLE, 0.0, null, null), ImmutableList.of("0", "2")); + assertFilterMatches(new EqualityFilter("vl0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0", "3")); + assertFilterMatches(new EqualityFilter("vf0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "4")); + assertFilterMatches(new EqualityFilter("vd0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "2")); + assertFilterMatches(new EqualityFilter("vl0", ColumnType.STRING, "0", null, null), ImmutableList.of("0", "3")); + } else { + assertFilterMatches(new EqualityFilter("vf0", ColumnType.FLOAT, 0f, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vd0", ColumnType.DOUBLE, 0.0, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vl0", ColumnType.LONG, 0L, null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vf0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vd0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("vl0", ColumnType.STRING, "0", null, null), ImmutableList.of("0")); + } + } + + @Test + public void test_equals() + { + EqualsVerifier.forClass(EqualityFilter.class).usingGetClass() + .withNonnullFields("column", "matchValueType", "matchValue") + .withIgnoredFields("predicateFactory", "cachedOptimizedFilter").verify(); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index e46e736f9fa9..0952dc7df10d 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -162,8 +162,8 @@ public void testOneSingleValuedStringColumn() @Test public void testOneMultiValuedStringColumn() { - // auto type columns don't support mvds, bail out - if (testName.contains("AutoTypes")) { + // auto type columns ingest arrays instead of mvds, bail out + if (isAutoSchema()) { return; } if (NullHandling.replaceWithDefault()) { @@ -252,8 +252,8 @@ public void testCompareColumns() assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } - // auto type columns don't support mvds, bail out - if (testName.contains("AutoTypes")) { + // auto type columns ingest arrays instead of mvds + if (isAutoSchema()) { return; } // String vs. multi-value string diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java index 0c6733e648be..3e19b9a113df 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java @@ -44,8 +44,8 @@ import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.filter.cnf.CNFFilterExplosionException; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Test; @@ -235,6 +235,9 @@ public void testSinglePostFilterWithNulls() @Test public void testBasicPreAndPostFilterWithNulls() { + if (isAutoSchema()) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatches( new AndDimFilter(Arrays.asList( @@ -359,6 +362,9 @@ public void testBasicPreAndPostFilterWithNulls() @Test public void testOrPostFilterWithNulls() { + if (isAutoSchema()) { + return; + } assertFilterMatches( new OrDimFilter(Arrays.asList( new SelectorDimFilter("dim2", "a", null), @@ -617,13 +623,17 @@ public void testMissingColumnNotSpecifiedInDimensionList() @Test public void testDistributeOrCNF() throws CNFFilterExplosionException { + if (isAutoSchema()) { + return; + } DimFilter dimFilter1 = new OrDimFilter(Arrays.asList( new SelectorDimFilter("dim0", "6", null), new AndDimFilter(Arrays.asList( new NoBitmapSelectorDimFilter("dim1", "abdef", null), new SelectorDimFilter("dim2", "c", null) ) - )) + ) + ) ); Filter filter1 = dimFilter1.toFilter(); @@ -671,13 +681,17 @@ public void testDistributeOrCNF() throws CNFFilterExplosionException @Test public void testDistributeOrCNFExtractionFn() throws CNFFilterExplosionException { + if (isAutoSchema()) { + return; + } DimFilter dimFilter1 = new OrDimFilter(Arrays.asList( new SelectorDimFilter("dim0", "super-6", JS_EXTRACTION_FN), new AndDimFilter(Arrays.asList( new NoBitmapSelectorDimFilter("dim1", "super-abdef", JS_EXTRACTION_FN), new SelectorDimFilter("dim2", "super-c", JS_EXTRACTION_FN) ) - )) + ) + ) ); Filter filter1 = dimFilter1.toFilter(); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java index 23ced8e5208a..535a134b0354 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java @@ -149,6 +149,9 @@ public void testSingleValueStringColumnWithNulls() @Test public void testMultiValueStringColumn() { + if (isAutoSchema()) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatches( toInFilter("dim2", "b", "d"), @@ -256,35 +259,35 @@ public void testMatchWithExtractionFn() ExtractionFn yesNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), ImmutableList.of("a", "b", "c", "d", "f") ); + assertFilterMatchesSkipArrays( + toInFilterWithFn("dim2", yesNullFn, "YES"), + ImmutableList.of("b", "c", "f") + ); assertFilterMatches( toInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), ImmutableList.of("a", "b", "e") ); - assertFilterMatches( - toInFilterWithFn("dim2", yesNullFn, "YES"), - ImmutableList.of("b", "c", "f") - ); assertFilterMatches( toInFilterWithFn("dim1", yesNullFn, "NO"), ImmutableList.of("b", "c", "d", "e", "f") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), ImmutableList.of("a", "b", "d", "f") ); + assertFilterMatchesSkipArrays( + toInFilterWithFn("dim2", yesNullFn, "YES"), + ImmutableList.of("b", "f") + ); assertFilterMatches( toInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), ImmutableList.of("b", "e") ); - assertFilterMatches( - toInFilterWithFn("dim2", yesNullFn, "YES"), - ImmutableList.of("b", "f") - ); assertFilterMatches( toInFilterWithFn("dim1", yesNullFn, "NO"), @@ -322,9 +325,10 @@ public void testMatchWithLookupExtractionFn() assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f")); assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e")); assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); - assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); - assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); - assertFilterMatches( + + assertFilterMatchesSkipArrays(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); + assertFilterMatchesSkipArrays(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); + assertFilterMatchesSkipArrays( toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), ImmutableList.of("a", "b", "c", "d", "e", "f") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java index 40ccaa64dbb4..98b9579a9079 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java @@ -111,6 +111,9 @@ public void testSingleValueStringColumnWithNulls() @Test public void testMultiValueStringColumn() { + if (isAutoSchema()) { + return; + } // multi-val null...... if (NullHandling.replaceWithDefault()) { assertFilterMatchesSkipVectorize( @@ -184,14 +187,16 @@ public void testJavascriptFilterWithLookupExtractionFn() ImmutableList.of("0", "1", "2", "5") ); - assertFilterMatchesSkipVectorize( - newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), - ImmutableList.of("0", "3") - ); - assertFilterMatchesSkipVectorize( - newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), - ImmutableList.of("0", "1", "2", "4", "5") - ); + if (!isAutoSchema()) { + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "4", "5") + ); + } assertFilterMatchesSkipVectorize( newJavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), diff --git a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java new file mode 100644 index 000000000000..22d928d11874 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.extraction.MapLookupExtractor; +import org.apache.druid.query.filter.NullFilter; +import org.apache.druid.query.lookup.LookupExtractionFn; +import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.StorageAdapter; +import org.junit.AfterClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.Map; + +@RunWith(Parameterized.class) +public class NullFilterTest extends BaseFilterTest +{ + public NullFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean cnf, + boolean optimize + ) + { + super(testName, DEFAULT_ROWS, indexBuilder, finisher, cnf, optimize); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(NullFilterTest.class.getName()); + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + assertFilterMatches(NullFilter.forColumn("dim0"), ImmutableList.of()); + } + + @Test + public void testSingleValueVirtualStringColumnWithoutNulls() + { + assertFilterMatches(NullFilter.forColumn("vdim0"), ImmutableList.of()); + } + + @Test + public void testListFilteredVirtualColumn() + { + assertFilterMatchesSkipVectorize(NullFilter.forColumn("allow-dim0"), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatchesSkipVectorize(NullFilter.forColumn("deny-dim0"), ImmutableList.of("3", "4")); + if (isAutoSchema()) { + return; + } + assertFilterMatchesSkipVectorize(NullFilter.forColumn("allow-dim2"), ImmutableList.of("1", "2", "4", "5")); + if (NullHandling.replaceWithDefault()) { + assertFilterMatchesSkipVectorize( + NullFilter.forColumn("deny-dim2"), + ImmutableList.of("1", "2", "3", "5") + ); + } else { + assertFilterMatchesSkipVectorize( + NullFilter.forColumn("deny-dim2"), + ImmutableList.of("1", "3", "5") + ); + } + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + // testSingleValueStringColumnWithoutNulls but with virtual column selector + if (NullHandling.replaceWithDefault()) { + assertFilterMatches(NullFilter.forColumn("dim1"), ImmutableList.of("0")); + } else { + assertFilterMatches(NullFilter.forColumn("dim1"), ImmutableList.of()); + } + } + + @Test + public void testSingleValueVirtualStringColumnWithNulls() + { + // testSingleValueStringColumnWithNulls but with virtual column selector + if (NullHandling.replaceWithDefault()) { + assertFilterMatches(NullFilter.forColumn("vdim1"), ImmutableList.of("0")); + } else { + assertFilterMatches(NullFilter.forColumn("vdim1"), ImmutableList.of()); + } + } + + @Test + public void testMultiValueStringColumn() + { + if (NullHandling.replaceWithDefault()) { + if (isAutoSchema()) { + assertFilterMatches(NullFilter.forColumn("dim2"), ImmutableList.of("5")); + } else { + assertFilterMatches(NullFilter.forColumn("dim2"), ImmutableList.of("1", "2", "5")); + } + } else { + // only one array row is totally null + if (isAutoSchema()) { + assertFilterMatches(NullFilter.forColumn("dim2"), ImmutableList.of("5")); + } else { + assertFilterMatches(NullFilter.forColumn("dim2"), ImmutableList.of("1", "5")); + } + } + } + + @Test + public void testMissingColumnSpecifiedInDimensionList() + { + assertFilterMatches(NullFilter.forColumn("dim3"), ImmutableList.of("0", "1", "2", "3", "4", "5")); + } + + @Test + public void testMissingColumnNotSpecifiedInDimensionList() + { + assertFilterMatches(NullFilter.forColumn("dim4"), ImmutableList.of("0", "1", "2", "3", "4", "5")); + } + + + @Test + public void testVirtualNumericColumnNullsAndDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertFilterMatches(NullFilter.forColumn("vf0"), ImmutableList.of()); + assertFilterMatches(NullFilter.forColumn("vd0"), ImmutableList.of()); + assertFilterMatches(NullFilter.forColumn("vl0"), ImmutableList.of()); + } else { + assertFilterMatches(NullFilter.forColumn("vf0"), ImmutableList.of("4")); + assertFilterMatches(NullFilter.forColumn("vd0"), ImmutableList.of("2")); + assertFilterMatches(NullFilter.forColumn("vl0"), ImmutableList.of("3")); + } + } + + @Test + public void testNumericColumnNullsAndDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertFilterMatches(NullFilter.forColumn("f0"), ImmutableList.of()); + assertFilterMatches(NullFilter.forColumn("d0"), ImmutableList.of()); + assertFilterMatches(NullFilter.forColumn("l0"), ImmutableList.of()); + } else { + assertFilterMatches(NullFilter.forColumn("f0"), ImmutableList.of("4")); + assertFilterMatches(NullFilter.forColumn("d0"), ImmutableList.of("2")); + assertFilterMatches(NullFilter.forColumn("l0"), ImmutableList.of("3")); + } + } + + @Test + public void testSelectorWithLookupExtractionFn() + { + /* + static final List DEFAULT_ROWS = ImmutableList.of( + makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), + makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), + makeDefaultSchemaRow("2", "2", ImmutableList.of(""), "2017-05-25", null, 5.5f, 40L), + makeDefaultSchemaRow("3", "1", ImmutableList.of("a"), "2020-01-25", 120.0245, 110.0f, null), + makeDefaultSchemaRow("4", "abdef", ImmutableList.of("c"), null, 60.0, null, 9001L), + makeDefaultSchemaRow("5", "abc", null, "2020-01-25", 765.432, 123.45f, 12345L) + ); + */ + final Map stringMap = ImmutableMap.of( + "1", "HELLO", + "a", "HELLO", + "abdef", "HELLO", + "abc", "UNKNOWN" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, null, false, true); + LookupExtractionFn lookupFnRetain = new LookupExtractionFn(mapExtractor, true, null, false, true); + LookupExtractionFn lookupFnReplace = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatches(new NullFilter("dim0", lookupFn, null), ImmutableList.of("0", "2", "3", "4", "5")); + assertFilterMatches(new NullFilter("dim0", lookupFnRetain, null), ImmutableList.of()); + } else { + assertFilterMatches(new NullFilter("dim0", lookupFn, null), ImmutableList.of("0", "2", "3", "4", "5")); + assertFilterMatches(new NullFilter("dim0", lookupFnRetain, null), ImmutableList.of()); + } + + assertFilterMatches(new NullFilter("dim0", lookupFnReplace, null), ImmutableList.of()); + + + final Map stringMapEmpty = ImmutableMap.of( + "1", "" + ); + LookupExtractor mapExtractoryEmpty = new MapLookupExtractor(stringMapEmpty, false); + LookupExtractionFn lookupFnEmpty = new LookupExtractionFn(mapExtractoryEmpty, false, null, false, true); + if (NullHandling.replaceWithDefault()) { + // Nulls and empty strings are considered equivalent + assertFilterMatches( + new NullFilter("dim0", lookupFnEmpty, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } else { + assertFilterMatches( + new NullFilter("dim0", lookupFnEmpty, null), + ImmutableList.of("0", "2", "3", "4", "5") + ); + } + } + + @Test + public void test_equals() + { + EqualsVerifier.forClass(NullFilter.class).usingGetClass() + .withNonnullFields("column") + .withIgnoredFields("cachedOptimizedFilter") + .verify(); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java new file mode 100644 index 000000000000..d324a510f398 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java @@ -0,0 +1,857 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.js.JavaScriptConfig; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.extraction.JavaScriptExtractionFn; +import org.apache.druid.query.filter.BoundDimFilter; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.RangeFilter; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnType; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.List; + +@RunWith(Parameterized.class) +public class RangeFilterTest extends BaseFilterTest +{ + private static final List ROWS = ImmutableList.builder() + .addAll(DEFAULT_ROWS) + .add(makeDefaultSchemaRow( + "6", + "-1000", + ImmutableList.of("a"), + null, + 6.6, + null, + 10L + )) + .add(makeDefaultSchemaRow( + "7", + "-10.012", + ImmutableList.of("d"), + null, + null, + 3.0f, + null + )) + .build(); + + public RangeFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean cnf, + boolean optimize + ) + { + super(testName, ROWS, indexBuilder, finisher, cnf, optimize); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(RangeFilterTest.class.getName()); + } + + @Test + public void testLexicographicalMatch() + { + + assertFilterMatches( + new RangeFilter("dim0", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("vdim0", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + new RangeFilter("dim1", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("vdim0", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } else { + assertFilterMatches( + new RangeFilter("dim1", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("vdim0", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } + + if (isAutoSchema()) { + // auto schema ingests arrays instead of mvds.. this filter doesn't currently support arrays + } else { + assertFilterMatches( + new RangeFilter("dim2", ColumnType.STRING, null, "z", false, false, null, null), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "2", "3", "4", "6", "7") + : ImmutableList.of("0", "3", "4", "6", "7") + ); + // vdim2 does not exist... + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of() + ); + } + } + + @Test + public void testLexicographicMatchWithEmptyString() + { + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + new RangeFilter("dim0", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("dim1", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + if (!isAutoSchema()) { + // auto schema ingests arrays which are currently incompatible with the range filter + assertFilterMatches( + new RangeFilter("dim2", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("0", "2", "3", "4", "6", "7") + ); + } + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of() + ); + } else { + assertFilterMatches( + new RangeFilter("dim0", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("dim1", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("1", "2", "3", "4", "5", "6", "7") + ); + if (!isAutoSchema()) { + // auto schema ingests arrays which are currently incompatible with the range filter + assertFilterMatches( + new RangeFilter("dim2", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of("0", "3", "4", "6", "7") + ); + } + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, "", "z", false, false, null, null), + ImmutableList.of() + ); + } + } + + @Test + public void testLexicographicMatchNull() + { + assertFilterMatches( + new BoundDimFilter("dim0", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0") + ); + if (NullHandling.replaceWithDefault()) { + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + isAutoSchema() ? ImmutableList.of() : ImmutableList.of("1", "2", "5") + ); + } else { + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + isAutoSchema() ? ImmutableList.of() : ImmutableList.of("2") + ); + } + } + + @Test + public void testLexicographicMatchMissingColumn() + { + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new BoundDimFilter("dim3", "", null, false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + } else { + assertFilterMatches( + new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim3", "", null, false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } + assertFilterMatches( + new BoundDimFilter("dim3", "", "", true, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim3", "", "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + + assertFilterMatches( + new BoundDimFilter("dim3", null, "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } else { + assertFilterMatches( + new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + } + } + + + @Test + public void testLexicographicMatchTooStrict() + { + assertFilterMatches( + new BoundDimFilter("dim1", "abc", "abc", true, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "abc", "abc", true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "abc", "abc", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + } + + @Test + public void testLexicographicMatchExactlySingleValue() + { + assertFilterMatches( + new BoundDimFilter("dim1", "abc", "abc", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("5") + ); + } + + @Test + public void testLexicographicMatchSurroundingSingleValue() + { + assertFilterMatches( + new BoundDimFilter("dim1", "ab", "abd", true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("5") + ); + } + + @Test + public void testLexicographicMatchNoUpperLimit() + { + assertFilterMatches( + new BoundDimFilter("dim1", "ab", null, true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("4", "5") + ); + } + + @Test + public void testLexicographicMatchNoLowerLimit() + { + assertFilterMatches( + new BoundDimFilter("dim1", null, "abd", true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "5", "6", "7") + ); + } + + @Test + public void testLexicographicMatchNumbers() + { + assertFilterMatches( + new BoundDimFilter("dim1", "1", "3", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("1", "2", "3") + ); + assertFilterMatches( + new BoundDimFilter("dim1", "1", "3", true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("1", "2") + ); + assertFilterMatches( + new BoundDimFilter("dim1", "-1", "3", true, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("1", "2", "3", "6", "7") + ); + } + + + @Test + public void testNumericMatchNull() + { + assertFilterMatches( + new BoundDimFilter("dim0", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("0") + ); + if (NullHandling.replaceWithDefault()) { + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("1", "2", "5") + ); + assertFilterMatches( + new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } else { + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("2") + ); + assertFilterMatches( + new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + } + + } + + @Test + public void testNumericMatchTooStrict() + { + assertFilterMatches( + new BoundDimFilter("dim1", "2", "2", true, false, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "2", "2", true, true, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + assertFilterMatches( + new BoundDimFilter("dim1", "2", "2", false, true, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + } + + @Test + public void testNumericMatchVirtualColumn() + { + assertFilterMatches( + new BoundDimFilter("expr", "1", "2", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + assertFilterMatches( + new BoundDimFilter("expr", "2", "3", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of() + ); + } + + @Test + public void testNumericMatchExactlySingleValue() + { + assertFilterMatches( + new BoundDimFilter("dim1", "2", "2", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("2") + ); + + assertFilterMatches( + new BoundDimFilter("dim1", "-10.012", "-10.012", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("7") + ); + } + + @Test + public void testNumericMatchSurroundingSingleValue() + { + assertFilterMatches( + new BoundDimFilter("dim1", "1", "3", true, true, false, null, StringComparators.NUMERIC), + ImmutableList.of("2") + ); + + assertFilterMatches( + new BoundDimFilter("dim1", "-11", "-10", false, false, false, null, StringComparators.NUMERIC), + ImmutableList.of("7") + ); + } + + @Test + public void testNumericMatchNoUpperLimit() + { + assertFilterMatches( + new BoundDimFilter("dim1", "1", null, true, true, false, null, StringComparators.NUMERIC), + ImmutableList.of("1", "2") + ); + } + + @Test + public void testNumericMatchNoLowerLimit() + { + assertFilterMatches( + new BoundDimFilter("dim1", null, "2", true, true, false, null, StringComparators.NUMERIC), + ImmutableList.of("0", "3", "4", "5", "6", "7") + ); + } + + @Test + public void testNumericMatchWithNegatives() + { + assertFilterMatches( + new BoundDimFilter("dim1", "-2000", "3", true, true, false, null, StringComparators.NUMERIC), + ImmutableList.of("2", "3", "6", "7") + ); + } + + @Test + public void testNumericNullsAndZeros() + { + assertFilterMatches( + new BoundDimFilter( + "d0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "f0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "l0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "3", "7") + : ImmutableList.of("0") + ); + } + + @Test + public void testVirtualNumericNullsAndZeros() + { + assertFilterMatches( + new BoundDimFilter( + "vd0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "vf0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") + ); + + assertFilterMatches( + new BoundDimFilter( + "vl0", + "0.0", + "1.0", + false, + false, + false, + null, + StringComparators.NUMERIC + ), + NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "3", "7") + : ImmutableList.of("0") + ); + } + + @Test + public void testNumericNulls() + { + assertFilterMatches( + new BoundDimFilter( + "f0", + "1.0", + null, + false, + false, + false, + null, + StringComparators.NUMERIC + ), + ImmutableList.of("1", "2", "3", "5", "7") + ); + assertFilterMatches( + new BoundDimFilter( + "d0", + "1", + null, + false, + false, + false, + null, + StringComparators.NUMERIC + ), + ImmutableList.of("1", "3", "4", "5", "6") + ); + assertFilterMatches( + new BoundDimFilter( + "l0", + "1", + null, + false, + false, + false, + null, + StringComparators.NUMERIC + ), + ImmutableList.of("1", "2", "4", "5", "6") + ); + } + + @Test + public void testMatchWithExtractionFn() + { + String extractionJsFn = "function(str) { return 'super-' + str; }"; + ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance()); + + String nullJsFn = "function(str) { return null; }"; + ExtractionFn makeNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } else { + assertFilterMatches( + new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + } + + assertFilterMatches( + new BoundDimFilter( + "dim1", + "super-ab", + "super-abd", + true, + true, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("5") + ); + + assertFilterMatches( + new BoundDimFilter("dim1", "super-0", "super-10", false, false, true, superFn, StringComparators.ALPHANUMERIC), + ImmutableList.of("1", "2", "3") + ); + + assertFilterMatchesSkipArrays( + new BoundDimFilter( + "dim2", + "super-", + "super-zzzzzz", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatchesSkipArrays( + new BoundDimFilter( + "dim2", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("1", "2", "5") + ); + assertFilterMatchesSkipArrays( + new BoundDimFilter( + "dim2", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.NUMERIC + ), + ImmutableList.of("1", "2", "5") + ); + } else { + assertFilterMatchesSkipArrays( + new BoundDimFilter( + "dim2", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("1", "5") + ); + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), + ImmutableList.of("2") + ); + assertFilterMatchesSkipArrays( + new BoundDimFilter( + "dim2", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("1", "5") + ); + assertFilterMatchesSkipArrays( + new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), + ImmutableList.of("2") + ); + } + + assertFilterMatches( + new BoundDimFilter( + "dim3", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + assertFilterMatches( + new BoundDimFilter( + "dim4", + "super-null", + "super-null", + false, + false, + false, + superFn, + StringComparators.LEXICOGRAPHIC + ), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + assertFilterMatches( + new BoundDimFilter("dim4", "super-null", "super-null", false, false, false, superFn, StringComparators.NUMERIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } + + @Test + public void testListFilteredVirtualColumn() + { + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim0", "0", "2", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("3", "4") + ); + // the bound filter matches null, so it is what it is... + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "5", "6") + ); + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim0", "3", "4", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + // the bound filter matches null, so it is what it is... + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6") + ); + + if (isAutoSchema()) { + // bail out, auto ingests arrays instead of mvds and this virtual column is for mvd stuff + return; + } + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim2", "a", "c", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "3", "6") + ); + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + // the bound filter matches null, so it is what it is... + assertFilterMatchesSkipVectorize( + new BoundDimFilter("allow-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim2", "a", "b", false, true, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("4", "7") + ); + // the bound filter matches null, so it is what it is... + assertFilterMatchesSkipVectorize( + new BoundDimFilter("deny-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + } + + @Test + public void testRequiredColumnRewrite() + { + BoundFilter filter = new BoundFilter( + new BoundDimFilter("dim0", "", "", false, false, true, null, StringComparators.ALPHANUMERIC) + ); + BoundFilter filter2 = new BoundFilter( + new BoundDimFilter("dim1", "", "", false, false, true, null, StringComparators.ALPHANUMERIC) + ); + Assert.assertTrue(filter.supportsRequiredColumnRewrite()); + Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); + + Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); + Assert.assertEquals(filter2, rewrittenFilter); + + Throwable t = Assert.assertThrows( + IAE.class, + () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) + ); + + Assert.assertEquals( + "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", + t.getMessage() + ); + } + + @Test + public void test_equals() + { + EqualsVerifier.forClass(BoundFilter.class) + .usingGetClass() + .withNonnullFields("boundDimFilter") + .verify(); + } + + @Test + public void test_equals_boundDimFilterDruidPredicateFactory() + { + EqualsVerifier.forClass(BoundFilter.BoundDimFilterDruidPredicateFactory.class) + .usingGetClass() + .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") + .verify(); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RegexFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RegexFilterTest.java index 30f0c6b7170e..a6ec01681771 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RegexFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RegexFilterTest.java @@ -94,6 +94,9 @@ public void testSingleValueStringColumnWithNulls() @Test public void testMultiValueStringColumn() { + if (isAutoSchema()) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatches(new RegexDimFilter("dim2", ".*", null), ImmutableList.of("0", "3", "4")); } else { @@ -130,14 +133,17 @@ public void testRegexWithExtractionFn() ExtractionFn changeNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); if (NullHandling.replaceWithDefault()) { assertFilterMatches(new RegexDimFilter("dim1", ".*ANYMORE", changeNullFn), ImmutableList.of("0")); - assertFilterMatches(new RegexDimFilter("dim2", ".*ANYMORE", changeNullFn), ImmutableList.of("1", "2", "5")); + assertFilterMatchesSkipArrays( + new RegexDimFilter("dim2", ".*ANYMORE", changeNullFn), + ImmutableList.of("1", "2", "5") + ); } else { assertFilterMatches(new RegexDimFilter("dim1", ".*ANYMORE", changeNullFn), ImmutableList.of()); - assertFilterMatches(new RegexDimFilter("dim2", ".*ANYMORE", changeNullFn), ImmutableList.of("1", "5")); + assertFilterMatchesSkipArrays(new RegexDimFilter("dim2", ".*ANYMORE", changeNullFn), ImmutableList.of("1", "5")); } assertFilterMatches(new RegexDimFilter("dim1", "ab.*", changeNullFn), ImmutableList.of("4", "5")); - assertFilterMatches(new RegexDimFilter("dim2", "a.*", changeNullFn), ImmutableList.of("0", "3")); + assertFilterMatchesSkipArrays(new RegexDimFilter("dim2", "a.*", changeNullFn), ImmutableList.of("0", "3")); assertFilterMatches(new RegexDimFilter("dim3", ".*ANYMORE", changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); assertFilterMatches(new RegexDimFilter("dim3", "a.*", changeNullFn), ImmutableList.of()); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SearchQueryFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SearchQueryFilterTest.java index a7246642925f..8aef0ef27ab9 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SearchQueryFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SearchQueryFilterTest.java @@ -107,6 +107,9 @@ public void testSingleValueStringColumnWithNulls() @Test public void testMultiValueStringColumn() { + if (isAutoSchema()) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue(""), null), ImmutableList.of("0", "3", "4")); } else { @@ -152,7 +155,7 @@ public void testSearchQueryWithExtractionFn() new SearchQueryDimFilter("dim1", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new SearchQueryDimFilter("dim2", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("1", "2", "5") ); @@ -162,7 +165,7 @@ public void testSearchQueryWithExtractionFn() new SearchQueryDimFilter("dim1", specForValue("ANYMORE"), changeNullFn), ImmutableList.of() ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new SearchQueryDimFilter("dim2", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("1", "5") ); @@ -174,12 +177,21 @@ public void testSearchQueryWithExtractionFn() ); assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("ab"), changeNullFn), ImmutableList.of("4", "5")); - assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("a"), changeNullFn), ImmutableList.of("0", "3")); + assertFilterMatchesSkipArrays( + new SearchQueryDimFilter("dim2", specForValue("a"), changeNullFn), + ImmutableList.of("0", "3") + ); - assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SearchQueryDimFilter("dim3", specForValue("ANYMORE"), changeNullFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("a"), changeNullFn), ImmutableList.of()); - assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SearchQueryDimFilter("dim4", specForValue("ANYMORE"), changeNullFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("a"), changeNullFn), ImmutableList.of()); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index 95765949ac0f..80b1bb1a88d1 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -120,13 +120,22 @@ public void testListFilteredVirtualColumn() { assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", "1", null), ImmutableList.of()); assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", "4", null), ImmutableList.of("4")); - assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", null, null), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatchesSkipVectorize( + new SelectorDimFilter("allow-dim0", null, null), + ImmutableList.of("0", "1", "2", "5") + ); assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", "0", null), ImmutableList.of("0")); assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", "4", null), ImmutableList.of()); assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", null, null), ImmutableList.of("3", "4")); + if (isAutoSchema()) { + return; + } assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", "b", null), ImmutableList.of()); assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", "a", null), ImmutableList.of("0", "3")); - assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", null, null), ImmutableList.of("1", "2", "4", "5")); + assertFilterMatchesSkipVectorize( + new SelectorDimFilter("allow-dim2", null, null), + ImmutableList.of("1", "2", "4", "5") + ); assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim2", "b", null), ImmutableList.of("0")); assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim2", "a", null), ImmutableList.of()); if (NullHandling.replaceWithDefault()) { @@ -181,6 +190,9 @@ public void testSingleValueVirtualStringColumnWithNulls() @Test public void testMultiValueStringColumn() { + if (isAutoSchema()) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatches(new SelectorDimFilter("dim2", null, null), ImmutableList.of("1", "2", "5")); assertFilterMatches(new SelectorDimFilter("dim2", "", null), ImmutableList.of("1", "2", "5")); @@ -250,8 +262,11 @@ public void testSelectorWithLookupExtractionFn() assertFilterMatches(new SelectorDimFilter("dim1", "HELLO", lookupFn), ImmutableList.of("3", "4")); assertFilterMatches(new SelectorDimFilter("dim1", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "5")); - assertFilterMatches(new SelectorDimFilter("dim2", "HELLO", lookupFn), ImmutableList.of("0", "3")); - assertFilterMatches(new SelectorDimFilter("dim2", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); + assertFilterMatchesSkipArrays(new SelectorDimFilter("dim2", "HELLO", lookupFn), ImmutableList.of("0", "3")); + assertFilterMatchesSkipArrays( + new SelectorDimFilter("dim2", "UNKNOWN", lookupFn), + ImmutableList.of("0", "1", "2", "4", "5") + ); assertFilterMatches(new SelectorDimFilter("dim3", "HELLO", lookupFn), ImmutableList.of()); assertFilterMatches( diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java index ae6622d1d77e..6cec0392dd3c 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java @@ -63,11 +63,11 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; -import org.apache.druid.segment.column.AllTrueBitmapColumnIndex; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.SelectorFilter; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.DateTime; import org.joda.time.Interval; diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java index a5202260efee..efc90593c3a4 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java @@ -152,6 +152,12 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) return () -> NullHandling.sqlCompatible() && selector.isNull() ? null : selector.getLong(); } + @Override + public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + { + return selector::getObject; + } + @Override public Supplier makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index d6131b2d820a..32b3eaa871bd 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -53,11 +53,11 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import org.apache.druid.segment.vector.BitmapVectorOffset; @@ -263,7 +263,6 @@ public void testBasicFunctionality() throws IOException final ColumnHolder holder = bob.build(); final ColumnCapabilities capabilities = holder.getCapabilities(); Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); - Assert.assertTrue(capabilities.isFilterable()); Assert.assertTrue(holder.getColumnFormat() instanceof NestedCommonFormatColumn.Format); try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTest(column); @@ -287,7 +286,6 @@ public void testArrayFunctionality() throws IOException final ColumnHolder holder = bob.build(); final ColumnCapabilities capabilities = holder.getCapabilities(); Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); - Assert.assertTrue(capabilities.isFilterable()); Assert.assertTrue(holder.getColumnFormat() instanceof NestedCommonFormatColumn.Format); try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTestArrays(column); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index 154be02dfe77..824f4f80bd77 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -46,16 +46,16 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.ComplexColumnPartSerde; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -229,7 +229,6 @@ public void testBasicFunctionality() throws IOException final ColumnHolder holder = bob.build(); final ColumnCapabilities capabilities = holder.getCapabilities(); Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); - Assert.assertTrue(capabilities.isFilterable()); Assert.assertTrue(holder.getColumnFormat() instanceof NestedDataComplexTypeSerde.NestedColumnFormatV4); try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTest(column); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java index 9503e55cc574..3164cc251c41 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java @@ -28,17 +28,8 @@ import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.NumericRangeIndex; -import org.apache.druid.segment.column.SpatialIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.FixedIndexed; @@ -47,6 +38,15 @@ import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.LexicographicalRangeIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.SpatialIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.serde.Serializer; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java index 8d1e535d182d..b65b56e220aa 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java @@ -42,11 +42,11 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java index 415bcf0d4020..c3cdff147772 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java @@ -42,11 +42,11 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java index 5b86747aa2ae..edecb52f7bf6 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java @@ -43,12 +43,12 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java index 9c7375ca4fd5..6b689eacf950 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java @@ -42,14 +42,14 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringEncodingStrategy; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.CompressionFactory; import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java index 7537dc53c92f..ad18650c9a38 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java @@ -24,12 +24,12 @@ import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; -import org.apache.druid.segment.column.BitmapColumnIndex; -import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.StringValueSetIndex; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; diff --git a/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java index 2c3455478d0f..c6e6e9359b4a 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java @@ -71,7 +71,6 @@ public void testDeserializer() Assert.assertTrue(Types.is(columnCapabilities, ValueType.DOUBLE)); Assert.assertTrue(columnCapabilities.hasNulls().isTrue()); Assert.assertTrue(columnCapabilities.hasMultipleValues().isFalse()); - Assert.assertTrue(columnCapabilities.isFilterable()); Assert.assertTrue(columnCapabilities.hasBitmapIndexes()); Assert.assertTrue(columnCapabilities.isDictionaryEncoded().isTrue()); Assert.assertTrue(columnCapabilities.areDictionaryValuesSorted().isTrue()); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java index 4b7e1f640b20..6ba06956e2f0 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java @@ -39,10 +39,10 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.SelectorFilter; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.easymock.EasyMock; import org.junit.Assert; diff --git a/processing/src/test/resources/nested-all-types-test-data.json b/processing/src/test/resources/nested-all-types-test-data.json index 832c4de7cf00..05984a7d4a23 100644 --- a/processing/src/test/resources/nested-all-types-test-data.json +++ b/processing/src/test/resources/nested-all-types-test-data.json @@ -1,7 +1,7 @@ {"timestamp": "2023-01-01T00:00:00", "str":"a", "long":1, "double":1.0, "bool": true, "variant": 1, "variantEmptyObj":1, "variantEmtpyArray":1, "obj":{"a": 100, "b": {"x": "a", "y": 1.1, "z": [1, 2, 3, 4]}}, "complexObj":{"x": 1234, "y": [{"l": ["a", "b", "c"], "m": "a", "n": 1},{"l": ["a", "b", "c"], "m": "a", "n": 1}], "z": {"a": [1.1, 2.2, 3.3], "b": true}}, "arrayString": ["a", "b"], "arrayStringNulls": ["a", "b"], "arrayLong":[1, 2, 3], "arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2], "arrayBool":[true, false, true], "arrayNestedLong":[[1, 2, null], [3, 4]], "arrayObject":[{"x": 1},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"", "long":2, "bool": false, "variant": "b", "variantEmptyObj":"b", "variantEmtpyArray":2, "obj":{"a": 200, "b": {"x": "b", "y": 1.1, "z": [2, 4, 6]}}, "complexObj":{"x": 10, "y": [{"l": ["b", "b", "c"], "m": "b", "n": 2}, [1, 2, 3]], "z": {"a": [5.5], "b": false}}, "arrayString": ["a", "b", "c"], "arrayStringNulls": [null, "b"], "arrayLong":[2, 3], "arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999, null, 5.5], "arrayVariant":[null, null, 2.2], "arrayBool":[true, true], "arrayNestedLong":[null, [null], []], "arrayObject":[{"x": 3},{"x":4}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"null", "long":3, "double":2.0, "variant": 3.0, "variantEmptyObj":3.3, "variantEmtpyArray":3, "obj":{"a": 300}, "complexObj":{"x": 4, "y": [{"l": [], "m": 100, "n": 3},{"l": ["a"]}, {"l": ["b"], "n": []}], "z": {"a": [], "b": true}}, "arrayString": ["b", "c"], "arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4], "arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3], "arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1], "arrayBool":[true, null, true], "arrayNestedLong":[[1], null, [1, 2, 3]], "arrayObject":[null,{"x":2}], "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} -{"timestamp": "2023-01-01T00:00:00", "str":"b", "long":4, "double":3.3, "bool": true, "variant": "4", "variantEmptyObj":{}, "variantEmtpyArray":4, "obj":{"a": 400, "b": {"x": "d", "y": 1.1, "z": [3, 4]}}, "complexObj":{"x": 1234, "z": {"a": [1.1, 2.2, 3.3], "b": true}}, "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayBool":[null, false, true], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} +{"timestamp": "2023-01-01T00:00:00", "str":"b", "long":4, "double":3.3, "bool": true, "variant": "1", "variantEmptyObj":{}, "variantEmtpyArray":4, "obj":{"a": 400, "b": {"x": "d", "y": 1.1, "z": [3, 4]}}, "complexObj":{"x": 1234, "z": {"a": [1.1, 2.2, 3.3], "b": true}}, "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayBool":[null, false, true], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"c", "long": null, "double":4.4, "bool": true, "variant": "hello", "variantEmptyObj":{}, "variantEmtpyArray":[], "obj":{"a": 500, "b": {"x": "e", "z": [1, 2, 3, 4]}}, "complexObj":{"x": 11, "y": [], "z": {"a": [null], "b": false}}, "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":[], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null, "arrayBool":[false], "arrayObject":[{"x": 1000},{"y":2000}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"d", "long":5, "double":5.9, "bool": false, "variantEmptyObj":"a", "variantEmtpyArray":6, "obj":{"a": 600, "b": {"x": "f", "y": 1.1, "z": [6, 7, 8, 9]}}, "arrayString": ["a", "b"], "arrayStringNulls": null, "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayBool":[], "arrayNestedLong":[[1], [1, 2, null]], "arrayObject":[{"a": 1},{"b":2}], "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":null, "double":null, "bool": true, "variant": 51, "variantEmptyObj":1, "variantEmtpyArray":[], "obj":{"a": 700, "b": {"x": "g", "y": 1.1, "z": [9, null, 9, 9]}}, "complexObj":{"x": 400, "y": [{"l": [null], "m": 100, "n": 5},{"l": ["a", "b", "c"], "m": "a", "n": 1}], "z": {}}, "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null], "arrayVariant":null, "arrayBool":[true, false, true], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index 8c865af1664a..b80e3ccb145d 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -85,13 +85,13 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn; import org.apache.druid.segment.nested.NestedFieldDictionaryEncodedColumn; import org.apache.druid.segment.nested.NestedPathFinder; diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java index 6613dc212fd4..9206f38ed0d4 100644 --- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java +++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java @@ -50,7 +50,7 @@ import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.SegmentId; import org.junit.After; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BitwiseSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BitwiseSqlAggregator.java index 9d5023e8baf0..d8758141dfba 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BitwiseSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BitwiseSqlAggregator.java @@ -32,11 +32,13 @@ import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Optionality; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -169,7 +171,11 @@ public Aggregation toDruidAggregation( null, macroTable ), - new NotDimFilter(new SelectorDimFilter(fieldName, null, null)) + new NotDimFilter( + plannerContext.isUseBoundsAndSelectors() + ? new SelectorDimFilter(fieldName, NullHandling.defaultStringValue(), null) + : NullFilter.forColumn(fieldName) + ) ) ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java index a6044d53e364..e235cfc06cb8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java @@ -37,12 +37,14 @@ import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Optionality; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -140,7 +142,11 @@ public Aggregation toDruidAggregation( } final String finalizer = StringUtils.format("if(array_length(o) == 0, null, array_to_string(o, '%s'))", separator); - final NotDimFilter dimFilter = new NotDimFilter(new SelectorDimFilter(fieldName, null, null)); + final NotDimFilter dimFilter = new NotDimFilter( + plannerContext.isUseBoundsAndSelectors() + ? new SelectorDimFilter(fieldName, NullHandling.defaultStringValue(), null) + : NullFilter.forColumn(fieldName) + ); if (aggregateCall.isDistinct()) { return Aggregation.create( // string_agg ignores nulls diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index d429f1fd8a9d..6618be846665 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -45,6 +45,7 @@ import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparator; @@ -55,6 +56,8 @@ import org.apache.druid.sql.calcite.filtration.BoundRefKey; import org.apache.druid.sql.calcite.filtration.Bounds; import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.filtration.RangeRefKey; +import org.apache.druid.sql.calcite.filtration.Ranges; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.ExpressionParser; import org.apache.druid.sql.calcite.planner.PlannerContext; @@ -516,22 +519,34 @@ private static DimFilter toSimpleLeafFilter( // column instead for filtering to ensure that results are correct if (druidExpression.isSimpleExtraction() && !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) { - equalFilter = new SelectorDimFilter( - druidExpression.getSimpleExtraction().getColumn(), - NullHandling.defaultStringValue(), - druidExpression.getSimpleExtraction().getExtractionFn() - ); + if (NullHandling.sqlCompatible() && !plannerContext.isUseBoundsAndSelectors()) { + equalFilter = new NullFilter( + druidExpression.getSimpleExtraction().getColumn(), + druidExpression.getSimpleExtraction().getExtractionFn(), + null + ); + } else { + equalFilter = new SelectorDimFilter( + druidExpression.getSimpleExtraction().getColumn(), + NullHandling.defaultStringValue(), + druidExpression.getSimpleExtraction().getExtractionFn() + ); + } } else if (virtualColumnRegistry != null) { final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( druidExpression, operand.getType() ); - equalFilter = new SelectorDimFilter( - virtualColumn, - NullHandling.defaultStringValue(), - null - ); + if (NullHandling.sqlCompatible() && !plannerContext.isUseBoundsAndSelectors()) { + equalFilter = new NullFilter(virtualColumn, null, null); + } else { + equalFilter = new SelectorDimFilter( + virtualColumn, + NullHandling.defaultStringValue(), + null + ); + } } else { return null; } @@ -585,8 +600,12 @@ private static DimFilter toSimpleLeafFilter( flippedKind = kind; } + final DruidExpression rhsExpression = toDruidExpression(plannerContext, rowSignature, rhs); + final Expr parsedRhsExpression = rhsExpression != null + ? plannerContext.parseExpression(rhsExpression.getExpression()) + : null; // rhs must be a literal - if (rhs.getKind() != SqlKind.LITERAL) { + if (rhs.getKind() != SqlKind.LITERAL && (parsedRhsExpression == null || !parsedRhsExpression.isLiteral())) { return null; } @@ -602,7 +621,13 @@ private static DimFilter toSimpleLeafFilter( if (queryGranularity != null) { // lhs is FLOOR(__time TO granularity); rhs must be a timestamp final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis(); - return buildTimeFloorFilter(ColumnHolder.TIME_COLUMN_NAME, queryGranularity, flippedKind, rhsMillis); + return buildTimeFloorFilter( + ColumnHolder.TIME_COLUMN_NAME, + queryGranularity, + flippedKind, + rhsMillis, + plannerContext + ); } final String column; @@ -633,61 +658,105 @@ private static DimFilter toSimpleLeafFilter( // Is rhs aligned on granularity boundaries? final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - // Create a BoundRefKey that strips the extractionFn and compares __time as a number. - final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); + if (plannerContext.isUseBoundsAndSelectors()) { + // Create a BoundRefKey that strips the extractionFn and compares __time as a number. + final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned); + return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned); + } else { + final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG, null); + return getRangeTimeDimFilter(flippedKind, rangeRefKey, rhsInterval, rhsAligned); + } } } - final String val; - final RexLiteral rhsLiteral = (RexLiteral) rhs; - if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.value(rhsLiteral)); - } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.stringValue(rhsLiteral)); - } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) { - val = String.valueOf( - Calcites.calciteDateTimeLiteralToJoda( - rhsLiteral, - plannerContext.getTimeZone() - ).getMillis() - ); + if (plannerContext.isUseBoundsAndSelectors()) { + final String val; + final RexLiteral rhsLiteral = (RexLiteral) rhs; + if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) { + val = String.valueOf(RexLiteral.value(rhsLiteral)); + } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) { + val = String.valueOf(RexLiteral.stringValue(rhsLiteral)); + } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) { + val = String.valueOf( + Calcites.calciteDateTimeLiteralToJoda( + rhsLiteral, + plannerContext.getTimeZone() + ).getMillis() + ); + } else { + // Don't know how to filter on this kind of literal. + return null; + } + // Numeric lhs needs a numeric comparison. + final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType()); + final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator); + final DimFilter filter; + + // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator). + switch (flippedKind) { + case EQUALS: + filter = Bounds.equalTo(boundRefKey, val); + break; + case NOT_EQUALS: + filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val)); + break; + case GREATER_THAN: + filter = Bounds.greaterThan(boundRefKey, val); + break; + case GREATER_THAN_OR_EQUAL: + filter = Bounds.greaterThanOrEqualTo(boundRefKey, val); + break; + case LESS_THAN: + filter = Bounds.lessThan(boundRefKey, val); + break; + case LESS_THAN_OR_EQUAL: + filter = Bounds.lessThanOrEqualTo(boundRefKey, val); + break; + default: + throw new IllegalStateException("Shouldn't have got here"); + } + + return filter; } else { - // Don't know how to filter on this kind of literal. - return null; - } + final Object val; + if (parsedRhsExpression != null && parsedRhsExpression.isLiteral()) { + val = parsedRhsExpression.getLiteralValue(); + } else { + // Don't know how to filter on this kind of literal. + return null; + } - // Numeric lhs needs a numeric comparison. - final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType()); - final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator); - final DimFilter filter; - - // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator). - switch (flippedKind) { - case EQUALS: - filter = Bounds.equalTo(boundRefKey, val); - break; - case NOT_EQUALS: - filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val)); - break; - case GREATER_THAN: - filter = Bounds.greaterThan(boundRefKey, val); - break; - case GREATER_THAN_OR_EQUAL: - filter = Bounds.greaterThanOrEqualTo(boundRefKey, val); - break; - case LESS_THAN: - filter = Bounds.lessThan(boundRefKey, val); - break; - case LESS_THAN_OR_EQUAL: - filter = Bounds.lessThanOrEqualTo(boundRefKey, val); - break; - default: - throw new IllegalStateException("Shouldn't have got here"); - } + final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType()); + final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType, extractionFn); + final DimFilter filter; + + // Always use RangeFilter, to simplify filter optimization later + switch (flippedKind) { + case EQUALS: + filter = Ranges.equalTo(rangeRefKey, val); + break; + case NOT_EQUALS: + filter = new NotDimFilter(Ranges.equalTo(rangeRefKey, val)); + break; + case GREATER_THAN: + filter = Ranges.greaterThan(rangeRefKey, val); + break; + case GREATER_THAN_OR_EQUAL: + filter = Ranges.greaterThanOrEqualTo(rangeRefKey, val); + break; + case LESS_THAN: + filter = Ranges.lessThan(rangeRefKey, val); + break; + case LESS_THAN_OR_EQUAL: + filter = Ranges.lessThanOrEqualTo(rangeRefKey, val); + break; + default: + throw new IllegalStateException("Shouldn't have got here"); + } - return filter; + return filter; + } } else if (rexNode instanceof RexCall) { final SqlOperator operator = ((RexCall) rexNode).getOperator(); final SqlOperatorConversion conversion = plannerContext.getPlannerToolbox().operatorTable().lookupOperatorConversion(operator); @@ -772,16 +841,27 @@ private static DimFilter buildTimeFloorFilter( final String column, final Granularity granularity, final SqlKind operatorKind, - final long rhsMillis + final long rhsMillis, + final PlannerContext plannerContext ) { - final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); + if (plannerContext.isUseBoundsAndSelectors()) { + final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); + final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; + // Is rhs aligned on granularity boundaries? + final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned); + return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned); + } else { + final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG, null); + final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); + + // Is rhs aligned on granularity boundaries? + final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; + + return getRangeTimeDimFilter(operatorKind, rangeRefKey, rhsInterval, rhsAligned); + } } @@ -817,4 +897,37 @@ private static DimFilter getBoundTimeDimFilter( throw new IllegalStateException("Shouldn't have got here"); } } + + private static DimFilter getRangeTimeDimFilter( + SqlKind operatorKind, + RangeRefKey rangeRefKey, + Interval interval, + boolean isAligned + ) + { + switch (operatorKind) { + case EQUALS: + return isAligned + ? Ranges.interval(rangeRefKey, interval) + : Filtration.matchNothing(); + case NOT_EQUALS: + return isAligned + ? new NotDimFilter(Ranges.interval(rangeRefKey, interval)) + : Filtration.matchEverything(); + case GREATER_THAN: + return Ranges.greaterThanOrEqualTo(rangeRefKey, String.valueOf(interval.getEndMillis())); + case GREATER_THAN_OR_EQUAL: + return isAligned + ? Ranges.greaterThanOrEqualTo(rangeRefKey, String.valueOf(interval.getStartMillis())) + : Ranges.greaterThanOrEqualTo(rangeRefKey, String.valueOf(interval.getEndMillis())); + case LESS_THAN: + return isAligned + ? Ranges.lessThan(rangeRefKey, String.valueOf(interval.getStartMillis())) + : Ranges.lessThan(rangeRefKey, String.valueOf(interval.getEndMillis())); + case LESS_THAN_OR_EQUAL: + return Ranges.lessThan(rangeRefKey, String.valueOf(interval.getEndMillis())); + default: + throw new IllegalStateException("Shouldn't have got here"); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java index 39d5c952d924..beee936b1947 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java @@ -29,6 +29,7 @@ import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.RowSignature; @@ -116,11 +117,20 @@ public DimFilter toDruidFilter( if (condition.getExpression().startsWith("notnull") && condition.getArguments().get(0).isDirectColumnAccess()) { DimFilter thenFilter = null, elseFilter = null; - final DimFilter isNull = new SelectorDimFilter( - condition.getArguments().get(0).getDirectColumn(), - null, - null - ); + final DimFilter isNull; + if (plannerContext.isUseBoundsAndSelectors()) { + isNull = new SelectorDimFilter( + condition.getArguments().get(0).getDirectColumn(), + null, + null + ); + } else { + isNull = new NullFilter( + condition.getArguments().get(0).getDirectColumn(), + null, + null + ); + } if (call.getOperands().get(1) instanceof RexCall) { final RexCall thenCall = (RexCall) call.getOperands().get(1); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CombineAndSimplifyBounds.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CombineAndSimplifyBounds.java index 7b4f4b669ec9..a11ffd64ed8a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CombineAndSimplifyBounds.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CombineAndSimplifyBounds.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import org.apache.druid.java.util.common.ISE; @@ -30,6 +31,8 @@ import org.apache.druid.query.filter.FalseDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.query.filter.RangeFilter; +import org.apache.druid.segment.column.ColumnType; import java.util.ArrayList; import java.util.HashMap; @@ -133,6 +136,9 @@ private static DimFilter doSimplify(final List children, boolean disj // Group Bound filters by dimension, extractionFn, and comparator and compute a RangeSet for each one. final Map> bounds = new HashMap<>(); + // Group range filters by dimension, extractionFn, and matchValueType and compute a RangeSet for each one. + final Map> ranges = new HashMap<>(); + final Map leastRestrictiveNumericTypes = new HashMap<>(); // all and/or filters have at least 1 child boolean allFalse = true; @@ -143,8 +149,20 @@ private static DimFilter doSimplify(final List children, boolean disj final List filterList = bounds.computeIfAbsent(boundRefKey, k -> new ArrayList<>()); filterList.add(bound); allFalse = false; + } else if (child instanceof RangeFilter) { + final RangeFilter range = (RangeFilter) child; + final RangeRefKey rangeRefKey = RangeRefKey.from(range); + if (rangeRefKey.getMatchValueType().isNumeric()) { + leastRestrictiveNumericTypes.compute( + range.getColumn(), + (c, existingType) -> ColumnType.leastRestrictiveType(existingType, range.getMatchValueType()) + ); + } + final List filterList = ranges.computeIfAbsent(rangeRefKey, k -> new ArrayList<>()); + filterList.add(range); + allFalse = false; } else { - allFalse &= child instanceof FalseDimFilter; + allFalse = allFalse && (child instanceof FalseDimFilter); } } @@ -188,6 +206,63 @@ private static DimFilter doSimplify(final List children, boolean disj } } + // Try to consolidate numeric groups + final Map> consolidatedNumericRanges = Maps.newHashMapWithExpectedSize(ranges.size()); + for (Map.Entry> entry : ranges.entrySet()) { + RangeRefKey refKey = entry.getKey(); + if (entry.getKey().getMatchValueType().isNumeric()) { + ColumnType numericTypeToUse = leastRestrictiveNumericTypes.get(refKey.getColumn()); + refKey = new RangeRefKey(refKey.getColumn(), numericTypeToUse, refKey.getExtractionFn()); + } + final List filterList = consolidatedNumericRanges.computeIfAbsent(refKey, k -> new ArrayList<>()); + for (RangeFilter filter : entry.getValue()) { + + int pos = newChildren.indexOf(filter); + if (!newChildren.remove(filter)) { + // Don't expect this to happen, but include it as a sanity check. + throw new ISE("Tried to remove range, but couldn't"); + } + final RangeFilter rewrite = Ranges.toFilter(refKey, Ranges.toRange(filter, refKey.getMatchValueType())); + newChildren.add(pos, rewrite); + filterList.add(rewrite); + } + } + + // Try to simplify filters within each group. + for (Map.Entry> entry : consolidatedNumericRanges.entrySet()) { + final RangeRefKey rangeRefKey = entry.getKey(); + final List filterList = entry.getValue(); + + // Create a RangeSet for this group. + final RangeSet rangeSet = disjunction + ? RangeSets.unionRanges(Ranges.toRanges(filterList)) + : RangeSets.intersectRanges(Ranges.toRanges(filterList)); + + if (rangeSet.asRanges().size() < filterList.size()) { + // We found a simplification. Remove the old filters and add new ones. + for (final RangeFilter range : filterList) { + if (!newChildren.remove(range)) { + // Don't expect this to happen, but include it as a sanity check. + throw new ISE("Tried to remove range, but couldn't"); + } + } + + if (rangeSet.asRanges().isEmpty()) { + // range set matches nothing, equivalent to FALSE + newChildren.add(Filtration.matchNothing()); + } + + for (final Range range : rangeSet.asRanges()) { + if (!range.hasLowerBound() && !range.hasUpperBound()) { + // range matches all, equivalent to TRUE + newChildren.add(Filtration.matchEverything()); + } else { + newChildren.add(Ranges.toFilter(rangeRefKey, range)); + } + } + } + } + // Finally: Go through newChildren, removing or potentially exiting early based on TRUE / FALSE marker filters. Preconditions.checkState(newChildren.size() > 0, "newChildren.size > 0"); @@ -243,6 +318,9 @@ private static DimFilter negate(final DimFilter filter) } else if (filter instanceof BoundDimFilter) { final BoundDimFilter negated = Bounds.not((BoundDimFilter) filter); return negated != null ? negated : new NotDimFilter(filter); + } else if (filter instanceof RangeFilter) { + final RangeFilter negated = Ranges.not((RangeFilter) filter); + return negated != null ? negated : new NotDimFilter(filter); } else { return new NotDimFilter(filter); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertBoundsToSelectors.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertBoundsToSelectors.java index 155e7a632ccf..6c5109693beb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertBoundsToSelectors.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertBoundsToSelectors.java @@ -21,6 +21,8 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; +import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparator; import org.apache.druid.segment.column.RowSignature; @@ -65,6 +67,21 @@ public DimFilter process(DimFilter filter) } else { return filter; } + } else if (filter instanceof RangeFilter) { + final RangeFilter bound = (RangeFilter) filter; + // since the range filter retains the match value type, we don't need to restrict to ranges + // that match the underlying column type + if (bound.isEquality()) { + return new EqualityFilter( + bound.getColumn(), + bound.getMatchValueType(), + bound.getUpper(), + bound.getExtractionFn(), + null + ); + } else { + return filter; + } } else { return filter; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java index 29ea37129897..16769892443e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java @@ -21,11 +21,15 @@ import com.google.common.collect.Lists; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.sql.calcite.expression.SimpleExtraction; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -57,6 +61,7 @@ public DimFilter process(DimFilter filter) // Group filters by dimension and extractionFn. final Map> selectors = new HashMap<>(); + final Map> equality = new HashMap<>(); for (DimFilter child : children) { if (child instanceof SelectorDimFilter) { @@ -70,6 +75,16 @@ public DimFilter process(DimFilter filter) ); List filterList = selectors.computeIfAbsent(boundRefKey, k -> new ArrayList<>()); filterList.add(selector); + } else if (child instanceof EqualityFilter) { + final EqualityFilter equals = (EqualityFilter) child; + if (!equals.getMatchValueType().is(ValueType.STRING)) { + // skip non-string equality filters since InDimFilter uses a sorted string set, which is a different sort + // than numbers or other types might use + continue; + } + final RangeRefKey rangeRefKey = RangeRefKey.from(equals); + List filterList = equality.computeIfAbsent(rangeRefKey, k -> new ArrayList<>()); + filterList.add(equals); } } @@ -92,6 +107,33 @@ public DimFilter process(DimFilter filter) } } + // Emit IN filters for each group of size > 1 + // right now we only do this for string types, since the value set is sorted in string order + // someday we might want to either allow numbers after ensuring that all value set indexes can handle value + // sets which are not in the correct sorted order, or make a cooler in filter that retains the match value type + // and can sort the values in match value native order + for (Map.Entry> entry : equality.entrySet()) { + final List filterList = entry.getValue(); + if (filterList.size() > 1) { + // We found a simplification. Remove the old filters and add new ones. + final InDimFilter.ValuesSet values = new InDimFilter.ValuesSet(); + + for (final EqualityFilter equals : filterList) { + values.add( + ExprEval.ofType(ExpressionType.fromColumnType(equals.getMatchValueType()), equals.getMatchValue()) + .castTo(ExpressionType.STRING) + .asString() + ); + if (!children.remove(equals)) { + // Don't expect this to happen, but include it as a sanity check. + throw new ISE("Tried to remove equals but couldn't"); + } + } + + children.add(new InDimFilter(entry.getKey().getColumn(), values, entry.getKey().getExtractionFn(), null)); + } + } + if (!children.equals(((OrDimFilter) filter).getFields())) { return children.size() == 1 ? children.get(0) : new OrDimFilter(children); } else { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/MoveTimeFiltersToIntervals.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/MoveTimeFiltersToIntervals.java index b374d2523744..dc93dc1bb20c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/MoveTimeFiltersToIntervals.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/MoveTimeFiltersToIntervals.java @@ -23,14 +23,17 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; +import org.apache.druid.java.util.common.Numbers; import org.apache.druid.java.util.common.Pair; import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; import java.util.ArrayList; import java.util.List; @@ -44,6 +47,12 @@ public class MoveTimeFiltersToIntervals implements Function> extractConvertibleTimeBounds(fina } else { return Pair.of(filter, null); } + } else if (filter instanceof RangeFilter) { + final RangeFilter bound = (RangeFilter) filter; + if (RangeRefKey.from(bound).equals(TIME_RANGE_REF_KEY)) { + return Pair.of(null, RangeSets.of(toLongRangeFromRange(Ranges.toRange(bound)))); + } else { + return Pair.of(filter, null); + } } else { return Pair.of(filter, null); } @@ -169,4 +185,20 @@ private static Range toLongRange(final Range range) ); } } + + private static Range toLongRangeFromRange(final Range range) + { + if (!range.hasUpperBound() && !range.hasLowerBound()) { + return Range.all(); + } else if (range.hasUpperBound() && !range.hasLowerBound()) { + return Range.upTo(Numbers.parseLong(range.upperEndpoint().getValue()), range.upperBoundType()); + } else if (!range.hasUpperBound() && range.hasLowerBound()) { + return Range.downTo(Numbers.parseLong(range.lowerEndpoint().getValue()), range.lowerBoundType()); + } else { + return Range.range( + Numbers.parseLong(range.lowerEndpoint().getValue()), range.lowerBoundType(), + Numbers.parseLong(range.upperEndpoint().getValue()), range.upperBoundType() + ); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeRefKey.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeRefKey.java new file mode 100644 index 000000000000..0547e87865f9 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeRefKey.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.filtration; + +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.EqualityFilter; +import org.apache.druid.query.filter.RangeFilter; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; +import java.util.Objects; + +public class RangeRefKey +{ + private final String column; + private final ColumnType matchValueType; + @Nullable + private final ExtractionFn extractionFn; + + public RangeRefKey(String column, ColumnType matchValueType, ExtractionFn extractionFn) + { + this.column = column; + this.matchValueType = matchValueType; + this.extractionFn = extractionFn; + } + + public static RangeRefKey from(RangeFilter filter) + { + return new RangeRefKey( + filter.getColumn(), + filter.getMatchValueType(), + filter.getExtractionFn() + ); + } + + public static RangeRefKey from(EqualityFilter filter) + { + return new RangeRefKey( + filter.getColumn(), + filter.getMatchValueType(), + filter.getExtractionFn() + ); + } + + public String getColumn() + { + return column; + } + + public ColumnType getMatchValueType() + { + return matchValueType; + } + + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RangeRefKey that = (RangeRefKey) o; + return Objects.equals(column, that.column) + && Objects.equals(matchValueType, that.matchValueType) + && Objects.equals(extractionFn, that.extractionFn); + } + + @Override + public int hashCode() + { + return Objects.hash(column, matchValueType, extractionFn); + } + + @Override + public String toString() + { + return "RangeRefKey{" + + "column='" + column + '\'' + + ", matchValueType=" + matchValueType + + ", extractionFn=" + extractionFn + + '}'; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeValue.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeValue.java new file mode 100644 index 000000000000..2cccdfe9ca5a --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RangeValue.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.filtration; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; +import java.util.Comparator; +import java.util.Objects; + +public class RangeValue implements Comparable +{ + @Nullable + private final Object value; + private final ColumnType matchValueType; + private final Comparator matchValueTypeComparator; + + public RangeValue( + @Nullable Object value, + ColumnType matchValueType + ) + { + this.value = value; + this.matchValueType = matchValueType; + this.matchValueTypeComparator = matchValueType.getNullableStrategy(); + } + + @Nullable + public Object getValue() + { + return value; + } + + public ColumnType getMatchValueType() + { + return matchValueType; + } + + @Override + public int compareTo(RangeValue o) + { + if (!matchValueType.equals(o.matchValueType)) { + throw new ISE("Comparator mismatch: [%s] and [%s]", matchValueType, o.matchValueType); + } + return matchValueTypeComparator.compare(value, o.value); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RangeValue that = (RangeValue) o; + return Objects.equals(value, that.value) && Objects.equals(matchValueType, that.matchValueType); + } + + @Override + public int hashCode() + { + return Objects.hash(value, matchValueType); + } + + @Override + public String toString() + { + return "RangeValue{" + + "value=" + value + + ", matchValueType=" + matchValueType + + '}'; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java new file mode 100644 index 000000000000..03f3366bcdf5 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.filtration; + +import com.google.common.collect.BoundType; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Range; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.filter.RangeFilter; +import org.apache.druid.segment.column.ColumnType; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.util.List; + +public class Ranges +{ + /** + * Negates single-ended Bound filters. + * + * @param range filter + * @return negated filter, or null if this range is double-ended. + */ + @Nullable + public static RangeFilter not(final RangeFilter range) + { + if (range.getUpper() != null && range.getLower() != null) { + return null; + } else if (range.getUpper() != null) { + return new RangeFilter( + range.getColumn(), + range.getMatchValueType(), + range.getUpper(), + null, + !range.isUpperStrict(), + false, + range.getExtractionFn(), + range.getFilterTuning() + ); + } else { + // range.getLower() != null + return new RangeFilter( + range.getColumn(), + range.getMatchValueType(), + null, + range.getLower(), + false, + !range.isLowerStrict(), + range.getExtractionFn(), + range.getFilterTuning() + ); + } + } + + public static Range toRange(final RangeFilter range) + { + final RangeValue upper = range.getUpper() != null + ? new RangeValue(range.getUpper(), range.getMatchValueType()) + : null; + final RangeValue lower = range.getLower() != null + ? new RangeValue(range.getLower(), range.getMatchValueType()) + : null; + + if (lower == null) { + return range.isUpperStrict() ? Range.lessThan(upper) : Range.atMost(upper); + } else if (upper == null) { + return range.isLowerStrict() ? Range.greaterThan(lower) : Range.atLeast(lower); + } else { + BoundType lowerBoundType = range.isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED; + BoundType upperBoundType = range.isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED; + return Range.range(lower, lowerBoundType, upper, upperBoundType); + } + } + + public static Range toRange(final RangeFilter range, final ColumnType newMatchValueType) + { + final ExpressionType exprType = ExpressionType.fromColumnType(newMatchValueType); + final RangeValue upper = range.getUpper() != null + ? new RangeValue(ExprEval.ofType(exprType, range.getUpper()) + .valueOrDefault(), newMatchValueType) + : null; + final RangeValue lower = range.getLower() != null + ? new RangeValue(ExprEval.ofType(exprType, range.getLower()) + .valueOrDefault(), newMatchValueType) + : null; + + if (lower == null) { + return range.isUpperStrict() ? Range.lessThan(upper) : Range.atMost(upper); + } else if (upper == null) { + return range.isLowerStrict() ? Range.greaterThan(lower) : Range.atLeast(lower); + } else { + BoundType lowerBoundType = range.isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED; + BoundType upperBoundType = range.isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED; + return Range.range(lower, lowerBoundType, upper, upperBoundType); + } + } + + public static List> toRanges(final List ranges) + { + return ImmutableList.copyOf(Lists.transform(ranges, Ranges::toRange)); + } + + public static RangeFilter toFilter(final RangeRefKey rangeRefKey, final Range range) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + range.hasLowerBound() ? range.lowerEndpoint().getValue() : null, + range.hasUpperBound() ? range.upperEndpoint().getValue() : null, + range.hasLowerBound() && range.lowerBoundType() == BoundType.OPEN, + range.hasUpperBound() && range.upperBoundType() == BoundType.OPEN, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter equalTo(final RangeRefKey rangeRefKey, final Object value) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + value, + value, + false, + false, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter greaterThan(final RangeRefKey rangeRefKey, final Object value) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + value, + null, + true, + false, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter greaterThanOrEqualTo(final RangeRefKey rangeRefKey, final Object value) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + value, + null, + false, + false, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter lessThan(final RangeRefKey rangeRefKey, final Object value) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + null, + value, + false, + true, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter lessThanOrEqualTo(final RangeRefKey rangeRefKey, final Object value) + { + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + null, + value, + false, + false, + rangeRefKey.getExtractionFn(), + null + ); + } + + public static RangeFilter interval(final RangeRefKey rangeRefKey, final Interval interval) + { + if (!rangeRefKey.getMatchValueType().equals(ColumnType.LONG)) { + // Interval comparison only works with LONG comparator. + throw new ISE("Comparator must be LONG but was[%s]", rangeRefKey.getMatchValueType()); + } + + return new RangeFilter( + rangeRefKey.getColumn(), + rangeRefKey.getMatchValueType(), + interval.getStartMillis(), + interval.getEndMillis(), + false, + true, + rangeRefKey.getExtractionFn(), + null + ); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java index 331a61a1f50b..7c066e663c44 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java @@ -263,9 +263,12 @@ public static RelDataType createSqlArrayTypeWithNullability( final boolean nullable ) { - final RelDataType dataType = typeFactory.createArrayType( - createSqlTypeWithNullability(typeFactory, elementTypeName, nullable), - -1 + final RelDataType dataType = typeFactory.createTypeWithNullability( + typeFactory.createArrayType( + createSqlTypeWithNullability(typeFactory, elementTypeName, nullable), + -1 + ), + true ); return dataType; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidRexExecutor.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidRexExecutor.java index 844d9896ae8d..c8b5c2b60dd2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidRexExecutor.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidRexExecutor.java @@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; import org.apache.druid.math.expr.Parser; import org.apache.druid.segment.column.RowSignature; @@ -145,7 +146,7 @@ public void reduce( } else if (SqlTypeName.NUMERIC_TYPES.contains(constExp.getType().getComponentType().getSqlTypeName())) { if (exprResult.type().getElementType().is(ExprType.LONG)) { List resultAsBigDecimalList = new ArrayList<>(array.length); - for (Object val : array) { + for (Object val : exprResult.castTo(ExpressionType.LONG_ARRAY).asArray()) { final Number longVal = (Number) val; if (longVal == null) { resultAsBigDecimalList.add(null); @@ -156,7 +157,7 @@ public void reduce( literal = rexBuilder.makeLiteral(resultAsBigDecimalList, constExp.getType(), true); } else { List resultAsBigDecimalList = new ArrayList<>(array.length); - for (Object val : array) { + for (Object val : exprResult.castTo(ExpressionType.fromColumnType(druidExpression.getDruidType())).asArray()) { final Number doubleVal = (Number) val; if (doubleVal == null) { resultAsBigDecimalList.add(null); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java index abf837bb9c57..d0230da06b17 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java @@ -28,6 +28,7 @@ import org.apache.calcite.avatica.remote.TypedValue; import org.apache.calcite.linq4j.QueryProvider; import org.apache.calcite.schema.SchemaPlus; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Numbers; @@ -85,6 +86,10 @@ public class PlannerContext */ public static final String CTX_ENABLE_UNNEST = "enableUnnest"; + public static final String CTX_SQL_USE_BOUNDS_AND_SELECTORS = "sqlUseBoundAndSelectors"; + public static final boolean DEFAULT_SQL_USE_BOUNDS_AND_SELECTORS = NullHandling.replaceWithDefault(); + + // DataContext keys public static final String DATA_CTX_AUTHENTICATION_RESULT = "authenticationResult"; @@ -97,6 +102,7 @@ public class PlannerContext private final Map queryContext; private final String sqlQueryId; private final boolean stringifyArrays; + private final boolean useBoundsAndSelectors; private final CopyOnWriteArrayList nativeQueryIds = new CopyOnWriteArrayList<>(); private final PlannerHook hook; // bindings for dynamic parameters to bind during planning @@ -121,6 +127,7 @@ private PlannerContext( final PlannerConfig plannerConfig, final DateTime localNow, final boolean stringifyArrays, + final boolean useBoundsAndSelectors, final SqlEngine engine, final Map queryContext, final PlannerHook hook @@ -134,6 +141,7 @@ private PlannerContext( this.queryContext = queryContext; this.localNow = Preconditions.checkNotNull(localNow, "localNow"); this.stringifyArrays = stringifyArrays; + this.useBoundsAndSelectors = useBoundsAndSelectors; this.hook = hook == null ? NoOpPlannerHook.INSTANCE : hook; String sqlQueryId = (String) this.queryContext.get(QueryContexts.CTX_SQL_QUERY_ID); @@ -155,10 +163,12 @@ public static PlannerContext create( final DateTime utcNow; final DateTimeZone timeZone; final boolean stringifyArrays; + final boolean useBoundsAndSelectors; final Object stringifyParam = queryContext.get(QueryContexts.CTX_SQL_STRINGIFY_ARRAYS); final Object tsParam = queryContext.get(CTX_SQL_CURRENT_TIMESTAMP); final Object tzParam = queryContext.get(CTX_SQL_TIME_ZONE); + final Object useBoundsAndSelectorsParam = queryContext.get(CTX_SQL_USE_BOUNDS_AND_SELECTORS); if (tsParam != null) { utcNow = new DateTime(tsParam, DateTimeZone.UTC); @@ -178,12 +188,19 @@ public static PlannerContext create( stringifyArrays = true; } + if (useBoundsAndSelectorsParam != null) { + useBoundsAndSelectors = Numbers.parseBoolean(useBoundsAndSelectorsParam); + } else { + useBoundsAndSelectors = DEFAULT_SQL_USE_BOUNDS_AND_SELECTORS; + } + return new PlannerContext( plannerToolbox, sql, plannerToolbox.plannerConfig().withOverrides(queryContext), utcNow.withZone(timeZone), stringifyArrays, + useBoundsAndSelectors, engine, queryContext, hook @@ -299,6 +316,11 @@ public boolean isStringifyArrays() return stringifyArrays; } + public boolean isUseBoundsAndSelectors() + { + return useBoundsAndSelectors; + } + public List getParameters() { return parameters; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java index e2b305919010..a739b0d3f72a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java @@ -39,6 +39,7 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.QuerySegmentSpec; import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.segment.column.ColumnHolder; @@ -107,16 +108,20 @@ public QueryResponse runQuery(final DruidQuery druidQuery) OrDimFilter orDimFilter = (OrDimFilter) query.getFilter(); int numBoundFilters = 0; for (DimFilter filter : orDimFilter.getFields()) { - numBoundFilters += filter instanceof BoundDimFilter ? 1 : 0; - } - if (numBoundFilters > numFilters) { - String dimension = ((BoundDimFilter) (orDimFilter.getFields().get(0))).getDimension(); - throw new UOE(StringUtils.format( - "The number of values in the IN clause for [%s] in query exceeds configured maxNumericFilter limit of [%s] for INs. Cast [%s] values of IN clause to String", - dimension, - numFilters, - orDimFilter.getFields().size() - )); + if (filter instanceof BoundDimFilter) { + final BoundDimFilter bound = (BoundDimFilter) filter; + if (StringComparators.NUMERIC.equals(bound.getOrdering())) { + numBoundFilters++; + if (numBoundFilters > numFilters) { + throw new UOE(StringUtils.format( + "The number of values in the IN clause for [%s] in query exceeds configured maxNumericFilter limit of [%s] for INs. Cast [%s] values of IN clause to String", + bound.getDimension(), + numFilters, + orDimFilter.getFields().size() + )); + } + } + } } } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 2ba9ce6dc730..b65a904756f3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -40,6 +40,7 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.math.expr.Evals; import org.apache.druid.query.DataSource; import org.apache.druid.query.Druids; import org.apache.druid.query.JoinDataSource; @@ -55,10 +56,13 @@ import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.having.DimFilterHavingSpec; @@ -119,6 +123,7 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -343,11 +348,52 @@ public static NotDimFilter not(DimFilter filter) return new NotDimFilter(filter); } - public static InDimFilter in(String dimension, List values, ExtractionFn extractionFn) + public static InDimFilter in(String dimension, Collection values, ExtractionFn extractionFn) { return new InDimFilter(dimension, values, extractionFn); } + public static DimFilter isNull(final String fieldName) + { + return isNull(fieldName, null); + } + + public static DimFilter isNull(final String fieldName, final ExtractionFn extractionFn) + { + if (NullHandling.sqlCompatible()) { + return new NullFilter(fieldName, extractionFn, null); + } + return selector(fieldName, NullHandling.defaultStringValue(), extractionFn); + } + + public static DimFilter notNull(final String fieldName) + { + return not(isNull(fieldName)); + } + + public static DimFilter equality(final String fieldName, final Object matchValue, final ColumnType matchValueType) + { + return equality(fieldName, matchValue, null, matchValueType); + } + + public static DimFilter equality( + final String fieldName, + final Object matchValue, + final ExtractionFn extractionFn, + final ColumnType matchValueType + ) + { + if (NullHandling.sqlCompatible()) { + return new EqualityFilter(fieldName, matchValueType, matchValue, extractionFn, null); + } + return selector(fieldName, Evals.asString(matchValue), extractionFn); + } + + public static SelectorDimFilter selector(final String fieldName, final String value) + { + return selector(fieldName, value, null); + } + public static SelectorDimFilter selector(final String fieldName, final String value, final ExtractionFn extractionFn) { return new SelectorDimFilter(fieldName, value, extractionFn); @@ -396,6 +442,59 @@ public static BoundDimFilter timeBound(final Object intervalObj) ); } + public static DimFilter range( + final String fieldName, + final ColumnType matchValueType, + final Object lower, + final Object upper, + final boolean lowerStrict, + final boolean upperStrict + ) + { + return range(fieldName, matchValueType, lower, upper, lowerStrict, upperStrict, null); + } + + public static DimFilter range( + final String fieldName, + final ColumnType matchValueType, + final Object lower, + final Object upper, + final boolean lowerStrict, + final boolean upperStrict, + final ExtractionFn extractionFn + ) + { + if (NullHandling.sqlCompatible()) { + return new RangeFilter(fieldName, matchValueType, lower, upper, lowerStrict, upperStrict, extractionFn, null); + } + return new BoundDimFilter( + fieldName, + Evals.asString(lower), + Evals.asString(upper), + lowerStrict, + upperStrict, + false, + extractionFn, + matchValueType.isNumeric() ? StringComparators.NUMERIC : StringComparators.LEXICOGRAPHIC + ); + } + + public static DimFilter timeRange(final Object intervalObj) + { + final Interval interval = new Interval(intervalObj, ISOChronology.getInstanceUTC()); + if (NullHandling.sqlCompatible()) { + return range( + ColumnHolder.TIME_COLUMN_NAME, + ColumnType.LONG, + interval.getStartMillis(), + interval.getEndMillis(), + false, + true + ); + } + return timeBound(intervalObj); + } + public static CascadeExtractionFn cascade(final ExtractionFn... fns) { return new CascadeExtractionFn(fns); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index da9eb754c433..addd48965a93 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -45,7 +45,6 @@ import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.having.DimFilterHavingSpec; @@ -1385,7 +1384,7 @@ public void testArrayToStringToMultiValueString() ColumnType.STRING_ARRAY ) ) - .setDimFilter(bound("v0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, 0L, null, true, false)) .setDimensions( dimensions( new DefaultDimensionSpec("v1", "_d0", ColumnType.STRING_ARRAY) @@ -1418,7 +1417,7 @@ public void testArrayAgg() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(not(selector("dim1", null, null))) + .filters(notNull("dim1")) .aggregators( aggregators( new ExpressionLambdaAggregatorFactory( @@ -1470,7 +1469,7 @@ public void testArrayAgg() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - selector("dim1", "shazbot", null) + equality("dim1", "shazbot", ColumnType.STRING) ) ) ) @@ -1894,7 +1893,7 @@ public void testArrayAggToString() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(not(selector("dim1", null, null))) + .filters(notNull("dim1")) .aggregators( aggregators( new ExpressionLambdaAggregatorFactory( @@ -2064,7 +2063,7 @@ public void testArrayAggAsArrayFromJoin() .setDataSource(CalciteTests.DATASOURCE3) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", null, null))) + .setDimFilter(notNull("dim1")) .setDimensions(new DefaultDimensionSpec("dim4", "_d0")) .setAggregatorSpecs( aggregators( @@ -2133,7 +2132,7 @@ public void testArrayAggGroupByArrayAggFromSubquery() StringComparators.LEXICOGRAPHIC ), 5, querySegmentSpec(Filtration.eternity()), - new NotDimFilter(new SelectorDimFilter("dim1", null, null)), + notNull("dim1"), Granularities.ALL, aggregators(new ExpressionLambdaAggregatorFactory( "a0", @@ -2420,7 +2419,7 @@ public void testArrayAggArrayContainsSubquery() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(not(selector("dim1", null, null))) + .filters(notNull("dim1")) .aggregators( aggregators( new ExpressionLambdaAggregatorFactory( @@ -2500,7 +2499,7 @@ public void testArrayAggGroupByArrayContainsSubquery() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(not(selector("dim1", null, null))) + .filters(notNull("dim1")) .aggregators( aggregators( new ExpressionLambdaAggregatorFactory( @@ -2998,7 +2997,7 @@ public void testUnnestWithGroupByHaving() .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setAggregatorSpecs(new CountAggregatorFactory("a0")) - .setHavingSpec(new DimFilterHavingSpec(selector("a0", "1", null), true)) + .setHavingSpec(new DimFilterHavingSpec(equality("a0", 1L, ColumnType.LONG), true)) .setContext(QUERY_CONTEXT_UNNEST) .build() ), @@ -3125,7 +3124,7 @@ public void testUnnestWithFilters() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .filters(new SelectorDimFilter("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("dim3") .context(QUERY_CONTEXT_UNNEST) .build() @@ -3177,8 +3176,8 @@ public void testUnnestWithFiltersInsideAndOutside() .legacy(false) .filters( and( - selector("dim2", "a", null), - not(selector("dim1", "foo", null)) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "foo", ColumnType.STRING)) ) ) .columns("dim3") @@ -3186,7 +3185,7 @@ public void testUnnestWithFiltersInsideAndOutside() .build() ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - not(selector("j0.unnest", "b", null)) + not(equality("j0.unnest", "b", ColumnType.STRING)) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3232,8 +3231,8 @@ public void testUnnestWithFiltersOutside() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters( and( - selector("dim2", "a", null), - not(selector("dim1", "foo", null)) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "foo", ColumnType.STRING)) ) ) .legacy(false) @@ -3569,7 +3568,7 @@ public void testUnnestWithINFiltersWithLeftRewrite() )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) + .filters(range("m1", ColumnType.LONG, null, 10L, false, true)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3655,7 +3654,7 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - not(selector("j0.unnest", "d", null)) + not(equality("j0.unnest", "d", ColumnType.STRING)) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3699,7 +3698,7 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - selector("j0.unnest", "b", null) + equality("j0.unnest", "b", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3728,7 +3727,9 @@ public void testUnnestWithSelectorFiltersOnVirtualColumn() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY), - selector("j0.unnest", "1", null) + NullHandling.replaceWithDefault() + ? selector("j0.unnest", "1") + : equality("j0.unnest", 1.0, ColumnType.FLOAT) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3789,7 +3790,7 @@ public void testUnnestWithMultipleAndFiltersOnSelectedColumns() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - selector("j0.unnest", "b", null) + equality("j0.unnest", "b", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3797,8 +3798,8 @@ public void testUnnestWithMultipleAndFiltersOnSelectedColumns() .context(QUERY_CONTEXT_UNNEST) .filters( and( - bound("m1", null, "10", false, true, null, StringComparators.NUMERIC), - bound("m2", null, "10", false, true, null, StringComparators.NUMERIC) + range("m1", ColumnType.LONG, null, 10L, false, true), + range("m2", ColumnType.LONG, null, 10L, false, true) ) ) .columns(ImmutableList.of("j0.unnest")) @@ -3832,8 +3833,8 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() .context(QUERY_CONTEXT_UNNEST) .filters( or( - selector("j0.unnest", "b", null), - bound("m1", null, "2", false, true, null, StringComparators.NUMERIC) + equality("j0.unnest", "b", ColumnType.STRING), + range("m1", ColumnType.LONG, null, 2L, false, true) ) ) .columns(ImmutableList.of("j0.unnest")) @@ -3862,7 +3863,7 @@ public void testUnnestWithMultipleAndFiltersOnSelectedUnnestedColumns() expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), and( new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null), - bound("j0.unnest", null, "e", false, true, null, StringComparators.LEXICOGRAPHIC) + range("j0.unnest", ColumnType.STRING, null, "e", false, true) ) )) .intervals(querySegmentSpec(Filtration.eternity())) @@ -3924,8 +3925,8 @@ public void testUnnestWithMultipleOrFiltersOnVariationsOfUnnestedColumns() new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), or( - new ExpressionDimFilter("(strlen(\"j0.unnest\") < 2)", TestExprMacroTable.INSTANCE), - selector("j0.unnest", "d", null) + expressionFilter("(strlen(\"j0.unnest\") < 2)"), + equality("j0.unnest", "d", ColumnType.STRING) ) )) .intervals(querySegmentSpec(Filtration.eternity())) @@ -3978,8 +3979,8 @@ public void testUnnestWithMultipleOrFiltersOnSelectedNonUnnestedColumns() .context(QUERY_CONTEXT_UNNEST) .filters( or( - bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), - bound("m2", null, "2", false, true, null, StringComparators.NUMERIC) + range("m1", ColumnType.LONG, null, 2L, false, true), + range("m2", ColumnType.LONG, null, 2L, false, true) ) ) .columns(ImmutableList.of("j0.unnest")) @@ -4013,7 +4014,7 @@ public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() .context(QUERY_CONTEXT_UNNEST) .filters( or( - bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), + range("m1", ColumnType.LONG, null, 2L, false, true), new InDimFilter("j0.unnest", ImmutableSet.of("a", "aa"), null) ) ) @@ -4049,8 +4050,8 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumn( .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters( or( - selector("j0.unnest", "b", null), - selector("dim3", "d", null) + equality("j0.unnest", "b", ColumnType.STRING), + equality("dim3", "d", ColumnType.STRING) ) ) .legacy(false) @@ -4085,8 +4086,8 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumnD .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters( or( - selector("dim3", "b", null), - selector("j0.unnest", "a", null) + equality("dim3", "b", ColumnType.STRING), + equality("j0.unnest", "a", ColumnType.STRING) ) ) .legacy(false) @@ -4148,7 +4149,7 @@ public void testUnnestWithGroupByHavingSelector() .setContext(QUERY_CONTEXT_UNNEST) .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) - .setDimFilter(selector("j0.unnest", "b", null)) + .setDimFilter(equality("j0.unnest", "b", ColumnType.STRING)) .setAggregatorSpecs(new CountAggregatorFactory("a0")) .setContext(QUERY_CONTEXT_UNNEST) .build() @@ -4236,7 +4237,7 @@ public void testUnnestWithGroupByHavingWithWhereOnAggCol() .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setAggregatorSpecs(new CountAggregatorFactory("a0")) - .setHavingSpec(new DimFilterHavingSpec(selector("a0", "1", null), true)) + .setHavingSpec(new DimFilterHavingSpec(equality("a0", 1L, ColumnType.LONG), true)) .setContext(QUERY_CONTEXT_UNNEST) .build() ), @@ -4267,7 +4268,7 @@ public void testUnnestWithGroupByHavingWithWhereOnUnnestCol() .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setAggregatorSpecs(new CountAggregatorFactory("a0")) - .setDimFilter(selector("j0.unnest", "a", null)) + .setDimFilter(equality("j0.unnest", "a", ColumnType.STRING)) .setContext(QUERY_CONTEXT_UNNEST) .build() ), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCorrelatedQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCorrelatedQueryTest.java index 577f46a2e993..89b09872d402 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCorrelatedQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCorrelatedQueryTest.java @@ -87,7 +87,7 @@ public void testCorrelatedSubquery(Map queryContext) ColumnType.LONG, TestExprMacroTable.INSTANCE )) - .setDimFilter(not(selector("country", null, null))) + .setDimFilter(notNull("country")) .setDimensions( new DefaultDimensionSpec( "v0", @@ -132,7 +132,7 @@ public void testCorrelatedSubquery(Map queryContext) ? new CountAggregatorFactory("_a0:count") : new FilteredAggregatorFactory( new CountAggregatorFactory("_a0:count"), - not(selector("a0", null, null)) + notNull("a0") ) ) .setPostAggregatorSpecs(Collections.singletonList(new ArithmeticPostAggregator( @@ -205,7 +205,7 @@ public void testCorrelatedSubqueryWithLeftFilter(Map queryContex ColumnType.LONG, TestExprMacroTable.INSTANCE )) - .setDimFilter(not(selector("country", null, null))) + .setDimFilter(notNull("country")) .setDimensions( new DefaultDimensionSpec( "v0", @@ -243,7 +243,7 @@ public void testCorrelatedSubqueryWithLeftFilter(Map queryContex makeColumnExpression("j0._d0") ), JoinType.LEFT, - selector("city", "B", null) + equality("city", "B", ColumnType.STRING) ) ) .setQuerySegmentSpec(querySegmentSpec(Intervals.of( @@ -283,7 +283,11 @@ public void testCorrelatedSubqueryWithLeftFilter_leftDirectAccessDisabled(Map ) .setAggregatorSpecs(new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("user", null, null)) + notNull("user") )) .setDimFilter(and( - selector("city", "A", null), - not(selector("country", null, null)) + equality("city", "A", ColumnType.STRING), + notNull("country") )) .setContext( withTimestampResultContext( @@ -429,7 +433,7 @@ public void testCorrelatedSubqueryWithCorrelatedQueryFilter(Map makeColumnExpression("j0._d0") ), JoinType.LEFT, - selector("city", "B", null) + equality("city", "B", ColumnType.STRING) ) ) .setQuerySegmentSpec(querySegmentSpec(Intervals.ETERNITY)) @@ -491,11 +495,11 @@ public void testCorrelatedSubqueryWithCorrelatedQueryFilter_Scan(Map}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{d0:STRING}])\n"; - final String explanation = "[" + final String legacyExplanation = NullHandling.replaceWithDefault() + ? + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{a0:LONG}])\n" + + " DruidJoinQueryRel(condition=[=(SUBSTRING($2, 1, 1), $8)], joinType=[inner], query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{d0:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, dim1:STRING, dim2:STRING, dim3:STRING, cnt:LONG, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{d0:STRING}])\n" + : + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{a0:LONG}])\n" + + " DruidJoinQueryRel(condition=[=(SUBSTRING($2, 1, 1), $8)], joinType=[inner], query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{d0:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, dim1:STRING, dim2:STRING, dim3:STRING, cnt:LONG, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"null\",\"column\":\"dim1\"}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{d0:STRING}])\n"; + final String explanation = NullHandling.replaceWithDefault() ? + "[" + "{\"query\":{\"queryType\":\"groupBy\"," + "\"dataSource\":{\"type\":\"query\",\"query\":{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"join\",\"left\":{\"type\":\"table\",\"name\":\"foo\"},\"right\":{\"type\":\"query\",\"query\":{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}},\"rightPrefix\":\"j0.\",\"condition\":\"(substring(\\\"dim2\\\", 0, 1) == \\\"j0.d0\\\")\",\"joinType\":\"INNER\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}}," + "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]}," @@ -127,7 +146,8 @@ public void testExplainExactCountDistinctOfSemiJoinResult() + "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}," + "\"signature\":[{\"name\":\"a0\",\"type\":\"LONG\"}]," + "\"columnMappings\":[{\"queryColumn\":\"a0\",\"outputColumn\":\"EXPR$0\"}]" - + "}]"; + + "}]" + : "[{\"query\":{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"query\",\"query\":{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"join\",\"left\":{\"type\":\"table\",\"name\":\"foo\"},\"right\":{\"type\":\"query\",\"query\":{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"null\",\"column\":\"dim1\"}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}},\"rightPrefix\":\"j0.\",\"condition\":\"(substring(\\\"dim2\\\", 0, 1) == \\\"j0.d0\\\")\",\"joinType\":\"INNER\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"granularity\":{\"type\":\"all\"},\"dimensions\":[],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}},\"signature\":[{\"name\":\"a0\",\"type\":\"LONG\"}],\"columnMappings\":[{\"queryColumn\":\"a0\",\"outputColumn\":\"EXPR$0\"}]}]"; final String resources = "[{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; final String attributes = "{\"statementType\":\"SELECT\"}"; @@ -230,27 +250,35 @@ public void testExplainMultipleTopLevelUnionAllQueries() final String query = "EXPLAIN PLAN FOR SELECT dim1 FROM druid.foo\n" + "UNION ALL (SELECT dim1 FROM druid.foo WHERE dim1 = '42'\n" + "UNION ALL SELECT dim1 FROM druid.foo WHERE dim1 = '44')"; - final String legacyExplanation = "DruidUnionRel(limit=[-1])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" - + " DruidUnionRel(limit=[-1])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n"; - final String explanation = "[" - + "{" - + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," - + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," - + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" - + "}," - + "{" - + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," - + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," - + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" - + "}," - + "{" - + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," - + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," - + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" - + "}]"; + final String legacyExplanation = NullHandling.replaceWithDefault() + ? "DruidUnionRel(limit=[-1])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + + " DruidUnionRel(limit=[-1])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + : "DruidUnionRel(limit=[-1])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + + " DruidUnionRel(limit=[-1])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"equals\",\"column\":\"dim1\",\"matchValueType\":\"STRING\",\"matchValue\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"equals\",\"column\":\"dim1\",\"matchValueType\":\"STRING\",\"matchValue\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{dim1:STRING}])\n"; + final String explanation = NullHandling.replaceWithDefault() + ? "[" + + "{" + + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," + + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," + + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" + + "}," + + "{" + + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," + + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," + + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" + + "}," + + "{" + + "\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}," + + "\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}]," + + "\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]" + + "}]" + : "[{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}],\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]},{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"equals\",\"column\":\"dim1\",\"matchValueType\":\"STRING\",\"matchValue\":\"42\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}],\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]},{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"filter\":{\"type\":\"equals\",\"column\":\"dim1\",\"matchValueType\":\"STRING\",\"matchValue\":\"44\"},\"columns\":[\"dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"dim1\",\"type\":\"STRING\"}],\"columnMappings\":[{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"}]}]"; final String resources = "[{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; final String attributes = "{\"statementType\":\"SELECT\"}"; testQuery( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java index 1679b86fb069..61c3267491d5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java @@ -99,7 +99,7 @@ public void testInsertFromViewA() .dataSource("foo") .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING)) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("v0") .context(PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() @@ -129,7 +129,7 @@ public void testInsertFromViewC() newScanQueryBuilder() .dataSource("foo") .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("dim1", "dim2") .context(PARTITIONED_BY_ALL_TIME_QUERY_CONTEXT) .build() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 337926d462ae..08079edeebc3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -63,13 +63,7 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.extraction.SubstringDimExtractionFn; -import org.apache.druid.query.filter.AndDimFilter; -import org.apache.druid.query.filter.BoundDimFilter; -import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.OrDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; @@ -102,10 +96,8 @@ import org.junit.runner.RunWith; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; @@ -216,7 +208,7 @@ public void testExactTopNOnInnerJoinWithLimit() GroupByQuery.builder() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(new NotDimFilter(new SelectorDimFilter("dim4", "a", null))) + .setDimFilter(not(equality("dim4", "a", ColumnType.STRING))) .setDataSource(new TableDataSource("numfoo")) .setDimensions(new DefaultDimensionSpec("dim4", "_d0")) .setContext(context) @@ -290,7 +282,7 @@ public void testJoinOuterGroupByAndSubqueryHasLimit() new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory( new CountAggregatorFactory("a0:count"), - not(selector("m2", null, null)) + notNull("m2") ) ) ) @@ -373,7 +365,7 @@ public void testJoinOuterGroupByAndSubqueryNoLimit(Map queryCont new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory( new CountAggregatorFactory("a0:count"), - not(selector("m2", null, null)) + notNull("m2") ) ) ) @@ -460,7 +452,7 @@ public void testJoinWithLimitBeforeJoining() new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory( new CountAggregatorFactory("a0:count"), - not(selector("m2", null, null)) + notNull("m2") ) ) ) @@ -517,7 +509,7 @@ public void testJoinOnTimeseriesWithFloorOnTime() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Intervals.of("1994-04-29/2020-01-11T00:00:00.001Z"))) - .filters(selector("dim3", "b", null)) + .filters(equality("dim3", "b", ColumnType.STRING)) .granularity(new PeriodGranularity(Period.hours(1), null, DateTimeZone.UTC)) .aggregators(aggregators( new FloatMinAggregatorFactory("a0", "m1") @@ -579,7 +571,7 @@ public void testJoinOnGroupByInsteadOfTimeseriesWithFloorOnTime() ColumnType.LONG ) ) - .setDimFilter(selector("dim3", "b", null)) + .setDimFilter(equality("dim3", "b", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec( "v0", @@ -642,7 +634,7 @@ public void testFilterAndGroupByLookupUsingJoinOperatorWithValueFilterPushdownMa ) ) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("j0.v", "123", null)) + .setDimFilter(equality("j0.v", "123", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("j0.k", "d0"))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -679,7 +671,12 @@ public void testFilterAndGroupByLookupUsingJoinOperatorAllowNulls(Map queryContext) ) ) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(not(selector("j0.v", "xa", null))) + .setDimFilter(not(equality("j0.v", "xa", ColumnType.STRING))) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("j0.v", "d0"))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -856,7 +853,7 @@ public void testFilterAndGroupByLookupUsingJoinOperator(Map quer ) ) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("j0.v", "xa", null)) + .setDimFilter(equality("j0.v", "xa", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("j0.k", "d0"))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -903,7 +900,12 @@ public void testFilterAndGroupByLookupUsingPostAggregationJoinOperator(Map queryConte ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("j0.v", "xa", null)) + .filters(equality("j0.v", "xa", ColumnType.STRING)) .columns("dim1") .context(queryContext) .build() @@ -2365,7 +2367,12 @@ public void testSelectOnLookupUsingLeftJoinOperator(Map queryCon ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(or(not(selector("j0.v", "xxx", null)), selector("j0.v", null, null))) + .filters( + or( + not(equality("j0.v", "xxx", ColumnType.STRING)), + isNull("j0.v") + ) + ) .columns("dim1", "j0.k", "j0.v") .context(queryContext) .build() @@ -2408,7 +2415,12 @@ public void testSelectOnLookupUsingRightJoinOperator(Map queryCo ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(or(not(selector("j0.v", "xxx", null)), selector("j0.v", null, null))) + .filters( + or( + not(equality("j0.v", "xxx", ColumnType.STRING)), + isNull("j0.v") + ) + ) .columns("dim1", "j0.k", "j0.v") .context(queryContext) .build() @@ -2446,7 +2458,12 @@ public void testSelectOnLookupUsingFullJoinOperator(Map queryCon ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(or(not(selector("j0.v", "xxx", null)), selector("j0.v", null, null))) + .filters( + or( + not(equality("j0.v", "xxx", ColumnType.STRING)), + isNull("j0.v") + ) + ) .columns("cnt", "dim1", "j0.k", "j0.v", "m1") .context(queryContext) .build() @@ -2553,7 +2570,7 @@ public void testNotInAggregationSubquery(Map queryContext) NullHandling.sqlCompatible() ? new FilteredAggregatorFactory( new CountAggregatorFactory("_a1"), - not(selector("a0", null, null)) + not(isNull("a0")) ) : new CountAggregatorFactory("_a1") ) @@ -2583,8 +2600,8 @@ public void testNotInAggregationSubquery(Map queryContext) .setGranularity(Granularities.ALL) .setDimFilter( or( - selector("j0._a0", "0", null), - and(selector("_j0.p0", null, null), expressionFilter("(\"j0._a1\" >= \"j0._a0\")")) + equality("j0._a0", 0L, ColumnType.LONG), + and(isNull("_j0.p0"), expressionFilter("(\"j0._a1\" >= \"j0._a0\")")) ) ) .setDimensions(dimensions(new DefaultDimensionSpec("__time", "d0", ColumnType.LONG))) @@ -2623,7 +2640,9 @@ public void testUsingSubqueryWithExtractionFns(Map queryContext) .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", "", null))) + .setDimFilter( + not(equality("dim1", "", ColumnType.STRING)) + ) .setDimensions( dimensions(new ExtractionDimensionSpec( "dim1", @@ -2681,7 +2700,7 @@ public void testInnerJoinWithIsNullFilter(Map queryContext) ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim2", null, null)) + .filters(isNull("dim2")) .columns("dim1", "j0.v") .build() ), @@ -2758,7 +2777,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithTimeFilter(Map ) ) ) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("__time", "v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2776,7 +2795,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithTimeFilter(Map ) ) ) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2791,7 +2810,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithTimeFilter(Map .virtualColumns(expressionVirtualColumn("_v0", "'10.1'", ColumnType.STRING)) .intervals(querySegmentSpec(Filtration.eternity())) .columns("__time", "_v0") - .filters(new SelectorDimFilter("v0", "10.1", null)) + .filters(equality("v0", "10.1", ColumnType.STRING)) .context(queryContext) .build() ), @@ -2829,7 +2848,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithTimeFilter_withLeftDirectAcces ) ) ) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2839,7 +2858,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithTimeFilter_withLeftDirectAcces "j0.", equalsCondition(makeExpression("'10.1'"), makeColumnExpression("j0.v0")), JoinType.LEFT, - selector("dim1", "10.1", null) + equality("dim1", "10.1", ColumnType.STRING) ) ) .intervals(querySegmentSpec( @@ -2878,7 +2897,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithOuterWhere(Map newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("__time", "v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2889,7 +2908,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithOuterWhere(Map newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .columns(ImmutableList.of("dim1")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(queryContext) @@ -2902,7 +2921,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithOuterWhere(Map ) .virtualColumns(expressionVirtualColumn("_v0", "'10.1'", ColumnType.STRING)) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("v0", "10.1", null)) + .filters(equality("v0", "10.1", ColumnType.STRING)) .columns("__time", "_v0") .context(queryContext) .build() @@ -2934,7 +2953,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithOuterWhere_withLeftDirectAcces newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .columns(ImmutableList.of("dim1")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(queryContext) @@ -2946,7 +2965,7 @@ public void testLeftJoinOnTwoInlineDataSourcesWithOuterWhere_withLeftDirectAcces makeColumnExpression("j0.dim1") ), JoinType.LEFT, - selector("dim1", "10.1", null) + equality("dim1", "10.1", ColumnType.STRING) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2980,7 +2999,7 @@ public void testLeftJoinOnTwoInlineDataSources(Map queryContext) newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("__time", "v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2991,7 +3010,7 @@ public void testLeftJoinOnTwoInlineDataSources(Map queryContext) newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .columns(ImmutableList.of("dim1")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(queryContext) @@ -3035,7 +3054,7 @@ public void testLeftJoinOnTwoInlineDataSources_withLeftDirectAccess(Map queryContext newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .virtualColumns(expressionVirtualColumn("v0", "\'10.1\'", ColumnType.STRING)) .columns(ImmutableList.of("__time", "v0")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3194,7 +3213,7 @@ public void testInnerJoinOnTwoInlineDataSources(Map queryContext newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim1", "10.1", null)) + .filters(equality("dim1", "10.1", ColumnType.STRING)) .columns(ImmutableList.of("dim1")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(queryContext) @@ -3241,7 +3260,7 @@ public void testGroupByOverGroupByOverInnerJoinOnTwoInlineDataSources(Map queryConte ) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1", "j0.d0") - .filters(new NotDimFilter(new SelectorDimFilter("j0.d0", null, null))) + .filters(notNull("j0.d0")) .context(queryContext) .build(); @@ -3580,7 +3597,7 @@ public void testLeftJoinSubqueryWithSelectorFilter(Map queryCont ) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1", "j0.d0") - .filters(selector("j0.d0", "abc", null)) + .filters(equality("j0.d0", "abc", ColumnType.STRING)) .context(queryContext) .build() ), @@ -3718,7 +3735,7 @@ public void testSemiJoinWithOuterTimeExtractScan() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) - .setDimFilter(selector("dim1", "def", null)) + .setDimFilter(equality("dim1", "def", ColumnType.STRING)) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -3731,7 +3748,9 @@ public void testSemiJoinWithOuterTimeExtractScan() .virtualColumns( expressionVirtualColumn("v0", "timestamp_extract(\"__time\",'MONTH','UTC')", ColumnType.LONG) ) - .filters(not(selector("dim1", "", null))) + .filters( + not(equality("dim1", "", ColumnType.STRING)) + ) .columns("dim1", "v0") .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3773,7 +3792,7 @@ public void testTwoSemiJoinsSimultaneously(Map queryContext) .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .bound(TimeBoundaryQuery.MAX_TIME) - .filters(selector("cnt", "1", null)) + .filters(equality("cnt", 1L, ColumnType.LONG)) .context(maxTimeQueryContext) .build() ), @@ -3786,7 +3805,7 @@ public void testTwoSemiJoinsSimultaneously(Map queryContext) .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .bound(TimeBoundaryQuery.MAX_TIME) - .filters(not(selector("cnt", "2", null))) + .filters(not(equality("cnt", 2L, ColumnType.LONG))) .context(maxTimeQueryContext) .build() ), @@ -3864,7 +3883,7 @@ public void testSemiAndAntiJoinSimultaneouslyUsingWhereInSubquery(Map= \"_j0._a0\")") ) ) @@ -3969,7 +3988,7 @@ public void testSemiAndAntiJoinSimultaneouslyUsingExplicitJoins(Map queryContext) .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", "", null))) + .setDimFilter( + not( + NullHandling.replaceWithDefault() + ? isNull("dim1") + : equality("dim1", "", ColumnType.STRING) + ) + ) .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4466,7 +4497,7 @@ public void testUsingSubqueryAsPartOfAndFilter(Map queryContext) ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", "xxx", null))) + .setDimFilter(not(equality("dim1", "xxx", ColumnType.STRING))) .setDimensions( dimensions( new DefaultDimensionSpec("dim1", "d0"), @@ -4549,11 +4580,11 @@ public void testUsingSubqueryAsPartOfOrFilter(Map queryContext) .setGranularity(Granularities.ALL) .setDimFilter( or( - selector("dim1", "xxx", null), + equality("dim1", "xxx", ColumnType.STRING), and( - not(selector("j0.a0", "0", null)), - not(selector("_j0.d1", null, null)), - not(selector("dim2", null, null)) + not(equality("j0.a0", 0L, ColumnType.LONG)), + notNull("_j0.d1"), + notNull("dim2") ) ) ) @@ -4638,7 +4669,7 @@ public void testNestedGroupByOnInlineDataSourceWithFilter(Map qu ) .setGranularity(Granularities.ALL) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim1", "def", null)) + .setDimFilter(equality("dim1", "def", ColumnType.STRING)) .setDimensions( dimensions( new DefaultDimensionSpec("v0", "d0") @@ -4747,7 +4778,7 @@ public void testCountOnSemiJoinSingleColumn(Map queryContext) .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setDimFilter( - selector("dim1", "10.1", null) + equality("dim1", "10.1", ColumnType.STRING) ) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) @@ -5014,82 +5045,82 @@ public void testInnerJoinWithFilterPushdownAndManyFiltersEmptyResults(Map q .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setVirtualColumns(expressionVirtualColumn("v0", "1", ColumnType.LONG)) - .setDimFilter(selector("m2", "A", null)) + .setDimFilter(equality("m2", "A", ColumnType.STRING)) .setDimensions( new DefaultDimensionSpec("v0", "d0", ColumnType.LONG) ) @@ -5366,14 +5397,14 @@ public void testRegressionFilteredAggregatorsSubqueryJoins(Map q new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), and( - not(selector("_j0.d1", null, null)), - not(selector("dim1", null, null)) + notNull("_j0.d1"), + notNull("dim1") ), "a0" ), new FilteredAggregatorFactory( new FloatMinAggregatorFactory("a1", "m1"), - selector("__j0.d0", null, null), + isNull("__j0.d0"), "a1" ) ) @@ -5422,10 +5453,12 @@ public void testRegressionFilteredAggregatorsSubqueryJoins(Map q new QueryDataSource( new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new InDimFilter( - "m2", - new HashSet<>(Arrays.asList(null, "A")) - )) + .filters( + or( + equality("m2", "A", ColumnType.STRING), + isNull("m2") + ) + ) .virtualColumns(expressionVirtualColumn( "v0", "notnull(\"m2\")", @@ -5450,19 +5483,19 @@ public void testRegressionFilteredAggregatorsSubqueryJoins(Map q new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), and( - not(selector("_j0.d1", null, null)), - not(selector("dim1", null, null)) + notNull("_j0.d1"), + notNull("dim1") ), "a0" ), new FilteredAggregatorFactory( new FloatMinAggregatorFactory("a1", "m1"), or( - selector("__j0.a0", null, null), + isNull("__j0.a0"), not( or( not(expressionFilter("\"__j0.d0\"")), - not(selector("__j0.d0", null, null)) + notNull("__j0.d0") ) ) ), @@ -5579,7 +5612,14 @@ public void testJoinsWithTwoConditions() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setDataSource(new TableDataSource(CalciteTests.DATASOURCE1)) - .setDimFilter(in("m1", ImmutableList.of("1", "2"), null)) + .setDimFilter( + NullHandling.replaceWithDefault() + ? in("m1", ImmutableList.of("1", "2"), null) + : or( + equality("m1", 1.0, ColumnType.FLOAT), + equality("m1", 2.0, ColumnType.FLOAT) + ) + ) .setDimensions(new DefaultDimensionSpec("m1", "d0", ColumnType.FLOAT)) .setAggregatorSpecs(aggregators(new LongMaxAggregatorFactory("a0", "__time"))) .setContext(context) @@ -5623,10 +5663,21 @@ public void testJoinsWithThreeConditions() .setGranularity(Granularities.ALL) .setDataSource(new TableDataSource(CalciteTests.DATASOURCE1)) .setDimFilter( - and( + NullHandling.replaceWithDefault() + ? and( in("m1", ImmutableList.of("1", "2"), null), in("m2", ImmutableList.of("1", "2"), null) + ) + : and( + or( + equality("m1", 1.0, ColumnType.FLOAT), + equality("m1", 2.0, ColumnType.FLOAT) + ), + or( + equality("m2", 1.0, ColumnType.DOUBLE), + equality("m2", 2.0, ColumnType.DOUBLE) ) + ) ) .setDimensions( new DefaultDimensionSpec("m1", "d0", ColumnType.FLOAT), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index f17ef78c1752..3520db7a4ac1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -161,7 +161,7 @@ public void testMultiValueStringWorksLikeStringGroupByWithFilter() new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING) ) ) - .setDimFilter(selector("v0", "bfoo", null)) + .setDimFilter(equality("v0", "bfoo", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setLimitSpec(new DefaultLimitSpec( ImmutableList.of(new OrderByColumnSpec( @@ -248,7 +248,7 @@ public void testMultiValueStringWorksLikeStringScanWithFilter() .dataSource(CalciteTests.DATASOURCE3) .eternityInterval() .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'foo')", ColumnType.STRING)) - .filters(selector("v0", "bfoo", null)) + .filters(equality("v0", "bfoo", ColumnType.STRING)) .columns(ImmutableList.of("v0")) .context(QUERY_CONTEXT_DEFAULT) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -1028,7 +1028,7 @@ public void testMultiValueStringToStringToMultiValueString() ColumnType.STRING ) ) - .setDimFilter(bound("v0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, 0L, null, true, false)) .setDimensions(dimensions(new DefaultDimensionSpec("v1", "_d0", ColumnType.STRING))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setLimitSpec(new DefaultLimitSpec( @@ -1209,7 +1209,7 @@ public void testStringToMVOfStringAgg() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim1", null, null)) + notNull("dim1") ), new CountAggregatorFactory("a1") ) @@ -1691,7 +1691,7 @@ public void testFilterOnMultiValueListFilterNoMatch() true ) ) - .setDimFilter(selector("v0", "a", null)) + .setDimFilter(equality("v0", "a", ColumnType.STRING)) .setDimensions( dimensions( new DefaultDimensionSpec("dim3", "_d0", ColumnType.STRING) @@ -1734,7 +1734,7 @@ public void testFilterOnMultiValueListFilterMatch() true ) ) - .setDimFilter(selector("v0", "b", null)) + .setDimFilter(equality("v0", "b", ColumnType.STRING)) .setDimensions( dimensions( new DefaultDimensionSpec("dim3", "_d0", ColumnType.STRING) @@ -2105,9 +2105,9 @@ public void testMultiValueStringOverlapFilterCoalesceSingleValue() ) ) .filters( - new OrDimFilter( - new InDimFilter("dim3", ImmutableSet.of("a", "b", "other")), - new SelectorDimFilter("dim3", null, null) + or( + in("dim3", ImmutableSet.of("a", "b", "other"), null), + isNull("dim3") ) ) .columns("v0") @@ -2152,11 +2152,11 @@ public void testMultiValueStringOverlapFilterCoalesceSingleValueOtherColumn() ) ) .filters( - new OrDimFilter( - new InDimFilter("dim3", ImmutableSet.of("a", "b", "other")), - new AndDimFilter( - new InDimFilter("dim2", ImmutableSet.of("a", "b", "other")), - new SelectorDimFilter("dim3", null, null) + or( + in("dim3", ImmutableSet.of("a", "b", "other"), null), + and( + in("dim2", ImmutableSet.of("a", "b", "other"), null), + isNull("dim3") ) ) ) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 628d7dc3246f..b5d9440faad6 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -52,7 +52,6 @@ import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery; @@ -1183,6 +1182,53 @@ public void testGroupByRootSingleTypeArrayLong() .run(); } + @Test + public void testGroupByRootSingleTypeArrayLongFilteredArrayEquality() + { + if (NullHandling.replaceWithDefault()) { + // this fails in default value mode because it relies on equality filter + return; + } + cannotVectorize(); + testBuilder() + .sql( + "SELECT " + + "arrayLong, " + + "SUM(cnt) " + + "FROM druid.arrays WHERE arrayLong = ARRAY[1, 2, 3] GROUP BY 1" + ) + .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .expectedQueries( + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter(equality("arrayLong", new Object[]{1L, 2L, 3L}, ColumnType.LONG_ARRAY)) + .setDimensions( + dimensions( + new DefaultDimensionSpec("arrayLong", "d0", ColumnType.LONG_ARRAY) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() + ) + ) + .expectedResults( + ImmutableList.of( + new Object[]{Arrays.asList(1L, 2L, 3L), 4L} + ) + ) + .expectedSignature( + RowSignature.builder() + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("EXPR$1", ColumnType.LONG) + .build() + ) + .run(); + } + @Test public void testGroupByRootSingleTypeArrayLongNulls() { @@ -1232,6 +1278,55 @@ public void testGroupByRootSingleTypeArrayLongNulls() .run(); } + @Test + public void testGroupByRootSingleTypeArrayLongNullsFilteredArrayEquality() + { + cannotVectorize(); + testBuilder() + .sql( + "SELECT " + + "arrayLongNulls, " + + "SUM(cnt) " + + "FROM druid.arrays WHERE arrayLongNulls = ARRAY[null, 2, 9] OR arrayLongNulls IS NULL GROUP BY 1" + ) + .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .expectedQueries( + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimFilter( + or( + equality("arrayLongNulls", new Object[]{null, 2L, 9L}, ColumnType.LONG_ARRAY), + isNull("arrayLongNulls") + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("arrayLongNulls", "d0", ColumnType.LONG_ARRAY) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() + ) + ) + .expectedResults( + ImmutableList.of( + new Object[]{null, 3L}, + new Object[]{Arrays.asList(null, 2L, 9L), 2L} + ) + ) + .expectedSignature( + RowSignature.builder() + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("EXPR$1", ColumnType.LONG) + .build() + ) + .run(); + } + @Test public void testGroupByRootSingleTypeArrayLongNullsUnnest() { @@ -1882,7 +1977,7 @@ public void testGroupByRootSingleTypeArrayLongElementFiltered() .setVirtualColumns( new NestedFieldVirtualColumn("arrayLong", "$[1]", "v0", ColumnType.LONG) ) - .setDimFilter(new SelectorDimFilter("v0", "2", null)) + .setDimFilter(equality("v0", 2L, ColumnType.LONG)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .build() @@ -2022,7 +2117,7 @@ public void testGroupByRootSingleTypeArrayStringElementFiltered() .setVirtualColumns( new NestedFieldVirtualColumn("arrayStringNulls", "$[1]", "v0", ColumnType.STRING) ) - .setDimFilter(new SelectorDimFilter("v0", "b", null)) + .setDimFilter(equality("v0", "b", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .build() @@ -2111,7 +2206,7 @@ public void testGroupByRootSingleTypeArrayDoubleElementFiltered() new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE) ) ) - .setDimFilter(new SelectorDimFilter("v0", "5.5", null)) + .setDimFilter(equality("v0", 5.5, ColumnType.DOUBLE)) .setVirtualColumns( new NestedFieldVirtualColumn("arrayDoubleNulls", "$[2]", "v0", ColumnType.DOUBLE) ) @@ -2196,7 +2291,7 @@ public void testGroupByPathSelectorFilter() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(selector("v0", "100", null)) + .setDimFilter(equality("v0", "100", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2359,7 +2454,7 @@ public void testGroupByPathSelectorFilterLong() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "100", null)) + .setDimFilter(equality("v0", 100L, ColumnType.LONG)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2399,7 +2494,7 @@ public void testGroupByPathSelectorFilterDouble() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "2.02", null)) + .setDimFilter(equality("v0", 2.02, ColumnType.DOUBLE)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2439,7 +2534,7 @@ public void testGroupByPathSelectorFilterString() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "400", null)) + .setDimFilter(equality("v0", "400", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2479,7 +2574,7 @@ public void testGroupByPathSelectorFilterVariant() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "1", null)) + .setDimFilter(equality("v0", 1L, ColumnType.LONG)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2514,7 +2609,7 @@ public void testGroupByPathSelectorFilterVariant2() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "1", null)) + .setDimFilter(equality("v0", "1", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2530,6 +2625,92 @@ public void testGroupByPathSelectorFilterVariant2() ); } + @Test + public void testGroupByPathSelectorFilterVariant2Int() + { + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "SUM(cnt) " + + "FROM druid.nested WHERE JSON_VALUE(nest, '$.mixed2') = 1 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nest", "$.mixed2", "v0", ColumnType.LONG), + new NestedFieldVirtualColumn("nest", "$.x", "v1", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v1", "d0") + ) + ) + .setDimFilter(equality("v0", 1L, ColumnType.LONG)) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + // todo (clint): this is a bit wonky, we get extra matches for numeric 1 matcher because the virtual column + // is defined as long typed, which makes a long processor which will convert the 1.1 to a 1L + new Object[]{"100", 2L}, + new Object[]{"200", 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByPathSelectorFilterVariant2BothTypesMatcher() + { + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "SUM(cnt) " + + "FROM druid.nested WHERE JSON_VALUE(nest, '$.mixed2') = '1' OR JSON_VALUE(nest, '$.mixed2') = 1 GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nest", "$.mixed2", "v0", ColumnType.STRING), + new NestedFieldVirtualColumn("nest", "$.mixed2", "v1", ColumnType.LONG), + new NestedFieldVirtualColumn("nest", "$.x", "v2", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v2", "d0") + ) + ) + .setDimFilter( + or( + equality("v0", "1", ColumnType.STRING), + equality("v1", 1L, ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + // todo (clint): this is a bit wonky, we get 2 matches for numeric 1 matcher because the virtual column + // is defined as long typed, which makes a long processor which will convert the 1.1 to a 1L + new Object[]{"100", 2L}, + new Object[]{"200", 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testGroupByPathSelectorFilterVariant3() { @@ -2590,7 +2771,7 @@ public void testGroupByPathSelectorFilterNonExistent() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(selector("v0", "no way", null)) + .setDimFilter(equality("v0", "no way", ColumnType.STRING)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2625,7 +2806,7 @@ public void testGroupByPathSelectorFilterNull() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(not(selector("v0", null, null))) + .setDimFilter(notNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2662,7 +2843,7 @@ public void testGroupByPathBoundFilterLong() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", "100", "300", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "100", "300", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2699,7 +2880,7 @@ public void testGroupByPathBoundFilterLongNoUpper() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", "100", null, false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "100", null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2736,7 +2917,7 @@ public void testGroupByPathBoundFilterLongNoLower() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", null, "100", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, null, "100", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2773,7 +2954,7 @@ public void testGroupByPathBoundFilterLongNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", "100", "300", false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, 100L, 300L, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2811,7 +2992,7 @@ public void testGroupByPathBoundFilterLongNoUpperNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", "100", null, false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, 100L, null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2848,7 +3029,7 @@ public void testGroupByPathNumericBoundFilterLongNoUpperNumeric() new DefaultDimensionSpec("v0", "d0", ColumnType.LONG) ) ) - .setDimFilter(bound("v0", "100", null, false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, 100L, null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2886,13 +3067,17 @@ public void testGroupByPathBoundFilterLongNoLowerNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", null, "100", false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, null, 100L, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 4L}, + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{"", 4L}, + new Object[]{"100", 2L} + ) + : ImmutableList.of( new Object[]{"100", 2L} ), RowSignature.builder() @@ -2923,7 +3108,7 @@ public void testGroupByPathBoundFilterDouble() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", "1.01", "3.03", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "1.01", "3.03", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2960,7 +3145,7 @@ public void testGroupByPathBoundFilterDoubleNoUpper() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", "1.01", null, false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "1.01", null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -2997,7 +3182,7 @@ public void testGroupByPathBoundFilterDoubleNoLower() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", null, "2.02", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, null, "2.02", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3034,7 +3219,7 @@ public void testGroupByPathBoundDoubleFilterNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", "2.0", "3.5", false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.DOUBLE, 2.0, 3.5, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3072,7 +3257,7 @@ public void testGroupByPathBoundFilterDoubleNoUpperNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", "1.0", null, false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.DOUBLE, 1.0, null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3110,13 +3295,17 @@ public void testGroupByPathBoundFilterDoubleNoLowerNumeric() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", null, "2.02", false, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.DOUBLE, null, 2.02, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 4L}, + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{"", 4L}, + new Object[]{"2.02", 2L} + ) + : ImmutableList.of( new Object[]{"2.02", 2L} ), RowSignature.builder() @@ -3147,7 +3336,7 @@ public void testGroupByPathBoundFilterString() new DefaultDimensionSpec("v0", "d0") ) ) - .setDimFilter(bound("v0", "100", "300", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "100", "300", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3185,7 +3374,7 @@ public void testGroupByPathBoundFilterStringNoUpper() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", "400", null, false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, "400", null, false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3223,13 +3412,17 @@ public void testGroupByPathBoundFilterStringNoLower() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(bound("v0", null, "400", false, false, null, StringComparators.LEXICOGRAPHIC)) + .setDimFilter(range("v0", ColumnType.STRING, null, "400", false, false)) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 4L}, + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{"", 4L}, + new Object[]{"100", 2L} + ) + : ImmutableList.of( new Object[]{"100", 2L} ), RowSignature.builder() @@ -3408,7 +3601,12 @@ public void testGroupByPathInFilter() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(new InDimFilter("v0", ImmutableSet.of("100", "200"))) + .setDimFilter( + NullHandling.replaceWithDefault() + ? in("v0", ImmutableSet.of("100", "200"), null) + : or(equality("v0", 100L, ColumnType.LONG), equality("v0", 200L, ColumnType.LONG) + ) + ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3446,7 +3644,14 @@ public void testGroupByPathInFilterDouble() new DefaultDimensionSpec("v1", "d0") ) ) - .setDimFilter(new InDimFilter("v0", ImmutableSet.of("2.02", "3.03"))) + .setDimFilter( + NullHandling.replaceWithDefault() + ? in("v0", ImmutableSet.of("2.02", "3.03"), null) + : or( + equality("v0", 2.02, ColumnType.DOUBLE), + equality("v0", 3.03, ColumnType.DOUBLE) + ) + ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -3586,7 +3791,7 @@ public void testSumPathFilteredAggDouble() aggregators( new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a0", "v1"), - selector("v0", "2.02", null) + equality("v0", 2.02, ColumnType.DOUBLE) ) ) ) @@ -3623,7 +3828,7 @@ public void testSumPathFilteredAggString() aggregators( new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a0", "v1"), - selector("v0", "300", null) + equality("v0", "300", ColumnType.STRING) ) ) ) @@ -3686,12 +3891,11 @@ public void testSumPathMixedFilteredAggLong() new NestedFieldVirtualColumn("nest", "$.mixed", "v0", ColumnType.LONG), new NestedFieldVirtualColumn("nest", "$.mixed", "v1", ColumnType.DOUBLE) ) - .aggregators( aggregators( new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a0", "v1"), - selector("v0", "1", null) + equality("v0", 1L, ColumnType.LONG) ) ) ) @@ -3727,7 +3931,7 @@ public void testSumPathMixedFilteredAggDouble() aggregators( new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a0", "v0"), - selector("v0", "1.1", null) + equality("v0", 1.1, ColumnType.DOUBLE) ) ) ) @@ -4623,7 +4827,7 @@ public void testGroupByPathSelectorFilterVariantNull() new DefaultDimensionSpec("v0", "d1", ColumnType.LONG) ) ) - .setDimFilter(selector("v0", null, null)) + .setDimFilter(isNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4660,7 +4864,7 @@ public void testSelectPathSelectorFilterVariantNull() .columns( "v0", "v1" ) - .filters(selector("v0", null, null)) + .filters(isNull("v0")) .context(QUERY_CONTEXT_DEFAULT) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) @@ -4705,7 +4909,7 @@ public void testGroupByPathSelectorFilterVariantNotNull() new DefaultDimensionSpec("v0", "d1", ColumnType.LONG) ) ) - .setDimFilter(not(selector("v0", null, null))) + .setDimFilter(notNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4740,7 +4944,7 @@ public void testGroupByRegularLongLongMixed1FilterNotNull() ) ) .setVirtualColumns(new NestedFieldVirtualColumn("long", "$", "v0", ColumnType.LONG)) - .setDimFilter(not(selector("v0", null, null))) + .setDimFilter(notNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4778,7 +4982,7 @@ public void testGroupByRootSingleTypeStringMixed1SparseNotNull() ) ) .setVirtualColumns(new NestedFieldVirtualColumn("string_sparse", "$", "v0", ColumnType.LONG)) - .setDimFilter(not(selector("v0", null, null))) + .setDimFilter(notNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4808,7 +5012,7 @@ public void testScanStringNotNullCast() .virtualColumns( expressionVirtualColumn("v0", "CAST(\"string_sparse\", 'LONG')", ColumnType.LONG) ) - .filters(not(selector("v0", null, null))) + .filters(notNull("v0")) .columns("v0") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) @@ -4859,8 +5063,12 @@ public void testGroupByRootSingleTypeStringMixed1SparseNotNullCast2() new DefaultDimensionSpec("string_sparse", "d0", ColumnType.LONG) ) ) - .setVirtualColumns(expressionVirtualColumn("v0", "CAST(\"string_sparse\", 'LONG')", ColumnType.LONG)) - .setDimFilter(not(selector("v0", null, null))) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "CAST(\"string_sparse\", 'LONG')", + ColumnType.LONG + )) + .setDimFilter(notNull("v0")) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -5178,6 +5386,39 @@ public void testGroupByRootSingleTypeArrayStringNullsFilteredAsMvd() .run(); } + @Test + public void testGroupByAndFilterVariant() + { + testQuery( + "SELECT " + + "variant, " + + "SUM(cnt) " + + "FROM druid.all_auto WHERE variant = '1' GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ALL) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("variant", "d0") + ) + ) + .setDimFilter(equality("variant", "1", ColumnType.STRING)) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"1", 2L} + ), + RowSignature.builder() + .add("variant", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testScanAllTypesAuto() { @@ -5230,22 +5471,512 @@ public void testScanAllTypesAuto() ), useDefault ? ImmutableList.of( - new Object[]{1672531200000L, "", 0L, 0.0D, "true", "51", "1", "[]", "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]}}", "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", null, "[\"a\",\"b\"]", null, "[2,3]", null, "[null]", null, "[\"true\",\"false\",\"true\"]", null, "[{\"x\":1},{\"x\":2}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "", 2L, 0.0D, "false", "b", "\"b\"", "2", "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]}}", "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", "[\"a\",\"b\",\"c\"]", "[null,\"b\"]", "[2,3]", null, "[3.3,4.4,5.5]", "[999.0,null,5.5]", "[null,null,2.2]", "[\"true\",\"true\"]", "[null,[null],[]]", "[{\"x\":3},{\"x\":4}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "a", 1L, 1.0D, "true", "1", "1", "1", "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]}}", "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"a\",\"b\"]", "[\"a\",\"b\"]", "[1,2,3]", "[1,null,3]", "[1.1,2.2,3.3]", "[1.1,2.2,null]", "[\"a\",\"1\",\"2.2\"]", "[\"true\",\"false\",\"true\"]", "[[1,2,null],[3,4]]", "[{\"x\":1},{\"x\":2}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "b", 4L, 3.3D, "true", "4", "{}", "4", "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]}}", "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"d\",\"e\"]", "[\"b\",\"b\"]", "[1,4]", "[1]", "[2.2,3.3,4.0]", null, "[\"a\",\"b\",\"c\"]", "[null,\"false\",\"true\"]", "[[1,2],[3,4],[5,6,7]]", "[{\"x\":null},{\"x\":2}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "c", 0L, 4.4D, "true", "hello", "{}", "[]", "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]}}", "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", null, null, "[1,2,3]", "[]", "[1.1,2.2,3.3]", null, null, "[\"false\"]", null, "[{\"x\":1000},{\"y\":2000}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "d", 5L, 5.9D, "false", "", "\"a\"", "6", "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]}}", null, "[\"a\",\"b\"]", null, null, "[null,2,9]", null, "[999.0,5.5,null]", "[\"a\",\"1\",\"2.2\"]", "[]", "[[1],[1,2,null]]", "[{\"a\":1},{\"b\":2}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "null", 3L, 2.0D, "", "3.0", "3.3", "3", "{\"a\":300}", "{\"x\":4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", "[\"b\",\"c\"]", "[\"d\",null,\"b\"]", "[1,2,3,4]", "[1,2,3]", "[1.1,3.3]", "[null,2.2,null]", "[1,null,1]", "[\"true\",null,\"true\"]", "[[1],null,[1,2,3]]", "[null,{\"x\":2}]", "", "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L} + new Object[]{ + 1672531200000L, + "", + 0L, + 0.0D, + "true", + "51", + "1", + "[]", + "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]}}", + "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", + null, + "[\"a\",\"b\"]", + null, + "[2,3]", + null, + "[null]", + null, + "[\"true\",\"false\",\"true\"]", + null, + "[{\"x\":1},{\"x\":2}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "", + 2L, + 0.0D, + "false", + "b", + "\"b\"", + "2", + "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]}}", + "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", + "[\"a\",\"b\",\"c\"]", + "[null,\"b\"]", + "[2,3]", + null, + "[3.3,4.4,5.5]", + "[999.0,null,5.5]", + "[null,null,2.2]", + "[\"true\",\"true\"]", + "[null,[null],[]]", + "[{\"x\":3},{\"x\":4}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "a", + 1L, + 1.0D, + "true", + "1", + "1", + "1", + "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]}}", + "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", + "[\"a\",\"b\"]", + "[\"a\",\"b\"]", + "[1,2,3]", + "[1,null,3]", + "[1.1,2.2,3.3]", + "[1.1,2.2,null]", + "[\"a\",\"1\",\"2.2\"]", + "[\"true\",\"false\",\"true\"]", + "[[1,2,null],[3,4]]", + "[{\"x\":1},{\"x\":2}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "b", + 4L, + 3.3D, + "true", + "1", + "{}", + "1", + "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]}}", + "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", + "[\"d\",\"e\"]", + "[\"b\",\"b\"]", + "[1,4]", + "[1]", + "[2.2,3.3,4.0]", + null, + "[\"a\",\"b\",\"c\"]", + "[null,\"false\",\"true\"]", + "[[1,2],[3,4],[5,6,7]]", + "[{\"x\":null},{\"x\":2}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "c", + 0L, + 4.4D, + "true", + "hello", + "{}", + "[]", + "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]}}", + "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", + null, + null, + "[1,2,3]", + "[]", + "[1.1,2.2,3.3]", + null, + null, + "[\"false\"]", + null, + "[{\"x\":1000},{\"y\":2000}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "d", + 5L, + 5.9D, + "false", + "", + "\"a\"", + "6", + "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]}}", + null, + "[\"a\",\"b\"]", + null, + null, + "[null,2,9]", + null, + "[999.0,5.5,null]", + "[\"a\",\"1\",\"2.2\"]", + "[]", + "[[1],[1,2,null]]", + "[{\"a\":1},{\"b\":2}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "null", + 3L, + 2.0D, + "", + "3.0", + "3.3", + "3", + "{\"a\":300}", + "{\"x\":4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", + "[\"b\",\"c\"]", + "[\"d\",null,\"b\"]", + "[1,2,3,4]", + "[1,2,3]", + "[1.1,3.3]", + "[null,2.2,null]", + "[1,null,1]", + "[\"true\",null,\"true\"]", + "[[1],null,[1,2,3]]", + "[null,{\"x\":2}]", + "", + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + } ) : ImmutableList.of( - new Object[]{1672531200000L, null, null, null, "true", "51", "1", "[]", "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]}}", "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", null, "[\"a\",\"b\"]", null, "[2,3]", null, "[null]", null, "[\"true\",\"false\",\"true\"]", null, "[{\"x\":1},{\"x\":2}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "", 2L, null, "false", "b", "\"b\"", "2", "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]}}", "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", "[\"a\",\"b\",\"c\"]", "[null,\"b\"]", "[2,3]", null, "[3.3,4.4,5.5]", "[999.0,null,5.5]", "[null,null,2.2]", "[\"true\",\"true\"]", "[null,[null],[]]", "[{\"x\":3},{\"x\":4}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "a", 1L, 1.0D, "true", "1", "1", "1", "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]}}", "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"a\",\"b\"]", "[\"a\",\"b\"]", "[1,2,3]", "[1,null,3]", "[1.1,2.2,3.3]", "[1.1,2.2,null]", "[\"a\",\"1\",\"2.2\"]", "[\"true\",\"false\",\"true\"]", "[[1,2,null],[3,4]]", "[{\"x\":1},{\"x\":2}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "b", 4L, 3.3D, "true", "4", "{}", "4", "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]}}", "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"d\",\"e\"]", "[\"b\",\"b\"]", "[1,4]", "[1]", "[2.2,3.3,4.0]", null, "[\"a\",\"b\",\"c\"]", "[null,\"false\",\"true\"]", "[[1,2],[3,4],[5,6,7]]", "[{\"x\":null},{\"x\":2}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "c", null, 4.4D, "true", "hello", "{}", "[]", "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]}}", "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", null, null, "[1,2,3]", "[]", "[1.1,2.2,3.3]", null, null, "[\"false\"]", null, "[{\"x\":1000},{\"y\":2000}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "d", 5L, 5.9D, "false", null, "\"a\"", "6", "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]}}", null, "[\"a\",\"b\"]", null, null, "[null,2,9]", null, "[999.0,5.5,null]", "[\"a\",\"1\",\"2.2\"]", "[]", "[[1],[1,2,null]]", "[{\"a\":1},{\"b\":2}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L}, - new Object[]{1672531200000L, "null", 3L, 2.0D, null, "3.0", "3.3", "3", "{\"a\":300}", "{\"x\":4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", "[\"b\",\"c\"]", "[\"d\",null,\"b\"]", "[1,2,3,4]", "[1,2,3]", "[1.1,3.3]", "[null,2.2,null]", "[1,null,1]", "[\"true\",null,\"true\"]", "[[1],null,[1,2,3]]", "[null,{\"x\":2}]", null, "hello", 1234L, 1.234D, "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", "[\"a\",\"b\",\"c\"]", "[1,2,3]", "[1.1,2.2,3.3]", "[]", "{}", "[null,null]", "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L} + new Object[]{ + 1672531200000L, + null, + null, + null, + "true", + "51", + "1", + "[]", + "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]}}", + "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", + null, + "[\"a\",\"b\"]", + null, + "[2,3]", + null, + "[null]", + null, + "[\"true\",\"false\",\"true\"]", + null, + "[{\"x\":1},{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "", + 2L, + null, + "false", + "b", + "\"b\"", + "2", + "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]}}", + "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", + "[\"a\",\"b\",\"c\"]", + "[null,\"b\"]", + "[2,3]", + null, + "[3.3,4.4,5.5]", + "[999.0,null,5.5]", + "[null,null,2.2]", + "[\"true\",\"true\"]", + "[null,[null],[]]", + "[{\"x\":3},{\"x\":4}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "a", + 1L, + 1.0D, + "true", + "1", + "1", + "1", + "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]}}", + "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", + "[\"a\",\"b\"]", + "[\"a\",\"b\"]", + "[1,2,3]", + "[1,null,3]", + "[1.1,2.2,3.3]", + "[1.1,2.2,null]", + "[\"a\",\"1\",\"2.2\"]", + "[\"true\",\"false\",\"true\"]", + "[[1,2,null],[3,4]]", + "[{\"x\":1},{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "b", + 4L, + 3.3D, + "true", + "1", + "{}", + "4", + "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]}}", + "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", + "[\"d\",\"e\"]", + "[\"b\",\"b\"]", + "[1,4]", + "[1]", + "[2.2,3.3,4.0]", + null, + "[\"a\",\"b\",\"c\"]", + "[null,\"false\",\"true\"]", + "[[1,2],[3,4],[5,6,7]]", + "[{\"x\":null},{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "c", + null, + 4.4D, + "true", + "hello", + "{}", + "[]", + "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]}}", + "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", + null, + null, + "[1,2,3]", + "[]", + "[1.1,2.2,3.3]", + null, + null, + "[\"false\"]", + null, + "[{\"x\":1000},{\"y\":2000}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "d", + 5L, + 5.9D, + "false", + null, + "\"a\"", + "6", + "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]}}", + null, + "[\"a\",\"b\"]", + null, + null, + "[null,2,9]", + null, + "[999.0,5.5,null]", + "[\"a\",\"1\",\"2.2\"]", + "[]", + "[[1],[1,2,null]]", + "[{\"a\":1},{\"b\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "null", + 3L, + 2.0D, + null, + "3.0", + "3.3", + "3", + "{\"a\":300}", + "{\"x\":4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", + "[\"b\",\"c\"]", + "[\"d\",null,\"b\"]", + "[1,2,3,4]", + "[1,2,3]", + "[1.1,3.3]", + "[null,2.2,null]", + "[1,null,1]", + "[\"true\",null,\"true\"]", + "[[1],null,[1,2,3]]", + "[null,{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "[{},{},{}]", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + } ), RowSignature.builder() .add("__time", ColumnType.LONG) @@ -5285,4 +6016,81 @@ public void testScanAllTypesAuto() .build() ); } + + @Test + public void testFilterJsonIsNotNull() + { + testQuery( + "SELECT nest\n" + + "FROM druid.nested WHERE nest IS NOT NULL", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("nest") + .filters(notNull("nest")) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + NullHandling.replaceWithDefault() + ? ImmutableList.of() + : ImmutableList.of( + new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}"}, + new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}"}, + new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}"} + ), + RowSignature.builder() + .add("nest", ColumnType.NESTED_DATA) + .build() + + ); + } + + @Test + public void testFilterJsonIsNull() + { + testQuery( + "SELECT nest, nester\n" + + "FROM druid.nested WHERE nest IS NULL", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("nest", "nester") + .filters(isNull("nest")) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + // selector filter is wrong + NullHandling.replaceWithDefault() + ? ImmutableList.of( + new Object[]{ + "{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}", + "{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}" + }, + new Object[]{null, "\"hello\""}, + new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}", null}, + new Object[]{null, null}, + new Object[]{null, null}, + new Object[]{ + "{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}", + "{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}" + }, + new Object[]{null, "2"} + ) + : ImmutableList.of( + new Object[]{null, "\"hello\""}, + new Object[]{null, null}, + new Object[]{null, null}, + new Object[]{null, "2"} + ), + RowSignature.builder() + .add("nest", ColumnType.NESTED_DATA) + .add("nester", ColumnType.NESTED_DATA) + .build() + + ); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index 2266b8d6b825..9d3fecd075cd 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -122,7 +122,11 @@ public void testParametersInSelectAndFilter() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(numericSelector("dim2", "0", null)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0"), @@ -312,7 +316,7 @@ public void testParametersInStrangePlaces() .setAggregatorSpecs(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("dim2", "a", null)) + not(equality("dim2", "a", ColumnType.STRING)) ), new CountAggregatorFactory("a1") )) @@ -492,7 +496,9 @@ public void testDoubles() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - bound("cnt", "1.1", "100000001", true, true, null, StringComparators.NUMERIC) + NullHandling.replaceWithDefault() + ? bound("cnt", "1.1", "100000001", true, true, null, StringComparators.NUMERIC) + : range("cnt", ColumnType.DOUBLE, 1.1, 100000001.0, true, true) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -514,7 +520,12 @@ public void testDoubles() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - in("cnt", ImmutableList.of("1.0", "100000001"), null) + NullHandling.replaceWithDefault() + ? in("cnt", ImmutableList.of("1.0", "100000001"), null) + : or( + equality("cnt", 1.0, ColumnType.DOUBLE), + equality("cnt", 100000001.0, ColumnType.DOUBLE) + ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -541,7 +552,7 @@ public void testFloats() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - selector("cnt", "1.0", null) + equality("cnt", 1.0, ColumnType.DOUBLE) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -564,7 +575,7 @@ public void testLongs() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("l1", "3", null, true, false, null, StringComparators.NUMERIC)) + .filters(range("l1", ColumnType.LONG, 3L, null, true, false)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -643,7 +654,7 @@ public void testWrongTypeParameter() .filters( and( bound("l1", "3", null, true, false, null, StringComparators.NUMERIC), - selector("f1", useDefault ? "0.0" : null, null) + selector("f1", "0.0", null) ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index de6697388fc8..ba75fa2f0f27 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -80,14 +80,10 @@ import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.RegexDimExtractionFn; import org.apache.druid.query.extraction.SubstringDimExtractionFn; -import org.apache.druid.query.filter.AndDimFilter; -import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.RegexDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; @@ -1391,13 +1387,13 @@ public void testFirstLatestAggregatorsSkipNulls() final DimFilter filter; if (useDefault) { - filter = not(selector("dim1", null, null)); + filter = notNull("dim1"); } else { filter = and( - not(selector("dim1", null, null)), - not(selector("l1", null, null)), - not(selector("d1", null, null)), - not(selector("f1", null, null)) + notNull("dim1"), + notNull("l1"), + notNull("d1"), + notNull("f1") ); } testQuery( @@ -1461,13 +1457,13 @@ public void testAnyAggregatorsSkipNullsWithFilter() { final DimFilter filter; if (useDefault) { - filter = not(selector("dim1", null, null)); + filter = notNull("dim1"); } else { filter = and( - not(selector("dim1", null, null)), - not(selector("l2", null, null)), - not(selector("d2", null, null)), - not(selector("f2", null, null)) + notNull("dim1"), + notNull("l2"), + notNull("d2"), + notNull("f2") ); } testQuery( @@ -2119,7 +2115,7 @@ public void testFilterOnFloat() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) - .filters(selector("m1", "1.0", null)) + .filters(equality("m1", 1.0, ColumnType.DOUBLE)) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -2140,7 +2136,7 @@ public void testFilterOnDouble() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) - .filters(selector("m2", "1.0", null)) + .filters(equality("m2", 1.0, ColumnType.DOUBLE)) .context(QUERY_CONTEXT_DEFAULT) .build() ), @@ -2161,7 +2157,13 @@ public void testHavingOnGrandTotal() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setAggregatorSpecs(aggregators(new DoubleSumAggregatorFactory("a0", "m1"))) - .setHavingSpec(having(selector("a0", "21", null))) + .setHavingSpec( + having( + NullHandling.replaceWithDefault() + ? selector("a0", "21") + : equality("a0", 21.0, ColumnType.DOUBLE) + ) + ) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -2185,15 +2187,13 @@ public void testHavingOnDoubleSum() .setAggregatorSpecs(aggregators(new DoubleSumAggregatorFactory("a0", "m1"))) .setHavingSpec( having( - new BoundDimFilter( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -2236,14 +2236,13 @@ public void testHavingOnApproximateCountDistinct() ) .setHavingSpec( having( - bound( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -2296,20 +2295,19 @@ public void testHavingOnExactCountDistinct() ? new CountAggregatorFactory("a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("d1", null, null)) + notNull("d1") ) ) ) .setHavingSpec( having( - bound( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -2407,13 +2405,13 @@ public void testExactCountDistinctWithFilter() new FilteredAggregatorFactory( new CountAggregatorFactory("_a0"), and( - not(selector("d0", null, null)), - selector("a1", "0", null) + notNull("d0"), + equality("a1", 0L, ColumnType.LONG) ) ), new FilteredAggregatorFactory( new LongMinAggregatorFactory("_a1", "a0"), - selector("a1", "3", null) + equality("a1", 3L, ColumnType.LONG) ) )) .setContext(QUERY_CONTEXT_DEFAULT) @@ -2439,15 +2437,13 @@ public void testHavingOnFloatSum() .setAggregatorSpecs(aggregators(new DoubleSumAggregatorFactory("a0", "m1"))) .setHavingSpec( having( - new BoundDimFilter( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -2515,7 +2511,7 @@ public void testHavingOnRatio() .setAggregatorSpecs(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("dim2", "a", null)) + not(equality("dim2", "a", ColumnType.STRING)) ), new CountAggregatorFactory("a1") )) @@ -3278,7 +3274,13 @@ public void testPruneDeadAggregatorsThroughHaving() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setAggregatorSpecs(aggregators(new DoubleSumAggregatorFactory("a0", "m1"))) - .setHavingSpec(having(selector("a0", "21", null))) + .setHavingSpec( + having( + NullHandling.replaceWithDefault() + ? selector("a0", "21") + : equality("a0", 21.0, ColumnType.DOUBLE) + ) + ) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -3391,10 +3393,10 @@ public void testNullEmptyStringEquality() // (dim2 != 'a') component is unnecessary. .filters( or( - selector("dim2", "a", null), + equality("dim2", "a", ColumnType.STRING), and( - selector("dim2", null, null), - not(selector("dim2", "a", null)) + isNull("dim2"), + not(equality("dim2", "a", ColumnType.STRING)) ) ) ) @@ -3439,7 +3441,7 @@ public void testNullLongFilter() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("l1", null, null)) + .filters(isNull("l1")) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3477,7 +3479,7 @@ public void testNullDoubleFilter() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("d1", null, null)) + .filters(isNull("d1")) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3515,7 +3517,7 @@ public void testNullFloatFilter() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("f1", null, null)) + .filters(isNull("f1")) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3710,7 +3712,7 @@ public void testLongPredicateFilterNulls() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("l1", "3", null, true, false, null, StringComparators.NUMERIC)) + .filters(range("l1", ColumnType.LONG, 3L, null, true, false)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3731,7 +3733,7 @@ public void testDoublePredicateFilterNulls() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("d1", "0", null, true, false, null, StringComparators.NUMERIC)) + .filters(range("d1", ColumnType.LONG, 0L, null, true, false)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3752,7 +3754,7 @@ public void testFloatPredicateFilterNulls() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("f1", "0", null, true, false, null, StringComparators.NUMERIC)) + .filters(range("f1", ColumnType.LONG, 0L, null, true, false)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3794,7 +3796,7 @@ public void testEmptyStringEquality() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("dim2", "", null)) + .filters(equality("dim2", "", ColumnType.STRING)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -3897,18 +3899,19 @@ public void testCoalesceColumnsFilter() ) ) .setDimFilter( - new OrDimFilter( - new AndDimFilter( - selector("dim1", "a", null), - selector("dim2", null, null) + or( + and( + equality("dim1", "a", ColumnType.STRING), + isNull("dim2") ), - new AndDimFilter( - selector("dim1", "abc", null), - selector("dim2", null, null) + and( + equality("dim1", "abc", ColumnType.STRING), + isNull("dim2") ), - new InDimFilter( + in( "dim2", - ImmutableSet.of("a", "abc") + ImmutableSet.of("a", "abc"), + null ) ) ) @@ -3989,7 +3992,7 @@ public void testColumnIsNull() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("dim2", null, null)) + .filters(isNull("dim2")) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -4140,7 +4143,7 @@ public void testGroupByWithFilterMatchingNothing() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim1", "foobar", null)) + .filters(equality("dim1", "foobar", ColumnType.STRING)) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0"), @@ -4186,7 +4189,7 @@ public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim1", "foobar", null)) + .filters(equality("dim1", "foobar", ColumnType.STRING)) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0"), @@ -4215,7 +4218,7 @@ public void testCountNonNullColumn() ? new CountAggregatorFactory("a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("cnt", null, null)) + notNull("cnt") ) ) ) @@ -4241,7 +4244,7 @@ public void testCountNullableColumn() .aggregators(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("dim2", null, null)) + notNull("dim2") ) )) .context(QUERY_CONTEXT_DEFAULT) @@ -4313,8 +4316,8 @@ public void testCountStarOnCommonTableExpression() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters(and( - selector("dim2", "a", null), - not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1))) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "z", new SubstringDimExtractionFn(0, 1), ColumnType.STRING)) )) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -4337,8 +4340,8 @@ public void testCountStarOnView() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters(and( - selector("dim2", "a", null), - not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1))) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "z", new SubstringDimExtractionFn(0, 1), ColumnType.STRING)) )) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -4361,8 +4364,8 @@ public void testConfusedView() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters(and( - selector("dim2", "a", null), - not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1))) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "z", new SubstringDimExtractionFn(0, 1), ColumnType.STRING)) )) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -4413,8 +4416,8 @@ public void testCountStarWithLongColumnFilters() .granularity(Granularities.ALL) .filters( or( - bound("cnt", "3", null, false, false, null, StringComparators.NUMERIC), - selector("cnt", "1", null) + range("cnt", ColumnType.LONG, 3L, null, false, false), + equality("cnt", 1L, ColumnType.LONG) ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -4439,7 +4442,9 @@ public void testCountStarWithLongColumnFiltersOnFloatLiterals() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - bound("cnt", "1.1", "100000001.0", true, true, null, StringComparators.NUMERIC) + NullHandling.replaceWithDefault() + ? bound("cnt", "1.1", "100000001.0", true, true, null, StringComparators.NUMERIC) + : range("cnt", ColumnType.DOUBLE, 1.1, 100000001.0, true, true) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -4458,7 +4463,7 @@ public void testCountStarWithLongColumnFiltersOnFloatLiterals() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - selector("cnt", "1.0", null) + equality("cnt", 1.0, ColumnType.DOUBLE) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -4477,7 +4482,9 @@ public void testCountStarWithLongColumnFiltersOnFloatLiterals() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - selector("cnt", "100000001.0", null) + NullHandling.replaceWithDefault() + ? selector("cnt", "100000001.0") + : equality("cnt", 100000001.0, ColumnType.DOUBLE) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -4496,7 +4503,9 @@ public void testCountStarWithLongColumnFiltersOnFloatLiterals() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - in("cnt", ImmutableList.of("1.0", "100000001.0"), null) + NullHandling.replaceWithDefault() + ? in("cnt", ImmutableList.of("1.0", "100000001.0"), null) + : or(equality("cnt", 1.0, ColumnType.DOUBLE), equality("cnt", 1.00000001E8, ColumnType.DOUBLE)) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -4518,7 +4527,14 @@ public void testCountStarWithLongColumnFiltersOnTwoPoints() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(in("cnt", ImmutableList.of("1", "2"), null)) + .filters( + NullHandling.replaceWithDefault() + ? in("cnt", ImmutableList.of("1", "2"), null) + : or( + equality("cnt", 1L, ColumnType.LONG), + equality("cnt", 2L, ColumnType.LONG) + ) + ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -4550,13 +4566,46 @@ public void testFilterOnStringAsNumber() ) ) .setDimFilter( - or( - bound("dim1", "10", "10", false, false, null, StringComparators.NUMERIC), + NullHandling.replaceWithDefault() + ? or( + numericSelector("dim1", "10", null), and( - selector("v0", "10.00", null), + selector("v0", "10.00"), bound("dim1", "9", "10.5", true, false, null, StringComparators.NUMERIC) ) ) + : or( + equality("dim1", 10L, ColumnType.LONG), + and( + equality("v0", 10.0, ColumnType.DOUBLE), + range("dim1", ColumnType.DOUBLE, 9.0, 10.5, true, false) + ) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"10.1"} + ) + ); + } + + @Test + public void testFilterOnStringAsNumber2() + { + testQuery( + "SELECT distinct dim1 FROM druid.foo WHERE CAST(dim1 AS float) > 9 and CAST(dim1 AS float) <= 10.5", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) + .setDimFilter( + NullHandling.replaceWithDefault() + ? bound("dim1", "9", "10.5", true, false, null, StringComparators.NUMERIC) + : range("dim1", ColumnType.DOUBLE, 9.0, 10.5, true, false) ) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -4654,7 +4703,7 @@ public void testSimpleAggregations() new CountAggregatorFactory("a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), - not(selector("dim1", null, null)) + notNull("dim1") ), new LongSumAggregatorFactory("a2:sum", "cnt"), new CountAggregatorFactory("a2:count"), @@ -4663,7 +4712,7 @@ public void testSimpleAggregations() new LongMaxAggregatorFactory("a5", "cnt"), new FilteredAggregatorFactory( new CountAggregatorFactory("a6"), - not(selector("dim2", null, null)) + notNull("dim2") ), new DoubleSumAggregatorFactory("a7:sum", "d1"), new CountAggregatorFactory("a7:count") @@ -4672,32 +4721,32 @@ public void testSimpleAggregations() new CountAggregatorFactory("a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), - not(selector("cnt", null, null)) + notNull("cnt") ), new FilteredAggregatorFactory( new CountAggregatorFactory("a2"), - not(selector("dim1", null, null)) + notNull("dim1") ), new LongSumAggregatorFactory("a3:sum", "cnt"), new FilteredAggregatorFactory( new CountAggregatorFactory("a3:count"), - not(selector("cnt", null, null)) + notNull("cnt") ), new LongSumAggregatorFactory("a4", "cnt"), new LongMinAggregatorFactory("a5", "cnt"), new LongMaxAggregatorFactory("a6", "cnt"), new FilteredAggregatorFactory( new CountAggregatorFactory("a7"), - not(selector("dim2", null, null)) + notNull("dim2") ), new FilteredAggregatorFactory( new CountAggregatorFactory("a8"), - not(selector("d1", null, null)) + notNull("d1") ), new DoubleSumAggregatorFactory("a9:sum", "d1"), new FilteredAggregatorFactory( new CountAggregatorFactory("a9:count"), - not(selector("d1", null, null)) + notNull("d1") ) ) ) @@ -4885,49 +4934,49 @@ public void testFilteredAggregations() .aggregators(aggregators( new FilteredAggregatorFactory( new LongSumAggregatorFactory("a0", "cnt"), - selector("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a1", "cnt"), - not(selector("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a2", "cnt"), - selector("dim1", "a", new SubstringDimExtractionFn(0, 1)) + equality("dim1", "a", new SubstringDimExtractionFn(0, 1), ColumnType.STRING) ), new FilteredAggregatorFactory( new CountAggregatorFactory("a3"), and( - not(selector("dim2", null, null)), - not(selector("dim1", "1", null)) + notNull("dim2"), + not(equality("dim1", "1", ColumnType.STRING)) ) ), new FilteredAggregatorFactory( new CountAggregatorFactory("a4"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new CountAggregatorFactory("a5"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a6", "cnt"), - selector("dim2", "a", null) + equality("dim2", "a", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a7", "cnt"), and( - selector("dim2", "a", null), - not(selector("dim1", "1", null)) + equality("dim2", "a", ColumnType.STRING), + not(equality("dim1", "1", ColumnType.STRING)) ) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a8", "cnt"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new LongMaxAggregatorFactory("a9", "cnt"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new CardinalityAggregatorFactory( @@ -4937,11 +4986,14 @@ public void testFilteredAggregations() false, true ), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a11", "cnt"), - and(selector("dim2", "a", null), selector("dim1", "b", null)) + and( + equality("dim2", "a", ColumnType.STRING), + equality("dim1", "b", ColumnType.STRING) + ) ) )) .context(QUERY_CONTEXT_DEFAULT) @@ -4975,7 +5027,7 @@ public void testCaseFilteredAggregationWithGroupBy() .setAggregatorSpecs(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new LongSumAggregatorFactory("a1", "cnt") )) @@ -5006,13 +5058,13 @@ public void testFilteredAggregationWithNotIn() aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("dim1", "1", null)) + not(equality("dim1", "1", ColumnType.STRING)) ), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), and( - not(selector("dim2", null, null)), - not(selector("dim1", "1", null)) + notNull("dim2"), + not(equality("dim1", "1", ColumnType.STRING)) ) ) ) @@ -5097,7 +5149,7 @@ public void testExpressionFilteringAndGrouping() .setVirtualColumns( expressionVirtualColumn("v0", "(floor((\"m1\" / 2)) * 2)", ColumnType.FLOAT) ) - .setDimFilter(bound("v0", "-1", null, true, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("v0", ColumnType.LONG, -1L, null, true, false)) .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.FLOAT))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) .setLimitSpec( @@ -5144,7 +5196,7 @@ public void testExpressionFilteringAndGroupingUsingCastToLong() expressionVirtualColumn("v0", "((CAST(\"m1\", 'LONG') / 2) * 2)", ColumnType.LONG) ) .setDimFilter( - bound("v0", "-1", null, true, false, null, StringComparators.NUMERIC) + range("v0", ColumnType.LONG, -1L, null, true, false) ) .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -5196,7 +5248,7 @@ public void testExpressionFilteringAndGroupingOnStringCastToNumber() ) ) .setDimFilter( - bound("v0", "-1", null, true, false, null, StringComparators.NUMERIC) + range("v0", ColumnType.LONG, -1L, null, true, false) ) .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.FLOAT))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -5268,7 +5320,14 @@ public void testSqlIsNullToInFilter() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) - .setDimFilter(new InDimFilter("dim1", Arrays.asList("abc", "def", "ghi", null), null)) + .setDimFilter( + NullHandling.replaceWithDefault() + ? in("dim1", Arrays.asList("abc", "def", "ghi", ""), null) + : or( + isNull("dim1"), + in("dim1", Arrays.asList("abc", "def", "ghi"), null) + ) + ) .setAggregatorSpecs( aggregators( new CountAggregatorFactory("a0") @@ -5337,7 +5396,7 @@ public void testCountStarWithDegenerateFilter() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - selector("dim2", "a", null) + equality("dim2", "a", ColumnType.STRING) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -5417,7 +5476,7 @@ public void testCountStarWithBoundFilterSimplifyOnMetric() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("m1", "2.5", "3.5", true, true, null, StringComparators.NUMERIC)) + .filters(range("m1", ColumnType.DOUBLE, 2.5, 3.5, true, true)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -5438,7 +5497,7 @@ public void testCountStarWithBoundFilterSimplifyOr() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("dim1", "a", "b", false, true, null, StringComparators.LEXICOGRAPHIC)) + .filters(range("dim1", ColumnType.STRING, "a", "b", false, true)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -5543,7 +5602,7 @@ public void testCountStarWithBoundFilterSimplifyAnd() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(selector("dim1", "abc", null)) + .filters(equality("dim1", "abc", ColumnType.STRING)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -5564,7 +5623,11 @@ public void testCountStarWithFilterOnCastedString() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(numericSelector("dim1", "2", null)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim1", "2", null) + : equality("dim1", 2L, ColumnType.LONG) + ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -5891,12 +5954,12 @@ public void testCountStarWithComplexDisjointTimeFilter() .granularity(Granularities.ALL) .filters( and( - selector("dim2", "a", null), + equality("dim2", "a", ColumnType.STRING), or( - timeBound("2000/2001"), + timeRange("2000/2001"), and( - selector("dim1", "abc", null), - timeBound("2002-05-01/2003-05-01") + equality("dim1", "abc", ColumnType.STRING), + timeRange("2002-05-01/2003-05-01") ) ) ) @@ -5931,12 +5994,12 @@ public void testCountStarWithNotOfComplexDisjointTimeFilter() .intervals(querySegmentSpec(Filtration.eternity())) .filters( or( - not(selector("dim2", "a", null)), + not(equality("dim2", "a", ColumnType.STRING)), and( - not(timeBound("2000/2001")), + not(timeRange("2000/2001")), not(and( - selector("dim1", "abc", null), - timeBound("2002-05-01/2003-05-01") + equality("dim1", "abc", ColumnType.STRING), + timeRange("2002-05-01/2003-05-01") )) ) ) @@ -5970,7 +6033,7 @@ public void testCountStarWithNotTimeFilter() new Interval(DateTimes.of("2004"), DateTimes.MAX) ) ) - .filters(not(selector("dim1", "xxx", null))) + .filters(not(equality("dim1", "xxx", ColumnType.STRING))) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -5993,7 +6056,7 @@ public void testCountStarWithTimeAndDimFilter() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Intervals.of("2000-01-01/2001-01-01"))) - .filters(not(selector("dim2", "a", null))) + .filters(not(equality("dim2", "a", ColumnType.STRING))) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -6018,15 +6081,14 @@ public void testCountStarWithTimeOrDimFilter() .intervals(querySegmentSpec(Filtration.eternity())) .filters( or( - not(selector("dim2", "a", null)), - bound( + not(equality("dim2", "a", ColumnType.STRING)), + range( "__time", - String.valueOf(timestamp("2000-01-01")), - String.valueOf(timestamp("2000-12-31T23:59:59.999")), + ColumnType.LONG, + timestamp("2000-01-01"), + timestamp("2000-12-31T23:59:59.999"), false, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -6054,14 +6116,13 @@ public void testCountStarWithTimeFilterOnLongColumnUsingExtractEpoch() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - bound( + range( "cnt", - String.valueOf(DateTimes.of("1970-01-01").getMillis()), - String.valueOf(DateTimes.of("1970-01-02").getMillis()), + ColumnType.LONG, + DateTimes.of("1970-01-01").getMillis(), + DateTimes.of("1970-01-02").getMillis(), false, - true, - null, - StringComparators.NUMERIC + true ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -6087,14 +6148,13 @@ public void testCountStarWithTimeFilterOnLongColumnUsingExtractEpochFromDate() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - bound( + range( "cnt", - String.valueOf(DateTimes.of("1970-01-01").getMillis()), - String.valueOf(DateTimes.of("1970-01-02").getMillis()), + ColumnType.LONG, + DateTimes.of("1970-01-01").getMillis(), + DateTimes.of("1970-01-02").getMillis(), false, - true, - null, - StringComparators.NUMERIC + true ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -6120,14 +6180,13 @@ public void testCountStarWithTimeFilterOnLongColumnUsingTimestampToMillis() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - bound( + range( "cnt", - String.valueOf(DateTimes.of("1970-01-01").getMillis()), - String.valueOf(DateTimes.of("1970-01-02").getMillis()), + ColumnType.LONG, + DateTimes.of("1970-01-01").getMillis(), + DateTimes.of("1970-01-02").getMillis(), false, - true, - null, - StringComparators.NUMERIC + true ) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) @@ -6226,14 +6285,13 @@ public void testTimeseriesWithTimeFilterOnLongColumnUsingMillisToTimestamp() expressionVirtualColumn("v0", "timestamp_floor(\"cnt\",'P1Y',null,'UTC')", ColumnType.LONG) ) .setDimFilter( - bound( + range( "cnt", - String.valueOf(DateTimes.of("1970-01-01").getMillis()), - String.valueOf(DateTimes.of("1970-01-02").getMillis()), + ColumnType.LONG, + DateTimes.of("1970-01-01").getMillis(), + DateTimes.of("1970-01-02").getMillis(), false, - true, - null, - StringComparators.NUMERIC + true ) ) .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG))) @@ -6304,7 +6362,7 @@ public void testCountDistinctOfCaseWhen() false, true ), - bound("m1", "4", null, false, false, null, StringComparators.NUMERIC) + range("m1", ColumnType.LONG, 4L, null, false, false) ), new FilteredAggregatorFactory( new CardinalityAggregatorFactory( @@ -6314,11 +6372,11 @@ public void testCountDistinctOfCaseWhen() false, true ), - bound("m1", "4", null, false, false, null, StringComparators.NUMERIC) + range("m1", ColumnType.LONG, 4L, null, false, false) ), new FilteredAggregatorFactory( new HyperUniquesAggregatorFactory("a2", "unique_dim1", false, true), - bound("m1", "4", null, false, false, null, StringComparators.NUMERIC) + range("m1", ColumnType.LONG, 4L, null, false, false) ) ) ) @@ -6358,7 +6416,7 @@ public void testExactCountDistinct() .setAggregatorSpecs(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("d0", null, null)) + notNull("d0") ) )) .setContext(QUERY_CONTEXT_DEFAULT) @@ -6476,7 +6534,7 @@ public void testExactCountDistinctWithGroupingAndOtherAggregators() new LongSumAggregatorFactory("_a0", "a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("_a1"), - not(selector("d0", null, null)) + notNull("d0") ) )) .setContext(QUERY_CONTEXT_DEFAULT) @@ -6556,11 +6614,17 @@ public void testMultipleExactCountDistinctWithGroupingAndOtherAggregators() .setAggregatorSpecs(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("_a0"), - and(not(selector("d1", null, null)), selector("a0", "1", null)) + and( + notNull("d1"), + equality("a0", 1L, ColumnType.LONG) + ) ), new FilteredAggregatorFactory( new CountAggregatorFactory("_a1"), - and(not(selector("d2", null, null)), selector("a0", "2", null)) + and( + notNull("d2"), + equality("a0", 2L, ColumnType.LONG) + ) ) )) .setContext(QUERY_CONTEXT_DEFAULT) @@ -6616,7 +6680,7 @@ public void testApproxCountDistinct() false, true ), - not(selector("dim2", "", null)) + not(equality("dim2", "", ColumnType.STRING)) ), new CardinalityAggregatorFactory( "a3", @@ -6718,7 +6782,7 @@ public void testNestedGroupBy() new DefaultDimensionSpec("dim1", "d1") ) ) - .setDimFilter(new SelectorDimFilter("m1", "5.0", null)) + .setDimFilter(equality("m1", 5.0, ColumnType.FLOAT)) .setAggregatorSpecs(aggregators(new LongMaxAggregatorFactory("a0", "__time"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -6742,7 +6806,7 @@ public void testNestedGroupBy() ? new CountAggregatorFactory("_a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("_a0"), - not(selector("d0", null, null)) + notNull("d0") ) ) ) @@ -7007,7 +7071,7 @@ public void testAvgDailyCountDistinct() new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("_a0:count"), - not(selector("a0", null, null)) + notNull("a0") ) ) ) @@ -7053,6 +7117,10 @@ public void testHighestMaxNumericInFilter() @Test public void testQueryWithMoreThanMaxNumericInFilter() { + if (NullHandling.sqlCompatible()) { + // skip in sql compatible mode, this plans to an OR filter with equality filter children... + return; + } notMsqCompatible(); expectedException.expect(UOE.class); expectedException.expectMessage( @@ -7088,7 +7156,11 @@ public void testExactCountDistinctUsingSubqueryWithWherePushDown() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(not(selector("dim2", "", null))) + .setDimFilter( + not( + equality("dim2", "", ColumnType.STRING) + ) + ) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) @@ -7127,7 +7199,7 @@ public void testExactCountDistinctUsingSubqueryWithWherePushDown() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(not(selector("dim2", null, null))) + .setDimFilter(notNull("dim2")) .setGranularity(Granularities.ALL) .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) @@ -7171,7 +7243,9 @@ public void testCompareExactAndApproximateCountDistinctUsingSubquery() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", "", null))) + .setDimFilter( + not(equality("dim1", "", ColumnType.STRING)) + ) .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() @@ -7363,7 +7437,7 @@ public void testCountDistinctOfSubstring() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(not(selector("dim1", "", null))) + .filters(not(equality("dim1", "", ColumnType.STRING))) .granularity(Granularities.ALL) .aggregators( aggregators( @@ -7406,7 +7480,7 @@ public void testCountDistinctOfTrim() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .virtualColumns(expressionVirtualColumn("v0", "trim(\"dim1\",' ')", ColumnType.STRING)) - .filters(not(selector("v0", NullHandling.emptyToNullIfNeeded(""), null))) + .filters(not(equality("v0", "", ColumnType.STRING))) .aggregators( aggregators( new CardinalityAggregatorFactory( @@ -7478,10 +7552,9 @@ public void testRegexpExtract() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setDimFilter( - not(selector( + not(equality( "dim1", - "x", - new RegexDimExtractionFn("^(.)", 1, true, null) + "x", new RegexDimExtractionFn("^(.)", 1, true, null), ColumnType.STRING )) ) .setDimensions( @@ -7531,8 +7604,8 @@ public void testRegexpExtractFilterViaNotNullCheck() ) .filters( or( - not(selector("dim1", null, new RegexDimExtractionFn("^1", 0, true, null))), - not(selector("v0", null, null)) + not(isNull("dim1", new RegexDimExtractionFn("^1", 0, true, null))), + notNull("v0") ) ) .aggregators(new CountAggregatorFactory("a0")) @@ -7651,7 +7724,7 @@ public void testGroupByLimitPushDownWithHavingOnLong() 4 ) ) - .setHavingSpec(having(selector("a0", "1", null))) + .setHavingSpec(having(equality("a0", 1L, ColumnType.LONG))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -7690,7 +7763,7 @@ public void testGroupByLimitPushdownExtraction() ) ) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) - .setDimFilter(selector("dim4", "a", null)) + .setDimFilter(equality("dim4", "a", ColumnType.STRING)) .setAggregatorSpecs( aggregators( new CountAggregatorFactory("a0") @@ -7951,8 +8024,8 @@ public void testFilterOnTimeExtract() .aggregators(aggregators(new CountAggregatorFactory("a0"))) .filters( and( - selector("v0", "2000", null), - selector("v1", "1", null) + equality("v0", 2000L, ColumnType.LONG), + equality("v1", 1L, ColumnType.LONG) ) ) .context(QUERY_CONTEXT_DEFAULT) @@ -7994,8 +8067,14 @@ public void testFilterOnTimeExtractWithMultipleDays() .aggregators(aggregators(new CountAggregatorFactory("a0"))) .filters( and( - selector("v0", "2000", null), - in("v1", ImmutableList.of("2", "3", "5"), null) + equality("v0", 2000L, ColumnType.LONG), + NullHandling.replaceWithDefault() + ? in("v1", ImmutableList.of("2", "3", "5"), null) + : or( + equality("v1", 2L, ColumnType.LONG), + equality("v1", 3L, ColumnType.LONG), + equality("v1", 5L, ColumnType.LONG) + ) ) ) .context(QUERY_CONTEXT_DEFAULT) @@ -8051,14 +8130,14 @@ public void testFilterOnTimeExtractWithVariousTimeUnits() .aggregators(aggregators(new CountAggregatorFactory("a0"))) .filters( and( - selector("v0", "2000", null), - selector("v1", "946723", null), - selector("v2", "695", null), - selector("v3", "6", null), - selector("v4", "2000", null), - selector("v5", "200", null), - selector("v6", "20", null), - selector("v7", "2", null) + equality("v0", 2000L, ColumnType.LONG), + equality("v1", 946723L, ColumnType.LONG), + equality("v2", 695L, ColumnType.LONG), + equality("v3", 6L, ColumnType.LONG), + equality("v4", 2000L, ColumnType.LONG), + equality("v5", 200L, ColumnType.LONG), + equality("v6", 20L, ColumnType.LONG), + equality("v7", 2L, ColumnType.LONG) ) ) .context(QUERY_CONTEXT_DEFAULT) @@ -8194,11 +8273,14 @@ public void testQueryWithSelectProjectAndIdentityProjectDoesNotRename() aggregators( new FilteredAggregatorFactory( new LongMinAggregatorFactory("_a0", "a0"), - selector("a1", "1", null) + equality("a1", 1L, ColumnType.LONG) ), new FilteredAggregatorFactory( new CountAggregatorFactory("_a1"), - and(not(selector("d0", null, null)), selector("a1", "0", null)) + and( + notNull("d0"), + equality("a1", 0L, ColumnType.LONG) + ) ) ) ) @@ -8451,10 +8533,9 @@ public void testFilterAndGroupByLookup() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setDimFilter( - not(selector( + not(equality( "dim1", - "xxx", - extractionFn + "xxx", extractionFn, ColumnType.STRING )) ) .setDimensions( @@ -8604,38 +8685,35 @@ public void testFilteredTimeAggregators() .aggregators(aggregators( new FilteredAggregatorFactory( new LongSumAggregatorFactory("a0", "cnt"), - bound( + range( "__time", + ColumnType.LONG, null, - String.valueOf(timestamp("2000-02-01")), + timestamp("2000-02-01"), false, - true, - null, - StringComparators.NUMERIC + true ) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a1", "cnt"), - bound( + range( "__time", - String.valueOf(timestamp("2000-01-01T00:00:01")), - String.valueOf(timestamp("2000-02-01")), + ColumnType.LONG, + timestamp("2000-01-01T00:00:01"), + timestamp("2000-02-01"), false, - true, - null, - StringComparators.NUMERIC + true ) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a2", "cnt"), - bound( + range( "__time", - String.valueOf(timestamp("2001-01-01")), - String.valueOf(timestamp("2001-02-01")), + ColumnType.LONG, + timestamp("2001-01-01"), + timestamp("2001-02-01"), false, - true, - null, - StringComparators.NUMERIC + true ) ) )) @@ -9072,7 +9150,11 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValues() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators( aggregators( @@ -9103,14 +9185,14 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValues() ? new CountAggregatorFactory("a9:count") : new FilteredAggregatorFactory( new CountAggregatorFactory("a9:count"), - not(selector("l1", null, null)) + notNull("l1") ), new DoubleSumAggregatorFactory("a10:sum", "d1"), useDefault ? new CountAggregatorFactory("a10:count") : new FilteredAggregatorFactory( new CountAggregatorFactory("a10:count"), - not(selector("d1", null, null)) + notNull("d1") ) ) ) @@ -9179,7 +9261,11 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators( aggregators( @@ -9222,7 +9308,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim3", null, null)) + notNull("dim3") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9241,7 +9327,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9260,7 +9346,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9279,7 +9365,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ) ) ) @@ -9316,7 +9402,7 @@ public void testGroupByAggregatorDefaultValues() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE3) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) @@ -9324,7 +9410,7 @@ public void testGroupByAggregatorDefaultValues() aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new CardinalityAggregatorFactory( @@ -9334,7 +9420,7 @@ public void testGroupByAggregatorDefaultValues() false, true ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new CardinalityAggregatorFactory( @@ -9344,57 +9430,63 @@ public void testGroupByAggregatorDefaultValues() false, true ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a3", "d1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoubleMaxAggregatorFactory("a4", "d1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoubleMinAggregatorFactory("a5", "d1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a6", "l1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongMaxAggregatorFactory("a7", "l1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongMinAggregatorFactory("a8", "l1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a9:sum", "l1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), useDefault ? new FilteredAggregatorFactory( new CountAggregatorFactory("a9:count"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) : new FilteredAggregatorFactory( new CountAggregatorFactory("a9:count"), - and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null)) + and( + notNull("l1"), + equality("dim1", "nonexistent", ColumnType.STRING) + ) ), new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("a10:sum", "d1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), useDefault ? new FilteredAggregatorFactory( new CountAggregatorFactory("a10:count"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ) : new FilteredAggregatorFactory( new CountAggregatorFactory("a10:count"), - and(not(selector("d1", null, null)), selector("dim1", "nonexistent", null)) + and( + notNull("d1"), + equality("dim1", "nonexistent", ColumnType.STRING) + ) ) ) ) @@ -9466,7 +9558,7 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE3) .setInterval(querySegmentSpec(Filtration.eternity())) - .setDimFilter(selector("dim2", "a", null)) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) @@ -9474,27 +9566,27 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() aggregators( new FilteredAggregatorFactory( new StringAnyAggregatorFactory("a0", "dim1", 1024), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongAnyAggregatorFactory("a1", "l1"), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new StringFirstAggregatorFactory("a2", "dim1", null, 1024), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongFirstAggregatorFactory("a3", "l1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new StringLastAggregatorFactory("a4", "dim1", null, 1024), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new LongLastAggregatorFactory("a5", "l1", null), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9513,7 +9605,7 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - selector("dim1", "nonexistent", null) + equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9533,8 +9625,8 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() TestExprMacroTable.INSTANCE ), and( - not(selector("dim3", null, null)), - selector("dim1", "nonexistent", null) + notNull("dim3"), + equality("dim1", "nonexistent", ColumnType.STRING) ) ), new FilteredAggregatorFactory( @@ -9554,7 +9646,10 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null)) + and( + notNull("l1"), + equality("dim1", "nonexistent", ColumnType.STRING) + ) ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9573,7 +9668,10 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null)) + and( + notNull("l1"), + equality("dim1", "nonexistent", ColumnType.STRING) + ) ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -9592,7 +9690,10 @@ public void testGroupByAggregatorDefaultValuesNonVectorized() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null)) + and( + notNull("l1"), + equality("dim1", "nonexistent", ColumnType.STRING) + ) ) ) ) @@ -9969,14 +10070,13 @@ public void testGroupByTimeFloorAndDimOnGroupByTimeFloorAndDim() ) .setHavingSpec( having( - bound( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -10811,7 +10911,7 @@ public void testUnicodeFilterAndGroupBy() .setGranularity(Granularities.ALL) .setDimFilter(or( new LikeDimFilter("dim1", "דר%", null, null), - new SelectorDimFilter("dim1", "друид", null) + equality("dim1", "друид", ColumnType.STRING) )) .setDimensions(dimensions( new DefaultDimensionSpec("dim1", "d0"), @@ -11078,7 +11178,7 @@ public void testSortProjectAfterNestedGroupBy() ? new CountAggregatorFactory("a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("d1", null, null)) + notNull("d1") ) ) ) @@ -11117,7 +11217,7 @@ public void testPostAggWithTimeseries() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .granularity(Granularities.YEAR) .aggregators( aggregators( @@ -11159,7 +11259,7 @@ public void testPostAggWithTopN() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .dimension(new DefaultDimensionSpec("m1", "d0", ColumnType.FLOAT)) - .filters("dim2", "a") + .filters(equality("dim2", "a", ColumnType.STRING)) .aggregators( useDefault ? aggregators( @@ -11172,7 +11272,7 @@ public void testPostAggWithTopN() new DoubleSumAggregatorFactory("a0:sum", "m2"), new FilteredAggregatorFactory( new CountAggregatorFactory("a0:count"), - not(selector("m2", null, null)) + notNull("m2") ), new DoubleSumAggregatorFactory("a1", "m1"), new DoubleSumAggregatorFactory("a2", "m2") @@ -11478,9 +11578,7 @@ public void testRequireTimeConditionPositive() ) ) ) - .setDimFilter( - not(selector("dim1", NullHandling.sqlCompatible() ? "" : null, null)) - ) + .setDimFilter(not(equality("dim1", "", ColumnType.STRING))) .setGranularity(Granularities.ALL) .setDimensions( new ExtractionDimensionSpec( @@ -11613,7 +11711,7 @@ public void testFilterFloatDimension() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") - .filters(selector("f1", "0.1", null)) + .filters(equality("f1", 0.1, ColumnType.DOUBLE)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) @@ -11635,7 +11733,7 @@ public void testFilterDoubleDimension() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") - .filters(selector("d1", "1.7", null)) + .filters(equality("d1", 1.7, ColumnType.DOUBLE)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) @@ -11657,7 +11755,7 @@ public void testFilterLongDimension() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .columns("dim1") - .filters(selector("l1", "7", null)) + .filters(equality("l1", 7L, ColumnType.LONG)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) @@ -11683,7 +11781,11 @@ public void testTrigonometricFunction() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0") @@ -11732,7 +11834,7 @@ public void testRadiansAndDegrees() expressionVirtualColumn("v0", "(toRadians((\"m1\" * 15)) / toDegrees(\"m2\"))", ColumnType.DOUBLE) ) .columns("v0") - .filters(selector("dim1", "1", null)) + .filters(equality("dim1", "1", ColumnType.STRING)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -12447,7 +12549,7 @@ public void testLookupWithNull() ) .columns("v0") .legacy(false) - .filters(new SelectorDimFilter("dim2", NULL_STRING, null)) + .filters(isNull("dim2")) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -12503,7 +12605,7 @@ public void testCountAndAverageByConstantVirtualColumn() aggs = ImmutableList.of( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("v0", null, null)) + notNull("v0") ), new LongSumAggregatorFactory("a1:sum", "v1", null, TestExprMacroTable.INSTANCE), new CountAggregatorFactory("a1:count") @@ -12516,12 +12618,12 @@ public void testCountAndAverageByConstantVirtualColumn() aggs = ImmutableList.of( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("v0", null, null)) + notNull("v0") ), new LongSumAggregatorFactory("a1:sum", "v1"), new FilteredAggregatorFactory( new CountAggregatorFactory("a1:count"), - not(selector("v1", null, null)) + notNull("v1") ) ); virtualColumns = ImmutableList.of( @@ -12538,8 +12640,8 @@ public void testCountAndAverageByConstantVirtualColumn() .setInterval(querySegmentSpec(Filtration.eternity())) .setDimFilter( and( - selector("dim1", "10.1", null), - selector("l1", "325323", null) + equality("dim1", "10.1", ColumnType.STRING), + equality("l1", 325323L, ColumnType.LONG) ) ) .setGranularity(Granularities.ALL) @@ -12591,15 +12693,15 @@ public void testExpressionCounts() aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("v0", null, null)) + notNull("v0") ), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), - not(selector("v1", null, null)) + notNull("v1") ), new FilteredAggregatorFactory( new CountAggregatorFactory("a2"), - not(selector("v2", null, null)) + notNull("v2") ) ) ) @@ -12650,7 +12752,7 @@ public void testBitwiseAggregatorsTimeseries() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12669,7 +12771,7 @@ public void testBitwiseAggregatorsTimeseries() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12688,7 +12790,7 @@ public void testBitwiseAggregatorsTimeseries() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ) ) ) @@ -12739,7 +12841,7 @@ public void testBitwiseAggregatorsGroupBy() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12758,7 +12860,7 @@ public void testBitwiseAggregatorsGroupBy() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12777,7 +12879,7 @@ public void testBitwiseAggregatorsGroupBy() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ) ) ) @@ -12821,7 +12923,7 @@ public void testStringAgg() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(not(selector("dim1", null, null))) + .filters(notNull("dim1")) .aggregators( aggregators( new FilteredAggregatorFactory( @@ -12841,7 +12943,7 @@ public void testStringAgg() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim1", null, null)) + notNull("dim1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12860,7 +12962,7 @@ public void testStringAgg() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim1", null, null)) + notNull("dim1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12880,8 +12982,8 @@ public void testStringAgg() TestExprMacroTable.INSTANCE ), and( - not(selector("dim1", null, null)), - selector("dim1", "shazbot", null) + notNull("dim1"), + equality("dim1", "shazbot", ColumnType.STRING) ) ) ) @@ -12927,7 +13029,7 @@ public void testStringAggMultiValue() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim3", null, null)) + notNull("dim3") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -12946,7 +13048,7 @@ public void testStringAggMultiValue() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("dim3", null, null)) + notNull("dim3") ) ) ) @@ -12991,7 +13093,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13010,7 +13112,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13029,7 +13131,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("d1", null, null)) + notNull("d1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13048,7 +13150,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("d1", null, null)) + notNull("d1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13067,7 +13169,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("f1", null, null)) + notNull("f1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13086,7 +13188,7 @@ public void testStringAggNumeric() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("f1", null, null)) + notNull("f1") ) ) ) @@ -13148,7 +13250,7 @@ public void testStringAggExpression() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("v0", null, null)) + notNull("v0") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13167,7 +13269,7 @@ public void testStringAggExpression() ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE ), - not(selector("v0", null, null)) + notNull("v0") ) ) ) @@ -13222,7 +13324,7 @@ public void testStringAggMaxBytes() new HumanReadableBytes(128), TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ), new FilteredAggregatorFactory( new ExpressionLambdaAggregatorFactory( @@ -13241,7 +13343,7 @@ public void testStringAggMaxBytes() new HumanReadableBytes(128), TestExprMacroTable.INSTANCE ), - not(selector("l1", null, null)) + notNull("l1") ) ) ) @@ -13306,7 +13408,7 @@ public void testHumanReadableFormatFunction() expressionVirtualColumn("v7", "human_readable_decimal_format(\"l1\")", ColumnType.STRING) ) .columns("m1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7") - .filters(selector("dim1", "1", null)) + .filters(equality("dim1", "1", ColumnType.STRING)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(1) .context(QUERY_CONTEXT_DEFAULT) @@ -13389,7 +13491,7 @@ public void testCommonVirtualExpressionWithDifferentValueType() ImmutableList.of(new TopNQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim1", "none", null)) + .filters(equality("dim1", "none", ColumnType.STRING)) .granularity(Granularities.ALL) .virtualColumns( expressionVirtualColumn("v0", "'none'", ColumnType.STRING), @@ -13426,8 +13528,10 @@ public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithSingleConsta .intervals(querySegmentSpec(Filtration.eternity())) .filters( and( - selector("m1", "50", null), - selector("dim1", "wat", null) + NullHandling.replaceWithDefault() + ? selector("m1", "50") + : equality("m1", 50.0, ColumnType.FLOAT), + equality("dim1", "wat", ColumnType.STRING) ) ) .granularity(Granularities.ALL) @@ -13453,7 +13557,7 @@ public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithSingleConsta .setInterval(querySegmentSpec(Intervals.ETERNITY)) .setGranularity(Granularities.ALL) .addDimension(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING)) - .setDimFilter(selector("dim1", "wat", null)) + .setDimFilter(equality("dim1", "wat", ColumnType.STRING)) .setPostAggregatorSpecs( ImmutableList.of( new ExpressionPostAggregator("p0", "'A'", null, ExprMacroTable.nil()) @@ -13478,8 +13582,10 @@ public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithMultipleCons .intervals(querySegmentSpec(Filtration.eternity())) .filters( and( - selector("m1", "50", null), - selector("dim1", "wat", null) + NullHandling.replaceWithDefault() + ? selector("m1", "50") + : equality("m1", 50.0, ColumnType.FLOAT), + equality("dim1", "wat", ColumnType.STRING) ) ) .granularity(Granularities.ALL) @@ -13503,8 +13609,8 @@ public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithMultipleCons .intervals(querySegmentSpec(Filtration.eternity())) .filters( and( - selector("m1", "2.0", null), - selector("dim1", "10.1", null) + equality("m1", 2.0, ColumnType.DOUBLE), + equality("dim1", "10.1", ColumnType.STRING) ) ) .granularity(Granularities.ALL) @@ -13538,10 +13644,15 @@ public void testPlanWithInFilterLessThanInSubQueryThreshold() .context(QUERY_CONTEXT_DEFAULT) .legacy(false) .filters( - in( - "l1", - ImmutableList.of("4842", "4844", "4845", "14905", "4853", "29064"), - null + NullHandling.replaceWithDefault() + ? in("l1", ImmutableList.of("4842", "4844", "4845", "14905", "4853", "29064"), null) + : or( + equality("l1", 4842L, ColumnType.LONG), + equality("l1", 4844L, ColumnType.LONG), + equality("l1", 4845L, ColumnType.LONG), + equality("l1", 14905L, ColumnType.LONG), + equality("l1", 4853L, ColumnType.LONG), + equality("l1", 29064L, ColumnType.LONG) ) ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -13926,10 +14037,10 @@ public void testFilterWithNVLAndNotIn() .columns(ImmutableList.of("__time", "dim1")) .filters(and( or( - not(selector("dim1", "a", null)), - selector("dim1", null, null) + not(equality("dim1", "a", ColumnType.STRING)), + isNull("dim1") ), - not(selector("dim1", NullHandling.sqlCompatible() ? "" : null, null)) + not(equality("dim1", "", ColumnType.STRING)) )) .build() ), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java index 282ea7d82517..f4da1520685f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java @@ -328,7 +328,7 @@ public void testReplaceFromView() .dataSource("foo") .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING)) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("v0") .context(REPLACE_ALL_TIME_CHUNKS) .build() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java index cc60a27acdd1..cd947ac2ed61 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java @@ -592,8 +592,8 @@ public void testSelectStarWithDimFilter() .intervals(querySegmentSpec(Filtration.eternity())) .filters( or( - bound("dim1", "d", null, true, false, null, StringComparators.LEXICOGRAPHIC), - selector("dim2", "a", null) + range("dim1", ColumnType.STRING, "d", null, true, false), + equality("dim2", "a", ColumnType.STRING) ) ) .columns("__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2", "unique_dim1") @@ -622,15 +622,14 @@ public void testSelectDistinctWithCascadeExtractionFilter() .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) .setDimFilter( or( - selector( + equality( "dim1", - "e", - cascade( + "e", cascade( new SubstringDimExtractionFn(1, null), new SubstringDimExtractionFn(0, 1) - ) + ), ColumnType.STRING ), - selector("dim2", "a", null) + equality("dim2", "a", ColumnType.STRING) ) ) .setContext(QUERY_CONTEXT_DEFAULT) @@ -670,8 +669,8 @@ public void testSelectDistinctWithStrlenFilter() .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0"))) .setDimFilter( or( - selector("v0", "3", null), - selector("v1", "3", null) + equality("v0", 3L, ColumnType.LONG), + equality("v1", 3L, ColumnType.LONG) ) ) .setContext(QUERY_CONTEXT_DEFAULT) @@ -987,19 +986,23 @@ public void testSelectCountStar() "SELECT exp(count(*)) + 10, sum(m2) FROM druid.foo WHERE dim2 = 0", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder() - .dataSource(CalciteTests.DATASOURCE1) - .intervals(querySegmentSpec(Filtration.eternity())) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) - .granularity(Granularities.ALL) - .aggregators(aggregators( - new CountAggregatorFactory("a0"), - new DoubleSumAggregatorFactory("a1", "m2") - )) - .postAggregators( - expressionPostAgg("p0", "(exp(\"a0\") + 10)") - ) - .context(QUERY_CONTEXT_DEFAULT) - .build()), + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) + .granularity(Granularities.ALL) + .aggregators(aggregators( + new CountAggregatorFactory("a0"), + new DoubleSumAggregatorFactory("a1", "m2") + )) + .postAggregators( + expressionPostAgg("p0", "(exp(\"a0\") + 10)") + ) + .context(QUERY_CONTEXT_DEFAULT) + .build()), ImmutableList.of( new Object[]{11.0, NullHandling.defaultDoubleValue()} ) @@ -1038,15 +1041,15 @@ public void testSelectCountStar() "SELECT COUNT(*) FROM foo WHERE dim1 = 'nonexistent'", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder() - .dataSource(CalciteTests.DATASOURCE1) - .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim1", "nonexistent", null)) - .granularity(Granularities.ALL) - .aggregators(aggregators( - new CountAggregatorFactory("a0") - )) - .context(TIMESERIES_CONTEXT_BY_GRAN) - .build()), + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(equality("dim1", "nonexistent", ColumnType.STRING)) + .granularity(Granularities.ALL) + .aggregators(aggregators( + new CountAggregatorFactory("a0") + )) + .context(TIMESERIES_CONTEXT_BY_GRAN) + .build()), ImmutableList.of() ); @@ -1054,15 +1057,15 @@ public void testSelectCountStar() testQuery( "SELECT COUNT(*) FROM foo WHERE dim1 = 'nonexistent' GROUP BY FLOOR(__time TO DAY)", ImmutableList.of(Druids.newTimeseriesQueryBuilder() - .dataSource(CalciteTests.DATASOURCE1) - .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim1", "nonexistent", null)) - .granularity(Granularities.DAY) - .aggregators(aggregators( - new CountAggregatorFactory("a0") - )) - .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) - .build()), + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(equality("dim1", "nonexistent", ColumnType.STRING)) + .granularity(Granularities.DAY) + .aggregators(aggregators( + new CountAggregatorFactory("a0") + )) + .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) + .build()), ImmutableList.of() ); } @@ -1181,7 +1184,7 @@ public void testSelectStarOnForbiddenView() expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING), expressionVirtualColumn("v1", "'a'", ColumnType.STRING) ) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("__time", "v0", "v1") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) @@ -1212,7 +1215,7 @@ public void testSelectStarOnRestrictedView() ImmutableList.of( newScanQueryBuilder() .dataSource(CalciteTests.FORBIDDEN_DATASOURCE) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .intervals(querySegmentSpec(Filtration.eternity())) .columns("__time", "dim1", "dim2", "m1") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -1237,7 +1240,7 @@ public void testSelectStarOnRestrictedView() newScanQueryBuilder() .dataSource(CalciteTests.FORBIDDEN_DATASOURCE) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(selector("dim2", "a", null)) + .filters(equality("dim2", "a", ColumnType.STRING)) .columns("__time", "dim1", "dim2", "m1") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) @@ -1412,7 +1415,11 @@ public void testSelectWithExpressionFilter() .virtualColumns( expressionVirtualColumn("v0", "(\"m1\" + 1)", ColumnType.FLOAT) ) - .filters(selector("v0", "7", null)) + .filters( + NullHandling.replaceWithDefault() + ? selector("v0", "7") + : equality("v0", 7.0, ColumnType.FLOAT) + ) .columns("dim1") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .context(QUERY_CONTEXT_DEFAULT) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java index 40c04f358073..21853f3c3942 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java @@ -132,7 +132,7 @@ public void testExactCountDistinctUsingSubqueryWithWhereToOuterFilter() .build() ) ) - .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("a0", ColumnType.LONG, 0L, null, true, false)) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setAggregatorSpecs(aggregators( @@ -182,7 +182,7 @@ public void testExactCountDistinctOfSemiJoinResult() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(not(selector("dim1", "", null))) + .setDimFilter(not(equality("dim1", "", ColumnType.STRING))) .setDimensions( dimensions( new ExtractionDimensionSpec( @@ -258,7 +258,7 @@ public void testTwoExactCountDistincts() .setAggregatorSpecs( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("d0", null, null)) + notNull("d0") ) ) .setContext(QUERY_CONTEXT_DEFAULT) @@ -282,7 +282,7 @@ public void testTwoExactCountDistincts() .setAggregatorSpecs( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - not(selector("d0", null, null)) + notNull("d0") ) ) .setContext(QUERY_CONTEXT_DEFAULT) @@ -329,7 +329,7 @@ public void testViewAndJoin() "j0.", "(\"dim2\" == \"j0.dim2\")", JoinType.INNER, - bound("dim2", "a", "a", false, false, null, null) + range("dim2", ColumnType.STRING, "a", "a", false, false) ), new QueryDataSource( newScanQueryBuilder().dataSource(CalciteTests.DATASOURCE1) @@ -344,7 +344,7 @@ public void testViewAndJoin() ) ) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))) + .filters(not(equality("dim1", "z", new SubstringDimExtractionFn(0, 1), ColumnType.STRING))) .granularity(Granularities.ALL) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(queryContextModified) @@ -481,7 +481,7 @@ public void testUsingSubqueryAsFilterOnTwoColumns() .setDataSource(CalciteTests.DATASOURCE1) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(selector("dim2", "abc", null)) + .setDimFilter(equality("dim2", "abc", ColumnType.STRING)) .setDimensions(dimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") @@ -490,7 +490,7 @@ public void testUsingSubqueryAsFilterOnTwoColumns() .setPostAggregatorSpecs( ImmutableList.of(expressionPostAgg("p0", "'abc'")) ) - .setHavingSpec(having(selector("a0", "1", null))) + .setHavingSpec(having(equality("a0", 1L, ColumnType.LONG))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), @@ -561,7 +561,7 @@ public void testMinMaxAvgDailyCountWithLimit() new LongSumAggregatorFactory("_a2:sum", "a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("_a2:count"), - not(selector("a0", null, null)) + notNull("a0") ), new LongMaxAggregatorFactory("_a3", "d0"), new CountAggregatorFactory("_a4") @@ -697,7 +697,7 @@ public void testMaxSubqueryRows() .build() ) ) - .setDimFilter(bound("a0", "0", null, true, false, null, StringComparators.NUMERIC)) + .setDimFilter(range("a0", ColumnType.LONG, 0L, null, true, false)) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setAggregatorSpecs(aggregators( From 97d4c2bb79ce814cd49c12d55f5d0393c3c73435 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Jul 2023 02:02:16 -0700 Subject: [PATCH 02/44] more better --- .../HllSketchBuildColumnProcessorFactory.java | 23 ++++++++-- .../HllSketchBuildVectorProcessorFactory.java | 43 ++++++++++++++++++- .../KllDoublesSketchAggregatorFactory.java | 7 --- .../kll/KllFloatsSketchAggregatorFactory.java | 8 ---- .../DoublesSketchAggregatorFactory.java | 7 --- .../indexing/input/DruidSegmentReader.java | 6 ++- .../org/apache/druid/math/expr/ExprEval.java | 10 +++++ .../druid/query/filter/EqualityFilter.java | 10 ++--- .../vector/ObjectVectorValueMatcher.java | 1 - .../druid/segment/ColumnProcessorFactory.java | 3 +- .../druid/segment/ColumnProcessors.java | 5 ++- .../filter/ColumnComparisonFilter.java | 6 ++- .../filter/PredicateValueMatcherFactory.java | 8 +++- .../StringConstantValueMatcherFactory.java | 11 ++++- .../join/lookup/LookupJoinMatcher.java | 6 ++- .../join/table/IndexedTableJoinMatcher.java | 6 ++- .../druid/segment/filter/BaseFilterTest.java | 1 - .../druid/segment/join/JoinTestHelper.java | 6 ++- .../calcite/CalciteNestedDataQueryTest.java | 8 ++-- 19 files changed, 126 insertions(+), 49 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java index 4a8b15a85a00..e6561c6856fc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java @@ -21,14 +21,17 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.java.util.common.UOE; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.segment.BaseDoubleColumnValueSelector; import org.apache.druid.segment.BaseFloatColumnValueSelector; import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnProcessorFactory; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.data.IndexedInts; import java.util.function.Consumer; @@ -98,12 +101,24 @@ public Consumer> makeLongProcessor(BaseLongColumnValueSelect } @Override - public Consumer> makeArrayProcessor(BaseObjectColumnValueSelector selector) + public Consumer> makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { - // todo (clint): pass in type info so we can convert these arrays to byte arrays - throw new UOE("HLL sketch does not support ARRAY inputs"); + final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities); + final NullableTypeStrategy strategy = expressionType.getNullableStrategy(); + return sketch -> { + final Object o = selector.getObject(); + if (o != null) { + byte[] bytes = ExprEval.toBytes(expressionType, strategy, o); + sketch.get().update(bytes); + } + }; } + + @Override public Consumer> makeComplexProcessor(BaseObjectColumnValueSelector selector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java index 5c430c7ce4a9..b4e3eea4f1d6 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -19,15 +19,22 @@ package org.apache.druid.query.aggregation.datasketches.hll.vector; +import org.apache.datasketches.hll.HllSketch; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; import org.apache.druid.segment.VectorColumnProcessorFactory; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + public class HllSketchBuildVectorProcessorFactory implements VectorColumnProcessorFactory { private final HllSketchBuildBufferAggregatorHelper helper; @@ -89,7 +96,41 @@ public HllSketchBuildVectorProcessor makeArrayProcessor( VectorObjectSelector selector ) { - return null; + final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities); + final NullableTypeStrategy typeStrategy = expressionType.getNullableStrategy(); + return new HllSketchBuildVectorProcessor() + { + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final Object[] vector = selector.getObjectVector(); + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (vector[i] != null) { + byte[] bytes = ExprEval.toBytes(expressionType, typeStrategy, vector[i]); + sketch.update(bytes); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + if (vector[idx] != null) { + byte[] bytes = ExprEval.toBytes(expressionType, typeStrategy, vector[idx]); + sketch.update(bytes); + } + } + } + }; } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java index bea0bc610e09..b18153067a2d 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java @@ -212,13 +212,6 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto @Override public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { - // todo (clint): y tho? shouldn't this (and string inputs) be an error? - /* - throw new UOE( - "KLL Doubles sketch does not support[%s] inputs", - capabilities.toColumnType() - ); - */ return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java index e05e627a9702..c5506a3b86ab 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java @@ -212,14 +212,6 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto @Override public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { - - // todo (clint): y tho? shouldn't this (and string inputs) be an error? - /* - throw new UOE( - "KLL Floats sketch does not support[%s] inputs", - capabilities.toColumnType() - ); - */ return new KllSketchNoOpBufferAggregator<>(getEmptySketch()); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java index 2c54a3c3db5b..cbf72a2745c9 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorFactory.java @@ -211,13 +211,6 @@ public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, Vecto @Override public VectorAggregator makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { - /* - throw new UOE( - "Doubles sketch does not support[%s] inputs", - capabilities.toColumnType() - ); - */ - // todo (clint): y tho? shouldn't this (and string inputs) be an error? return new NoopDoublesSketchBufferAggregator(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java index 301d718aab99..cff9081d57ed 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java @@ -52,6 +52,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.filter.Filters; @@ -277,7 +278,10 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + public Supplier makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { return selector::getObject; } diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 15334ce919be..8937123c1ccf 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -142,6 +142,16 @@ public static void serialize(ByteBuffer buffer, int position, ExpressionType typ } } + public static byte[] toBytes(ExpressionType expressionType, NullableTypeStrategy strategy, Object o) + { + // convert the array to byte[] form so that we take a hash of the whole array + final ExprEval eval = ExprEval.ofType(expressionType, o); + final int size = strategy.estimateSizeBytes(eval.valueOrDefault()); + final ByteBuffer buffer = ByteBuffer.allocate(size); + strategy.write(buffer, eval.valueOrDefault(), size); + return buffer.array(); + } + /** * Converts a List to an appropriate array type, optionally doing some conversion to make multi-valued strings * consistent across selector types, which are not consistent in treatment of null, [], and [null]. diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index debc00078190..90809ca9d024 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -217,7 +217,6 @@ public RangeSet getDimensionRangeSet(String dimension) return null; } RangeSet retSet = TreeRangeSet.create(); - // todo (clint): this is lame.. but matches how range partitioning currently works i think retSet.add(Range.singleton(String.valueOf(matchValue))); return retSet; } @@ -235,8 +234,6 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) return Filters.makeNullIndex(false, selector); } - // todo (clint): do it for reals, i think we can do better than string value set ... - final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); if (valueSetIndex == null) { // column exists, but has no index @@ -519,11 +516,14 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public ValueMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + public ValueMatcher makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { return new PredicateValueMatcherFactory( new EqualityPredicateFactory(matchValue.valueOrDefault(), matchValueType) - ).makeArrayProcessor(selector); + ).makeArrayProcessor(selector, columnCapabilities); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java index f62039012046..4b59454bbfaf 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java @@ -56,7 +56,6 @@ public VectorValueMatcher makeMatcher(@Nullable String value) @Override public VectorValueMatcher makeMatcher(Object value, ColumnType type) { - // todo (clint): something cooler... return BooleanVectorValueMatcher.of(selector, value == null); } diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java index 360ba5abf69c..a27e490ba69e 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java @@ -20,6 +20,7 @@ package org.apache.druid.segment; import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; /** @@ -76,7 +77,7 @@ public interface ColumnProcessorFactory */ T makeLongProcessor(BaseLongColumnValueSelector selector); - T makeArrayProcessor(BaseObjectColumnValueSelector selector); + T makeArrayProcessor(BaseObjectColumnValueSelector selector, ColumnCapabilities columnCapabilities); /** * Create a processor for a complex column. diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 3cbfb7e05f65..3a32f52f6090 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -287,7 +287,10 @@ private static T makeProcessorInternal( case DOUBLE: return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory)); case ARRAY: - return processorFactory.makeArrayProcessor(valueSelectorFunction.apply(selectorFactory)); + return processorFactory.makeArrayProcessor( + valueSelectorFunction.apply(selectorFactory), + capabilities != null ? capabilities : ColumnCapabilitiesImpl.createDefault().setType(effectiveType) + ); case COMPLEX: return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory)); default: diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java index b280be27a9f2..9fb866f2cdcb 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java @@ -35,6 +35,7 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.index.BitmapColumnIndex; @@ -235,7 +236,10 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector } @Override - public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + public Supplier makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { return () -> { final Object o = selector.getObject(); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java index 7a536c7a2094..4b7f68a2899a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.ColumnProcessorFactory; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import java.util.List; @@ -85,7 +86,10 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public ValueMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + public ValueMatcher makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { if (selector instanceof NilColumnValueSelector) { // Column does not exist, or is unfilterable. Treat it as all nulls. @@ -103,7 +107,7 @@ public boolean matches() return predicate.apply((Object[]) o); } if (o instanceof List) { - ExprEval oEval = ExprEval.bestEffortArray((List) o); + ExprEval oEval = ExprEval.bestEffortArray((List) o); return predicate.apply(oEval.asArray()); } // upcast non-array to a single element array to behave consistently with expressions.. idk if this is cool diff --git a/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java index 017235c5694d..4c993977ad6e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java @@ -28,6 +28,7 @@ import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnProcessorFactory; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; @@ -77,9 +78,15 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public ValueMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + public ValueMatcher makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { - return new PredicateValueMatcherFactory(new SelectorPredicateFactory(matchValue)).makeArrayProcessor(selector); + // this is gonna fail because SelectorPredicateFactory does not implement array predicate... + return new PredicateValueMatcherFactory( + new SelectorPredicateFactory(matchValue) + ).makeArrayProcessor(selector, columnCapabilities); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java index 1e58646fcfef..9b68eb303a66 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java @@ -37,6 +37,7 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.join.Equality; @@ -113,7 +114,10 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + public Supplier makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { throw new QueryUnsupportedException("Joining against a ARRAY columns is not supported."); } diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java index 3935184e52b2..6640e24726eb 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java @@ -48,6 +48,7 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.SimpleDescendingOffset; import org.apache.druid.segment.SimpleSettableOffset; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; @@ -492,7 +493,10 @@ public ConditionMatcher makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public ConditionMatcher makeArrayProcessor(BaseObjectColumnValueSelector selector) + public ConditionMatcher makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { return () -> { throw new QueryUnsupportedException("Joining against ARRAY columns is not supported."); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index e1b49bb7c1df..03471b31ca2e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -926,7 +926,6 @@ protected void assertFilterMatchesSkipArrays( Throwable.class, () -> assertFilterMatches(filter, expectedRows, testVectorized) ); - // todo (clint): maybe better? Assert.assertTrue(t.getMessage().contains("ARRAY")); } else { assertFilterMatches(filter, expectedRows, testVectorized); diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java index efc90593c3a4..af359767bc73 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java @@ -53,6 +53,7 @@ import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.RowAdapter; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -153,7 +154,10 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) } @Override - public Supplier makeArrayProcessor(BaseObjectColumnValueSelector selector) + public Supplier makeArrayProcessor( + BaseObjectColumnValueSelector selector, + ColumnCapabilities columnCapabilities + ) { return selector::getObject; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index b5d9440faad6..16eb9e14b22a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -2653,8 +2653,8 @@ public void testGroupByPathSelectorFilterVariant2Int() .build() ), ImmutableList.of( - // todo (clint): this is a bit wonky, we get extra matches for numeric 1 matcher because the virtual column - // is defined as long typed, which makes a long processor which will convert the 1.1 to a 1L + // this is a bit wonky, we get extra matches for numeric 1 matcher because the virtual column is defined + // as long typed, which makes a long processor which will convert the 1.1 to a 1L new Object[]{"100", 2L}, new Object[]{"200", 1L} ), @@ -2699,8 +2699,8 @@ public void testGroupByPathSelectorFilterVariant2BothTypesMatcher() .build() ), ImmutableList.of( - // todo (clint): this is a bit wonky, we get 2 matches for numeric 1 matcher because the virtual column - // is defined as long typed, which makes a long processor which will convert the 1.1 to a 1L + // this is a bit wonky, we get 2 matches for numeric 1 matcher because the virtual column is defined as + // long typed, which makes a long processor which will convert the 1.1 to a 1L new Object[]{"100", 2L}, new Object[]{"200", 1L} ), From 899bf58c424c605537d62a4314ac147e900eeeca Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Jul 2023 02:15:42 -0700 Subject: [PATCH 03/44] more --- .../sql/calcite/expression/Expressions.java | 2 +- .../calcite/CalciteNestedDataQueryTest.java | 133 +++++++++++++++--- 2 files changed, 113 insertions(+), 22 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 6618be846665..273348407deb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -670,7 +670,7 @@ private static DimFilter toSimpleLeafFilter( } } - if (plannerContext.isUseBoundsAndSelectors()) { + if (plannerContext.isUseBoundsAndSelectors() && rhs instanceof RexLiteral) { final String val; final RexLiteral rhsLiteral = (RexLiteral) rhs; if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 16eb9e14b22a..f5443697fa10 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -928,17 +928,52 @@ public void testJsonValueArrays() ImmutableList.of( new Object[]{null, Arrays.asList(1L, 2L, 3L), Arrays.asList(1.1D, 2.2D, 3.3D), null}, new Object[]{null, null, null, null}, - new Object[]{Arrays.asList("d", "e"), Arrays.asList(1L, 4L), Arrays.asList(2.2D, 3.3D, 4.0D), Arrays.asList(1L, 2L)}, + new Object[]{ + Arrays.asList("d", "e"), + Arrays.asList(1L, 4L), + Arrays.asList(2.2D, 3.3D, 4.0D), + Arrays.asList(1L, 2L) + }, new Object[]{Arrays.asList("a", "b"), null, null, Collections.singletonList(1L)}, - new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, 2L, 3L), Arrays.asList(1.1D, 2.2D, 3.3D), Arrays.asList(1L, 2L, null)}, - new Object[]{Arrays.asList("b", "c"), Arrays.asList(1L, 2L, 3L, 4L), Arrays.asList(1.1D, 3.3D), Collections.singletonList(1L)}, - new Object[]{Arrays.asList("a", "b", "c"), Arrays.asList(2L, 3L), Arrays.asList(3.3D, 4.4D, 5.5D), null}, + new Object[]{ + Arrays.asList("a", "b"), + Arrays.asList(1L, 2L, 3L), + Arrays.asList(1.1D, 2.2D, 3.3D), + Arrays.asList(1L, 2L, null) + }, + new Object[]{ + Arrays.asList("b", "c"), + Arrays.asList(1L, 2L, 3L, 4L), + Arrays.asList(1.1D, 3.3D), + Collections.singletonList(1L) + }, + new Object[]{ + Arrays.asList("a", "b", "c"), + Arrays.asList(2L, 3L), + Arrays.asList(3.3D, 4.4D, 5.5D), + null + }, new Object[]{null, Arrays.asList(1L, 2L, 3L), Arrays.asList(1.1D, 2.2D, 3.3D), null}, new Object[]{null, null, null, null}, - new Object[]{Arrays.asList("d", "e"), Arrays.asList(1L, 4L), Arrays.asList(2.2D, 3.3D, 4.0D), Arrays.asList(1L, 2L)}, + new Object[]{ + Arrays.asList("d", "e"), + Arrays.asList(1L, 4L), + Arrays.asList(2.2D, 3.3D, 4.0D), + Arrays.asList(1L, 2L) + }, new Object[]{Arrays.asList("a", "b"), null, null, null}, - new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, 2L, 3L), Arrays.asList(1.1D, 2.2D, 3.3D), Arrays.asList(2L, 3L)}, - new Object[]{Arrays.asList("b", "c"), Arrays.asList(1L, 2L, 3L, 4L), Arrays.asList(1.1D, 3.3D), Collections.singletonList(1L)}, + new Object[]{ + Arrays.asList("a", "b"), + Arrays.asList(1L, 2L, 3L), + Arrays.asList(1.1D, 2.2D, 3.3D), + Arrays.asList(2L, 3L) + }, + new Object[]{ + Arrays.asList("b", "c"), + Arrays.asList(1L, 2L, 3L, 4L), + Arrays.asList(1.1D, 3.3D), + Collections.singletonList(1L) + }, new Object[]{Arrays.asList("a", "b", "c"), Arrays.asList(2L, 3L), Arrays.asList(3.3D, 4.4D, 5.5D), null} ) @@ -1013,6 +1048,7 @@ public void testUnnestRootSingleTypeArrayLongNulls() ) .run(); } + @Test public void testUnnestRootSingleTypeArrayStringNulls() { @@ -1281,6 +1317,10 @@ public void testGroupByRootSingleTypeArrayLongNulls() @Test public void testGroupByRootSingleTypeArrayLongNullsFilteredArrayEquality() { + if (NullHandling.replaceWithDefault()) { + // this fails in default value mode because it relies on equality filter and null filter to behave correctly + return; + } cannotVectorize(); testBuilder() .sql( @@ -1406,10 +1446,18 @@ public void testGroupByRootSingleTypeArrayLongNullsFiltered() ) ) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "array_length(\"arrayLongNulls\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "array_length(\"arrayLongNulls\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimFilter( - new ExpressionDimFilter("array_contains(\"arrayLongNulls\",1)", queryFramework().macroTable()) + new ExpressionDimFilter( + "array_contains(\"arrayLongNulls\",1)", + queryFramework().macroTable() + ) ) .setAggregatorSpecs( aggregators( @@ -1603,7 +1651,11 @@ public void testGroupByRootSingleTypeArrayStringNullsUnnest() .setDataSource( UnnestDataSource.create( TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayStringNulls\"", ColumnType.STRING_ARRAY), + expressionVirtualColumn( + "j0.unnest", + "\"arrayStringNulls\"", + ColumnType.STRING_ARRAY + ), null ) ) @@ -1663,10 +1715,18 @@ public void testGroupByRootSingleTypeArrayStringNullsFiltered() ) ) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "array_length(\"arrayStringNulls\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "array_length(\"arrayStringNulls\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimFilter( - new ExpressionDimFilter("array_contains(\"arrayStringNulls\",'b')", queryFramework().macroTable()) + new ExpressionDimFilter( + "array_contains(\"arrayStringNulls\",'b')", + queryFramework().macroTable() + ) ) .setAggregatorSpecs( aggregators( @@ -1809,7 +1869,11 @@ public void testGroupByRootSingleTypeArrayDoubleNullsUnnest() .setDataSource( UnnestDataSource.create( TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayDoubleNulls\"", ColumnType.DOUBLE_ARRAY), + expressionVirtualColumn( + "j0.unnest", + "\"arrayDoubleNulls\"", + ColumnType.DOUBLE_ARRAY + ), null ) ) @@ -1870,10 +1934,18 @@ public void testGroupByRootSingleTypeArrayDoubleNullsFiltered() ) ) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "array_length(\"arrayDoubleNulls\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "array_length(\"arrayDoubleNulls\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimFilter( - new ExpressionDimFilter("array_contains(\"arrayDoubleNulls\",2.2)", queryFramework().macroTable()) + new ExpressionDimFilter( + "array_contains(\"arrayDoubleNulls\",2.2)", + queryFramework().macroTable() + ) ) .setAggregatorSpecs( aggregators( @@ -5114,7 +5186,11 @@ public void testGroupByRootSingleTypeArrayLongNullsAsMvd() new DefaultDimensionSpec("v0", "d0", ColumnType.STRING) ) ) - .setVirtualColumns(expressionVirtualColumn("v0", "array_to_mv(\"arrayLongNulls\")", ColumnType.STRING)) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "array_to_mv(\"arrayLongNulls\")", + ColumnType.STRING + )) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .build() @@ -5173,10 +5249,17 @@ public void testGroupByRootSingleTypeArrayLongNullsAsMvdFiltered() ) .setVirtualColumns( expressionVirtualColumn("v0", "array_to_mv(\"arrayLongNulls\")", ColumnType.STRING), - expressionVirtualColumn("v1", "array_length(array_to_mv(\"arrayLongNulls\"))", ColumnType.LONG) + expressionVirtualColumn( + "v1", + "array_length(array_to_mv(\"arrayLongNulls\"))", + ColumnType.LONG + ) ) .setDimFilter( - new ExpressionDimFilter("array_contains(array_to_mv(\"arrayLongNulls\"),'1')", queryFramework().macroTable()) + new ExpressionDimFilter( + "array_contains(array_to_mv(\"arrayLongNulls\"),'1')", + queryFramework().macroTable() + ) ) .setAggregatorSpecs( aggregators( @@ -5353,10 +5436,18 @@ public void testGroupByRootSingleTypeArrayStringNullsFilteredAsMvd() ) .setVirtualColumns( expressionVirtualColumn("v0", "array_to_mv(\"arrayStringNulls\")", ColumnType.STRING), - new ExpressionVirtualColumn("v1", "array_length(array_to_mv(\"arrayStringNulls\"))", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v1", + "array_length(array_to_mv(\"arrayStringNulls\"))", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimFilter( - new ExpressionDimFilter("array_contains(array_to_mv(\"arrayStringNulls\"),'b')", queryFramework().macroTable()) + new ExpressionDimFilter( + "array_contains(array_to_mv(\"arrayStringNulls\"),'b')", + queryFramework().macroTable() + ) ) .setAggregatorSpecs( aggregators( @@ -5587,7 +5678,7 @@ public void testScanAllTypesAuto() "true", "1", "{}", - "1", + "4", "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]}}", "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"d\",\"e\"]", From b545dc2ed7709a1ef4b1f8f88596a8e2948c9539 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Jul 2023 12:29:23 -0700 Subject: [PATCH 04/44] more stuff --- .../hll/sql/HllSketchSqlAggregatorTest.java | 8 +- .../sql/DoublesSketchSqlAggregatorTest.java | 27 ++++--- .../sql/ThetaSketchSqlAggregatorTest.java | 6 +- .../bloom/ObjectBloomFilterAggregator.java | 3 + .../druid/query/filter/BloomDimFilter.java | 13 ++++ .../bloom/BloomFilterAggregatorTest.java | 77 +++++++++++++++++++ .../sql/BloomFilterSqlAggregatorTest.java | 6 +- .../query/filter/BloomDimFilterTest.java | 48 ++++++++++-- .../filter/sql/BloomDimFilterSqlTest.java | 1 + .../sql/VarianceSqlAggregatorTest.java | 6 +- .../org/apache/druid/math/expr/ExprEval.java | 10 +++ .../druid/query/filter/EqualityFilter.java | 2 +- .../vector/ArrayVectorValueMatcher.java | 5 -- ...torValueMatcherColumnProcessorFactory.java | 2 +- .../ArrayContainsOperatorConversion.java | 28 ++++++- .../ArrayOverlapOperatorConversion.java | 14 +++- 16 files changed, 222 insertions(+), 34 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 0819d58854ae..534114afe9ea 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -347,7 +347,7 @@ public void testApproxCountDistinctHllSketch() new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND), new FilteredAggregatorFactory( new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND), - not(equality("dim2", "", null)) + not(equality("dim2", "", ColumnType.STRING)) ), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND), @@ -855,7 +855,11 @@ public void testEmptyTimeseriesResults() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators( aggregators( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java index 27a14bd17ef9..be1c3505232f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java @@ -48,10 +48,7 @@ import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; @@ -161,11 +158,11 @@ public void testQuantileOnFloatAndLongs() new DoublesSketchAggregatorFactory("a4:agg", "v0", null), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a5:agg", "m1", null), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a6:agg", "m1", null), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ), new DoublesSketchAggregatorFactory("a8:agg", "cnt", null) )) @@ -223,11 +220,11 @@ public void testQuantileOnComplexColumn() new DoublesSketchAggregatorFactory("a2:agg", "qsketch_m1", 256), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a4:agg", "qsketch_m1", null), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a5:agg", "qsketch_m1", null), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ) )) .postAggregators( @@ -325,11 +322,11 @@ public void testQuantileOnCastedString() new DoublesSketchAggregatorFactory("a4:agg", "v1", 128), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a5:agg", "v0", 128), - new SelectorDimFilter("dim2", "abc", null) + equality("dim2", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new DoublesSketchAggregatorFactory("a6:agg", "v0", 128), - new NotDimFilter(new SelectorDimFilter("dim2", "abc", null)) + not(equality("dim2", "abc", ColumnType.STRING)) ) )) .postAggregators( @@ -728,7 +725,11 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), @@ -775,7 +776,11 @@ public void testEmptyTimeseriesResultsWithFinalizeSketches() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 8086898f3c3b..7f8c1970b2d7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -831,7 +831,11 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators( ImmutableList.of( new SketchMergeAggregatorFactory( diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java index 0ad7a179fdb6..87e9f6721b7b 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java @@ -19,6 +19,7 @@ package org.apache.druid.query.aggregation.bloom; +import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.filter.BloomKFilter; import org.apache.druid.segment.BaseObjectColumnValueSelector; @@ -54,6 +55,8 @@ void bufferAdd(ByteBuffer buf) BloomKFilter.addFloat(buf, (float) object); } else if (object instanceof String) { BloomKFilter.addString(buf, (String) object); + } else if (object instanceof Object[]) { + BloomKFilter.addBytes(buf, ExprEval.toBytesBestEffort(object)); } else { BloomKFilter.addBytes(buf, null, 0, 0); } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index a22fec727464..5a20822eca3d 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.RangeSet; import com.google.common.hash.HashCode; +import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.filter.DimensionPredicateFilter; @@ -165,6 +166,18 @@ public boolean applyNull() } }; } + + @Override + public Predicate makeArrayPredicate() + { + return input -> { + if (input == null) { + return bloomKFilter.testBytes(null, 0, 0); + } + final byte[] bytes = ExprEval.toBytesBestEffort(input); + return bloomKFilter.testBytes(bytes); + }; + } }, extractionFn, filterTuning diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java index 5888b7d13dde..e52d710ee6d9 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java @@ -28,6 +28,8 @@ import org.apache.druid.guice.BloomFilterSerializersModule; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.BufferAggregator; @@ -91,6 +93,12 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest private static final Float[] FLOAT_VALUES1 = new Float[]{0.4f, 0.8f, 23.2f}; private static final Long[] LONG_VALUES1 = new Long[]{10241L, 12312355L, 0L, 81L}; + private static final Object[] ARRAY_VALUES = new Object[]{ + new Object[]{1L, 2L}, + new Object[]{3L, 4L}, + new Object[]{0L, 1000L} + }; + private static final int MAX_NUM_VALUES = 15; private static BloomKFilter filter1; @@ -102,6 +110,7 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest private static String serializedLongFilter; private static String serializedDoubleFilter; private static String serializedFloatFilter; + private static String serializedArrayFilter; static { try { @@ -134,6 +143,17 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest } serializedDoubleFilter = filterToString(doubleFilter); + BloomKFilter arrayFilter = new BloomKFilter(MAX_NUM_VALUES); + for (Object o : ARRAY_VALUES) { + arrayFilter.addBytes( + ExprEval.toBytes( + ExpressionType.LONG_ARRAY, + ExpressionType.LONG_ARRAY.getNullableStrategy(), + o + ) + ); + } + serializedArrayFilter = filterToString(arrayFilter); } catch (Exception ex) { throw new RuntimeException(ex); @@ -395,6 +415,49 @@ public void testBufferAggregateDoubleValues() throws IOException Assert.assertEquals(serializedDoubleFilter, serialized); } + @Test + public void testAggregateArrayValues() throws IOException + { + TestObjectColumnSelector selector = new TestObjectColumnSelector( + Arrays.asList(ARRAY_VALUES) + ); + ObjectBloomFilterAggregator agg = new ObjectBloomFilterAggregator(selector, MAX_NUM_VALUES, true); + + for (Object ignored : ARRAY_VALUES) { + aggregateColumn(Collections.singletonList(selector), agg); + } + + BloomKFilter bloomKFilter = BloomKFilter.deserialize( + (ByteBuffer) valueAggregatorFactory.finalizeComputation(agg.get()) + ); + String serialized = filterToString(bloomKFilter); + Assert.assertEquals(serializedArrayFilter, serialized); + } + + @Test + public void testBufferAggregateArrayValues() throws IOException + { + TestObjectColumnSelector selector = new TestObjectColumnSelector( + Arrays.asList(ARRAY_VALUES) + ); + ObjectBloomFilterAggregator agg = new ObjectBloomFilterAggregator(selector, MAX_NUM_VALUES, true); + + int maxSize = valueAggregatorFactory.getMaxIntermediateSizeWithNulls(); + ByteBuffer buf = ByteBuffer.allocate(maxSize + 64); + int pos = 10; + buf.limit(pos + maxSize); + + agg.init(buf, pos); + + IntStream.range(0, ARRAY_VALUES.length) + .forEach(i -> bufferAggregateColumn(Collections.singletonList(selector), agg, buf, pos)); + BloomKFilter bloomKFilter = BloomKFilter.deserialize( + (ByteBuffer) valueAggregatorFactory.finalizeComputation(agg.get(buf, pos)) + ); + String serialized = filterToString(bloomKFilter); + Assert.assertEquals(serializedArrayFilter, serialized); + } + @Test public void testCombineValues() throws IOException { @@ -672,4 +735,18 @@ public double getDouble() return values.get(pos); } } + + public static class TestObjectColumnSelector extends SteppableSelector implements ColumnValueSelector + { + public TestObjectColumnSelector(List values) + { + super(values); + } + + @Override + public Object getObject() + { + return values.get(pos); + } + } } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index 90beea5bd87e..6eb402a7e1f2 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -488,7 +488,11 @@ public void testEmptyTimeseriesResults() throws Exception .dataSource(CalciteTests.DATASOURCE3) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(equality("dim2", 0L, ColumnType.LONG)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators( ImmutableList.of( new BloomFilterAggregatorFactory( diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index 1b9089baf11d..7c2daf31f1d0 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -33,6 +33,8 @@ import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.extraction.MapLookupExtractor; import org.apache.druid.query.extraction.TimeDimExtractionFn; import org.apache.druid.query.lookup.LookupExtractionFn; @@ -201,18 +203,35 @@ public void testMultiValueStringColumn() throws IOException if (NullHandling.replaceWithDefault()) { assertFilterMatches( new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), - ImmutableList.of("1", "2", "5") + isAutoSchema() ? ImmutableList.of("5") : ImmutableList.of("1", "2", "5") ); } else { assertFilterMatches( new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), - ImmutableList.of("1", "5") + isAutoSchema() ? ImmutableList.of("5") : ImmutableList.of("1", "5") ); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, ""), null), ImmutableList.of("2")); + assertFilterMatches( + new BloomDimFilter("dim2", bloomKFilter(1000, ""), null), + isAutoSchema() ? ImmutableList.of() : ImmutableList.of("2") + ); + if (isAutoSchema()) { + assertFilterMatches( + new BloomDimFilter( + "dim2", + bloomKFilter( + 1000, + ExprEval.toBytes(ExpressionType.STRING_ARRAY, ExpressionType.STRING_ARRAY.getNullableStrategy(), ImmutableList.of("a", "b") + ) + ), + null + ), + ImmutableList.of("0") + ); + } } - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null), ImmutableList.of("0", "3")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null), ImmutableList.of("0")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null), ImmutableList.of("4")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("0", "3")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("0")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("4")); assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "d"), null), ImmutableList.of()); } @@ -289,8 +308,8 @@ public void testSelectorWithLookupExtractionFn() throws IOException ImmutableList.of("0", "1", "2", "5") ); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("0", "3")); - assertFilterMatches( + assertFilterMatchesSkipArrays(new BloomDimFilter("dim2", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("0", "3")); + assertFilterMatchesSkipArrays( new BloomDimFilter("dim2", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "4", "5") ); @@ -486,4 +505,17 @@ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... valu } return BloomKFilterHolder.fromBloomKFilter(filter); } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, byte[]... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (byte[] value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addBytes(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index c5d6e631c410..f10ad759f58f 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -92,6 +92,7 @@ public void testBloomFilterExprFilter() throws IOException } byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); String base64 = StringUtils.encodeBase64String(bytes); + skipVectorize(); // fool the planner to make an expression virtual column to test bloom filter Druid expression testQuery( diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java index f8e7fcbe3771..c84d94a2f735 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java @@ -518,7 +518,11 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators( new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), new VarianceAggregatorFactory("a1:agg", "d1", "sample", "double"), diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 8937123c1ccf..4569e8b2e94e 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -152,6 +152,16 @@ public static byte[] toBytes(ExpressionType expressionType, NullableTypeStrategy return buffer.array(); } + public static byte[] toBytesBestEffort(Object o) + { + final ExprEval eval = ExprEval.bestEffortOf(o); + final NullableTypeStrategy strategy = eval.type().getNullableStrategy(); + final int size = strategy.estimateSizeBytes(eval.valueOrDefault()); + final ByteBuffer buffer = ByteBuffer.allocate(size); + strategy.write(buffer, eval.valueOrDefault(), size); + return buffer.array(); + } + /** * Converts a List to an appropriate array type, optionally doing some conversion to make multi-valued strings * consistent across selector types, which are not consistent in treatment of null, [], and [null]. diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 90809ca9d024..f93dc46233d3 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -190,7 +190,7 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - boolean valuesMatch = false; + boolean valuesMatch; EqualityFilter that = (EqualityFilter) o; if (matchValue instanceof Object[] && that.matchValue instanceof Object[]) { valuesMatch = Arrays.deepEquals((Object[]) matchValue, (Object[]) that.matchValue); diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java index 5a559abbdcbd..da8e96d71a0e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java @@ -22,23 +22,18 @@ import com.google.common.base.Predicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.TypeSignature; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; public class ArrayVectorValueMatcher implements VectorValueMatcherFactory { - protected final TypeSignature columnType; protected final VectorObjectSelector selector; public ArrayVectorValueMatcher( - TypeSignature columnType, VectorObjectSelector selector ) { - this.columnType = columnType; this.selector = selector; } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java index 0d16ee24230b..0c20bbaf2ab6 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java @@ -89,7 +89,7 @@ public VectorValueMatcherFactory makeLongProcessor( @Override public VectorValueMatcherFactory makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { - return new ArrayVectorValueMatcher(capabilities, selector); + return new ArrayVectorValueMatcher(selector); } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index 36c5fd77d099..6d66971476a8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -28,9 +28,11 @@ import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -112,11 +114,33 @@ public DimFilter toDruidFilter( // to create an empty array with no argument, we just return null. return null; } else if (arrayElements.length == 1) { - return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); + if (plannerContext.isUseBoundsAndSelectors()) { + return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); + } else { + return new EqualityFilter( + leftExpr.getSimpleExtraction().getColumn(), + ExpressionType.toColumnType(exprEval.type()), + arrayElements[0], + leftExpr.getSimpleExtraction().getExtractionFn(), + null + ); + } } else { final List selectFilters = Arrays .stream(arrayElements) - .map(val -> newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val))) + .map(val -> { + if (plannerContext.isUseBoundsAndSelectors()) { + return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)); + } else { + return new EqualityFilter( + leftExpr.getSimpleExtraction().getColumn(), + ExpressionType.toColumnType(exprEval.type()), + val, + leftExpr.getSimpleExtraction().getExtractionFn(), + null + ); + } + }) .collect(Collectors.toList()); return new AndDimFilter(selectFilters); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index b0e25e14737a..68a3d9ff7e93 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -28,8 +28,10 @@ import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; @@ -124,7 +126,17 @@ public DimFilter toDruidFilter( // to create an empty array with no argument, we just return null. return null; } else if (arrayElements.length == 1) { - return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); + if (plannerContext.isUseBoundsAndSelectors()) { + return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); + } else { + return new EqualityFilter( + simpleExtractionExpr.getSimpleExtraction().getColumn(), + ExpressionType.toColumnType(exprEval.type()), + arrayElements[0], + simpleExtractionExpr.getSimpleExtraction().getExtractionFn(), + null + ); + } } else { return new InDimFilter( simpleExtractionExpr.getSimpleExtraction().getColumn(), From e49a64a7abd83f2631b6fb27eab113dbd623bdb7 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 17:26:15 -0700 Subject: [PATCH 05/44] fix stuff, more tests, etc --- .../sql/TDigestSketchSqlAggregatorTest.java | 6 +- .../hll/HllSketchAggregatorTest.java | 97 +++ .../druid/query/filter/BloomDimFilter.java | 12 +- .../input/DruidSegmentReaderTest.java | 112 ++++ .../query/filter/DruidPredicateFactory.java | 6 +- .../druid/query/filter/EqualityFilter.java | 8 +- .../apache/druid/query/filter/NullFilter.java | 4 +- .../druid/query/filter/RangeFilter.java | 45 +- .../vector/ArrayVectorValueMatcher.java | 19 +- ...torValueMatcherColumnProcessorFactory.java | 2 +- .../druid/segment/data/FrontCodedIndexed.java | 1 + .../segment/filter/ExpressionFilter.java | 4 +- .../filter/PredicateValueMatcherFactory.java | 6 +- .../virtual/ListFilteredVirtualColumn.java | 3 +- .../druid/segment/filter/RangeFilterTest.java | 575 ++++++++++-------- .../ArrayContainsOperatorConversion.java | 6 +- .../sql/calcite/CalciteArraysQueryTest.java | 10 +- .../CalciteMultiValueStringQueryTest.java | 10 +- 18 files changed, 642 insertions(+), 284 deletions(-) diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java index 9359bee75e43..beef63e7a6b7 100644 --- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java +++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java @@ -443,7 +443,11 @@ public void testEmptyTimeseriesResults() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) - .filters(equality("dim2", 0L, ColumnType.LONG)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .granularity(Granularities.ALL) .aggregators(ImmutableList.of( new TDigestSketchAggregatorFactory("a0:agg", "m1", TDigestSketchAggregatorFactory.DEFAULT_COMPRESSION), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index b8acb0ce2c22..71f1bb9be435 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -28,16 +28,24 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.After; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -46,6 +54,7 @@ import org.junit.runners.Parameterized; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -69,6 +78,8 @@ public class HllSketchAggregatorTest extends InitializedNullHandlingTest @Rule public final TemporaryFolder timeseriesFolder = new TemporaryFolder(); + private final Closer closer; + public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize, StringEncoding stringEncoding) { HllSketchModule.registerSerde(); @@ -80,6 +91,7 @@ public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize, Stri ); this.vectorize = QueryContexts.Vectorize.fromString(vectorize); this.stringEncoding = stringEncoding; + this.closer = Closer.create(); } @Parameterized.Parameters(name = "groupByConfig = {0}, vectorize = {1}, stringEncoding = {2}") @@ -98,6 +110,12 @@ public static Collection constructorFeeder() return constructors; } + @After + public void teardown() throws IOException + { + closer.close(); + } + @Test public void ingestSketches() throws Exception { @@ -417,6 +435,85 @@ public void testPostAggs() throws Exception Assert.assertEquals(expectedSummary, ((HllSketchHolder) row.get(4)).getSketch().toString()); } + @Test + public void testArrays() throws Exception + { + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndex( + groupByFolder, + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + NestedDataTestUtils.AUTO_DISCOVERY, + TransformSpec.NONE, + new AggregatorFactory[0], + Granularities.NONE, + true + ) + ); + List segs = NestedDataTestUtils.createSegments( + groupByFolder, + closer, + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + NestedDataTestUtils.AUTO_DISCOVERY, + TransformSpec.NONE, + new AggregatorFactory[0], + Granularities.NONE, + true, + IndexSpec.DEFAULT + ); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setAggregatorSpecs( + new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, false), + new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, false), + new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, false), + new CountAggregatorFactory("a3") + ) + .setPostAggregatorSpecs( + ImmutableList.of( + new HllSketchToEstimatePostAggregator( + "p0", + new FieldAccessPostAggregator("f0", "a0"), + false + ), + new HllSketchToEstimatePostAggregator( + "p1", + new FieldAccessPostAggregator("f1", "a1"), + false + ), + new HllSketchToEstimatePostAggregator( + "p2", + new FieldAccessPostAggregator("f2", "a2"), + false + ) + ) + ) + .build(); + + Sequence realtimeSeq = groupByHelper.runQueryOnSegmentsObjs(realtimeSegs, query); + Sequence seq = groupByHelper.runQueryOnSegmentsObjs(segs, query); + List realtimeList = realtimeSeq.toList(); + List list = seq.toList(); + + // expect 4 distinct arrays for each of these columns from 14 rows + Assert.assertEquals(1, realtimeList.size()); + Assert.assertEquals(14L, realtimeList.get(0).get(3)); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(4), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(5), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(6), 0.01); + Assert.assertEquals(1, list.size()); + Assert.assertEquals(14L, list.get(0).get(3)); + Assert.assertEquals(4.0, (Double) list.get(0).get(4), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(5), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(6), 0.01); + } + private static String buildParserJson(List dimensions, List columns) { Map timestampSpec = ImmutableMap.of( diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index 5a20822eca3d..00c55e545cf6 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -29,8 +29,11 @@ import com.google.common.collect.RangeSet; import com.google.common.hash.HashCode; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import javax.annotation.Nullable; @@ -168,13 +171,18 @@ public boolean applyNull() } @Override - public Predicate makeArrayPredicate() + public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { + final ExpressionType expressionType = arrayType == null || !arrayType.isArray() + ? null + : ExpressionType.fromColumnType(arrayType); return input -> { if (input == null) { return bloomKFilter.testBytes(null, 0, 0); } - final byte[] bytes = ExprEval.toBytesBestEffort(input); + final byte[] bytes = expressionType != null + ? ExprEval.toBytes(expressionType, expressionType.getNullableStrategy(), input) + : ExprEval.toBytesBestEffort(input); return bloomKFilter.testBytes(bytes); }; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java index 5580bfc5cc0c..85e9acd3e84c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java @@ -51,6 +51,7 @@ import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexSpec; @@ -683,6 +684,117 @@ public void close() Assert.assertTrue("Sequence is not closed", isSequenceClosed.booleanValue()); } + @Test + public void testArrayColumns() throws IOException + { + // Write a segment with two rows in it, with columns: s (string), d (double), cnt (long), met_s (complex). + DimensionsSpec dimensionsSpec = new DimensionsSpec( + ImmutableList.of( + StringDimensionSchema.create("strCol"), + new DoubleDimensionSchema("dblCol"), + new AutoTypeColumnSchema("arrayCol") + ) + ); + List metrics = ImmutableList.of( + new CountAggregatorFactory("cnt"), + new HyperUniquesAggregatorFactory("met_s", "strCol") + ); + final List rows = ImmutableList.of( + new MapBasedInputRow( + DateTimes.of("2000"), + ImmutableList.of("strCol", "dblCol", "arrayCol"), + ImmutableMap.builder() + .put("strCol", "foo") + .put("dblCol", 1.23) + .put("arrayCol", ImmutableList.of("a", "b", "c")) + .build() + ), + new MapBasedInputRow( + DateTimes.of("2000T01"), + ImmutableList.of("strCol", "dblCol", "arrayCol"), + ImmutableMap.builder() + .put("strCol", "bar") + .put("dblCol", 4.56) + .put("arrayCol", ImmutableList.of("x", "y", "z")) + .build() + ) + ); + + InputStats inputStats = new InputStatsImpl(); + final IncrementalIndex incrementalIndex = + IndexBuilder.create() + .schema( + new IncrementalIndexSchema.Builder() + .withDimensionsSpec(dimensionsSpec) + .withMetrics(metrics.toArray(new AggregatorFactory[0])) + .withRollup(false) + .build() + ) + .rows(rows) + .buildIncrementalIndex(); + + File segmentDirectory = temporaryFolder.newFolder(); + long segmentSize = 0; + try { + TestHelper.getTestIndexMergerV9( + OnHeapMemorySegmentWriteOutMediumFactory.instance() + ).persist( + incrementalIndex, + segmentDirectory, + IndexSpec.DEFAULT, + null + ); + segmentSize = FileUtils.getFileSize(segmentDirectory); + } + finally { + incrementalIndex.close(); + } + InputEntity entity = new BytesCountingInputEntity( + makeInputEntity( + Intervals.of("2000/P1D"), + segmentDirectory, + ImmutableList.of("strCol", "dblCol", "arrayCol"), + ImmutableList.of("cnt", "met_s") + ), + inputStats + ); + final DruidSegmentReader reader = new DruidSegmentReader( + entity, + indexIO, + new TimestampSpec("__time", "millis", DateTimes.of("1971")), + new DimensionsSpec( + ImmutableList.of( + StringDimensionSchema.create("strCol"), + new DoubleDimensionSchema("dblCol"), + new AutoTypeColumnSchema("arrayCol") + ) + ), + ColumnsFilter.all(), + null, + temporaryFolder.newFolder() + ); + + List readRows = readRows(reader); + + Assert.assertEquals(ImmutableList.of("strCol", "dblCol", "arrayCol"), readRows.get(0).getDimensions()); + Assert.assertEquals(DateTimes.of("2000T").getMillis(), readRows.get(0).getTimestampFromEpoch()); + Assert.assertEquals("foo", readRows.get(0).getRaw("strCol")); + Assert.assertEquals(1.23, readRows.get(0).getRaw("dblCol")); + Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) readRows.get(0).getRaw("arrayCol")); + Assert.assertEquals(1L, readRows.get(0).getRaw("cnt")); + Assert.assertEquals(makeHLLC("foo"), readRows.get(0).getRaw("met_s")); + + Assert.assertEquals(DateTimes.of("2000T1").getMillis(), readRows.get(1).getTimestampFromEpoch()); + Assert.assertEquals("bar", readRows.get(1).getRaw("strCol")); + Assert.assertEquals(4.56, readRows.get(1).getRaw("dblCol")); + Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) readRows.get(1).getRaw("arrayCol")); + Assert.assertEquals(1L, readRows.get(1).getRaw("cnt")); + Assert.assertEquals(makeHLLC("bar"), readRows.get(1).getRaw("met_s")); + + Assert.assertEquals(segmentSize, inputStats.getProcessedBytes()); + + } + private InputEntity makeInputEntity(final Interval interval) { return new BytesCountingInputEntity( diff --git a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java index 9433973e8d2c..929f3acbba54 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateFactory.java @@ -22,6 +22,10 @@ import com.google.common.base.Predicate; import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode; import org.apache.druid.java.util.common.UOE; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nullable; @SubclassesMustOverrideEqualsAndHashCode public interface DruidPredicateFactory @@ -34,7 +38,7 @@ public interface DruidPredicateFactory DruidDoublePredicate makeDoublePredicate(); - default Predicate makeArrayPredicate() + default Predicate makeArrayPredicate(@Nullable TypeSignature inputType) { throw new UOE("Predicate does not support ARRAY types"); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index f93dc46233d3..6a843c0e2223 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -49,7 +49,9 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.PredicateValueMatcherFactory; @@ -362,9 +364,11 @@ public DruidDoublePredicate makeDoublePredicate() } @Override - public Predicate makeArrayPredicate() + public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { - final Object[] arrayValue = matchValue.asArray(); + final Object[] arrayValue = arrayType != null + ? matchValue.castTo(ExpressionType.fromColumnType(arrayType)).asArray() + : matchValue.asArray(); return input -> Arrays.deepEquals(input, arrayValue); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index 7adad991c7e8..1891ed3bf9bc 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -39,6 +39,8 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; @@ -272,7 +274,7 @@ public DruidDoublePredicate makeDoublePredicate() } @Override - public Predicate makeArrayPredicate() + public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { return Predicates.isNull(); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 1d1ab9ab6bcf..8e6441b43608 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -33,6 +33,7 @@ import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprEval; @@ -111,11 +112,25 @@ public RangeFilter( this.upperEval = ExprEval.ofType(expressionType, upper); this.lowerEval = ExprEval.ofType(expressionType, lower); if (expressionType.isNumeric()) { - if (upper != null && upperEval.isNumericNull()) { - throw new IAE("Match value is specified as [%s] but [%s] cannot be parsed", expressionType, upper); + if (lower != null && lowerEval.value() == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Invalid range filter on column [%s], lower bound [%s] cannot be parsed as specified match value type [%s]", + column, + lower, + expressionType + ); } - if (lower != null && lowerEval.isNumericNull()) { - throw new IAE("Match value is specified as [%s] but [%s] cannot be parsed", expressionType, lower); + if (upper != null && upperEval.value() == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Invalid range filter on column [%s], upper bound [%s] cannot be parsed as specified match value type [%s]", + column, + upper, + expressionType + ); } } this.lowerStrict = lowerStrict != null && lowerStrict; @@ -251,6 +266,9 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { +// if (isEquality()) { +// return new EqualityFilter(column, matchValueType, lower, extractionFn, filterTuning); +// } return this; } @@ -302,10 +320,15 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) } final LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); if (rangeIndex != null) { + final String lower = hasLowerBound() ? lowerEval.asString() : null; + final String upper = hasUpperBound() ? upperEval.asString() : null; + if (NullHandling.isNullOrEquivalent(lower) && NullHandling.isNullOrEquivalent(upper)) { + return Filters.makeNullIndex(false, selector); + } final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( - hasLowerBound() ? lowerEval.asString() : null, + lower, lowerStrict, - hasUpperBound() ? upperEval.asString() : null, + upper, upperStrict ); if (rangeBitmaps != null) { @@ -320,8 +343,8 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) } final NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); if (rangeIndex != null) { - final Number lower = (Number) lowerEval.valueOrDefault(); - final Number upper = (Number) upperEval.valueOrDefault(); + final Number lower = (Number) lowerEval.value(); + final Number upper = (Number) upperEval.value(); final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( lower, isLowerStrict(), @@ -610,11 +633,11 @@ private Supplier makeDoublePredicateSupplier() private Supplier> makeStringPredicateSupplier() { return Suppliers.memoize(() -> { - Comparator stringComparator = matchValueType.isNumeric() + final Comparator stringComparator = matchValueType.isNumeric() ? StringComparators.NUMERIC : StringComparators.LEXICOGRAPHIC; - String lowerBound = lowerEval.castTo(ExpressionType.STRING).asString(); - String upperBound = upperEval.castTo(ExpressionType.STRING).asString(); + final String lowerBound = lowerEval.castTo(ExpressionType.STRING).asString(); + final String upperBound = upperEval.castTo(ExpressionType.STRING).asString(); if (hasLowerBound() && hasUpperBound()) { if (upperStrict && lowerStrict) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java index da8e96d71a0e..f07e9d5f7ac2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java @@ -22,18 +22,23 @@ import com.google.common.base.Predicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; public class ArrayVectorValueMatcher implements VectorValueMatcherFactory { + protected final TypeSignature columnType; protected final VectorObjectSelector selector; public ArrayVectorValueMatcher( + TypeSignature columnType, VectorObjectSelector selector ) { + this.columnType = columnType; this.selector = selector; } @@ -56,7 +61,7 @@ public VectorValueMatcher makeMatcher(Object value, ColumnType type) @Override public VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory) { - final Predicate predicate = predicateFactory.makeArrayPredicate(); + final Predicate predicate = predicateFactory.makeArrayPredicate(columnType); return new BaseVectorValueMatcher(selector) { @@ -73,10 +78,14 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) for (int i = 0; i < mask.getSelectionSize(); i++) { final int rowNum = mask.getSelection()[i]; Object o = vector[rowNum]; - if ((o == null || o instanceof Object[]) && predicate.apply((Object[]) o)) { - selection[numRows++] = rowNum; - } else if (predicate.apply(new Object[]{o})) { - selection[numRows++] = rowNum; + if (o == null || o instanceof Object[]) { + if (predicate.apply((Object[]) o)) { + selection[numRows++] = rowNum; + } + } else { + if (predicate.apply(new Object[]{o})) { + selection[numRows++] = rowNum; + } } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java index 0c20bbaf2ab6..0d16ee24230b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactory.java @@ -89,7 +89,7 @@ public VectorValueMatcherFactory makeLongProcessor( @Override public VectorValueMatcherFactory makeArrayProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) { - return new ArrayVectorValueMatcher(selector); + return new ArrayVectorValueMatcher(capabilities, selector); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java index ebbf13a91b09..46bca6c3b7f2 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java @@ -1,3 +1,4 @@ + /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index c4d3fd3db0fa..f3c77f91163d 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -49,6 +49,8 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.virtual.ExpressionSelectors; @@ -417,7 +419,7 @@ public boolean applyNull() } @Override - public Predicate makeArrayPredicate() + public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { if (inputCapabilites == null) { return input -> expr.get() diff --git a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java index 4b7f68a2899a..9673248f8263 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java @@ -93,10 +93,10 @@ public ValueMatcher makeArrayProcessor( { if (selector instanceof NilColumnValueSelector) { // Column does not exist, or is unfilterable. Treat it as all nulls. - return BooleanValueMatcher.of(predicateFactory.makeArrayPredicate().apply(null)); + return BooleanValueMatcher.of(predicateFactory.makeArrayPredicate(columnCapabilities).apply(null)); } else { // use the object predicate - final Predicate predicate = predicateFactory.makeArrayPredicate(); + final Predicate predicate = predicateFactory.makeArrayPredicate(columnCapabilities); return new ValueMatcher() { @Override @@ -244,7 +244,7 @@ private DruidDoublePredicate getDoublePredicate() private Predicate getArrayPredicate() { if (arrayPredicate == null) { - arrayPredicate = predicateFactory.makeArrayPredicate(); + arrayPredicate = predicateFactory.makeArrayPredicate(null); } return arrayPredicate; } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index f55512403d0f..75661070450e 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -571,8 +571,9 @@ public BitmapColumnIndex forRange( public Iterable getBitmapIterable() { int startIndex, endIndex; + final int firstValue = NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ? 1 : 0; if (startValue == null) { - startIndex = 0; + startIndex = firstValue; } else { final int found = getReverseIndex(NullHandling.emptyToNullIfNeeded(startValue)); if (found >= 0) { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java index d324a510f398..981c8740e672 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java @@ -25,15 +25,14 @@ import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.InputRow; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.js.JavaScriptConfig; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.extraction.JavaScriptExtractionFn; -import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.RangeFilter; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.column.ColumnType; @@ -49,27 +48,16 @@ @RunWith(Parameterized.class) public class RangeFilterTest extends BaseFilterTest { - private static final List ROWS = ImmutableList.builder() - .addAll(DEFAULT_ROWS) - .add(makeDefaultSchemaRow( - "6", - "-1000", - ImmutableList.of("a"), - null, - 6.6, - null, - 10L - )) - .add(makeDefaultSchemaRow( - "7", - "-10.012", - ImmutableList.of("d"), - null, - null, - 3.0f, - null - )) - .build(); + private static final List ROWS = + ImmutableList.builder() + .addAll(DEFAULT_ROWS) + .add( + makeDefaultSchemaRow("6", "-1000", ImmutableList.of("a"), null, 6.6, null, 10L) + ) + .add( + makeDefaultSchemaRow("7", "-10.012", ImmutableList.of("d"), null, null, 3.0f, null) + ) + .build(); public RangeFilterTest( String testName, @@ -188,21 +176,21 @@ public void testLexicographicMatchWithEmptyString() public void testLexicographicMatchNull() { assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0") + new RangeFilter("dim1", ColumnType.STRING, "", "", false, false, null, null), + NullHandling.replaceWithDefault() ? ImmutableList.of() : ImmutableList.of("0") ); if (NullHandling.replaceWithDefault()) { assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), - isAutoSchema() ? ImmutableList.of() : ImmutableList.of("1", "2", "5") + new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), + ImmutableList.of() ); } else { assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("2") ); } @@ -211,57 +199,34 @@ public void testLexicographicMatchNull() @Test public void testLexicographicMatchMissingColumn() { - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - assertFilterMatches( - new BoundDimFilter("dim3", "", null, false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of() - ); - } else { - assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of() - ); - assertFilterMatches( - new BoundDimFilter("dim3", "", null, false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of() - ); - assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - } assertFilterMatches( - new BoundDimFilter("dim3", "", "", true, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim3", ColumnType.STRING, "", "", false, false, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim3", ColumnType.STRING, "", null, false, true, null, null), ImmutableList.of() ); - assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + new RangeFilter("dim3", ColumnType.STRING, null, "", false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, "", "", true, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, "", "", false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, null, "", false, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("dim3", ColumnType.STRING, null, "", false, true, null, null), + ImmutableList.of() ); - if (NullHandling.sqlCompatible()) { - assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - } else { - assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of() - ); - } } @@ -269,15 +234,15 @@ public void testLexicographicMatchMissingColumn() public void testLexicographicMatchTooStrict() { assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", true, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "abc", "abc", true, false, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", true, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "abc", "abc", true, true, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", false, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "abc", "abc", false, true, null, null), ImmutableList.of() ); } @@ -286,7 +251,7 @@ public void testLexicographicMatchTooStrict() public void testLexicographicMatchExactlySingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "abc", "abc", false, false, null, null), ImmutableList.of("5") ); } @@ -295,7 +260,7 @@ public void testLexicographicMatchExactlySingleValue() public void testLexicographicMatchSurroundingSingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "ab", "abd", true, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "ab", "abd", true, true, null, null), ImmutableList.of("5") ); } @@ -304,7 +269,7 @@ public void testLexicographicMatchSurroundingSingleValue() public void testLexicographicMatchNoUpperLimit() { assertFilterMatches( - new BoundDimFilter("dim1", "ab", null, true, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "ab", null, true, true, null, null), ImmutableList.of("4", "5") ); } @@ -313,8 +278,8 @@ public void testLexicographicMatchNoUpperLimit() public void testLexicographicMatchNoLowerLimit() { assertFilterMatches( - new BoundDimFilter("dim1", null, "abd", true, true, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "5", "6", "7") + new RangeFilter("dim1", ColumnType.STRING, null, "abd", true, true, null, null), + NullHandling.replaceWithDefault() ? ImmutableList.of("1", "2", "3", "5", "6", "7") : ImmutableList.of("0", "1", "2", "3", "5", "6", "7") ); } @@ -322,66 +287,86 @@ public void testLexicographicMatchNoLowerLimit() public void testLexicographicMatchNumbers() { assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "1", "3", false, false, null, null), ImmutableList.of("1", "2", "3") ); assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", true, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "1", "3", true, true, null, null), ImmutableList.of("1", "2") ); assertFilterMatches( - new BoundDimFilter("dim1", "-1", "3", true, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("dim1", ColumnType.STRING, "-1", "3", true, true, null, null), ImmutableList.of("1", "2", "3", "6", "7") ); } @Test - public void testNumericMatchNull() + public void testNumericMatchBadParameters() { - assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of() + Throwable t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter("dim0", ColumnType.DOUBLE, "1234", "", false, false, null, null), + ImmutableList.of() + ) ); - assertFilterMatches( - new BoundDimFilter("dim1", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of("0") + Assert.assertEquals( + "Invalid range filter on column [dim0], upper bound [] cannot be parsed as specified match value type [DOUBLE]", + t.getMessage() ); - if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of("1", "2", "5") - ); - assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - } else { - assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of("2") - ); - assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, false, null, StringComparators.NUMERIC), - ImmutableList.of() - ); - } + t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter("dim0", ColumnType.DOUBLE, "abc", "1234", false, false, null, null), + ImmutableList.of() + ) + ); + Assert.assertEquals( + "Invalid range filter on column [dim0], lower bound [abc] cannot be parsed as specified match value type [DOUBLE]", + t.getMessage() + ); } @Test public void testNumericMatchTooStrict() { assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", true, false, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, 2L, 2L, true, false, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", true, true, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, 2L, 2L, true, true, null, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", false, true, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, 2L, 2L, false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.LONG, 2L, 3L, false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.DOUBLE, 2L, 3L, false, true, null, null), + ImmutableList.of() + ); + + assertFilterMatches( + new RangeFilter("f0", ColumnType.LONG, 2L, 3L, false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.DOUBLE, 2L, 3L, false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.LONG, 2L, 3L, false, true, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.DOUBLE, 2L, 3L, false, true, null, null), ImmutableList.of() ); } @@ -390,12 +375,28 @@ public void testNumericMatchTooStrict() public void testNumericMatchVirtualColumn() { assertFilterMatches( - new BoundDimFilter("expr", "1", "2", false, false, false, null, StringComparators.NUMERIC), + new RangeFilter("expr", ColumnType.LONG, 1L, 2L, false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("expr", ColumnType.DOUBLE, 1.1, 2.0, false, false, null, null), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter("expr", ColumnType.FLOAT, 1.1f, 2.0f, false, false, null, null), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); assertFilterMatches( - new BoundDimFilter("expr", "2", "3", false, false, false, null, StringComparators.NUMERIC), + new RangeFilter("expr", ColumnType.LONG, 2L, 3L, false, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("expr", ColumnType.DOUBLE, 2.0, 3.0, false, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("expr", ColumnType.FLOAT, 2.0f, 3.0f, false, false, null, null), ImmutableList.of() ); } @@ -404,45 +405,135 @@ public void testNumericMatchVirtualColumn() public void testNumericMatchExactlySingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", false, false, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, 2L, 2L, false, false, null, null), ImmutableList.of("2") ); assertFilterMatches( - new BoundDimFilter("dim1", "-10.012", "-10.012", false, false, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.DOUBLE, -10.012, -10.012, false, false, null, null), ImmutableList.of("7") ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.DOUBLE, 120.0245, 120.0245, false, false, null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.FLOAT, 120.0245f, 120.0245f, false, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.FLOAT, 60.0f, 60.0f, false, false, null, null), + ImmutableList.of("4") + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.DOUBLE, 10.1, 10.1, false, false, null, null), + ImmutableList.of() + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.FLOAT, 10.1f, 10.1f, false, false, null, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.LONG, 12345L, 12345L, false, false, null, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.DOUBLE, 12345.0, 12345.0, false, false, null, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.FLOAT, 12345.0f, 12345.0f, false, false, null, null), + ImmutableList.of("5") + ); } @Test public void testNumericMatchSurroundingSingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", true, true, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, 1L, 3L, true, true, null, null), ImmutableList.of("2") ); assertFilterMatches( - new BoundDimFilter("dim1", "-11", "-10", false, false, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, -11L, -10L, false, false, null, null), ImmutableList.of("7") ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.DOUBLE, 120.0, 120.03, false, false, null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.FLOAT, 120.02f, 120.03f, false, false, null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.FLOAT, 59.5f, 60.01f, false, false, null, null), + ImmutableList.of("4") + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.DOUBLE, 10.0, 10.2, false, false, null, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.FLOAT, 10.05f, 10.11f, false, false, null, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.LONG, 12344L, 12346L, false, false, null, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.DOUBLE, 12344.0, 12345.5, false, false, null, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.FLOAT, 12344.0f, 12345.5f, false, false, null, null), + ImmutableList.of("5") + ); } @Test public void testNumericMatchNoUpperLimit() { assertFilterMatches( - new BoundDimFilter("dim1", "1", null, true, true, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG,1L, null, true, true, null, null), ImmutableList.of("1", "2") ); + assertFilterMatches( + new RangeFilter("d0", ColumnType.DOUBLE,1.0, null, true, true, null, null), + ImmutableList.of("1", "3", "4", "5", "6") + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.FLOAT,1.0f, null, true, true, null, null), + ImmutableList.of("1", "2", "3", "5", "7") + ); + assertFilterMatches( + new RangeFilter("l0", ColumnType.LONG,1L, null, true, true, null, null), + ImmutableList.of("1", "2", "4", "5", "6") + ); } @Test public void testNumericMatchNoLowerLimit() { + // strings are wierd... + assertFilterMatches( + new RangeFilter("dim1", ColumnType.LONG, null, 2L, false, true, null, null), + NullHandling.replaceWithDefault() ? ImmutableList.of("3", "4", "5", "6", "7") : ImmutableList.of("0", "3", "4", "5", "6", "7") + ); + // numbers are sane though + assertFilterMatches( + new RangeFilter("d0", ColumnType.DOUBLE, null, 10.0, false, true, null, null), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "6", "7") : ImmutableList.of("0","6") + ); + assertFilterMatches( + new RangeFilter("f0", ColumnType.FLOAT, null, 50.5, false, true, null, null), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "1", "2", "4", "6", "7") : ImmutableList.of("0", "1", "2", "7") + ); assertFilterMatches( - new BoundDimFilter("dim1", null, "2", true, true, false, null, StringComparators.NUMERIC), - ImmutableList.of("0", "3", "4", "5", "6", "7") + new RangeFilter("l0", ColumnType.LONG, null, 100L, false, true, null, null), + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "3", "6", "7") : ImmutableList.of("0", "2", "6") ); } @@ -450,7 +541,7 @@ public void testNumericMatchNoLowerLimit() public void testNumericMatchWithNegatives() { assertFilterMatches( - new BoundDimFilter("dim1", "-2000", "3", true, true, false, null, StringComparators.NUMERIC), + new RangeFilter("dim1", ColumnType.LONG, -2000L, 3L, true, true, null, null), ImmutableList.of("2", "3", "6", "7") ); } @@ -459,43 +550,43 @@ public void testNumericMatchWithNegatives() public void testNumericNullsAndZeros() { assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "d0", - "0.0", - "1.0", - false, + ColumnType.DOUBLE, + 0.0, + 1.1, false, false, null, - StringComparators.NUMERIC + null ), canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "f0", - "0.0", - "1.0", - false, + ColumnType.FLOAT, + 0.0, + 1.0, false, false, null, - StringComparators.NUMERIC + null ), canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "l0", - "0.0", - "1.0", - false, + ColumnType.LONG, + 0L, + 1L, false, false, null, - StringComparators.NUMERIC + null ), NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "3", "7") @@ -507,43 +598,43 @@ public void testNumericNullsAndZeros() public void testVirtualNumericNullsAndZeros() { assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "vd0", - "0.0", - "1.0", - false, + ColumnType.DOUBLE, + 0.0, + 1.0, false, false, null, - StringComparators.NUMERIC + null ), canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "vf0", - "0.0", - "1.0", - false, + ColumnType.FLOAT, + 0.0, + 1.0, false, false, null, - StringComparators.NUMERIC + null ), canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "vl0", - "0.0", - "1.0", - false, + ColumnType.LONG, + 0L, + 1L, false, false, null, - StringComparators.NUMERIC + null ), NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "3", "7") @@ -555,41 +646,41 @@ public void testVirtualNumericNullsAndZeros() public void testNumericNulls() { assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "f0", - "1.0", + ColumnType.FLOAT, + 1.0, null, false, false, - false, null, - StringComparators.NUMERIC + null ), ImmutableList.of("1", "2", "3", "5", "7") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "d0", - "1", + ColumnType.DOUBLE, + 1.0, null, false, false, - false, null, - StringComparators.NUMERIC + null ), ImmutableList.of("1", "3", "4", "5", "6") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "l0", - "1", + ColumnType.LONG, + 1L, null, false, false, - false, null, - StringComparators.NUMERIC + null ), ImmutableList.of("1", "2", "4", "5", "6") ); @@ -604,145 +695,138 @@ public void testMatchWithExtractionFn() String nullJsFn = "function(str) { return null; }"; ExtractionFn makeNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - } else { - assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn, StringComparators.LEXICOGRAPHIC), - ImmutableList.of() - ); - } + assertFilterMatches( + new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, makeNullFn, null), + ImmutableList.of() + ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "dim1", + ColumnType.STRING, "super-ab", "super-abd", true, true, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("5") ); assertFilterMatches( - new BoundDimFilter("dim1", "super-0", "super-10", false, false, true, superFn, StringComparators.ALPHANUMERIC), - ImmutableList.of("1", "2", "3") + new RangeFilter("dim1", ColumnType.STRING, "super-0", "super-10", false, false, superFn, null), + ImmutableList.of("1", "3") ); assertFilterMatchesSkipArrays( - new BoundDimFilter( + new RangeFilter( "dim2", + ColumnType.STRING, "super-", "super-zzzzzz", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); if (NullHandling.replaceWithDefault()) { assertFilterMatchesSkipArrays( - new BoundDimFilter( + new RangeFilter( "dim2", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("1", "2", "5") ); assertFilterMatchesSkipArrays( - new BoundDimFilter( + new RangeFilter( "dim2", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.NUMERIC + null ), ImmutableList.of("1", "2", "5") ); } else { assertFilterMatchesSkipArrays( - new BoundDimFilter( + new RangeFilter( "dim2", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("1", "5") ); assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), + new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), ImmutableList.of("2") ); assertFilterMatchesSkipArrays( - new BoundDimFilter( + new RangeFilter( "dim2", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("1", "5") ); assertFilterMatchesSkipArrays( - new BoundDimFilter("dim2", "super-", "super-", false, false, false, superFn, StringComparators.NUMERIC), + new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), ImmutableList.of("2") ); } assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "dim3", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); assertFilterMatches( - new BoundDimFilter( + new RangeFilter( "dim4", + ColumnType.STRING, "super-null", "super-null", false, false, - false, superFn, - StringComparators.LEXICOGRAPHIC + null ), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); assertFilterMatches( - new BoundDimFilter("dim4", "super-null", "super-null", false, false, false, superFn, StringComparators.NUMERIC), + new RangeFilter("dim4", ColumnType.STRING, "super-null", "super-null", false, false, superFn, null), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); } @@ -751,75 +835,83 @@ public void testMatchWithExtractionFn() public void testListFilteredVirtualColumn() { assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim0", "0", "2", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("allow-dim0", ColumnType.STRING, "0", "2", false, false, null, null), ImmutableList.of() ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("allow-dim0", ColumnType.STRING, "0", "6", false, false, null, null), ImmutableList.of("3", "4") ); - // the bound filter matches null, so it is what it is... assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + new RangeFilter("allow-dim0", ColumnType.STRING, null, "6", false, false, null, null), + ImmutableList.of("3", "4") ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("deny-dim0", ColumnType.STRING, "0", "6", false, false, null, null), ImmutableList.of("0", "1", "2", "5", "6") ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim0", "3", "4", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("deny-dim0", ColumnType.STRING, "3", "4", false, false, null, null), ImmutableList.of() ); - // the bound filter matches null, so it is what it is... assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6") + new RangeFilter("deny-dim0", ColumnType.STRING, null, "6", false, false, null, null), + ImmutableList.of("0", "1", "2", "5", "6") ); if (isAutoSchema()) { // bail out, auto ingests arrays instead of mvds and this virtual column is for mvd stuff return; } + + + /* + makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), + makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), + makeDefaultSchemaRow("2", "2", ImmutableList.of(""), "2017-05-25", null, 5.5f, 40L), + makeDefaultSchemaRow("3", "1", ImmutableList.of("a"), "2020-01-25", 120.0245, 110.0f, null), + makeDefaultSchemaRow("4", "abdef", ImmutableList.of("c"), null, 60.0, null, 9001L), + makeDefaultSchemaRow("5", "abc", null, "2020-01-25", 765.432, 123.45f, 12345L) + makeDefaultSchemaRow("6", "-1000", ImmutableList.of("a"), null, 6.6, null, 10L) + makeDefaultSchemaRow("7", "-10.012", ImmutableList.of("d"), null, null, 3.0f, null) + + // allow 'a' + // deny 'a' + */ assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim2", "a", "c", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("allow-dim2", ColumnType.STRING, "a", "c", false, false, null, null), ImmutableList.of("0", "3", "6") ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("allow-dim2", ColumnType.STRING, "c", "z", false, false, null, null), ImmutableList.of() ); - // the bound filter matches null, so it is what it is... assertFilterMatchesSkipVectorize( - new BoundDimFilter("allow-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + new RangeFilter("allow-dim2", ColumnType.STRING, null, "z", false, false, null, null), + ImmutableList.of("0", "3", "6") ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim2", "a", "b", false, true, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("deny-dim2", ColumnType.STRING, "a", "b", false, true, null, null), ImmutableList.of() ); assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), + new RangeFilter("deny-dim2", ColumnType.STRING, "c", "z", false, false, null, null), ImmutableList.of("4", "7") ); - // the bound filter matches null, so it is what it is... + assertFilterMatchesSkipVectorize( - new BoundDimFilter("deny-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") + new RangeFilter("deny-dim2", ColumnType.STRING, null, "z", false, false, null, null), + NullHandling.replaceWithDefault() ? ImmutableList.of("0", "4", "7") : ImmutableList.of("0", "2", "4", "7") ); } @Test public void testRequiredColumnRewrite() { - BoundFilter filter = new BoundFilter( - new BoundDimFilter("dim0", "", "", false, false, true, null, StringComparators.ALPHANUMERIC) - ); - BoundFilter filter2 = new BoundFilter( - new BoundDimFilter("dim1", "", "", false, false, true, null, StringComparators.ALPHANUMERIC) - ); + RangeFilter filter = new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null); + RangeFilter filter2 = new RangeFilter("dim1", ColumnType.STRING, "", "", false, false, null, null); Assert.assertTrue(filter.supportsRequiredColumnRewrite()); Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); @@ -840,18 +932,19 @@ public void testRequiredColumnRewrite() @Test public void test_equals() { - EqualsVerifier.forClass(BoundFilter.class) - .usingGetClass() - .withNonnullFields("boundDimFilter") - .verify(); - } - - @Test - public void test_equals_boundDimFilterDruidPredicateFactory() - { - EqualsVerifier.forClass(BoundFilter.BoundDimFilterDruidPredicateFactory.class) + EqualsVerifier.forClass(RangeFilter.class) + .withNonnullFields("column", "matchValueType") + .withIgnoredFields( + "lowerEval", + "upperEval", + "cachedOptimizedFilter", + "stringPredicateSupplier", + "longPredicateSupplier", + "floatPredicateSupplier", + "doublePredicateSupplier" + ) + .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) .usingGetClass() - .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") .verify(); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index 6d66971476a8..7b4af1575560 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -96,6 +96,8 @@ public DimFilter toDruidFilter( final DruidExpression leftExpr = druidExpressions.get(0); final DruidExpression rightExpr = druidExpressions.get(1); + // if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into + // selector/equality filters on the individual array elements if (leftExpr.isSimpleExtraction() && !(leftExpr.isDirectColumnAccess() && leftExpr.getDruidType() != null && leftExpr.getDruidType().isArray())) { Expr expr = plannerContext.parseExpression(rightExpr.getExpression()); // To convert this expression filter into an And of Selector filters, we need to extract all array elements. @@ -119,7 +121,7 @@ public DimFilter toDruidFilter( } else { return new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType(exprEval.type()), + ExpressionType.toColumnType((ExpressionType) exprEval.type().getElementType()), arrayElements[0], leftExpr.getSimpleExtraction().getExtractionFn(), null @@ -134,7 +136,7 @@ public DimFilter toDruidFilter( } else { return new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType(exprEval.type()), + ExpressionType.toColumnType((ExpressionType) exprEval.type().getElementType()), val, leftExpr.getSimpleExtraction().getExtractionFn(), null diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index addd48965a93..1a43a240f170 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -41,11 +41,9 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.having.DimFilterHavingSpec; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; @@ -551,9 +549,9 @@ public void testArrayContainsFilter() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - new AndDimFilter( - new SelectorDimFilter("dim3", "a", null), - new SelectorDimFilter("dim3", "b", null) + and( + equality("dim3", "a", ColumnType.STRING), + equality("dim3", "b", ColumnType.STRING) ) ) .columns("dim3") @@ -577,7 +575,7 @@ public void testArrayContainsArrayOfOneElement() newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new SelectorDimFilter("dim3", "a", null)) + .filters(equality("dim3", "a", ColumnType.STRING)) .columns("dim3") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index 3520db7a4ac1..d4f25101545c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -33,12 +33,10 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.OrDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; @@ -316,9 +314,9 @@ public void testMultiValueStringContainsFilter() .dataSource(CalciteTests.DATASOURCE3) .eternityInterval() .filters( - new AndDimFilter( - new SelectorDimFilter("dim3", "a", null), - new SelectorDimFilter("dim3", "b", null) + and( + equality("dim3", "a", ColumnType.STRING), + equality("dim3", "b", ColumnType.STRING) ) ) .columns("dim3") @@ -342,7 +340,7 @@ public void testMultiValueStringContainsArrayOfOneElement() newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .eternityInterval() - .filters(new SelectorDimFilter("dim3", "a", null)) + .filters(equality("dim3", "a", ColumnType.STRING)) .columns("dim3") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) From cfcef4971c4158c3c300a5dc207496fcabc52cf3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 19:18:24 -0700 Subject: [PATCH 06/44] adjust --- .../input/DruidSegmentReaderTest.java | 2 +- .../druid/query/filter/RangeFilter.java | 3 --- .../druid/segment/filter/RangeFilterTest.java | 22 ++++++++++++------- .../ArrayContainsOperatorConversion.java | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java index 85e9acd3e84c..1ab45d631d4d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java @@ -734,7 +734,7 @@ public void testArrayColumns() throws IOException .buildIncrementalIndex(); File segmentDirectory = temporaryFolder.newFolder(); - long segmentSize = 0; + long segmentSize; try { TestHelper.getTestIndexMergerV9( OnHeapMemorySegmentWriteOutMediumFactory.instance() diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 8e6441b43608..0fe2df5f5876 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -266,9 +266,6 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { -// if (isEquality()) { -// return new EqualityFilter(column, matchValueType, lower, extractionFn, filterTuning); -// } return this; } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java index 981c8740e672..425d60fe3f7a 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java @@ -279,7 +279,9 @@ public void testLexicographicMatchNoLowerLimit() { assertFilterMatches( new RangeFilter("dim1", ColumnType.STRING, null, "abd", true, true, null, null), - NullHandling.replaceWithDefault() ? ImmutableList.of("1", "2", "3", "5", "6", "7") : ImmutableList.of("0", "1", "2", "3", "5", "6", "7") + NullHandling.replaceWithDefault() + ? ImmutableList.of("1", "2", "3", "5", "6", "7") + : ImmutableList.of("0", "1", "2", "3", "5", "6", "7") ); } @@ -497,19 +499,19 @@ public void testNumericMatchSurroundingSingleValue() public void testNumericMatchNoUpperLimit() { assertFilterMatches( - new RangeFilter("dim1", ColumnType.LONG,1L, null, true, true, null, null), + new RangeFilter("dim1", ColumnType.LONG, 1L, null, true, true, null, null), ImmutableList.of("1", "2") ); assertFilterMatches( - new RangeFilter("d0", ColumnType.DOUBLE,1.0, null, true, true, null, null), + new RangeFilter("d0", ColumnType.DOUBLE, 1.0, null, true, true, null, null), ImmutableList.of("1", "3", "4", "5", "6") ); assertFilterMatches( - new RangeFilter("f0", ColumnType.FLOAT,1.0f, null, true, true, null, null), + new RangeFilter("f0", ColumnType.FLOAT, 1.0f, null, true, true, null, null), ImmutableList.of("1", "2", "3", "5", "7") ); assertFilterMatches( - new RangeFilter("l0", ColumnType.LONG,1L, null, true, true, null, null), + new RangeFilter("l0", ColumnType.LONG, 1L, null, true, true, null, null), ImmutableList.of("1", "2", "4", "5", "6") ); } @@ -520,16 +522,20 @@ public void testNumericMatchNoLowerLimit() // strings are wierd... assertFilterMatches( new RangeFilter("dim1", ColumnType.LONG, null, 2L, false, true, null, null), - NullHandling.replaceWithDefault() ? ImmutableList.of("3", "4", "5", "6", "7") : ImmutableList.of("0", "3", "4", "5", "6", "7") + NullHandling.replaceWithDefault() + ? ImmutableList.of("3", "4", "5", "6", "7") + : ImmutableList.of("0", "3", "4", "5", "6", "7") ); // numbers are sane though assertFilterMatches( new RangeFilter("d0", ColumnType.DOUBLE, null, 10.0, false, true, null, null), - canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "6", "7") : ImmutableList.of("0","6") + canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "6", "7") : ImmutableList.of("0", "6") ); assertFilterMatches( new RangeFilter("f0", ColumnType.FLOAT, null, 50.5, false, true, null, null), - canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "1", "2", "4", "6", "7") : ImmutableList.of("0", "1", "2", "7") + canTestNumericNullsAsDefaultValues + ? ImmutableList.of("0", "1", "2", "4", "6", "7") + : ImmutableList.of("0", "1", "2", "7") ); assertFilterMatches( new RangeFilter("l0", ColumnType.LONG, null, 100L, false, true, null, null), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index 7b4af1575560..b53a96d45716 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -121,7 +121,7 @@ public DimFilter toDruidFilter( } else { return new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType((ExpressionType) exprEval.type().getElementType()), + ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), arrayElements[0], leftExpr.getSimpleExtraction().getExtractionFn(), null @@ -136,7 +136,7 @@ public DimFilter toDruidFilter( } else { return new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType((ExpressionType) exprEval.type().getElementType()), + ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), val, leftExpr.getSimpleExtraction().getExtractionFn(), null From 3defa8aa8d552b32bf41bcc23c1bb1d66fd2d971 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 21:53:39 -0700 Subject: [PATCH 07/44] fix tests --- ...etsHistogramQuantileSqlAggregatorTest.java | 21 ++-- .../sql/QuantileSqlAggregatorTest.java | 96 +++++++++++++++---- 2 files changed, 88 insertions(+), 29 deletions(-) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index ab3c0fda4a0c..03b194900336 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -40,10 +40,7 @@ import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; @@ -189,7 +186,7 @@ public void testQuantileOnFloatAndLongs() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new FixedBucketsHistogramAggregatorFactory( @@ -201,7 +198,7 @@ public void testQuantileOnFloatAndLongs() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ), new FixedBucketsHistogramAggregatorFactory( "a8:agg", @@ -293,7 +290,7 @@ public void testQuantileOnCastedString() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new FixedBucketsHistogramAggregatorFactory( @@ -305,7 +302,7 @@ public void testQuantileOnCastedString() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ) )) .postAggregators( @@ -408,7 +405,7 @@ public void testQuantileOnComplexColumn() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new FixedBucketsHistogramAggregatorFactory( @@ -420,7 +417,7 @@ public void testQuantileOnComplexColumn() FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false ), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ) )) .postAggregators( @@ -521,7 +518,11 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) .aggregators(ImmutableList.of( new FixedBucketsHistogramAggregatorFactory( "a0:agg", diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 292f41b5be4b..42aa77914627 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -39,10 +39,7 @@ import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; @@ -151,11 +148,11 @@ public void testQuantileOnFloatAndLongs() new ApproximateHistogramAggregatorFactory("a4:agg", "v0", null, null, null, null, false), new FilteredAggregatorFactory( new ApproximateHistogramAggregatorFactory("a5:agg", "m1", null, null, null, null, false), - new SelectorDimFilter("dim1", "abc", null) + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( new ApproximateHistogramAggregatorFactory("a6:agg", "m1", null, null, null, null, false), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + not(equality("dim1", "abc", ColumnType.STRING)) ), new ApproximateHistogramAggregatorFactory("a8:agg", "cnt", null, null, null, null, false) )) @@ -208,15 +205,47 @@ public void testQuantileOnComplexColumn() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .aggregators(ImmutableList.of( - new ApproximateHistogramFoldingAggregatorFactory("a0:agg", "hist_m1", null, null, null, null, false), - new ApproximateHistogramFoldingAggregatorFactory("a2:agg", "hist_m1", 200, null, null, null, false), + new ApproximateHistogramFoldingAggregatorFactory( + "a0:agg", + "hist_m1", + null, + null, + null, + null, + false + ), + new ApproximateHistogramFoldingAggregatorFactory( + "a2:agg", + "hist_m1", + 200, + null, + null, + null, + false + ), new FilteredAggregatorFactory( - new ApproximateHistogramFoldingAggregatorFactory("a4:agg", "hist_m1", null, null, null, null, false), - new SelectorDimFilter("dim1", "abc", null) + new ApproximateHistogramFoldingAggregatorFactory( + "a4:agg", + "hist_m1", + null, + null, + null, + null, + false + ), + equality("dim1", "abc", ColumnType.STRING) ), new FilteredAggregatorFactory( - new ApproximateHistogramFoldingAggregatorFactory("a5:agg", "hist_m1", null, null, null, null, false), - new NotDimFilter(new SelectorDimFilter("dim1", "abc", null)) + new ApproximateHistogramFoldingAggregatorFactory( + "a5:agg", + "hist_m1", + null, + null, + null, + null, + false + ), + not(equality("dim1", "abc", ColumnType.STRING)) ) )) .postAggregators( @@ -379,12 +408,25 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)) - .aggregators(ImmutableList.of( - new ApproximateHistogramFoldingAggregatorFactory("a0:agg", "hist_m1", null, null, null, null, false), - new ApproximateHistogramAggregatorFactory("a1:agg", "m1", null, null, null, null, false) - - )) + .filters( + NullHandling.replaceWithDefault() + ? numericSelector("dim2", "0", null) + : equality("dim2", 0L, ColumnType.LONG) + ) + .aggregators( + ImmutableList.of( + new ApproximateHistogramFoldingAggregatorFactory( + "a0:agg", + "hist_m1", + null, + null, + null, + null, + false + ), + new ApproximateHistogramAggregatorFactory("a1:agg", "m1", null, null, null, null, false) + ) + ) .postAggregators( new QuantilePostAggregator("a0", "a0:agg", 0.01f), new QuantilePostAggregator("a1", "a1:agg", 0.01f) @@ -418,11 +460,27 @@ public void testGroupByAggregatorDefaultValues() .setAggregatorSpecs( aggregators( new FilteredAggregatorFactory( - new ApproximateHistogramFoldingAggregatorFactory("a0:agg", "hist_m1", null, null, null, null, false), + new ApproximateHistogramFoldingAggregatorFactory( + "a0:agg", + "hist_m1", + null, + null, + null, + null, + false + ), equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( - new ApproximateHistogramAggregatorFactory("a1:agg", "m1", null, null, null, null, false), + new ApproximateHistogramAggregatorFactory( + "a1:agg", + "m1", + null, + null, + null, + null, + false + ), equality("dim1", "nonexistent", ColumnType.STRING) ) ) From ab97b3e4372be2cf98a00665fd356e06a23c9932 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 21:56:09 -0700 Subject: [PATCH 08/44] remove ignored --- .../aggregation/bloom/BloomFilterAggregatorTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java index e52d710ee6d9..c15a55b64d14 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java @@ -281,7 +281,7 @@ public void testAggregateLongValues() throws IOException TestLongColumnSelector selector = new TestLongColumnSelector(Arrays.asList(LONG_VALUES1)); LongBloomFilterAggregator agg = new LongBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (Long ignored : LONG_VALUES1) { + for (int i = 0; i < LONG_VALUES1.length; i++) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -298,7 +298,7 @@ public void testAggregateFloatValues() throws IOException TestFloatColumnSelector selector = new TestFloatColumnSelector(Arrays.asList(FLOAT_VALUES1)); FloatBloomFilterAggregator agg = new FloatBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (Float ignored : FLOAT_VALUES1) { + for (int i = 0; i < FLOAT_VALUES1.length; i++) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -315,7 +315,7 @@ public void testAggregateDoubleValues() throws IOException TestDoubleColumnSelector selector = new TestDoubleColumnSelector(Arrays.asList(DOUBLE_VALUES1)); DoubleBloomFilterAggregator agg = new DoubleBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (Double ignored : DOUBLE_VALUES1) { + for (int i = 0; i < DOUBLE_VALUES1.length; i++) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -423,7 +423,7 @@ public void testAggregateArrayValues() throws IOException ); ObjectBloomFilterAggregator agg = new ObjectBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (Object ignored : ARRAY_VALUES) { + for (int i = 0; i < ARRAY_VALUES.length; i++) { aggregateColumn(Collections.singletonList(selector), agg); } From 32097908f6f1d836c690da267eef4357bf699725 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 21:58:14 -0700 Subject: [PATCH 09/44] adjust --- .../java/org/apache/druid/query/filter/EqualityFilter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 6a843c0e2223..98af3c2afbde 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -417,13 +417,13 @@ private void initFloatPredicate() floatPredicate = DruidFloatPredicate.MATCH_NULL_ONLY; return; } - final Float valueAsFloat = ((Number) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault()).floatValue(); + final Double doubleValue = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); - if (valueAsFloat == null) { + if (doubleValue == null) { floatPredicate = DruidFloatPredicate.ALWAYS_FALSE; } else { // Compare with floatToIntBits instead of == to canonicalize NaNs. - final int floatBits = Float.floatToIntBits(valueAsFloat); + final int floatBits = Float.floatToIntBits(doubleValue.floatValue()); floatPredicate = input -> Float.floatToIntBits(input) == floatBits; } } From e9d7e0437cb657bbb22ea209f622608b85763914 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 9 Jul 2023 23:49:37 -0700 Subject: [PATCH 10/44] fixes --- .../test/java/org/apache/druid/query/sql/SleepSqlTest.java | 4 +--- .../java/org/apache/druid/segment/data/FrontCodedIndexed.java | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/extensions-core/testing-tools/src/test/java/org/apache/druid/query/sql/SleepSqlTest.java b/extensions-core/testing-tools/src/test/java/org/apache/druid/query/sql/SleepSqlTest.java index c3b1a102f3de..d8dc51f6c245 100644 --- a/extensions-core/testing-tools/src/test/java/org/apache/druid/query/sql/SleepSqlTest.java +++ b/extensions-core/testing-tools/src/test/java/org/apache/druid/query/sql/SleepSqlTest.java @@ -25,8 +25,6 @@ import org.apache.druid.guice.SleepModule; import org.apache.druid.query.Druids; import org.apache.druid.query.TableDataSource; -import org.apache.druid.query.filter.BoundDimFilter; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery.ResultFormat; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; @@ -61,7 +59,7 @@ public void testSleepFunction() ) ) .columns("v0") - .filters(new BoundDimFilter("m1", null, "2.0", null, true, null, null, StringComparators.NUMERIC)) + .filters(range("m1", ColumnType.DOUBLE, null, 2.0, false, true)) .resultFormat(ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java index 46bca6c3b7f2..ebbf13a91b09 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file From 92a3fce546dec1de8d9868ab2ac38a08b718e499 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 01:40:06 -0700 Subject: [PATCH 11/44] fix --- .../hll/sql/HllSketchSqlAggregatorTest.java | 2 +- .../apache/druid/msq/exec/MSQSelectTest.java | 13 ++++----- .../sql/calcite/BaseCalciteQueryTest.java | 27 +++++++++++++------ .../calcite/CalciteNestedDataQueryTest.java | 8 ++---- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 534114afe9ea..ba426523adc4 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -857,7 +857,7 @@ public void testEmptyTimeseriesResults() .intervals(querySegmentSpec(Filtration.eternity())) .filters( NullHandling.replaceWithDefault() - ? bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC) + ? numericSelector("dim2", "0", null) : equality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index 3e9a15c47934..2735acb86ae8 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -56,8 +56,6 @@ import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; @@ -1446,14 +1444,13 @@ public void testHavingOnApproximateCountDistinct() ) .setHavingSpec( having( - bound( + range( "a0", - "1", + ColumnType.LONG, + 1L, null, true, - false, - null, - StringComparators.NUMERIC + false ) ) ) @@ -1753,7 +1750,7 @@ public void testGroupByMultiValueMeasureQuery() aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - new NotDimFilter(new SelectorDimFilter("dim3", null, null)), + notNull("dim3"), "a0" ) ) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index b65a904756f3..8a903007f6d8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -190,18 +190,27 @@ public static void setupNullValues() public static final String DUMMY_SQL_ID = "dummy"; public static final String PRETEND_CURRENT_TIME = "2000-01-01T00:00:00Z"; - private static final ImmutableMap.Builder DEFAULT_QUERY_CONTEXT_BUILDER = + + public static final Map QUERY_CONTEXT_DEFAULT = ImmutableMap.builder() .put(QueryContexts.CTX_SQL_QUERY_ID, DUMMY_SQL_ID) .put(PlannerContext.CTX_SQL_CURRENT_TIMESTAMP, "2000-01-01T00:00:00Z") .put(QueryContexts.DEFAULT_TIMEOUT_KEY, QueryContexts.DEFAULT_TIMEOUT_MILLIS) - .put(QueryContexts.MAX_SCATTER_GATHER_BYTES_KEY, Long.MAX_VALUE); - public static final Map QUERY_CONTEXT_DEFAULT = DEFAULT_QUERY_CONTEXT_BUILDER.build(); + .put(QueryContexts.MAX_SCATTER_GATHER_BYTES_KEY, Long.MAX_VALUE) + .build(); public static final Map QUERY_CONTEXT_NO_STRINGIFY_ARRAY = - DEFAULT_QUERY_CONTEXT_BUILDER.put(QueryContexts.CTX_SQL_STRINGIFY_ARRAYS, false) - .put(PlannerContext.CTX_ENABLE_UNNEST, true) - .build(); + ImmutableMap.builder() + .putAll(QUERY_CONTEXT_DEFAULT) + .put(QueryContexts.CTX_SQL_STRINGIFY_ARRAYS, false) + .put(PlannerContext.CTX_ENABLE_UNNEST, true) + .build(); + + public static final Map QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY = + ImmutableMap.builder() + .putAll(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .put(PlannerContext.CTX_SQL_USE_BOUNDS_AND_SELECTORS, false) + .build(); public static final Map QUERY_CONTEXT_DONT_SKIP_EMPTY_BUCKETS = ImmutableMap.of( QueryContexts.CTX_SQL_QUERY_ID, DUMMY_SQL_ID, @@ -245,8 +254,10 @@ public static void setupNullValues() ); public static final Map QUERY_CONTEXT_WITH_SUBQUERY_MEMORY_LIMIT = - DEFAULT_QUERY_CONTEXT_BUILDER.put(QueryContexts.MAX_SUBQUERY_BYTES_KEY, "100000") - .build(); + ImmutableMap.builder() + .putAll(QUERY_CONTEXT_DEFAULT) + .put(QueryContexts.MAX_SUBQUERY_BYTES_KEY, "100000") + .build(); // Add additional context to the given context map for when the // timeseries query has timestamp_floor expression on the timestamp dimension diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index f5443697fa10..7a8c719ee818 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -1221,10 +1221,6 @@ public void testGroupByRootSingleTypeArrayLong() @Test public void testGroupByRootSingleTypeArrayLongFilteredArrayEquality() { - if (NullHandling.replaceWithDefault()) { - // this fails in default value mode because it relies on equality filter - return; - } cannotVectorize(); testBuilder() .sql( @@ -1233,7 +1229,7 @@ public void testGroupByRootSingleTypeArrayLongFilteredArrayEquality() + "SUM(cnt) " + "FROM druid.arrays WHERE arrayLong = ARRAY[1, 2, 3] GROUP BY 1" ) - .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) .expectedQueries( ImmutableList.of( GroupByQuery.builder() @@ -1247,7 +1243,7 @@ public void testGroupByRootSingleTypeArrayLongFilteredArrayEquality() ) ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) .build() ) ) From 328d65eb623211905fb376ad6d1cf17bc00168cd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 11:47:21 -0700 Subject: [PATCH 12/44] more test --- .../druid/sql/calcite/CalciteNestedDataQueryTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index a4c5fb22ada8..db37a763ab7f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -2272,7 +2272,7 @@ public void testGroupByRootSingleTypeArrayDoubleElementFiltered() + "WHERE JSON_VALUE(arrayDoubleNulls, '$[2]' RETURNING DOUBLE) = 5.5" + "GROUP BY 1" ) - .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) .expectedQueries( ImmutableList.of( GroupByQuery.builder() @@ -2284,12 +2284,15 @@ public void testGroupByRootSingleTypeArrayDoubleElementFiltered() new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE) ) ) - .setDimFilter(equality("v0", 5.5, ColumnType.DOUBLE)) + .setDimFilter( + // dont use static function since context flag indicates to always use equality + new EqualityFilter("v0", ColumnType.DOUBLE, 5.5, null, null) + ) .setVirtualColumns( new NestedFieldVirtualColumn("arrayDoubleNulls", "$[2]", "v0", ColumnType.DOUBLE) ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) .build() ) ) From 2d6e9ec2990efce2448dabfd379304d6f6140148 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 13:11:54 -0700 Subject: [PATCH 13/44] javadoc for sql test filter functions --- .../sql/calcite/BaseCalciteQueryTest.java | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 8a903007f6d8..b487331ba9c0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -400,11 +400,25 @@ public static DimFilter equality( return selector(fieldName, Evals.asString(matchValue), extractionFn); } + /** + * Callers should use {@link #equality(String, Object, ColumnType)} or + * {@link #equality(String, Object, ExtractionFn, ColumnType)} instead of this method, since they will correctly use + * either a {@link EqualityFilter} or {@link SelectorDimFilter} depending on the value of + * {@link NullHandling#sqlCompatible()}, which determines the default of + * {@link PlannerContext#CTX_SQL_USE_BOUNDS_AND_SELECTORS} + */ public static SelectorDimFilter selector(final String fieldName, final String value) { return selector(fieldName, value, null); } + /** + * Callers should use {@link #equality(String, Object, ColumnType)} or + * {@link #equality(String, Object, ExtractionFn, ColumnType)} instead of this method, since they will correctly use + * either a {@link EqualityFilter} or {@link SelectorDimFilter} depending on the value of + * {@link NullHandling#sqlCompatible()}, which determines the default of + * {@link PlannerContext#CTX_SQL_USE_BOUNDS_AND_SELECTORS} + */ public static SelectorDimFilter selector(final String fieldName, final String value, final ExtractionFn extractionFn) { return new SelectorDimFilter(fieldName, value, extractionFn); @@ -425,6 +439,13 @@ public static DimFilter numericSelector( return bound(fieldName, value, value, false, false, extractionFn, StringComparators.NUMERIC); } + /** + * Callers should use {@link #range(String, ColumnType, Object, Object, boolean, boolean)} or + * {@link #range(String, ColumnType, Object, Object, boolean, boolean, ExtractionFn)} instead of this method, since + * they will correctly use either a {@link RangeFilter} or {@link BoundDimFilter} depending on the value of + * {@link NullHandling#sqlCompatible()}, which determines the default of + * {@link PlannerContext#CTX_SQL_USE_BOUNDS_AND_SELECTORS} + */ public static BoundDimFilter bound( final String fieldName, final String lower, @@ -438,6 +459,11 @@ public static BoundDimFilter bound( return new BoundDimFilter(fieldName, lower, upper, lowerStrict, upperStrict, null, extractionFn, comparator); } + /** + * Callers should use {@link #timeRange(Object)} instead of this method, since it will correctly use either a + * {@link RangeFilter} or {@link BoundDimFilter} depending on the value of {@link NullHandling#sqlCompatible()}, + * which determines the default of {@link PlannerContext#CTX_SQL_USE_BOUNDS_AND_SELECTORS} + */ public static BoundDimFilter timeBound(final Object intervalObj) { final Interval interval = new Interval(intervalObj, ISOChronology.getInstanceUTC()); From 2af2350155937bbc6200783d2edeed24769e3ee8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 19:52:56 -0700 Subject: [PATCH 14/44] range filter support for arrays, tons more tests, fixes --- .../druid/query/filter/EqualityFilter.java | 69 ++- .../apache/druid/query/filter/NullFilter.java | 8 +- .../druid/query/filter/RangeFilter.java | 253 +++++++- .../druid/segment/AutoTypeColumnIndexer.java | 20 +- .../druid/segment/filter/BaseFilterTest.java | 159 ++++- .../segment/filter/EqualityFilterTest.java | 245 +++++++- .../segment/filter/ExpressionFilterTest.java | 4 + .../druid/segment/filter/NullFilterTest.java | 29 + .../druid/segment/filter/RangeFilterTest.java | 550 +++++++++++++++--- 9 files changed, 1198 insertions(+), 139 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 98af3c2afbde..2721262425ee 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -22,13 +22,13 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; @@ -62,7 +62,7 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; -import java.util.Arrays; +import java.util.Comparator; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -87,11 +87,23 @@ public EqualityFilter( @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - Preconditions.checkArgument(column != null, "column must not be null"); - Preconditions.checkArgument(matchValue != null, "value must not be null"); - + if (column == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid equality filter, column cannot be null"); + } this.column = column; + if (matchValueType == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid equality filter on column [%s], matchValueType cannot be null", column); + } this.matchValueType = matchValueType; + if (matchValue == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid equality filter on column [%s], matchValue cannot be null", column); + } this.matchValue = matchValue; this.extractionFn = extractionFn; this.filterTuning = filterTuning; @@ -192,18 +204,30 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - boolean valuesMatch; EqualityFilter that = (EqualityFilter) o; - if (matchValue instanceof Object[] && that.matchValue instanceof Object[]) { - valuesMatch = Arrays.deepEquals((Object[]) matchValue, (Object[]) that.matchValue); + if (!column.equals(that.column)) { + return false; + } + if (!Objects.equals(matchValueType, that.matchValueType)) { + return false; + } + if (!Objects.equals(extractionFn, that.extractionFn)) { + return false; + } + if (!Objects.equals(filterTuning, that.filterTuning)) { + return false; + } + if (matchValueType.isArray()) { + // just use predicate to see if the values are the same + final ExprEval thatValue = ExprEval.ofType( + ExpressionType.fromColumnType(that.matchValueType), + that.matchValue + ); + final Predicate arrayPredicate = predicateFactory.makeArrayPredicate(matchValueType); + return arrayPredicate.apply(thatValue.asArray()); } else { - valuesMatch = Objects.equals(matchValue, that.matchValue); + return Objects.equals(matchValue, that.matchValue); } - return column.equals(that.column) && - Objects.equals(matchValueType, that.matchValueType) && - valuesMatch && - Objects.equals(extractionFn, that.extractionFn) && - Objects.equals(filterTuning, that.filterTuning); } @Override @@ -366,10 +390,19 @@ public DruidDoublePredicate makeDoublePredicate() @Override public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { - final Object[] arrayValue = arrayType != null - ? matchValue.castTo(ExpressionType.fromColumnType(arrayType)).asArray() - : matchValue.asArray(); - return input -> Arrays.deepEquals(input, arrayValue); + if (arrayType != null) { + final Comparator arrayComparator = arrayType.getNullableStrategy(); + final Object[] matchArray = matchValue.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + return input -> arrayComparator.compare(input, matchArray) == 0; + } else { + // fall back to per row detection if input array type is unknown + return input -> { + final ExprEval eval = ExprEval.bestEffortOf(input); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + final Object[] matchArray = matchValue.castTo(eval.type()).asArray(); + return arrayComparator.compare(input, matchArray) == 0; + }; + } } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index 1891ed3bf9bc..c6fcadb49428 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -22,13 +22,13 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; @@ -72,7 +72,11 @@ public NullFilter( @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - Preconditions.checkArgument(column != null, "column must not be null"); + if (column == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid null filter, column cannot be null"); + } this.column = column; this.extractionFn = extractionFn; this.filterTuning = filterTuning; diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 0fe2df5f5876..0c90b832047b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Supplier; @@ -49,6 +48,7 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; @@ -83,13 +83,12 @@ public class RangeFilter extends AbstractOptimizableDimFilter implements Filter private final boolean upperStrict; @Nullable private final ExtractionFn extractionFn; - + @Nullable + private final FilterTuning filterTuning; private final Supplier> stringPredicateSupplier; private final Supplier longPredicateSupplier; private final Supplier floatPredicateSupplier; private final Supplier doublePredicateSupplier; - @Nullable - private final FilterTuning filterTuning; @JsonCreator public RangeFilter( @@ -103,9 +102,23 @@ public RangeFilter( @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - this.column = Preconditions.checkNotNull(column, "column can not be null"); - this.matchValueType = Preconditions.checkNotNull(matchValueType, "matchValueType can not be null"); - Preconditions.checkState((lower != null) || (upper != null), "lower and upper can not be null at the same time"); + if (column == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid range filter, column cannot be null"); + } + this.column = column; + if (matchValueType == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid range filter on column [%s], matchValueType cannot be null", column); + } + this.matchValueType = matchValueType; + if(lower == null && upper == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("Invalid range filter on column [%s], lower and upper cannot be null at the same time", column); + }; final ExpressionType expressionType = ExpressionType.fromColumnType(matchValueType); this.upper = upper; this.lower = lower; @@ -715,6 +728,226 @@ private Supplier> makeStringPredicateSupplier() } }); } + private Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) + { + if (hasLowerBound() && hasUpperBound()) { + if (upperStrict && lowerStrict) { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, input); + return ((lowerComparing > 0)) && (upperComparing > 0); + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator comparator = val.type().getNullableStrategy(); + final int lowerComparing = comparator.compare(val.asArray(), lowerBound); + final int upperComparing = comparator.compare(upperBound, val.asArray()); + return ((lowerComparing > 0)) && (upperComparing > 0); + }; + } + } else if (lowerStrict) { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, input); + return (lowerComparing > 0) && (upperComparing >= 0); + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int lowerComparing = arrayComparator.compare(val.asArray(), lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, val.asArray()); + return (lowerComparing > 0) && (upperComparing >= 0); + }; + } + } else if (upperStrict) { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing > 0); + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int lowerComparing = arrayComparator.compare(val.asArray(), lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, val.asArray()); + return (lowerComparing >= 0) && (upperComparing > 0); + }; + } + } else { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing >= 0); + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int lowerComparing = arrayComparator.compare(val.asArray(), lowerBound); + final int upperComparing = arrayComparator.compare(upperBound, val.asArray()); + return (lowerComparing >= 0) && (upperComparing >= 0); + }; + } + } + } else if (hasUpperBound()) { + if (upperStrict) { + if (arrayType != null) { + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int upperComparing = arrayComparator.compare(upperBound, input); + return upperComparing > 0; + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int upperComparing = arrayComparator.compare(upperBound, val.asArray()); + return upperComparing > 0; + }; + } + } else { + if (arrayType != null) { + final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int upperComparing = arrayComparator.compare(upperBound, input); + return upperComparing >= 0; + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] upperBound = upperEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int upperComparing = arrayComparator.compare(upperBound, val.asArray()); + return upperComparing >= 0; + }; + } + } + } else if (hasLowerBound()) { + if (lowerStrict) { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + return lowerComparing > 0; + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int lowerComparing = arrayComparator.compare(lowerBound, val.asArray()); + return lowerComparing > 0; + }; + } + } else { + if (arrayType != null) { + final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = arrayComparator.compare(input, lowerBound); + return lowerComparing >= 0; + }; + } else { + // fall back to per row type detection + return input -> { + if (input == null) { + return false; + } + ExprEval val = ExprEval.bestEffortOf(input); + final Object[] lowerBound = lowerEval.castTo(val.type()).asArray(); + final Comparator arrayComparator = val.type().getNullableStrategy(); + final int lowerComparing = arrayComparator.compare(lowerBound, val.asArray()); + return lowerComparing >= 0; + }; + } + } + } else { + return Predicates.notNull(); + } + } private class RangePredicateFactory implements DruidPredicateFactory { @@ -761,6 +994,12 @@ public DruidDoublePredicate makeDoublePredicate() return input -> stringPredicate.apply(String.valueOf(input)); } + @Override + public Predicate makeArrayPredicate(@Nullable TypeSignature inputType) + { + return RangeFilter.this.makeArrayPredicate(inputType); + } + @Override public int hashCode() { diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java index 3ce8b5f4ba62..7c5fe88520b4 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java @@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.UOE; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.dimension.DimensionSpec; @@ -223,10 +224,13 @@ public DimensionSelector makeDimensionSelector( ) { final int dimIndex = desc.getIndex(); + if (fieldIndexers.size() == 0 && isConstant && !hasNestedData) { + return DimensionSelector.constant(null, spec.getExtractionFn()); + } final ColumnValueSelector rootLiteralSelector = getRootLiteralValueSelector(currEntry, dimIndex); if (rootLiteralSelector != null) { final FieldIndexer root = fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT); - final ColumnType rootType = root.getTypes().getSingleType(); + final ColumnType rootType = root.isSingleType() ? root.getTypes().getSingleType() : getLogicalType(); if (rootType.isArray()) { throw new UOE( "makeDimensionSelector is not supported, column [%s] is [%s] typed and should only use makeColumnValueSelector", @@ -240,11 +244,11 @@ public DimensionSelector makeDimensionSelector( @Override protected String getValue() { - final Object o = rootLiteralSelector.getObject(); - if (o == null) { - return null; + final String o = Evals.asString(rootLiteralSelector.getObject()); + if (spec.getExtractionFn() != null) { + return spec.getExtractionFn().apply(o); } - return o.toString(); + return o; } @Override @@ -457,14 +461,14 @@ private ColumnValueSelector getRootLiteralValueSelector( int dimIndex ) { - if (fieldIndexers.size() > 1) { + if (fieldIndexers.size() > 1 || hasNestedData) { return null; } final FieldIndexer root = fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT); - if (root == null || !root.isSingleType()) { + if (root == null) { return null; } - final Object defaultValue = getDefaultValueForType(root.getTypes().getSingleType()); + final Object defaultValue = getDefaultValueForType(getLogicalType()); return new ColumnValueSelector() { @Override diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 03471b31ca2e..b1e31638877e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -159,6 +159,9 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .add(new DoubleDimensionSchema("d0")) .add(new FloatDimensionSchema("f0")) .add(new LongDimensionSchema("l0")) + .add(new AutoTypeColumnSchema("arrayString")) + .add(new AutoTypeColumnSchema("arrayLong")) + .add(new AutoTypeColumnSchema("arrayDouble")) .build() ); @@ -179,15 +182,84 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .add("d0", ColumnType.DOUBLE) .add("f0", ColumnType.FLOAT) .add("l0", ColumnType.LONG) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) .build(); static final List DEFAULT_ROWS = ImmutableList.of( - makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), - makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), - makeDefaultSchemaRow("2", "2", ImmutableList.of(""), "2017-05-25", null, 5.5f, 40L), - makeDefaultSchemaRow("3", "1", ImmutableList.of("a"), "2020-01-25", 120.0245, 110.0f, null), - makeDefaultSchemaRow("4", "abdef", ImmutableList.of("c"), null, 60.0, null, 9001L), - makeDefaultSchemaRow("5", "abc", null, "2020-01-25", 765.432, 123.45f, 12345L) + makeDefaultSchemaRow( + "0", + "", + ImmutableList.of("a", "b"), + "2017-07-25", + 0.0, + 0.0f, + 0L, + ImmutableList.of("a", "b", "c"), + ImmutableList.of(1L, 2L, 3L), + ImmutableList.of(1.1, 2.2, 3.3) + ), + makeDefaultSchemaRow( + "1", + "10", + ImmutableList.of(), + "2017-07-25", + 10.1, + 10.1f, + 100L, + ImmutableList.of(), + ImmutableList.of(), + new Object[]{1.1, 2.2, 3.3} + ), + makeDefaultSchemaRow( + "2", + "2", + ImmutableList.of(""), + "2017-05-25", + null, + 5.5f, + 40L, + null, + new Object[]{1L, 2L, 3L}, + Collections.singletonList(null) + ), + makeDefaultSchemaRow( + "3", + "1", + ImmutableList.of("a"), + "2020-01-25", + 120.0245, + 110.0f, + null, + new Object[]{"a", "b", "c"}, + null, + ImmutableList.of() + ), + makeDefaultSchemaRow( + "4", + "abdef", + ImmutableList.of("c"), + null, + 60.0, + null, + 9001L, + ImmutableList.of("c", "d"), + Collections.singletonList(null), + new Object[]{-1.1, -333.3} + ), + makeDefaultSchemaRow( + "5", + "abc", + null, + "2020-01-25", + 765.432, + 123.45f, + 12345L, + Collections.singletonList(null), + new Object[]{123L, 345L}, + null + ) ); static final IncrementalIndexSchema DEFAULT_INDEX_SCHEMA = new IncrementalIndexSchema.Builder() @@ -209,12 +281,15 @@ static InputRow makeSchemaRow( @Nullable Object... elements ) { - Preconditions.checkArgument(signature.size() == elements.length); Map mapRow = Maps.newHashMapWithExpectedSize(signature.size()); for (int i = 0; i < signature.size(); i++) { final String columnName = signature.getColumnName(i); - final Object value = elements[i]; - mapRow.put(columnName, value); + if (elements != null && i < elements.length) { + final Object value = elements[i]; + mapRow.put(columnName, value); + } else { + mapRow.put(columnName, null); + } } return parser.parseBatch(mapRow).get(0); } @@ -328,6 +403,34 @@ public static Collection makeConstructors() return Pair.of(new IncrementalIndexStorageAdapter(index), index); } ) + .put( + "incrementalAutoTypes", + input -> { + input.indexSpec(IndexSpec.builder().build()); + input.mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .map( + dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName()) + ) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) + ); + final IncrementalIndex index = input.buildIncrementalIndex(); + return Pair.of(new IncrementalIndexStorageAdapter(index), index); + } + ) .put( "mmappedAutoTypes", input -> { @@ -439,10 +542,48 @@ public static Collection makeConstructors() input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {}) ) .put("frame (row-based)", input -> { + // remove array type columns from frames since they aren't currently supported other than string + input.mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .filter(dimensionSchema -> !(dimensionSchema instanceof AutoTypeColumnSchema)) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) + ); final FrameSegment segment = input.buildFrameSegment(FrameType.ROW_BASED); return Pair.of(segment.asStorageAdapter(), segment); }) .put("frame (columnar)", input -> { + // remove array type columns from frames since they aren't currently supported other than string + input.mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .filter(dimensionSchema -> !(dimensionSchema instanceof AutoTypeColumnSchema)) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) + ); final FrameSegment segment = input.buildFrameSegment(FrameType.COLUMNAR); return Pair.of(segment.asStorageAdapter(), segment); }) diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java index 95f856c0c4a7..87a62e86a721 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java @@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableMap; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.Pair; import org.apache.druid.query.extraction.MapLookupExtractor; import org.apache.druid.query.extraction.TimeDimExtractionFn; @@ -63,15 +64,29 @@ public static void tearDown() throws Exception } @Test - public void testMatchNullThrowsError() + public void testInvalidParameters() { Throwable t = Assert.assertThrows( - IllegalArgumentException.class, + DruidException.class, + () -> assertFilterMatches( + new EqualityFilter(null, ColumnType.STRING, null, null, null), ImmutableList.of() + ) + ); + Assert.assertEquals("Invalid equality filter, column cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new EqualityFilter("dim0", null, null, null, null), ImmutableList.of() + ) + ); + Assert.assertEquals("Invalid equality filter on column [dim0], matchValueType cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, () -> assertFilterMatches( new EqualityFilter("dim0", ColumnType.STRING, null, null, null), ImmutableList.of() ) ); - Assert.assertEquals("value must not be null", t.getMessage()); + Assert.assertEquals("Invalid equality filter on column [dim0], matchValue cannot be null", t.getMessage()); } @Test @@ -427,11 +442,231 @@ public void testVirtualNumericColumnNullsAndDefaults() } } + @Test + public void testNumeric() + { + /* + dim0 d0 f0 l0 + "0" .. 0.0, 0.0f, 0L + "1" .. 10.1, 10.1f, 100L + "2" .. null, 5.5f, 40L + "3" .. 120.0245, 110.0f, null + "4" .. 60.0, null, 9001L + "5" .. 765.432, 123.45f, 12345L + */ + + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 10.1, null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 120.0245, null, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 765.432, null, null), ImmutableList.of("5")); + assertFilterMatches(new EqualityFilter("d0", ColumnType.DOUBLE, 765.431, null, null), ImmutableList.of()); + + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 100L, null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 40L, null, null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 9001L, null, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("l0", ColumnType.LONG, 9000L, null, null), ImmutableList.of()); + if (!isAutoSchema()) { + // auto schema doesn't store float columns as floats, rather they are stored as doubles... the predicate matcher + // matches fine, but the string value set index does not match correctly if we expect the input float values + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 10.1f, null, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 110.0f, null, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 123.45f, null, null), ImmutableList.of("5")); + assertFilterMatches(new EqualityFilter("f0", ColumnType.FLOAT, 123.46f, null, null), ImmutableList.of()); + } else { + // .. so we need to cast them instead + assertFilterMatches( + new EqualityFilter("f0", ColumnType.DOUBLE, (double) 10.1f, null, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new EqualityFilter("f0", ColumnType.DOUBLE, (double) 110.0f, null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new EqualityFilter("f0", ColumnType.DOUBLE, (double) 123.45f, null, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new EqualityFilter("f0", ColumnType.DOUBLE, (double) 123.46f, null, null), + ImmutableList.of() + ); + } + } + + @Test + public void testArrays() + { + if (isAutoSchema()) { + // only auto schema supports array columns... skip other segment types + /* + dim0 .. arrayString arrayLong arrayDouble + "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2, 3.3] + "1", .. [], [], [1.1, 2.2, 3.3] + "2", .. null, [1L, 2L, 3L], [null] + "3", .. ["a", "b", "c"], null, [] + "4", .. ["c", "d"], [null], [-1.1, -333.3] + "5", .. [null], [123L, 345L], null + */ + + assertFilterMatches( + new EqualityFilter( + "arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of("a", "b", "c"), + null, + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new EqualityFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"a", "b", "c"}, + null, + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new EqualityFilter( + "arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of(), + null, + null + ), + ImmutableList.of("1") + ); + assertFilterMatches( + new EqualityFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{null}, + null, + null + ), + ImmutableList.of("5") + ); + assertFilterMatches( + new EqualityFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{null, null}, + null, + null + ), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(1L, 2L, 3L), + null, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new EqualityFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{1L, 2L, 3L}, + null, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new EqualityFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(), + null, + null + ), + ImmutableList.of("1") + ); + assertFilterMatches( + new EqualityFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{null}, + null, + null + ), + ImmutableList.of("4") + ); + assertFilterMatches( + new EqualityFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{null, null}, + null, + null + ), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.3), + null, + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + new EqualityFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.2, 3.3}, + null, + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + new EqualityFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(), + null, + null + ), + ImmutableList.of("3") + );assertFilterMatches( + new EqualityFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{null}, + null, + null + ), + ImmutableList.of("2") + ); + assertFilterMatches( + new EqualityFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.4), + null, + null + ), + ImmutableList.of() + ); + + } + } + @Test public void test_equals() { EqualsVerifier.forClass(EqualityFilter.class).usingGetClass() - .withNonnullFields("column", "matchValueType", "matchValue") - .withIgnoredFields("predicateFactory", "cachedOptimizedFilter").verify(); + .withNonnullFields("column", "matchValueType", "matchValue", "predicateFactory", "cachedOptimizedFilter") + .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) + .withIgnoredFields("predicateFactory", "cachedOptimizedFilter") + .verify(); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index 0952dc7df10d..ac0ef94c2000 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -140,6 +140,10 @@ public static void tearDown() throws Exception @Test public void testOneSingleValuedStringColumn() { + if (testName.contains("incrementalAutoTypes")) { + // dim 3 is mixed type in auto incrementalIndex, so presents as complex + return; + } assertFilterMatches(edf("dim3 == ''"), ImmutableList.of("0")); assertFilterMatches(edf("dim3 == '1'"), ImmutableList.of("3", "4", "6")); assertFilterMatches(edf("dim3 == 'a'"), ImmutableList.of("7")); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java index 22d928d11874..eebba4c75f24 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTest.java @@ -229,6 +229,35 @@ public void testSelectorWithLookupExtractionFn() } } + @Test + public void testArrays() + { + if (isAutoSchema()) { + // only auto schema ingests arrays + /* + dim0 .. arrayString arrayLong arrayDouble + "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2, 3.3] + "1", .. [], [], [1.1, 2.2, 3.3] + "2", .. null, [1L, 2L, 3L], [null] + "3", .. ["a", "b", "c"], null, [] + "4", .. ["c", "d"], [null], [-1.1, -333.3] + "5", .. [null], [123L, 345L], null + */ + assertFilterMatches( + new NullFilter("arrayString", null, null), + ImmutableList.of("2") + ); + assertFilterMatches( + new NullFilter("arrayLong", null, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new NullFilter("arrayDouble", null, null), + ImmutableList.of("5") + ); + } + } + @Test public void test_equals() { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java index 425d60fe3f7a..eadbf64e0698 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java @@ -51,12 +51,30 @@ public class RangeFilterTest extends BaseFilterTest private static final List ROWS = ImmutableList.builder() .addAll(DEFAULT_ROWS) - .add( - makeDefaultSchemaRow("6", "-1000", ImmutableList.of("a"), null, 6.6, null, 10L) - ) - .add( - makeDefaultSchemaRow("7", "-10.012", ImmutableList.of("d"), null, null, 3.0f, null) - ) + .add(makeDefaultSchemaRow( + "6", + "-1000", + ImmutableList.of("a"), + null, + 6.6, + null, + 10L, + new Object[]{"x", "y"}, + new Object[]{100, 200}, + new Object[]{1.1, null, 3.3} + )) + .add(makeDefaultSchemaRow( + "7", + "-10.012", + ImmutableList.of("d"), + null, + null, + 3.0f, + null, + new Object[]{null, "hello", "world"}, + new Object[]{1234, 3456L, null}, + new Object[]{1.23, 4.56, 6.78} + )) .build(); public RangeFilterTest( @@ -184,14 +202,15 @@ public void testLexicographicMatchNull() NullHandling.replaceWithDefault() ? ImmutableList.of() : ImmutableList.of("0") ); if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipArrays( + assertFilterMatches( new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), ImmutableList.of() ); } else { - assertFilterMatchesSkipArrays( + // still matches even with auto-schema because match-values are upcast to array types + assertFilterMatches( new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), - isAutoSchema() ? ImmutableList.of() : ImmutableList.of("2") + ImmutableList.of("2") ); } } @@ -307,6 +326,40 @@ public void testLexicographicMatchNumbers() public void testNumericMatchBadParameters() { Throwable t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter(null, ColumnType.DOUBLE, "1234", "", false, false, null, null), + ImmutableList.of() + ) + ); + Assert.assertEquals( + "Invalid range filter, column cannot be null", + t.getMessage() + ); + t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter("dim0", null, "1234", "", false, false, null, null), + ImmutableList.of() + ) + ); + Assert.assertEquals( + "Invalid range filter on column [dim0], matchValueType cannot be null", + t.getMessage() + ); + t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter("dim0", ColumnType.DOUBLE, null, null, false, false, null, null), + ImmutableList.of() + ) + ); + Assert.assertEquals( + "Invalid range filter on column [dim0], lower and upper cannot be null at the same time", + t.getMessage() + ); + + t = Assert.assertThrows( DruidException.class, () -> assertFilterMatches( new RangeFilter("dim0", ColumnType.DOUBLE, "1234", "", false, false, null, null), @@ -725,82 +778,85 @@ public void testMatchWithExtractionFn() ImmutableList.of("1", "3") ); - assertFilterMatchesSkipArrays( - new RangeFilter( - "dim2", - ColumnType.STRING, - "super-", - "super-zzzzzz", - false, - false, - superFn, - null - ), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") - ); - - if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipArrays( - new RangeFilter( - "dim2", - ColumnType.STRING, - "super-null", - "super-null", - false, - false, - superFn, - null - ), - ImmutableList.of("1", "2", "5") - ); - assertFilterMatchesSkipArrays( - new RangeFilter( - "dim2", - ColumnType.STRING, - "super-null", - "super-null", - false, - false, - superFn, - null - ), - ImmutableList.of("1", "2", "5") - ); - } else { - assertFilterMatchesSkipArrays( - new RangeFilter( - "dim2", - ColumnType.STRING, - "super-null", - "super-null", - false, - false, - superFn, - null - ), - ImmutableList.of("1", "5") - ); - assertFilterMatchesSkipArrays( - new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), - ImmutableList.of("2") - ); - assertFilterMatchesSkipArrays( + // auto schema ingests arrays instead of MVDs which aren't compatible with list filtered virtual column + if (!isAutoSchema()) { + assertFilterMatches( new RangeFilter( "dim2", ColumnType.STRING, - "super-null", - "super-null", + "super-", + "super-zzzzzz", false, false, superFn, null ), - ImmutableList.of("1", "5") - ); - assertFilterMatchesSkipArrays( - new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), - ImmutableList.of("2") + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + new RangeFilter( + "dim2", + ColumnType.STRING, + "super-null", + "super-null", + false, + false, + superFn, + null + ), + ImmutableList.of("1", "2", "5") + ); + assertFilterMatches( + new RangeFilter( + "dim2", + ColumnType.STRING, + "super-null", + "super-null", + false, + false, + superFn, + null + ), + ImmutableList.of("1", "2", "5") + ); + } else { + assertFilterMatches( + new RangeFilter( + "dim2", + ColumnType.STRING, + "super-null", + "super-null", + false, + false, + superFn, + null + ), + ImmutableList.of("1", "5") + ); + assertFilterMatches( + new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), + ImmutableList.of("2") + ); + assertFilterMatches( + new RangeFilter( + "dim2", + ColumnType.STRING, + "super-null", + "super-null", + false, + false, + superFn, + null + ), + ImmutableList.of("1", "5") + ); + assertFilterMatches( + new RangeFilter("dim2", ColumnType.STRING, "super-", "super-", false, false, superFn, null), + ImmutableList.of("2") + ); + } } assertFilterMatches( @@ -871,20 +927,6 @@ public void testListFilteredVirtualColumn() return; } - - /* - makeDefaultSchemaRow("0", "", ImmutableList.of("a", "b"), "2017-07-25", 0.0, 0.0f, 0L), - makeDefaultSchemaRow("1", "10", ImmutableList.of(), "2017-07-25", 10.1, 10.1f, 100L), - makeDefaultSchemaRow("2", "2", ImmutableList.of(""), "2017-05-25", null, 5.5f, 40L), - makeDefaultSchemaRow("3", "1", ImmutableList.of("a"), "2020-01-25", 120.0245, 110.0f, null), - makeDefaultSchemaRow("4", "abdef", ImmutableList.of("c"), null, 60.0, null, 9001L), - makeDefaultSchemaRow("5", "abc", null, "2020-01-25", 765.432, 123.45f, 12345L) - makeDefaultSchemaRow("6", "-1000", ImmutableList.of("a"), null, 6.6, null, 10L) - makeDefaultSchemaRow("7", "-10.012", ImmutableList.of("d"), null, null, 3.0f, null) - - // allow 'a' - // deny 'a' - */ assertFilterMatchesSkipVectorize( new RangeFilter("allow-dim2", ColumnType.STRING, "a", "c", false, false, null, null), ImmutableList.of("0", "3", "6") @@ -935,6 +977,334 @@ public void testRequiredColumnRewrite() ); } + @Test + public void testArrayRanges() + { + if (isAutoSchema()) { + // only auto schema supports array columns currently, this means the match value will need to be coerceable to + // the column value type... + + /* dim0 .. arrayString arrayLong arrayDouble + "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2, 3.3] + "1", .. [], [], [1.1, 2.2, 3.3] + "2", .. null, [1L, 2L, 3L], [null] + "3", .. ["a", "b", "c"], null, [] + "4", .. ["c", "d"], [null], [-1.1, -333.3] + "5", .. [null], [123L, 345L], null + "6", .. ["x", "y"], [100, 200], [1.1, null, 3.3] + "7", .. [null, "hello", "world"], [1234, 3456L, null], [1.23, 4.56, 6.78] + */ + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"a", "b", "c"}, + new Object[]{"a", "b", "c"}, + false, + false, + null, + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + null, + new Object[]{"a", "b", "c"}, + false, + false, + null, + null + ), + ImmutableList.of("0", "1", "3", "5", "7") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"a", "b", "c"}, + null, + true, + false, + null, + null + ), + ImmutableList.of("4", "6") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + null, + new Object[]{"a", "b", "c"}, + false, + true, + null, + null + ), + ImmutableList.of("1", "5", "7") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"a", "b"}, + new Object[]{"a", "b", "c", "d"}, + true, + true, + null, + null + ), + ImmutableList.of("0", "3") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"c", "d"}, + new Object[]{"c", "d", "e"}, + false, + true, + null, + null + ), + ImmutableList.of("4") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + null, + new Object[]{}, + false, + false, + null, + null + ), + ImmutableList.of("1") + ); + + assertFilterMatches( + new RangeFilter( + "arrayString", + ColumnType.STRING_ARRAY, + null, + new Object[]{null}, + false, + false, + null, + null + ), + ImmutableList.of("1", "5") + ); + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + null, + new Object[]{}, + false, + false, + null, + null + ), + ImmutableList.of("1") + ); + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{}, + null, + true, + false, + null, + null + ), + ImmutableList.of("0", "2", "4", "5", "6", "7") + ); + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + null, + new Object[]{null}, + false, + false, + null, + null + ), + ImmutableList.of("1", "4") + ); + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{1L, 2L, 3L}, + new Object[]{1L, 2L, 3L}, + false, + false, + null, + null + ), + ImmutableList.of("0", "2") + ); + + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + null, + new Object[]{1L, 2L, 3L}, + false, + true, + null, + null + ), + ImmutableList.of("1", "4") + ); + + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{1L, 2L, 3L}, + null, + true, + false, + null, + null + ), + ImmutableList.of("5", "6", "7") + ); + + // empties and nulls still sort before numbers + assertFilterMatches( + new RangeFilter( + "arrayLong", + ColumnType.LONG_ARRAY, + null, + new Object[]{-1L}, + false, + false, + null, + null + ), + ImmutableList.of("1", "4") + ); + + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{}, + false, + false, + null, + null + ), + ImmutableList.of("3") + ); + + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{}, + null, + true, + false, + null, + null + ), + ImmutableList.of("0", "1", "2", "4", "6", "7") + ); + + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{null}, + false, + false, + null, + null + ), + ImmutableList.of("2", "3") + ); + + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.2, 3.3}, + new Object[]{1.1, 2.2, 3.3}, + false, + false, + null, + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.2, 3.3}, + null, + true, + false, + null, + null + ), + ImmutableList.of("7") + ); + + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{1.1, 2.2, 3.3}, + true, + false, + null, + null + ), + ImmutableList.of("0", "1", "2", "3", "4", "6") + ); + + // empties and nulls sort before numbers + assertFilterMatches( + new RangeFilter( + "arrayDouble", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{0.0}, + true, + false, + null, + null + ), + ImmutableList.of("2", "3", "4") + ); + } + } + @Test public void test_equals() { From 77a95f1e5e3fae1011715a3b32ba0404b680dd06 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 22:07:49 -0700 Subject: [PATCH 15/44] add dimension selector tests for mixed type roots --- .../segment/AutoTypeColumnIndexerTest.java | 37 +++++++------------ .../segment/NestedDataColumnIndexerTest.java | 36 +++++++----------- 2 files changed, 27 insertions(+), 46 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java index 05a91cd658f1..fe8900c0dd8f 100644 --- a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java @@ -482,45 +482,36 @@ public void testNestedColumnIndexerSchemaDiscoveryRootVariant() throws IndexSize ColumnSelectorFactory columnSelectorFactory = cursorList.get(0).getColumnSelectorFactory(); ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(0).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap("a"), valueSelector.getObject()); + DimensionSelector dimensionSelector = cursorList.get(0).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals("a", valueSelector.getObject()); + Assert.assertEquals("a", dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(1).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(1).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap(2L), valueSelector.getObject()); + dimensionSelector = cursorList.get(1).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals(2L, valueSelector.getObject()); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals("2", dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(2).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(2).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap(3.3), valueSelector.getObject()); + dimensionSelector = cursorList.get(2).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals(3.3, valueSelector.getObject()); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals("3.3", dimensionSelector.getObject()); + columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(3).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); + dimensionSelector = cursorList.get(3).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(4).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); + dimensionSelector = cursorList.get(4).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(dimensionSelector.getObject()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java index a50df7114982..3c8b376dbcad 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java @@ -481,45 +481,35 @@ public void testNestedColumnIndexerSchemaDiscoveryRootVariant() throws IndexSize ColumnSelectorFactory columnSelectorFactory = cursorList.get(0).getColumnSelectorFactory(); ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(0).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap("a"), valueSelector.getObject()); + DimensionSelector dimensionSelector = cursorList.get(0).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals("a", valueSelector.getObject()); + Assert.assertEquals("a", dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(1).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(1).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap(2L), valueSelector.getObject()); + dimensionSelector = cursorList.get(1).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals(2L, valueSelector.getObject()); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals("2", dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(2).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(2).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap(3.3), valueSelector.getObject()); + dimensionSelector = cursorList.get(2).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); + Assert.assertEquals(3.3, valueSelector.getObject()); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals("3.3", dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(3).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); + dimensionSelector = cursorList.get(3).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(dimensionSelector.getObject()); columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursorList.get(4).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); + dimensionSelector = cursorList.get(4).getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(dimensionSelector.getObject()); } @Test From f5d4f7440536a01b15eb9ae902059f8bb86f3e21 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 10 Jul 2023 22:11:58 -0700 Subject: [PATCH 16/44] style --- .../org/apache/druid/query/filter/RangeFilter.java | 14 +++++++++----- .../druid/segment/filter/EqualityFilterTest.java | 12 +++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 0c90b832047b..20a987d7e2c3 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -114,11 +114,14 @@ public RangeFilter( .build("Invalid range filter on column [%s], matchValueType cannot be null", column); } this.matchValueType = matchValueType; - if(lower == null && upper == null) { + if (lower == null && upper == null) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid range filter on column [%s], lower and upper cannot be null at the same time", column); - }; + .build( + "Invalid range filter on column [%s], lower and upper cannot be null at the same time", + column + ); + } final ExpressionType expressionType = ExpressionType.fromColumnType(matchValueType); this.upper = upper; this.lower = lower; @@ -644,8 +647,8 @@ private Supplier> makeStringPredicateSupplier() { return Suppliers.memoize(() -> { final Comparator stringComparator = matchValueType.isNumeric() - ? StringComparators.NUMERIC - : StringComparators.LEXICOGRAPHIC; + ? StringComparators.NUMERIC + : StringComparators.LEXICOGRAPHIC; final String lowerBound = lowerEval.castTo(ExpressionType.STRING).asString(); final String upperBound = upperEval.castTo(ExpressionType.STRING).asString(); @@ -728,6 +731,7 @@ private Supplier> makeStringPredicateSupplier() } }); } + private Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { if (hasLowerBound() && hasUpperBound()) { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java index 87a62e86a721..7a63b3f6999f 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTest.java @@ -636,7 +636,8 @@ public void testArrays() null ), ImmutableList.of("3") - );assertFilterMatches( + ); + assertFilterMatches( new EqualityFilter( "arrayDouble", ColumnType.DOUBLE_ARRAY, @@ -656,7 +657,6 @@ public void testArrays() ), ImmutableList.of() ); - } } @@ -664,7 +664,13 @@ public void testArrays() public void test_equals() { EqualsVerifier.forClass(EqualityFilter.class).usingGetClass() - .withNonnullFields("column", "matchValueType", "matchValue", "predicateFactory", "cachedOptimizedFilter") + .withNonnullFields( + "column", + "matchValueType", + "matchValue", + "predicateFactory", + "cachedOptimizedFilter" + ) .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) .withIgnoredFields("predicateFactory", "cachedOptimizedFilter") .verify(); From da894b41d6ed2bc8255fd79921d88a660833e7ef Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 02:54:28 -0700 Subject: [PATCH 17/44] more coverage maybe --- .../druid/segment/AutoTypeColumnIndexer.java | 12 +-- .../sql/calcite/expression/Expressions.java | 4 +- .../druid/sql/calcite/CalciteQueryTest.java | 85 +++++++++++++++++++ 3 files changed, 91 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java index 7c5fe88520b4..99ba022ec487 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java @@ -225,7 +225,7 @@ public DimensionSelector makeDimensionSelector( { final int dimIndex = desc.getIndex(); if (fieldIndexers.size() == 0 && isConstant && !hasNestedData) { - return DimensionSelector.constant(null, spec.getExtractionFn()); + return spec.decorate(DimensionSelector.nilSelector()); } final ColumnValueSelector rootLiteralSelector = getRootLiteralValueSelector(currEntry, dimIndex); if (rootLiteralSelector != null) { @@ -238,17 +238,13 @@ public DimensionSelector makeDimensionSelector( rootType ); } - return new BaseSingleValueDimensionSelector() + return spec.decorate(new BaseSingleValueDimensionSelector() { @Nullable @Override protected String getValue() { - final String o = Evals.asString(rootLiteralSelector.getObject()); - if (spec.getExtractionFn() != null) { - return spec.getExtractionFn().apply(o); - } - return o; + return Evals.asString(rootLiteralSelector.getObject()); } @Override @@ -256,7 +252,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) { } - }; + }); } // column has nested data or is of mixed root type, cannot use throw new UOE( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index f9cb73031423..c0f32aa5744f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -519,7 +519,7 @@ private static DimFilter toSimpleLeafFilter( // column instead for filtering to ensure that results are correct if (druidExpression.isSimpleExtraction() && !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) { - if (NullHandling.sqlCompatible() && !plannerContext.isUseBoundsAndSelectors()) { + if (!plannerContext.isUseBoundsAndSelectors()) { equalFilter = new NullFilter( druidExpression.getSimpleExtraction().getColumn(), druidExpression.getSimpleExtraction().getExtractionFn(), @@ -538,7 +538,7 @@ private static DimFilter toSimpleLeafFilter( operand.getType() ); - if (NullHandling.sqlCompatible() && !plannerContext.isUseBoundsAndSelectors()) { + if (!plannerContext.isUseBoundsAndSelectors()) { equalFilter = new NullFilter(virtualColumn, null, null); } else { equalFilter = new SelectorDimFilter( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index bbd7b1526a26..6fe832123dd3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -81,8 +81,11 @@ import org.apache.druid.query.extraction.RegexDimExtractionFn; import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; +import org.apache.druid.query.filter.NullFilter; +import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; @@ -3927,6 +3930,59 @@ public void testCoalesceColumnsFilter() ); } + @Test + public void testCoalesceColumnsFilterWithEquality() + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // we can remove this test if PlannerContext.CTX_SQL_USE_BOUNDS_AND_SELECTORS ever defaults to false all the time + // since it otherwise is a duplicate of testCoalesceColumnsFilter + + testQuery( + "SELECT COALESCE(dim2, dim1), COUNT(*) FROM druid.foo WHERE COALESCE(dim2, dim1) IN ('a', 'abc') GROUP BY COALESCE(dim2, dim1)", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "nvl(\"dim2\",\"dim1\")", + ColumnType.STRING + ) + ) + .setDimFilter( + or( + and( + new EqualityFilter("dim1", ColumnType.STRING, "a", null, null), + NullFilter.forColumn("dim2") + ), + and( + new EqualityFilter("dim1", ColumnType.STRING, "abc", null, null), + NullFilter.forColumn("dim2") + ), + in( + "dim2", + ImmutableSet.of("a", "abc"), + null + ) + ) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING))) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) + .build() + ), + ImmutableList.of( + new Object[]{"a", 2L}, + new Object[]{"abc", 2L} + ) + ); + } + @Test public void testCoalesceMoreColumns() { @@ -4430,6 +4486,35 @@ public void testCountStarWithLongColumnFilters() ); } + @Test + public void testCountStarWithLongColumnFiltersForceRange() + { + // we can remove this test if PlannerContext.CTX_SQL_USE_BOUNDS_AND_SELECTORS ever defaults to false all the time + // since it otherwise is a duplicate of testCountStarWithLongColumnFilters + testQuery( + "SELECT COUNT(*) FROM druid.foo WHERE cnt >= 3 OR cnt = 1", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .filters( + or( + new RangeFilter("cnt", ColumnType.LONG, 3L, null, false, false, null, null), + new EqualityFilter("cnt", ColumnType.LONG, 1L, null, null) + ) + ) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY_USE_EQUALITY) + .build() + ), + ImmutableList.of( + new Object[]{6L} + ) + ); + } + @Test public void testCountStarWithLongColumnFiltersOnFloatLiterals() { From acb18a990e33fcbabe9f6b0c0481fd8609509d8c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 05:11:56 -0700 Subject: [PATCH 18/44] deprecated is lies, at least for this test... --- .../apache/druid/segment/AutoTypeColumnIndexer.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java index 99ba022ec487..f51117ed0696 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java @@ -225,7 +225,7 @@ public DimensionSelector makeDimensionSelector( { final int dimIndex = desc.getIndex(); if (fieldIndexers.size() == 0 && isConstant && !hasNestedData) { - return spec.decorate(DimensionSelector.nilSelector()); + return DimensionSelector.constant(null, spec.getExtractionFn()); } final ColumnValueSelector rootLiteralSelector = getRootLiteralValueSelector(currEntry, dimIndex); if (rootLiteralSelector != null) { @@ -238,13 +238,17 @@ public DimensionSelector makeDimensionSelector( rootType ); } - return spec.decorate(new BaseSingleValueDimensionSelector() + return new BaseSingleValueDimensionSelector() { @Nullable @Override protected String getValue() { - return Evals.asString(rootLiteralSelector.getObject()); + final String s = Evals.asString(rootLiteralSelector.getObject()); + if (spec.getExtractionFn() != null) { + return spec.getExtractionFn().apply(s); + } + return s; } @Override @@ -252,7 +256,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) { } - }); + }; } // column has nested data or is of mixed root type, cannot use throw new UOE( From 2e9dc0b67a57e6152bd89b30a311454fa0f55a48 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 11:46:08 -0700 Subject: [PATCH 19/44] fix build --- .../org/apache/druid/sql/calcite/CalciteJoinQueryTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 2f8b98f74ab3..d8f8246c6974 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -5744,7 +5744,7 @@ public void testJoinWithInputRefCondition() .aggregators(aggregators( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - new SelectorDimFilter("j0.d1", null, null) + isNull("j0.d1") ) )) .context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")) @@ -5803,9 +5803,9 @@ public void testJoinWithInputRefCondition() new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), or( - new SelectorDimFilter("j0.a0", "0", null), + equality("j0.a0", 0L, ColumnType.LONG), and( - selector("_j0.d1", null, null), + isNull("_j0.d1"), expressionFilter("(\"j0.a1\" >= \"j0.a0\")") ) From cc80896052528fe74fd21ad340b1fc9faf99b881 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 11:47:30 -0700 Subject: [PATCH 20/44] fix --- .../java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index d8f8246c6974..5ad733327577 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -5765,7 +5765,7 @@ public void testJoinWithInputRefCondition() new CountAggregatorFactory("a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), - not(selector("m1", null, null)), + not(isNull("m1")), "a1" ) ) From 1bd041216467fb008b544df2051d6b6f70819b6c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 11:50:29 -0700 Subject: [PATCH 21/44] adjust --- .../org/apache/druid/sql/calcite/CalciteJoinQueryTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 5ad733327577..3a08e07a43ca 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -2570,7 +2570,7 @@ public void testNotInAggregationSubquery(Map queryContext) NullHandling.sqlCompatible() ? new FilteredAggregatorFactory( new CountAggregatorFactory("_a1"), - not(isNull("a0")) + notNull("a0") ) : new CountAggregatorFactory("_a1") ) @@ -5765,7 +5765,7 @@ public void testJoinWithInputRefCondition() new CountAggregatorFactory("a0"), new FilteredAggregatorFactory( new CountAggregatorFactory("a1"), - not(isNull("m1")), + notNull("m1"), "a1" ) ) From 71860ff305bdbfe8eb686d2c4bfabe3d305ef1c0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 12:47:36 -0700 Subject: [PATCH 22/44] fix --- ...onaryEncodedStringIndexSupplierBenchmark.java | 2 +- .../org/apache/druid/msq/exec/MSQSelectTest.java | 2 +- .../druid/query/filter/EqualityFilter.java | 2 +- .../apache/druid/query/filter/InDimFilter.java | 4 ++-- .../apache/druid/query/filter/NullFilter.java | 2 +- .../apache/druid/query/filter/RangeFilter.java | 4 ++-- .../druid/query/metadata/SegmentAnalyzer.java | 2 +- .../apache/druid/query/search/AutoStrategy.java | 2 +- .../druid/query/search/UseIndexesStrategy.java | 2 +- .../segment/QueryableIndexIndexableAdapter.java | 2 +- .../segment/QueryableIndexStorageAdapter.java | 2 +- .../druid/segment/column/ColumnConfig.java | 6 +++--- .../apache/druid/segment/filter/BoundFilter.java | 6 +++--- .../org/apache/druid/segment/filter/Filters.java | 4 ++-- .../apache/druid/segment/filter/LikeFilter.java | 4 ++-- .../druid/segment/filter/SelectorFilter.java | 4 ++-- .../druid/segment/filter/SpatialFilter.java | 2 +- ...dStringDictionaryEncodedStringValueIndex.java | 1 + .../index/IndexedStringDruidPredicateIndex.java | 1 + .../IndexedUtf8LexicographicalRangeIndex.java | 1 + .../segment/index/IndexedUtf8ValueSetIndex.java | 2 ++ .../DictionaryEncodedStringValueIndex.java | 2 +- .../DictionaryEncodedValueIndex.java | 2 +- .../{ => semantic}/DruidPredicateIndex.java | 3 ++- .../LexicographicalRangeIndex.java | 3 ++- .../index/{ => semantic}/NullValueIndex.java | 4 +++- .../index/{ => semantic}/NumericRangeIndex.java | 4 +++- .../index/{ => semantic}/SpatialIndex.java | 2 +- .../{ => semantic}/StringValueSetIndex.java | 3 ++- .../index/{ => semantic}/Utf8ValueSetIndex.java | 3 ++- .../nested/NestedFieldColumnIndexSupplier.java | 14 +++++++------- .../ScalarDoubleColumnAndIndexSupplier.java | 12 ++++++------ .../nested/ScalarLongColumnAndIndexSupplier.java | 12 ++++++------ .../nested/VariantColumnAndIndexSupplier.java | 2 +- .../segment/serde/NullValueIndexSupplier.java | 2 +- .../serde/StringUtf8ColumnIndexSupplier.java | 14 +++++++------- .../virtual/ListFilteredVirtualColumn.java | 12 ++++++------ .../druid/query/filter/InDimFilterTest.java | 4 ++-- .../druid/query/filter/LikeDimFilterTest.java | 4 ++-- .../ColumnSelectorColumnIndexSelectorTest.java | 4 ++-- .../segment/IndexMergerNullHandlingTest.java | 4 ++-- .../druid/segment/IndexMergerTestBase.java | 2 +- .../nested/NestedDataColumnSupplierTest.java | 6 +++--- .../nested/NestedDataColumnSupplierV4Test.java | 6 +++--- .../NestedFieldColumnIndexSupplierTest.java | 16 ++++++++-------- .../nested/ScalarDoubleColumnSupplierTest.java | 6 +++--- .../nested/ScalarLongColumnSupplierTest.java | 6 +++--- .../nested/ScalarStringColumnSupplierTest.java | 6 +++--- .../nested/VariantColumnSupplierTest.java | 6 +++--- ...DictionaryEncodedStringIndexSupplierTest.java | 2 +- .../ListFilteredVirtualColumnSelectorTest.java | 2 +- .../java/org/apache/druid/cli/DumpSegment.java | 2 +- .../org/apache/druid/cli/DumpSegmentTest.java | 2 +- 53 files changed, 121 insertions(+), 108 deletions(-) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/DictionaryEncodedStringValueIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/DictionaryEncodedValueIndex.java (97%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/DruidPredicateIndex.java (93%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/LexicographicalRangeIndex.java (96%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/NullValueIndex.java (90%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/NumericRangeIndex.java (94%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/SpatialIndex.java (95%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/StringValueSetIndex.java (93%) rename processing/src/main/java/org/apache/druid/segment/index/{ => semantic}/Utf8ValueSetIndex.java (92%) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java index 739a9d01d82f..882c6789b560 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java @@ -31,7 +31,7 @@ import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index aa4acf14115a..6d444ac70de5 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -2070,7 +2070,7 @@ public void testJoinUsesDifferentAlgorithm() .setAggregatorSpecs( new FilteredAggregatorFactory( new CountAggregatorFactory("a0"), - new SelectorDimFilter("j0.d1", null, null), + isNull("j0.d1"), "a0" ) ) diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 2721262425ee..71f9aec8fc00 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -57,7 +57,7 @@ import org.apache.druid.segment.filter.PredicateValueMatcherFactory; import org.apache.druid.segment.filter.ValueMatchers; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 0759ebe58f3e..52f55cd5fa1f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -60,8 +60,8 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.StringValueSetIndex; -import org.apache.druid.segment.index.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index c6fcadb49428..ff84b6462741 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -44,7 +44,7 @@ import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 20a987d7e2c3..fe4e9d7d740a 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -54,8 +54,8 @@ import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 3f45f5c82992..81fcf34ed013 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -49,7 +49,7 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; diff --git a/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java b/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java index 8e22aec5d6a1..29d5c3510040 100644 --- a/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/AutoStrategy.java @@ -29,7 +29,7 @@ import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import java.util.List; diff --git a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java index 1f4751e56079..1f2307a6ee0e 100644 --- a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java @@ -44,7 +44,7 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.virtual.VirtualizedColumnInspector; import org.joda.time.Interval; diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java index 17e49cd7c38c..f97f4a1bade7 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexIndexableAdapter.java @@ -34,7 +34,7 @@ import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.ImmutableBitmapValues; import org.apache.druid.segment.data.IndexedIterable; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; import org.apache.druid.segment.nested.NestedCommonFormatColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.SortedValueDictionary; diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index a6f52e1fe5a0..ead7900c6a80 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -35,7 +35,7 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.DateTime; import org.joda.time.Interval; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index 924bc576cbb2..e85e7a4d4144 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -20,9 +20,9 @@ package org.apache.druid.segment.column; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; public interface ColumnConfig { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index f08b19813990..f43a226bf140 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -47,9 +47,9 @@ import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 69fef386952e..3e185dd39b17 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -43,8 +43,8 @@ import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.join.filter.AllNullColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index e3e8fe85ffb5..a6a86196c517 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -38,8 +38,8 @@ import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index a0a648b91351..fc9044cb8377 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -35,8 +35,8 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java index 0b5c7da80171..b174b7f4ddcc 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.incremental.SpatialDimensionRowTransformer; import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.SpatialIndex; +import org.apache.druid.segment.index.semantic.SpatialIndex; import javax.annotation.Nullable; import java.util.Objects; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java index 0c84bba8749d..c3c0c304410c 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java @@ -22,6 +22,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java index 3ed85acd52c5..5fd63c8b3dc5 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java @@ -26,6 +26,7 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import javax.annotation.Nullable; import java.util.Iterator; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java index 0eea6974341e..343ea3b5ed36 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java @@ -32,6 +32,7 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java index 5e3146cefbbb..fb18891f51d7 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java @@ -29,6 +29,8 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java index 5579b54a7262..69a4c698dc9e 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.segment.column.DictionaryEncodedColumn; diff --git a/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java index 35f6d381d0a1..a928a71a5261 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.segment.column.DictionaryEncodedColumn; diff --git a/processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java similarity index 93% rename from processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java index a14ca5f1d7e9..0ddbf4febeb7 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/DruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java similarity index 96% rename from processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java index 701d377ced9d..151f30a00a15 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import com.google.common.base.Predicate; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java similarity index 90% rename from processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java index 7fccecb36380..e4627a4c39ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/NullValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java @@ -17,7 +17,9 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; + +import org.apache.druid.segment.index.BitmapColumnIndex; /** * Provides index for all null rows in a column, to use with IS/IS NOT NULL filters diff --git a/processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java similarity index 94% rename from processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java index ca1b32e347f1..97dcd41f4d23 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/NumericRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java @@ -17,7 +17,9 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; + +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java similarity index 95% rename from processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java index 7b7705dd55e7..cf19d5c4d9a0 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/SpatialIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.collections.spatial.ImmutableRTree; diff --git a/processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java similarity index 93% rename from processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java index 3845e8ca752a..d5178f7bf669 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/StringValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.segment.index.BitmapColumnIndex; import javax.annotation.Nullable; import java.util.SortedSet; diff --git a/processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java similarity index 92% rename from processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java index 50ef48724c03..7cf73b1fe32d 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/Utf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java @@ -17,9 +17,10 @@ * under the License. */ -package org.apache.druid.segment.index; +package org.apache.druid.segment.index.semantic; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.segment.index.BitmapColumnIndex; import java.nio.ByteBuffer; import java.util.SortedSet; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index 4f050fd5e430..adf9df1193d5 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -51,16 +51,16 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 04165fd2d568..a829ae98cd55 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -49,15 +49,15 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 47c9d1d78ac2..c8eac804fc5c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -48,15 +48,15 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index f18f756ec5ef..54edf6807291 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -41,7 +41,7 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java index 81390c685b9c..e37e499b425e 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java @@ -22,7 +22,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index 90695317e582..0f58cbcff319 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -30,18 +30,18 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.IndexedStringDictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.IndexedStringDruidPredicateIndex; import org.apache.druid.segment.index.IndexedUtf8LexicographicalRangeIndex; import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.index.SpatialIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.SpatialIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index 75661070450e..9a811bc1cec7 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -47,14 +47,14 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NullValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import javax.annotation.Nullable; import java.util.Collections; diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index 3d1f171ac4c0..ab3a7c525d44 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -35,8 +35,8 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.StringValueSetIndex; -import org.apache.druid.segment.index.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java index 8b35ee32c942..00ae514761f4 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java @@ -26,8 +26,8 @@ import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; diff --git a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java index 391d0d705ea5..c57fdd45b6e7 100644 --- a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java @@ -28,8 +28,8 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier; import org.easymock.EasyMock; import org.junit.Assert; diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java index b84e209e37ff..034eec7ccca1 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java @@ -35,8 +35,8 @@ import org.apache.druid.segment.data.IncrementalIndexTest; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.junit.Assert; import org.junit.Before; diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index 4f4471f9e85b..5b69ccfa14bd 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -60,7 +60,7 @@ import org.apache.druid.segment.incremental.IncrementalIndexAdapter; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.Interval; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 32b3eaa871bd..02ea10cc6d5c 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -55,9 +55,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import org.apache.druid.segment.vector.BitmapVectorOffset; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index 824f4f80bd77..7eefb0807677 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -53,9 +53,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.ComplexColumnPartSerde; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java index 3164cc251c41..5090523bb626 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java @@ -39,14 +39,14 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.LexicographicalRangeIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.NumericRangeIndex; -import org.apache.druid.segment.index.SpatialIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.SpatialIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.Serializer; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java index b65b56e220aa..6e02e5bca0d1 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java @@ -44,9 +44,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java index c3cdff147772..408b608f4b6f 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java @@ -44,9 +44,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java index edecb52f7bf6..2bf5c1ace4ec 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java @@ -46,9 +46,9 @@ import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java index 6b689eacf950..fa7645592246 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java @@ -47,9 +47,9 @@ import org.apache.druid.segment.data.CompressionFactory; import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.DruidPredicateIndex; -import org.apache.druid.segment.index.NullValueIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java index ad18650c9a38..d65c085d584b 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java @@ -29,7 +29,7 @@ import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java index 6ba06956e2f0..d642acb4d69a 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.SelectorFilter; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.easymock.EasyMock; import org.junit.Assert; diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index b80e3ccb145d..175c4332a954 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -91,7 +91,7 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.Filters; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn; import org.apache.druid.segment.nested.NestedFieldDictionaryEncodedColumn; import org.apache.druid.segment.nested.NestedPathFinder; diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java index 9206f38ed0d4..10027ae73b50 100644 --- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java +++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java @@ -50,7 +50,7 @@ import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.index.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.SegmentId; import org.junit.After; From c74af4af492ba8fad886ace7a1b021b412a41b50 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 13:06:43 -0700 Subject: [PATCH 23/44] fix style --- .../segment/nested/NestedFieldColumnIndexSupplier.java | 6 +++--- .../nested/ScalarDoubleColumnAndIndexSupplier.java | 6 +++--- .../segment/nested/ScalarLongColumnAndIndexSupplier.java | 6 +++--- .../segment/nested/VariantColumnAndIndexSupplier.java | 2 +- .../druid/segment/serde/NullValueIndexSupplier.java | 2 +- .../segment/serde/StringUtf8ColumnIndexSupplier.java | 8 ++++---- .../druid/segment/virtual/ListFilteredVirtualColumn.java | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index adf9df1193d5..f4eb3dd58c55 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -51,15 +51,15 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.SimpleBitmapColumnIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index a829ae98cd55..aff15475ea0e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -49,14 +49,14 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.SimpleBitmapColumnIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index c8eac804fc5c..08bdd6875766 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -48,14 +48,14 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.SimpleBitmapColumnIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index 54edf6807291..e49f180c2164 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -41,8 +41,8 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java index e37e499b425e..39c6a5e44aa8 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java @@ -22,8 +22,8 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import javax.annotation.Nullable; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index 0f58cbcff319..7e24af112e88 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -30,16 +30,16 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.IndexedStringDictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.IndexedStringDruidPredicateIndex; import org.apache.druid.segment.index.IndexedUtf8LexicographicalRangeIndex; import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; +import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.semantic.SpatialIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index 9a811bc1cec7..c7a93147c09f 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -47,13 +47,13 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; import org.apache.druid.segment.index.semantic.DruidPredicateIndex; import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.SimpleBitmapColumnIndex; -import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; import javax.annotation.Nullable; From c174905ff1ba374960a94342b696cbee2f714cca Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Jul 2023 13:52:35 -0700 Subject: [PATCH 24/44] support json equality --- .../java/org/apache/druid/query/filter/EqualityFilter.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 71f9aec8fc00..fb6dd536c843 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -58,6 +58,7 @@ import org.apache.druid.segment.filter.ValueMatchers; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -408,6 +409,9 @@ public Predicate makeArrayPredicate(@Nullable TypeSignature @Override public Predicate makeObjectPredicate() { + if (matchValueType.equals(ColumnType.NESTED_DATA)) { + return input -> Objects.equals(StructuredData.unwrap(input), StructuredData.unwrap(matchValue.value())); + } return Predicates.equalTo(matchValue.valueOrDefault()); } From a02bff95b2016e8dbd055395f86c6bf86bb87e2c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Jul 2023 04:51:49 -0700 Subject: [PATCH 25/44] some adjustments, still more to do --- .../hll/HllSketchBuildAggregatorFactory.java | 6 +-- .../datasketches/hll/HllSketchBuildUtil.java | 2 + .../HllSketchBuildVectorProcessorFactory.java | 8 ++- .../bloom/BloomFilterAggregatorFactory.java | 22 ++++++-- .../bloom/ByteBloomFilterAggregator.java | 37 ++++++++++++++ .../druid/query/filter/BloomDimFilter.java | 31 ++++++++---- .../bloom/BloomFilterAggregatorTest.java | 11 ++-- .../query/filter/BloomDimFilterTest.java | 6 +-- .../input/DruidSegmentReaderTest.java | 3 +- .../org/apache/druid/math/expr/ExprEval.java | 13 ++--- .../druid/query/filter/BoundDimFilter.java | 4 +- .../druid/query/filter/EqualityFilter.java | 37 ++++---------- .../apache/druid/query/filter/NullFilter.java | 6 +-- .../druid/query/filter/RangeFilter.java | 50 ++++++++----------- 14 files changed, 131 insertions(+), 105 deletions(-) create mode 100644 extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index 76d242786665..1e6734a84924 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -39,7 +39,6 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -225,12 +224,11 @@ private HllSketchUpdater formulateSketchUpdater(ColumnSelectorFactory columnSele }; break; case ARRAY: - final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities); - final NullableTypeStrategy strategy = expressionType.getNullableStrategy(); + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(capabilities); updater = sketch -> { final Object o = selector.getObject(); if (o != null) { - byte[] bytes = ExprEval.toBytes(expressionType, strategy, o); + byte[] bytes = ExprEval.toBytes(expressionType, o); sketch.get().update(bytes); } }; diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java index 7b24d4e14c6f..eb54dab877ce 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -43,9 +43,11 @@ public static void updateSketch(final HllSketch sketch, final StringEncoding str } else if (value instanceof String) { updateSketchWithString(sketch, stringEncoding, (String) value); } else if (value instanceof Object[]) { + // Object arrays are handled as ARRAY types, which count the entire array as a single value byte[] arrayBytes = ExprEval.toBytesBestEffort(value); sketch.update(arrayBytes); } else if (value instanceof List) { + // Lists are treated as multi-value strings, which count each element as a separate distinct value // noinspection rawtypes for (Object entry : (List) value) { if (entry != null) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java index b4e3eea4f1d6..e6ca1f933630 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -26,7 +26,6 @@ import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; import org.apache.druid.segment.VectorColumnProcessorFactory; import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -96,8 +95,7 @@ public HllSketchBuildVectorProcessor makeArrayProcessor( VectorObjectSelector selector ) { - final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities); - final NullableTypeStrategy typeStrategy = expressionType.getNullableStrategy(); + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(capabilities); return new HllSketchBuildVectorProcessor() { @Override @@ -108,7 +106,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) for (int i = startRow; i < endRow; i++) { if (vector[i] != null) { - byte[] bytes = ExprEval.toBytes(expressionType, typeStrategy, vector[i]); + byte[] bytes = ExprEval.toBytes(expressionType, vector[i]); sketch.update(bytes); } } @@ -125,7 +123,7 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in final HllSketch sketch = helper.getSketchAtPosition(buf, position); if (vector[idx] != null) { - byte[] bytes = ExprEval.toBytes(expressionType, typeStrategy, vector[idx]); + byte[] bytes = ExprEval.toBytes(expressionType, vector[idx]); sketch.update(bytes); } } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java index 374f9ef3e6fb..ce99e5b9b9e5 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java @@ -287,13 +287,29 @@ private BaseBloomFilterAggregator factorizeInternal(ColumnSelectorFactory column maxNumEntries, onHeap ); - case COMPLEX: - // in an ideal world, we would check complex type, but until then assume it's a bloom filter - return new BloomFilterMergeAggregator( + case ARRAY: + return new ByteBloomFilterAggregator( columnFactory.makeColumnValueSelector(field.getDimension()), + capabilities, maxNumEntries, onHeap ); + case COMPLEX: + if (BloomFilterSerializersModule.BLOOM_FILTER_TYPE_NAME.equals(capabilities.getComplexTypeName())) { + return new BloomFilterMergeAggregator( + columnFactory.makeColumnValueSelector(field.getDimension()), + maxNumEntries, + onHeap + ); + } else { + // fall back to bytes aggregator + return new ByteBloomFilterAggregator( + columnFactory.makeColumnValueSelector(field.getDimension()), + capabilities, + maxNumEntries, + onHeap + ); + } default: throw new IAE( "Cannot create bloom filter %s for invalid column type [%s]", diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java new file mode 100644 index 000000000000..255a1d66a612 --- /dev/null +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java @@ -0,0 +1,37 @@ +package org.apache.druid.query.aggregation.bloom; + +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.filter.BloomKFilter; +import org.apache.druid.segment.BaseObjectColumnValueSelector; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; + +import java.nio.ByteBuffer; + +public class ByteBloomFilterAggregator extends BaseBloomFilterAggregator> +{ + private final ExpressionType columnType; + + ByteBloomFilterAggregator( + BaseObjectColumnValueSelector baseObjectColumnValueSelector, + TypeSignature columnType, + int maxNumEntries, + boolean onHeap + ) + { + super(baseObjectColumnValueSelector, maxNumEntries, onHeap); + this.columnType = ExpressionType.fromColumnTypeStrict(columnType); + } + + @Override + void bufferAdd(ByteBuffer buf) + { + final Object val = selector.getObject(); + if (val == null) { + BloomKFilter.addBytes(buf, null, 0, 0); + } else { + BloomKFilter.addBytes(buf, ExprEval.toBytes(columnType, val)); + } + } +} diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index 00c55e545cf6..ab002496ee9c 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -173,18 +173,27 @@ public boolean applyNull() @Override public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { - final ExpressionType expressionType = arrayType == null || !arrayType.isArray() + final ExpressionType expressionType = arrayType == null ? null - : ExpressionType.fromColumnType(arrayType); - return input -> { - if (input == null) { - return bloomKFilter.testBytes(null, 0, 0); - } - final byte[] bytes = expressionType != null - ? ExprEval.toBytes(expressionType, expressionType.getNullableStrategy(), input) - : ExprEval.toBytesBestEffort(input); - return bloomKFilter.testBytes(bytes); - }; + : ExpressionType.fromColumnTypeStrict(arrayType); + if (expressionType != null) { + return input -> { + if (input == null) { + return bloomKFilter.testBytes(null, 0, 0); + } + final byte[] bytes = ExprEval.toBytes(expressionType, input); + return bloomKFilter.testBytes(bytes); + }; + } else { + // fall back to per row detection + return input -> { + if (input == null) { + return bloomKFilter.testBytes(null, 0, 0); + } + final byte[] bytes = ExprEval.toBytesBestEffort(input); + return bloomKFilter.testBytes(bytes); + }; + } } }, extractionFn, diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java index c15a55b64d14..41c553d56719 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java @@ -96,7 +96,8 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest private static final Object[] ARRAY_VALUES = new Object[]{ new Object[]{1L, 2L}, new Object[]{3L, 4L}, - new Object[]{0L, 1000L} + new Object[]{0L, 1000L}, + new Object[]{null, 123L} }; private static final int MAX_NUM_VALUES = 15; @@ -145,13 +146,7 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest BloomKFilter arrayFilter = new BloomKFilter(MAX_NUM_VALUES); for (Object o : ARRAY_VALUES) { - arrayFilter.addBytes( - ExprEval.toBytes( - ExpressionType.LONG_ARRAY, - ExpressionType.LONG_ARRAY.getNullableStrategy(), - o - ) - ); + arrayFilter.addBytes(ExprEval.toBytes(ExpressionType.LONG_ARRAY, o)); } serializedArrayFilter = filterToString(arrayFilter); } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index 7c2daf31f1d0..2c4823f7e988 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -218,11 +218,7 @@ public void testMultiValueStringColumn() throws IOException assertFilterMatches( new BloomDimFilter( "dim2", - bloomKFilter( - 1000, - ExprEval.toBytes(ExpressionType.STRING_ARRAY, ExpressionType.STRING_ARRAY.getNullableStrategy(), ImmutableList.of("a", "b") - ) - ), + bloomKFilter(1000, ExprEval.toBytes(ExpressionType.STRING_ARRAY, ImmutableList.of("a", "b"))), null ), ImmutableList.of("0") diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java index 1ab45d631d4d..09cec378b097 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/input/DruidSegmentReaderTest.java @@ -687,7 +687,8 @@ public void close() @Test public void testArrayColumns() throws IOException { - // Write a segment with two rows in it, with columns: s (string), d (double), cnt (long), met_s (complex). + // make our own stuff here so that we don't pollute the shared spec, rows, and segment defined in setup and + // break all the other tests DimensionsSpec dimensionsSpec = new DimensionsSpec( ImmutableList.of( StringDimensionSchema.create("strCol"), diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 4569e8b2e94e..2b855919a4d9 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -142,19 +142,20 @@ public static void serialize(ByteBuffer buffer, int position, ExpressionType typ } } - public static byte[] toBytes(ExpressionType expressionType, NullableTypeStrategy strategy, Object o) + public static byte[] toBytes(ExpressionType expressionType, Object o) { - // convert the array to byte[] form so that we take a hash of the whole array final ExprEval eval = ExprEval.ofType(expressionType, o); - final int size = strategy.estimateSizeBytes(eval.valueOrDefault()); - final ByteBuffer buffer = ByteBuffer.allocate(size); - strategy.write(buffer, eval.valueOrDefault(), size); - return buffer.array(); + return toBytes(eval); } public static byte[] toBytesBestEffort(Object o) { final ExprEval eval = ExprEval.bestEffortOf(o); + return toBytes(eval); + } + + public static byte[] toBytes(ExprEval eval) + { final NullableTypeStrategy strategy = eval.type().getNullableStrategy(); final int size = strategy.estimateSizeBytes(eval.valueOrDefault()); final ByteBuffer buffer = ByteBuffer.allocate(size); diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 958e4306e073..9463308b5069 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -610,7 +610,7 @@ private Supplier makeDoublePredicateSupplier() return Suppliers.memoize(doublePredicate); } - static DruidLongPredicate makeLongPredicateFromBounds( + public static DruidLongPredicate makeLongPredicateFromBounds( final boolean hasLowerLongBound, final boolean hasUpperLongBound, final boolean lowerStrict, @@ -646,7 +646,7 @@ static DruidLongPredicate makeLongPredicateFromBounds( } } - static DruidDoublePredicate makeDoublePredicateFromBounds( + public static DruidDoublePredicate makeDoublePredicateFromBounds( final boolean hasLowerDoubleBound, final boolean hasUpperDoubleBound, final boolean lowerStrict, diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index fb6dd536c843..79bcfa373824 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -29,6 +29,7 @@ import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; @@ -89,23 +90,18 @@ public EqualityFilter( ) { if (column == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid equality filter, column cannot be null"); + throw InvalidInput.exception("Invalid equality filter, column cannot be null"); } this.column = column; if (matchValueType == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid equality filter on column [%s], matchValueType cannot be null", column); + throw InvalidInput.exception("Invalid equality filter on column [%s], matchValueType cannot be null", column); } this.matchValueType = matchValueType; if (matchValue == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid equality filter on column [%s], matchValue cannot be null", column); + throw InvalidInput.exception("Invalid equality filter on column [%s], matchValue cannot be null", column); } this.matchValue = matchValue; + // remove once SQL planner no longer uses extractionFn this.extractionFn = extractionFn; this.filterTuning = filterTuning; this.predicateFactory = new EqualityPredicateFactory(matchValue, matchValueType); @@ -183,12 +179,10 @@ public FilterTuning getFilterTuning() @Override public String toString() { - DimFilter.DimFilterToStringBuilder bob = new DimFilter.DimFilterToStringBuilder().appendDimension( - column, - extractionFn - ) - .append(" = ") - .append(matchValue); + DimFilter.DimFilterToStringBuilder bob = + new DimFilter.DimFilterToStringBuilder().appendDimension(column, extractionFn) + .append(" = ") + .append(matchValue); if (!ColumnType.STRING.equals(matchValueType)) { bob.append(" (" + matchValueType.asTypeString() + ")"); @@ -424,10 +418,6 @@ private void initLongPredicate() if (longPredicate != null) { return; } - if (matchValue == null) { - longPredicate = DruidLongPredicate.MATCH_NULL_ONLY; - return; - } final Long valueAsLong = (Long) matchValue.castTo(ExpressionType.LONG).valueOrDefault(); if (valueAsLong == null) { @@ -449,11 +439,6 @@ private void initFloatPredicate() if (floatPredicate != null) { return; } - - if (matchValue == null) { - floatPredicate = DruidFloatPredicate.MATCH_NULL_ONLY; - return; - } final Double doubleValue = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); if (doubleValue == null) { @@ -475,10 +460,6 @@ private void initDoublePredicate() if (doublePredicate != null) { return; } - if (matchValue == null) { - doublePredicate = DruidDoublePredicate.MATCH_NULL_ONLY; - return; - } final Double aDouble = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); if (aDouble == null) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index ff84b6462741..d5a3ce3e83fe 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -29,6 +29,7 @@ import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; @@ -73,11 +74,10 @@ public NullFilter( ) { if (column == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid null filter, column cannot be null"); + throw InvalidInput.exception("Invalid null filter, column cannot be null"); } this.column = column; + // remove once SQL planner no longer uses extractionFn this.extractionFn = extractionFn; this.filterTuning = filterTuning; } diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index fe4e9d7d740a..152ffe3ac54b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -33,6 +33,7 @@ import com.google.common.collect.TreeRangeSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprEval; @@ -103,24 +104,18 @@ public RangeFilter( ) { if (column == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid range filter, column cannot be null"); + throw InvalidInput.exception("Invalid range filter, column cannot be null"); } this.column = column; if (matchValueType == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Invalid range filter on column [%s], matchValueType cannot be null", column); + throw InvalidInput.exception("Invalid range filter on column [%s], matchValueType cannot be null", column); } this.matchValueType = matchValueType; if (lower == null && upper == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Invalid range filter on column [%s], lower and upper cannot be null at the same time", - column - ); + throw InvalidInput.exception( + "Invalid range filter on column [%s], lower and upper cannot be null at the same time", + column + ); } final ExpressionType expressionType = ExpressionType.fromColumnType(matchValueType); this.upper = upper; @@ -129,34 +124,31 @@ public RangeFilter( this.lowerEval = ExprEval.ofType(expressionType, lower); if (expressionType.isNumeric()) { if (lower != null && lowerEval.value() == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Invalid range filter on column [%s], lower bound [%s] cannot be parsed as specified match value type [%s]", - column, - lower, - expressionType - ); + throw InvalidInput.exception( + "Invalid range filter on column [%s], lower bound [%s] cannot be parsed as specified match value type [%s]", + column, + lower, + expressionType + ); } if (upper != null && upperEval.value() == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Invalid range filter on column [%s], upper bound [%s] cannot be parsed as specified match value type [%s]", - column, - upper, - expressionType - ); + throw InvalidInput.exception( + "Invalid range filter on column [%s], upper bound [%s] cannot be parsed as specified match value type [%s]", + column, + upper, + expressionType + ); } } this.lowerStrict = lowerStrict != null && lowerStrict; this.upperStrict = upperStrict != null && upperStrict; + // remove once SQL planner no longer uses extractionFn this.extractionFn = extractionFn; + this.filterTuning = filterTuning; this.stringPredicateSupplier = makeStringPredicateSupplier(); this.longPredicateSupplier = makeLongPredicateSupplier(); this.floatPredicateSupplier = makeFloatPredicateSupplier(); this.doublePredicateSupplier = makeDoublePredicateSupplier(); - this.filterTuning = filterTuning; } @JsonProperty From 574b1e2bd01bdf5e900faf7ae91e3914a2e16a5a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Jul 2023 16:18:46 -0700 Subject: [PATCH 26/44] opt-in to array processing at ingest time for sketchy stuff --- .../hll/HllSketchAggregatorFactory.java | 22 +- .../hll/HllSketchBuildAggregatorFactory.java | 11 +- .../datasketches/hll/HllSketchBuildUtil.java | 26 +- .../hll/HllSketchMergeAggregatorFactory.java | 2 +- .../hll/sql/HllSketchBaseSqlAggregator.java | 3 +- .../ObjectHllSketchBuildVectorProcessor.java | 6 +- .../datasketches/theta/SketchAggregator.java | 28 +- .../theta/SketchAggregatorFactory.java | 13 +- .../theta/SketchBufferAggregator.java | 6 +- .../theta/SketchMergeAggregatorFactory.java | 27 +- .../theta/SketchVectorAggregator.java | 9 +- .../OldSketchBuildAggregatorFactory.java | 2 +- .../OldSketchMergeAggregatorFactory.java | 2 +- .../sql/ThetaSketchBaseSqlAggregator.java | 6 +- .../hll/HllSketchAggregatorFactoryTest.java | 8 +- .../hll/HllSketchAggregatorTest.java | 145 ++++++--- .../HllSketchBuildAggregatorFactoryTest.java | 4 +- .../hll/HllSketchBuildUtilTest.java | 4 +- .../HllSketchMergeAggregatorFactoryTest.java | 3 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 68 +++-- ...UsingSketchMergeAggregatorFactoryTest.java | 2 +- .../theta/SketchAggregationTest.java | 274 ++++++++++++++---- .../theta/SketchAggregatorFactoryTest.java | 8 +- .../SketchToStringPostAggregatorTest.java | 2 +- .../sql/ThetaSketchSqlAggregatorTest.java | 82 ++++-- .../bloom/ByteBloomFilterAggregator.java | 19 ++ .../duty/ITAutoCompactionTest.java | 7 +- .../apache/druid/segment/IndexBuilder.java | 6 +- 28 files changed, 577 insertions(+), 218 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index 4bc734dc0051..1f65b0480367 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -62,6 +62,8 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory private final boolean shouldFinalize; private final boolean round; + private final boolean processAsArray; + HllSketchAggregatorFactory( final String name, final String fieldName, @@ -69,7 +71,8 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory @Nullable final String tgtHllType, @Nullable final StringEncoding stringEncoding, final Boolean shouldFinalize, - final boolean round + final boolean round, + final boolean processAsArray ) { this.name = Objects.requireNonNull(name); @@ -79,6 +82,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory this.stringEncoding = stringEncoding == null ? DEFAULT_STRING_ENCODING : stringEncoding; this.shouldFinalize = shouldFinalize == null ? DEFAULT_SHOULD_FINALIZE : shouldFinalize; this.round = round; + this.processAsArray = processAsArray; } @Override @@ -127,6 +131,13 @@ public boolean isRound() return round; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean isProcessAsArray() + { + return processAsArray; + } + @Override public List requiredFields() { @@ -149,7 +160,8 @@ public List getRequiredColumns() tgtHllType.toString(), stringEncoding, shouldFinalize, - round + round, + false ) ); } @@ -284,13 +296,14 @@ public boolean equals(Object o) && Objects.equals(name, that.name) && Objects.equals(fieldName, that.fieldName) && tgtHllType == that.tgtHllType - && stringEncoding == that.stringEncoding; + && stringEncoding == that.stringEncoding + && processAsArray == that.processAsArray; } @Override public int hashCode() { - return Objects.hash(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); + return Objects.hash(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, processAsArray); } @Override @@ -304,6 +317,7 @@ public String toString() (stringEncoding != DEFAULT_STRING_ENCODING ? ", stringEncoding=" + stringEncoding : "") + (shouldFinalize != DEFAULT_SHOULD_FINALIZE ? ", shouldFinalize=" + shouldFinalize : "") + (round != DEFAULT_ROUND ? ", round=" + round : "") + + (processAsArray ? ", processAsArray=true" : "") + '}'; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index 1e6734a84924..ec2bf5a23334 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -55,6 +55,7 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory { public static final ColumnType TYPE = ColumnType.ofComplex(HllSketchModule.BUILD_TYPE_NAME); + @JsonCreator public HllSketchBuildAggregatorFactory( @JsonProperty("name") final String name, @@ -63,10 +64,11 @@ public HllSketchBuildAggregatorFactory( @JsonProperty("tgtHllType") @Nullable final String tgtHllType, @JsonProperty("stringEncoding") @Nullable final StringEncoding stringEncoding, @JsonProperty("shouldFinalize") final Boolean shouldFinalize, - @JsonProperty("round") final boolean round + @JsonProperty("round") final boolean round, + @JsonProperty("processAsArray") final boolean processAsArray ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, processAsArray); } @@ -145,7 +147,8 @@ public AggregatorFactory withName(String newName) getTgtHllType(), getStringEncoding(), isShouldFinalize(), - isRound() + isRound(), + isProcessAsArray() ); } @@ -237,7 +240,7 @@ private HllSketchUpdater formulateSketchUpdater(ColumnSelectorFactory columnSele updater = sketch -> { Object obj = selector.getObject(); if (obj != null) { - HllSketchBuildUtil.updateSketch(sketch.get(), getStringEncoding(), obj); + HllSketchBuildUtil.updateSketch(sketch.get(), getStringEncoding(), obj, isProcessAsArray()); } }; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java index eb54dab877ce..0437999ab85a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -34,7 +34,12 @@ public class HllSketchBuildUtil { - public static void updateSketch(final HllSketch sketch, final StringEncoding stringEncoding, final Object value) + public static void updateSketch( + final HllSketch sketch, + final StringEncoding stringEncoding, + final Object value, + final boolean processAsArray + ) { if (value instanceof Integer || value instanceof Long) { sketch.update(((Number) value).longValue()); @@ -42,16 +47,21 @@ public static void updateSketch(final HllSketch sketch, final StringEncoding str sketch.update(((Number) value).doubleValue()); } else if (value instanceof String) { updateSketchWithString(sketch, stringEncoding, (String) value); - } else if (value instanceof Object[]) { - // Object arrays are handled as ARRAY types, which count the entire array as a single value + } else if (value instanceof Object[] && processAsArray) { byte[] arrayBytes = ExprEval.toBytesBestEffort(value); sketch.update(arrayBytes); } else if (value instanceof List) { - // Lists are treated as multi-value strings, which count each element as a separate distinct value - // noinspection rawtypes - for (Object entry : (List) value) { - if (entry != null) { - updateSketchWithString(sketch, stringEncoding, entry.toString()); + if (processAsArray) { + final ExprEval eval = ExprEval.bestEffortArray((List) value); + final byte[] arrayBytes = ExprEval.toBytes(eval); + sketch.update(arrayBytes); + } else { + // Lists are treated as multi-value strings, which count each element as a separate distinct value + // noinspection rawtypes + for (Object entry : (List) value) { + if (entry != null) { + updateSketchWithString(sketch, stringEncoding, entry.toString()); + } } } } else if (value instanceof char[]) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 833df8ab1a55..20d2a854c8cd 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -64,7 +64,7 @@ public HllSketchMergeAggregatorFactory( @JsonProperty("round") final boolean round ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, false); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index c6dd3e7afa02..a68227b01e7c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -193,7 +193,8 @@ public Aggregation toDruidAggregation( tgtHllType, stringEncoding, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - ROUND + ROUND, + false ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java index 56eceb15f5c1..51308db4a88d 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java @@ -59,7 +59,8 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) HllSketchBuildUtil.updateSketch( sketch, stringEncoding, - vector[i] + vector[i], + false ); } } @@ -79,7 +80,8 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in HllSketchBuildUtil.updateSketch( sketch, stringEncoding, - vector[idx] + vector[idx], + false ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java index db704a2733e5..b397622ad325 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java @@ -36,14 +36,16 @@ public class SketchAggregator implements Aggregator private final BaseObjectColumnValueSelector selector; private final int size; + private final boolean processAsArray; @Nullable private Union union; - public SketchAggregator(BaseObjectColumnValueSelector selector, int size) + public SketchAggregator(BaseObjectColumnValueSelector selector, int size, boolean processAsArray) { this.selector = selector; this.size = size; + this.processAsArray = processAsArray; } private void initUnion() @@ -62,7 +64,7 @@ public void aggregate() if (union == null) { initUnion(); } - updateUnion(union, update); + updateUnion(union, update, processAsArray); } } @@ -86,7 +88,7 @@ public long aggregateWithSize() initialSketchSize = union.getCurrentBytes(); } - updateUnion(union, update); + updateUnion(union, update, processAsArray); long sketchSizeDelta = union.getCurrentBytes() - initialSketchSize; return sketchSizeDelta + unionSizeDelta; @@ -133,7 +135,7 @@ public void close() union = null; } - static void updateUnion(Union union, Object update) + static void updateUnion(Union union, Object update, boolean processAsArrays) { if (update instanceof SketchHolder) { ((SketchHolder) update).updateUnion(union); @@ -150,14 +152,20 @@ static void updateUnion(Union union, Object update) } else if (update instanceof long[]) { union.update((long[]) update); } else if (update instanceof Object[]) { - byte[] arrayBytes = ExprEval.toBytesBestEffort(update); + final byte[] arrayBytes = ExprEval.toBytesBestEffort(update); union.update(arrayBytes); } else if (update instanceof List) { - for (Object entry : (List) update) { - if (entry != null) { - final String asString = entry.toString(); - if (!NullHandling.isNullOrEquivalent(asString)) { - union.update(asString); + if (processAsArrays) { + final ExprEval eval = ExprEval.bestEffortArray((List) update); + final byte[] arrayBytes = ExprEval.toBytes(eval); + union.update(arrayBytes); + } else { + for (Object entry : (List) update) { + if (entry != null) { + final String asString = entry.toString(); + if (!NullHandling.isNullOrEquivalent(asString)) { + union.update(asString); + } } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index a52ff1819e76..76b3be162c49 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -63,7 +63,9 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory protected final int size; private final byte cacheId; - public SketchAggregatorFactory(String name, String fieldName, Integer size, byte cacheId) + protected final boolean processAsArray; + + public SketchAggregatorFactory(String name, String fieldName, Integer size, byte cacheId, boolean processAsArray) { this.name = Preconditions.checkNotNull(name, "Must have a valid, non-null aggregator name"); this.fieldName = Preconditions.checkNotNull(fieldName, "Must have a valid, non-null fieldName"); @@ -72,6 +74,7 @@ public SketchAggregatorFactory(String name, String fieldName, Integer size, byte Util.checkIfIntPowerOf2(this.size, "size"); this.cacheId = cacheId; + this.processAsArray = processAsArray; } @SuppressWarnings("unchecked") @@ -79,14 +82,14 @@ public SketchAggregatorFactory(String name, String fieldName, Integer size, byte public Aggregator factorize(ColumnSelectorFactory metricFactory) { BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - return new SketchAggregator(selector, size); + return new SketchAggregator(selector, size, processAsArray); } @Override public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) { BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - final SketchAggregator aggregator = new SketchAggregator(selector, size); + final SketchAggregator aggregator = new SketchAggregator(selector, size, processAsArray); return new AggregatorAndSize(aggregator, aggregator.getInitialSizeBytes()); } @@ -95,13 +98,13 @@ public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - return new SketchBufferAggregator(selector, size, getMaxIntermediateSizeWithNulls()); + return new SketchBufferAggregator(selector, size, getMaxIntermediateSizeWithNulls(), processAsArray); } @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { - return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls()); + return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls(), processAsArray); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java index 34aae3f36e18..787b3f84402b 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java @@ -31,11 +31,13 @@ public class SketchBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; private final SketchBufferAggregatorHelper helper; + private final boolean processAsArray; - public SketchBufferAggregator(BaseObjectColumnValueSelector selector, int size, int maxIntermediateSize) + public SketchBufferAggregator(BaseObjectColumnValueSelector selector, int size, int maxIntermediateSize, boolean processAsArray) { this.selector = selector; this.helper = new SketchBufferAggregatorHelper(size, maxIntermediateSize); + this.processAsArray = processAsArray; } @Override @@ -53,7 +55,7 @@ public void aggregate(ByteBuffer buf, int position) } Union union = helper.getOrCreateUnion(buf, position); - SketchAggregator.updateUnion(union, update); + SketchAggregator.updateUnion(union, update, processAsArray); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java index 41869d5ea509..33626448e3dc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java @@ -46,10 +46,11 @@ public SketchMergeAggregatorFactory( @JsonProperty("size") @Nullable Integer size, @JsonProperty("shouldFinalize") @Nullable Boolean shouldFinalize, @JsonProperty("isInputThetaSketch") @Nullable Boolean isInputThetaSketch, - @JsonProperty("errorBoundsStdDev") @Nullable Integer errorBoundsStdDev + @JsonProperty("errorBoundsStdDev") @Nullable Integer errorBoundsStdDev, + @JsonProperty("processAsArray") boolean processAsArray ) { - super(name, fieldName, size, AggregatorUtil.SKETCH_MERGE_CACHE_TYPE_ID); + super(name, fieldName, size, AggregatorUtil.SKETCH_MERGE_CACHE_TYPE_ID, processAsArray); this.shouldFinalize = (shouldFinalize == null) ? true : shouldFinalize; this.isInputThetaSketch = (isInputThetaSketch == null) ? false : isInputThetaSketch; this.errorBoundsStdDev = errorBoundsStdDev; @@ -65,7 +66,8 @@ public List getRequiredColumns() size, shouldFinalize, isInputThetaSketch, - errorBoundsStdDev + errorBoundsStdDev, + processAsArray ) ); } @@ -73,7 +75,7 @@ public List getRequiredColumns() @Override public AggregatorFactory getCombiningFactory() { - return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, false, errorBoundsStdDev); + return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, false, errorBoundsStdDev, processAsArray); } @Override @@ -88,7 +90,8 @@ public AggregatorFactory getMergingFactory(AggregatorFactory other) throws Aggre Math.max(size, castedOther.size), shouldFinalize, false, - errorBoundsStdDev + errorBoundsStdDev, + processAsArray ); } else { throw new AggregatorFactoryNotMergeableException(this, other); @@ -117,6 +120,13 @@ public Integer getErrorBoundsStdDev() return errorBoundsStdDev; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean isProcessAsArray() + { + return processAsArray; + } + /** * Finalize the computation on sketch object and returns estimate from underlying * sketch. @@ -178,7 +188,8 @@ public AggregatorFactory withName(String newName) getSize(), getShouldFinalize(), getIsInputThetaSketch(), - getErrorBoundsStdDev() + getErrorBoundsStdDev(), + isProcessAsArray() ); } @@ -212,7 +223,7 @@ public boolean equals(Object o) return false; } - return isInputThetaSketch == that.isInputThetaSketch; + return isInputThetaSketch == that.isInputThetaSketch && processAsArray == that.processAsArray; } @Override @@ -222,6 +233,7 @@ public int hashCode() result = 31 * result + (shouldFinalize ? 1 : 0); result = 31 * result + (isInputThetaSketch ? 1 : 0); result = 31 * result + (errorBoundsStdDev != null ? errorBoundsStdDev.hashCode() : 0); + result = 31 * result + (processAsArray ? 1 : 0); return result; } @@ -235,6 +247,7 @@ public String toString() + ", shouldFinalize=" + shouldFinalize + ", isInputThetaSketch=" + isInputThetaSketch + ", errorBoundsStdDev=" + errorBoundsStdDev + + ", processAsArray=" + processAsArray + "}"; } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java index a862265d561c..43125504177a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java @@ -33,15 +33,18 @@ public class SketchVectorAggregator implements VectorAggregator { private final SketchBufferAggregatorHelper helper; private final Supplier objectSupplier; + private final boolean processAsArray; SketchVectorAggregator( final VectorColumnSelectorFactory columnSelectorFactory, final String column, final int size, - final int maxIntermediateSize + final int maxIntermediateSize, + final boolean processAsArray ) { this.helper = new SketchBufferAggregatorHelper(size, maxIntermediateSize); + this.processAsArray = processAsArray; this.objectSupplier = ColumnProcessors.makeVectorProcessor( column, @@ -65,7 +68,7 @@ public void aggregate(final ByteBuffer buf, final int position, final int startR for (int i = startRow; i < endRow; i++) { final Object o = vector[i]; if (o != null) { - SketchAggregator.updateUnion(union, o); + SketchAggregator.updateUnion(union, o, processAsArray); } } } @@ -87,7 +90,7 @@ public void aggregate( if (o != null) { final int position = positions[i] + positionOffset; final Union union = helper.getOrCreateUnion(buf, position); - SketchAggregator.updateUnion(union, o); + SketchAggregator.updateUnion(union, o, processAsArray); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java index 2c8d21941eb3..4868fb74d298 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java @@ -35,7 +35,7 @@ public OldSketchBuildAggregatorFactory( @JsonProperty("size") Integer size ) { - super(name, fieldName, size, true, false, null); + super(name, fieldName, size, true, false, null, false); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java index e884b210a3a0..94070ec87a69 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java @@ -36,7 +36,7 @@ public OldSketchMergeAggregatorFactory( @JsonProperty("shouldFinalize") Boolean shouldFinalize ) { - super(name, fieldName, size, shouldFinalize, true, null); + super(name, fieldName, size, shouldFinalize, true, null, false); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 6564b276c971..219753822c3e 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -116,7 +116,8 @@ public Aggregation toDruidAggregation( sketchSize, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, - null + null, + false ); } else { final RelDataType dataType = columnRexNode.getType(); @@ -147,7 +148,8 @@ public Aggregation toDruidAggregation( sketchSize, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, - null + null, + false ); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java index 2c4ff635faa4..5d8eb0d5ede3 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java @@ -263,7 +263,7 @@ public void testToString() .collect(Collectors.toList()); for (Field field : toStringFields) { - if ("shouldFinalize".equals(field.getName()) || "stringEncoding".equals(field.getName())) { + if ("shouldFinalize".equals(field.getName()) || "stringEncoding".equals(field.getName()) || "processAsArray".equals(field.getName())) { // Skip; not included in the toString if it has the default value. continue; } @@ -290,6 +290,7 @@ public void testResultArraySignature() null, null, null, + false, false ), new HllSketchBuildAggregatorFactory( @@ -299,7 +300,8 @@ public void testResultArraySignature() null, null, null, - true + true, + false ), new HllSketchMergeAggregatorFactory( "hllMerge", @@ -382,7 +384,7 @@ private static class TestHllSketchAggregatorFactory extends HllSketchAggregatorF boolean round ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, null, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, null, round, false); } @Override diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 71f1bb9be435..353b9722c4d8 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.granularity.Granularities; @@ -41,10 +42,14 @@ import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.timeseries.TimeseriesResultValue; -import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.IncrementalIndexSegment; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.Segment; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; import org.junit.After; import org.junit.Assert; import org.junit.Rule; @@ -438,31 +443,42 @@ public void testPostAggs() throws Exception @Test public void testArrays() throws Exception { + AggregatorFactory[] aggs = new AggregatorFactory[]{ + new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false) + }; + + IndexBuilder bob = IndexBuilder.create(timeseriesHelper.getObjectMapper()) + .tmpDir(groupByFolder.newFolder()) + .schema( + IncrementalIndexSchema.builder() + .withTimestampSpec(NestedDataTestUtils.TIMESTAMP_SPEC) + .withDimensionsSpec(NestedDataTestUtils.AUTO_DISCOVERY) + .withMetrics(aggs) + .withQueryGranularity(Granularities.NONE) + .withRollup(true) + .withMinTimestamp(0) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ) + ) + .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT) + .transform(TransformSpec.NONE) + .inputTmpDir(groupByFolder.newFolder()); + List realtimeSegs = ImmutableList.of( - NestedDataTestUtils.createIncrementalIndex( - groupByFolder, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, - NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, - NestedDataTestUtils.TIMESTAMP_SPEC, - NestedDataTestUtils.AUTO_DISCOVERY, - TransformSpec.NONE, - new AggregatorFactory[0], - Granularities.NONE, - true - ) + new IncrementalIndexSegment(bob.buildIncrementalIndex(), SegmentId.dummy("test_datasource")) ); - List segs = NestedDataTestUtils.createSegments( - groupByFolder, - closer, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, - NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, - NestedDataTestUtils.TIMESTAMP_SPEC, - NestedDataTestUtils.AUTO_DISCOVERY, - TransformSpec.NONE, - new AggregatorFactory[0], - Granularities.NONE, - true, - IndexSpec.DEFAULT + List segs = ImmutableList.of( + new QueryableIndexSegment(bob.buildMMappedMergedIndex(), SegmentId.dummy("test_datasource")) ); GroupByQuery query = GroupByQuery.builder() @@ -470,10 +486,16 @@ public void testArrays() throws Exception .setGranularity(Granularities.ALL) .setInterval(Intervals.ETERNITY) .setAggregatorSpecs( - new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, false), - new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, false), - new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, false), - new CountAggregatorFactory("a3") + new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, false, false), + new HllSketchMergeAggregatorFactory("a3", "hll0", null, null, null, false, false), + new HllSketchMergeAggregatorFactory("a4", "hll1", null, null, null, false, false), + new HllSketchMergeAggregatorFactory("a5", "hll2", null, null, null, false, false), + new HllSketchMergeAggregatorFactory("a6", "hll3", null, null, null, false, false), + new HllSketchMergeAggregatorFactory("a7", "hll4", null, null, null, false, false), + new HllSketchMergeAggregatorFactory("a8", "hll5", null, null, null, false, false), + new CountAggregatorFactory("a9") ) .setPostAggregatorSpecs( ImmutableList.of( @@ -491,6 +513,38 @@ public void testArrays() throws Exception "p2", new FieldAccessPostAggregator("f2", "a2"), false + ), + // pre-aggregated array counts + new HllSketchToEstimatePostAggregator( + "p3", + new FieldAccessPostAggregator("f3", "a3"), + false + ), + new HllSketchToEstimatePostAggregator( + "p4", + new FieldAccessPostAggregator("f4", "a4"), + false + ), + new HllSketchToEstimatePostAggregator( + "p5", + new FieldAccessPostAggregator("f5", "a5"), + false + ), + // array element counts + new HllSketchToEstimatePostAggregator( + "p6", + new FieldAccessPostAggregator("f6", "a6"), + false + ), + new HllSketchToEstimatePostAggregator( + "p7", + new FieldAccessPostAggregator("f7", "a7"), + false + ), + new HllSketchToEstimatePostAggregator( + "p8", + new FieldAccessPostAggregator("f8", "a8"), + false ) ) ) @@ -503,15 +557,34 @@ public void testArrays() throws Exception // expect 4 distinct arrays for each of these columns from 14 rows Assert.assertEquals(1, realtimeList.size()); - Assert.assertEquals(14L, realtimeList.get(0).get(3)); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(4), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(5), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(6), 0.01); + Assert.assertEquals(14L, realtimeList.get(0).get(9)); + // array column estimate counts + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(10), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(11), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(12), 0.01); + // pre-aggregated arrays counts + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(13), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(14), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(15), 0.01); + // if processAsArray is false, count is done as string mvds so it counts the total number of elements + Assert.assertEquals(5.0, (Double) realtimeList.get(0).get(16), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(17), 0.01); + Assert.assertEquals(6.0, (Double) realtimeList.get(0).get(18), 0.01); + Assert.assertEquals(1, list.size()); - Assert.assertEquals(14L, list.get(0).get(3)); - Assert.assertEquals(4.0, (Double) list.get(0).get(4), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(5), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(6), 0.01); + Assert.assertEquals(14L, list.get(0).get(9)); + // array column estimate counts + Assert.assertEquals(4.0, (Double) list.get(0).get(10), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(11), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(12), 0.01); + // pre-aggregated arrays counts + Assert.assertEquals(4.0, (Double) list.get(0).get(13), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(14), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(15), 0.01); + // if processAsArray is false, count is done as string mvds so it counts the total number of elements + Assert.assertEquals(5.0, (Double) list.get(0).get(16), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(17), 0.01); + Assert.assertEquals(6.0, (Double) list.get(0).get(18), 0.01); } private static String buildParserJson(List dimensions, List columns) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java index 51ca671cd0d7..8d8ab6fcd82c 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java @@ -50,6 +50,7 @@ public void testSerde() throws IOException TgtHllType.HLL_8.name(), StringEncoding.UTF8, false, + true, true ); @@ -57,7 +58,7 @@ public void testSerde() throws IOException Assert.assertEquals( "{\"type\":\"HLLSketchBuild\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":18,\"tgtHllType\":\"HLL_8\"," - + "\"stringEncoding\":\"utf8\",\"shouldFinalize\":false,\"round\":true}", + + "\"stringEncoding\":\"utf8\",\"shouldFinalize\":false,\"round\":true,\"processAsArray\":true}", serializedString ); @@ -79,6 +80,7 @@ public void testSerdeWithDefaults() throws IOException null, null, null, + false, false ); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java index eca5e6f37a4f..98c129d29413 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java @@ -204,12 +204,12 @@ private void updateSketch(final StringEncoding stringEncoding, final Object firs { // first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value. if (first != null) { - HllSketchBuildUtil.updateSketch(sketch, stringEncoding, first); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, first, false); } for (final Object o : others) { if (o != null) { - HllSketchBuildUtil.updateSketch(sketch, stringEncoding, o); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, o, false); } } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index 101b25b99be0..01bff8952591 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -93,7 +93,8 @@ public void testGetMergingFactoryBadType() throws Exception TGT_HLL_TYPE, STRING_ENCODING, SHOULD_FINALIZE, - ROUND + ROUND, + false ); targetRound.getMergingFactory(other); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 1a2ef18a3d61..7d50ae16bdbb 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -171,13 +171,13 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest */ private static final List EXPECTED_PA_AGGREGATORS = ImmutableList.of( - new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, false, true), - new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, false, true), - new HllSketchBuildAggregatorFactory("a2", "cnt", null, null, null, false, true), - new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, false, true), - new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, false, true), - new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true, true), - new HllSketchBuildAggregatorFactory("a6", "dim2", null, null, StringEncoding.UTF8, true, true) + new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, false, true, false), + new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, false, true, false), + new HllSketchBuildAggregatorFactory("a2", "cnt", null, null, null, false, true, false), + new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, false, true, false), + new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, false, true, false), + new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true, true, false), + new HllSketchBuildAggregatorFactory("a6", "dim2", null, null, StringEncoding.UTF8, true, true, false) ); /** @@ -266,12 +266,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), - new HllSketchBuildAggregatorFactory("hllsketch_dim1", "dim1", null, null, null, false, ROUND), - new HllSketchBuildAggregatorFactory("hllsketch_dim3", "dim3", null, null, null, false, false), - new HllSketchBuildAggregatorFactory("hllsketch_m1", "m1", null, null, null, false, ROUND), - new HllSketchBuildAggregatorFactory("hllsketch_f1", "f1", null, null, null, false, ROUND), - new HllSketchBuildAggregatorFactory("hllsketch_l1", "l1", null, null, null, false, ROUND), - new HllSketchBuildAggregatorFactory("hllsketch_d1", "d1", null, null, null, false, ROUND) + new HllSketchBuildAggregatorFactory("hllsketch_dim1", "dim1", null, null, null, false, ROUND, false), + new HllSketchBuildAggregatorFactory("hllsketch_dim3", "dim3", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hllsketch_m1", "m1", null, null, null, false, ROUND, false), + new HllSketchBuildAggregatorFactory("hllsketch_f1", "f1", null, null, null, false, ROUND, false), + new HllSketchBuildAggregatorFactory("hllsketch_l1", "l1", null, null, null, false, ROUND, false), + new HllSketchBuildAggregatorFactory("hllsketch_d1", "d1", null, null, null, false, ROUND, false) ) .withRollup(false) .build() @@ -344,13 +344,13 @@ public void testApproxCountDistinctHllSketch() .aggregators( ImmutableList.of( new LongSumAggregatorFactory("a0", "cnt"), - new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND), + new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND, false), new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND), + new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND, false), not(equality("dim2", "", ColumnType.STRING)) ), - new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND), - new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND), + new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND, false), + new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND, false), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", null, null, ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, null, null, ROUND), new HllSketchMergeAggregatorFactory("a7", "hllsketch_dim1", 21, "HLL_4", null, null, ROUND) @@ -403,7 +403,8 @@ public void testAvgDailyCountDistinctHllSketch() null, null, null, - ROUND + ROUND, + false ) ) ) @@ -480,7 +481,7 @@ public void testApproxCountDistinctHllSketchIsRounded() .setGranularity(Granularities.ALL) .setAggregatorSpecs( aggregators( - new HllSketchBuildAggregatorFactory("a0", "m1", null, null, null, true, true) + new HllSketchBuildAggregatorFactory("a0", "m1", null, null, null, true, true, false) ) ) .setHavingSpec(having(equality("a0", 2L, ColumnType.LONG))) @@ -730,11 +731,11 @@ public void testHllSketchPostAggsFinalizeOuterSketches() ) .aggregators( ImmutableList.of( - new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, true, true), - new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, true, true), - new HllSketchBuildAggregatorFactory("a2", "v0", null, null, null, true, true), - new HllSketchBuildAggregatorFactory("a3", "v1", null, null, null, true, true), - new HllSketchBuildAggregatorFactory("a4", "dim2", null, null, null, true, true) + new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, true, true, false), + new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, true, true, false), + new HllSketchBuildAggregatorFactory("a2", "v0", null, null, null, true, true, false), + new HllSketchBuildAggregatorFactory("a3", "v1", null, null, null, true, true, false), + new HllSketchBuildAggregatorFactory("a4", "dim2", null, null, null, true, true, false) ) ) .postAggregators( @@ -820,7 +821,8 @@ public void testtHllSketchPostAggsPostSort() null, null, false, - true + true, + false ) ) ) @@ -870,7 +872,8 @@ public void testEmptyTimeseriesResults() null, null, null, - true + true, + false ), new HllSketchBuildAggregatorFactory( "a1", @@ -879,7 +882,8 @@ public void testEmptyTimeseriesResults() null, null, false, - true + true, + false ) ) ) @@ -916,7 +920,8 @@ public void testGroupByAggregatorDefaultValues() null, null, null, - true + true, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -928,7 +933,8 @@ public void testGroupByAggregatorDefaultValues() null, null, false, - true + true, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ) @@ -968,11 +974,11 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() .setAggregatorSpecs( aggregators( new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a0", "v0", null, null, null, null, true), + new HllSketchBuildAggregatorFactory("a0", "v0", null, null, null, null, true, false), equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a1", "v0", null, null, null, null, true), + new HllSketchBuildAggregatorFactory("a1", "v0", null, null, null, null, true, false), equality("dim1", "nonexistent", ColumnType.STRING) ) ) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java index f6d922ba3ae6..7e90ba3398c9 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java @@ -52,7 +52,7 @@ private static BufferHashGrouper makeGrouper( AggregatorAdapters.factorizeBuffered( columnSelectorFactory, ImmutableList.of( - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2, false), new CountAggregatorFactory("count") ) ), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 1ffb0bf7ced7..79d1353b833f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -31,6 +31,7 @@ import org.apache.datasketches.theta.Union; import org.apache.datasketches.theta.UpdateSketch; import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; @@ -52,9 +53,13 @@ import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.epinephelinae.GroupByTestColumnSelectorFactory; import org.apache.druid.query.groupby.epinephelinae.GrouperTestUtil; -import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.IncrementalIndexSegment; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.Segment; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.transform.TransformSpec; +import org.apache.druid.timeline.SegmentId; import org.junit.After; import org.junit.Assert; import org.junit.Rule; @@ -310,10 +315,10 @@ public void testThetaCardinalityOnSimpleColumn() throws Exception @Test public void testSketchMergeAggregatorFactorySerde() throws Exception { - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true, null)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, null)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, 2)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null, false)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true, null, false)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, null, false)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, 2, false)); } @Test @@ -321,16 +326,16 @@ public void testSketchMergeFinalization() { SketchHolder sketch = SketchHolder.of(Sketches.updateSketchBuilder().setNominalEntries(128).build()); - SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null); + SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null, false); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, null); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, null, false); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null, null); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null, null, false); Assert.assertEquals(sketch, agg.finalizeComputation(sketch)); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, 2); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, 2, false); SketchEstimateWithErrorBounds est = (SketchEstimateWithErrorBounds) agg.finalizeComputation(sketch); Assert.assertEquals(0.0, est.getEstimate(), 0.0001); Assert.assertEquals(0.0, est.getHighBound(), 0.0001); @@ -342,31 +347,89 @@ public void testSketchMergeFinalization() @Test public void testArrays() throws Exception { - List realtimeSegs = ImmutableList.of( - NestedDataTestUtils.createIncrementalIndex( - tempFolder, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, - NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, - NestedDataTestUtils.TIMESTAMP_SPEC, - NestedDataTestUtils.AUTO_DISCOVERY, - TransformSpec.NONE, - new AggregatorFactory[0], - Granularities.NONE, + AggregatorFactory[] aggs = new AggregatorFactory[]{ + new SketchMergeAggregatorFactory( + "sketch0", + "arrayString", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "sketch1", + "arrayLong", + null, + null, + null, + null, true + ), + new SketchMergeAggregatorFactory( + "sketch2", + "arrayDouble", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "sketch3", + "arrayString", + null, + null, + null, + null, + false + ), + new SketchMergeAggregatorFactory( + "sketch4", + "arrayLong", + null, + null, + null, + null, + false + ), + new SketchMergeAggregatorFactory( + "sketch5", + "arrayDouble", + null, + null, + null, + null, + false ) + }; + IndexBuilder bob = IndexBuilder.create(helper.getObjectMapper()) + .tmpDir(tempFolder.newFolder()) + .schema( + IncrementalIndexSchema.builder() + .withTimestampSpec(NestedDataTestUtils.TIMESTAMP_SPEC) + .withDimensionsSpec(NestedDataTestUtils.AUTO_DISCOVERY) + .withMetrics(aggs) + .withQueryGranularity(Granularities.NONE) + .withRollup(true) + .withMinTimestamp(0) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ) + ) + .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT) + .transform(TransformSpec.NONE) + .inputTmpDir(tempFolder.newFolder()); + + List realtimeSegs = ImmutableList.of( + new IncrementalIndexSegment(bob.buildIncrementalIndex(), SegmentId.dummy("test_datasource")) ); - List segs = NestedDataTestUtils.createSegments( - tempFolder, - closer, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE, - NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, - NestedDataTestUtils.TIMESTAMP_SPEC, - NestedDataTestUtils.AUTO_DISCOVERY, - TransformSpec.NONE, - new AggregatorFactory[0], - Granularities.NONE, - true, - IndexSpec.DEFAULT + List segs = ImmutableList.of( + new QueryableIndexSegment(bob.buildMMappedMergedIndex(), SegmentId.dummy("test_datasource")) ); GroupByQuery query = GroupByQuery.builder() @@ -380,17 +443,17 @@ public void testArrays() throws Exception null, null, null, - null - ) - { - }, + null, + false + ), new SketchMergeAggregatorFactory( "a1", "arrayLong", null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a2", @@ -398,9 +461,64 @@ public void testArrays() throws Exception null, null, null, - null + null, + false + ), + new SketchMergeAggregatorFactory( + "a3", + "sketch0", + null, + null, + true, + null, + false + ), + new SketchMergeAggregatorFactory( + "a4", + "sketch1", + null, + null, + true, + null, + false + ), + new SketchMergeAggregatorFactory( + "a5", + "sketch2", + null, + null, + true, + null, + false + ), + new SketchMergeAggregatorFactory( + "a6", + "sketch3", + null, + null, + true, + null, + false + ), + new SketchMergeAggregatorFactory( + "a7", + "sketch4", + null, + null, + true, + null, + false + ), + new SketchMergeAggregatorFactory( + "a8", + "sketch5", + null, + null, + true, + null, + false ), - new CountAggregatorFactory("a3") + new CountAggregatorFactory("a9") ) .setPostAggregatorSpecs( ImmutableList.of( @@ -418,6 +536,34 @@ public void testArrays() throws Exception "p2", new FieldAccessPostAggregator("f2", "a2"), null + ),new SketchEstimatePostAggregator( + "p3", + new FieldAccessPostAggregator("f3", "a3"), + null + ), + new SketchEstimatePostAggregator( + "p4", + new FieldAccessPostAggregator("f4", "a4"), + null + ), + new SketchEstimatePostAggregator( + "p5", + new FieldAccessPostAggregator("f5", "a5"), + null + ),new SketchEstimatePostAggregator( + "p6", + new FieldAccessPostAggregator("f6", "a6"), + null + ), + new SketchEstimatePostAggregator( + "p7", + new FieldAccessPostAggregator("f7", "a7"), + null + ), + new SketchEstimatePostAggregator( + "p8", + new FieldAccessPostAggregator("f8", "a8"), + null ) ) ) @@ -430,15 +576,34 @@ public void testArrays() throws Exception // expect 4 distinct arrays for each of these columns from 14 rows Assert.assertEquals(1, realtimeList.size()); - Assert.assertEquals(14L, realtimeList.get(0).get(3)); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(4), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(5), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(6), 0.01); + Assert.assertEquals(14L, realtimeList.get(0).get(9)); + // array column estimate counts + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(10), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(11), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(12), 0.01); + // pre-aggregated arrays counts + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(13), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(14), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(15), 0.01); + // if processAsArray is false, count is done as string mvds so it counts the total number of elements + Assert.assertEquals(5.0, (Double) realtimeList.get(0).get(16), 0.01); + Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(17), 0.01); + Assert.assertEquals(6.0, (Double) realtimeList.get(0).get(18), 0.01); + Assert.assertEquals(1, list.size()); - Assert.assertEquals(14L, list.get(0).get(3)); - Assert.assertEquals(4.0, (Double) list.get(0).get(4), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(5), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(6), 0.01); + Assert.assertEquals(14L, list.get(0).get(9)); + // array column estimate counts + Assert.assertEquals(4.0, (Double) list.get(0).get(10), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(11), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(12), 0.01); + // pre-aggregated arrays counts + Assert.assertEquals(4.0, (Double) list.get(0).get(13), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(14), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(15), 0.01); + // if processAsArray is false, count is done as string mvds so it counts the total number of elements + Assert.assertEquals(5.0, (Double) list.get(0).get(16), 0.01); + Assert.assertEquals(4.0, (Double) list.get(0).get(17), 0.01); + Assert.assertEquals(6.0, (Double) list.get(0).get(18), 0.01); } private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception @@ -517,7 +682,8 @@ public void testCacheKey() 16, null, null, - null + null, + false ); final SketchMergeAggregatorFactory factory2 = new SketchMergeAggregatorFactory( "name", @@ -525,7 +691,8 @@ public void testCacheKey() 16, null, null, - null + null, + false ); final SketchMergeAggregatorFactory factory3 = new SketchMergeAggregatorFactory( "name", @@ -533,7 +700,8 @@ public void testCacheKey() 32, null, null, - null + null, + false ); Assert.assertTrue(Arrays.equals(factory1.getCacheKey(), factory2.getCacheKey())); @@ -618,7 +786,7 @@ public void testRelocation() columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("sketch", sketchHolder))); SketchHolder[] holders = helper.runRelocateVerificationTest( - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2, false), columnSelectorFactory, SketchHolder.class ); @@ -635,7 +803,7 @@ public void testUpdateUnionWithNullInList() value.add("bar"); List[] columnValues = new List[]{value}; final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues); - final Aggregator agg = new SketchAggregator(selector, 4096); + final Aggregator agg = new SketchAggregator(selector, 4096, false); agg.aggregate(); Assert.assertFalse(agg.isNull()); Assert.assertNotNull(agg.get()); @@ -650,7 +818,7 @@ public void testUpdateUnionWithDouble() { Double[] columnValues = new Double[]{2.0}; final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues); - final Aggregator agg = new SketchAggregator(selector, 4096); + final Aggregator agg = new SketchAggregator(selector, 4096, false); agg.aggregate(); Assert.assertFalse(agg.isNull()); Assert.assertNotNull(agg.get()); @@ -669,7 +837,7 @@ public void testAggregateWithSize() } final TestObjectColumnSelector selector = new TestObjectColumnSelector<>(columnValues); - final SketchAggregator agg = new SketchAggregator(selector, 128); + final SketchAggregator agg = new SketchAggregator(selector, 128, false); // Verify initial size of sketch Assert.assertEquals(48L, agg.getInitialSizeBytes()); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java index d64b4263ac1b..775271661696 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java @@ -41,10 +41,10 @@ public class SketchAggregatorFactoryTest { private static final SketchMergeAggregatorFactory AGGREGATOR_16384 = - new SketchMergeAggregatorFactory("x", "x", 16384, null, false, null); + new SketchMergeAggregatorFactory("x", "x", 16384, null, false, null, false); private static final SketchMergeAggregatorFactory AGGREGATOR_32768 = - new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null); + new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null, false); @Test public void testGuessAggregatorHeapFootprint() @@ -93,8 +93,8 @@ public void testResultArraySignature() new OldSketchBuildAggregatorFactory("oldBuild", "col", 16), new OldSketchMergeAggregatorFactory("oldMerge", "col", 16, false), new OldSketchMergeAggregatorFactory("oldMergeFinalize", "col", 16, true), - new SketchMergeAggregatorFactory("merge", "col", 16, false, false, null), - new SketchMergeAggregatorFactory("mergeFinalize", "col", 16, true, false, null) + new SketchMergeAggregatorFactory("merge", "col", 16, false, false, null, false), + new SketchMergeAggregatorFactory("mergeFinalize", "col", 16, true, false, null, false) ) .postAggregators( new FieldAccessPostAggregator("oldBuild-access", "oldBuild"), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java index f2d19af813d1..b52d37a0b276 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java @@ -111,7 +111,7 @@ public void testCompute() { // not going to iterate over the selector since getting a summary of an empty sketch is sufficient final TestObjectColumnSelector selector = new TestObjectColumnSelector(new Object[0]); - final Aggregator agg = new SketchAggregator(selector, 4096); + final Aggregator agg = new SketchAggregator(selector, 4096, false); final Map fields = new HashMap<>(); fields.put("sketch", agg.get()); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 7f8c1970b2d7..952080b859d9 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -129,7 +129,8 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( null, false, false, - null + null, + false ) ) .withRollup(false) @@ -230,7 +231,8 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null + null, + false ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -239,7 +241,8 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null + null, + false ), not(equality("dim2", "", ColumnType.STRING)) ), @@ -249,7 +252,8 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a4", @@ -257,10 +261,11 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null + null, + false ), - new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null), - new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null) + new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null, false), + new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null, false) ) ) .context(QUERY_CONTEXT_DEFAULT) @@ -307,7 +312,8 @@ public void testAvgDailyCountDistinctThetaSketch() null, null, null, - null + null, + false ) ) ) @@ -433,7 +439,8 @@ public void testThetaSketchPostAggs() null, false, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a2", @@ -441,7 +448,8 @@ public void testThetaSketchPostAggs() null, false, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a3", @@ -449,7 +457,8 @@ public void testThetaSketchPostAggs() null, false, null, - null + null, + false ) ) ) @@ -610,7 +619,8 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a2", @@ -618,7 +628,8 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a3", @@ -626,7 +637,8 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null + null, + false ) ) ) @@ -736,7 +748,8 @@ public void testThetaSketchPostAggsPostSort() null, false, null, - null + null, + false ) ) ) @@ -792,7 +805,8 @@ public void testThetaSketchPostAggsPostSortFinalizeOuterSketches() null, null, null, - null + null, + false ) ) ) @@ -844,7 +858,8 @@ public void testEmptyTimeseriesResults() null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a1", @@ -852,7 +867,8 @@ public void testEmptyTimeseriesResults() null, null, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a2", @@ -860,7 +876,8 @@ public void testEmptyTimeseriesResults() 1024, false, null, - null + null, + false ), new SketchMergeAggregatorFactory( "a3", @@ -868,7 +885,8 @@ public void testEmptyTimeseriesResults() 1024, false, null, - null + null, + false ) ) ) @@ -907,7 +925,8 @@ public void testGroupByAggregatorDefaultValues() null, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -918,7 +937,8 @@ public void testGroupByAggregatorDefaultValues() null, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -929,7 +949,8 @@ public void testGroupByAggregatorDefaultValues() 1024, false, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -940,7 +961,8 @@ public void testGroupByAggregatorDefaultValues() 1024, false, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ) @@ -988,7 +1010,8 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -999,7 +1022,8 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1010,7 +1034,8 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() 1024, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1021,7 +1046,8 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() 1024, true, null, - null + null, + false ), equality("dim1", "nonexistent", ColumnType.STRING) ) diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java index 255a1d66a612..299b535321a7 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.query.aggregation.bloom; import org.apache.druid.math.expr.ExprEval; diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 3c40affa7834..8aabb8a344e5 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -171,8 +171,8 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new CountAggregatorFactory("count"), // FloatSumAggregator combine method takes in two Float but return Double new FloatSumAggregatorFactory("sum_added", "added"), - new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false, false), + new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null, false), + new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false, false, false), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L, null) }, false @@ -265,7 +265,7 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new AggregatorFactory[]{ new CountAggregatorFactory("count"), new LongSumAggregatorFactory("sum_added", "added"), - new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), + new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null, false), new HllSketchBuildAggregatorFactory( "HLLSketchBuild", "user", @@ -273,6 +273,7 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis TgtHllType.HLL_4.name(), null, false, + false, false ), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L, null) diff --git a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java index a04583a3b32d..b92b97fe6cb9 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java @@ -317,7 +317,7 @@ private QueryableIndex mergeIndexes( i++; } else { persisted.add( - TestHelper.getTestIndexIO().loadIndex( + indexIO.loadIndex( indexMerger.persist( incrementalIndex, new File(tmpDir, StringUtils.format("testIndex-%s", UUID.randomUUID().toString())), @@ -335,7 +335,7 @@ private QueryableIndex mergeIndexes( } if (i != 0) { persisted.add( - TestHelper.getTestIndexIO().loadIndex( + indexIO.loadIndex( indexMerger.persist( incrementalIndex, new File(tmpDir, StringUtils.format("testIndex-%s", UUID.randomUUID().toString())), @@ -346,7 +346,7 @@ private QueryableIndex mergeIndexes( ); } - final QueryableIndex merged = TestHelper.getTestIndexIO().loadIndex( + final QueryableIndex merged = indexIO.loadIndex( indexMerger.mergeQueryableIndex( persisted, true, From 3b235edf43c7fe07bdb1cf5e462d894a55b6d447 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Jul 2023 21:05:54 -0700 Subject: [PATCH 27/44] add cooler equality index, fix missing string utf8 index supplier --- .../druid/query/filter/EqualityFilter.java | 20 ++- .../apache/druid/query/filter/NullFilter.java | 1 - .../druid/query/filter/RangeFilter.java | 1 - .../index/IndexedUtf8ValueSetIndex.java | 35 ++++- .../index/semantic/TypedValueIndex.java | 37 ++++++ .../ScalarDoubleColumnAndIndexSupplier.java | 59 ++++++++- .../ScalarLongColumnAndIndexSupplier.java | 59 ++++++++- .../nested/VariantColumnAndIndexSupplier.java | 122 +++++++++++++++++- .../serde/StringUtf8ColumnIndexSupplier.java | 14 +- 9 files changed, 318 insertions(+), 30 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 79bcfa373824..23ed49d1eb15 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -28,7 +28,6 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; -import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; @@ -59,6 +58,7 @@ import org.apache.druid.segment.filter.ValueMatchers; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -255,12 +255,20 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) return Filters.makeNullIndex(false, selector); } - final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); - if (valueSetIndex == null) { - // column exists, but has no index - return null; + final TypedValueIndex valueSetIndex = indexSupplier.as(TypedValueIndex.class); + if (valueSetIndex != null) { + return valueSetIndex.forValue(matchValue, matchValueType); + } + + if (matchValueType.isPrimitive()) { + final StringValueSetIndex stringValueSetIndex = indexSupplier.as(StringValueSetIndex.class); + if (stringValueSetIndex != null) { + + return stringValueSetIndex.forValue(String.valueOf(matchValue)); + } } - return valueSetIndex.forValue(String.valueOf(matchValue)); + // column exists, but has no indexes we can use + return null; } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index d5a3ce3e83fe..f2ec707b3d53 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -28,7 +28,6 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; -import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.cache.CacheKeyBuilder; diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 152ffe3ac54b..90d80c26fec5 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -32,7 +32,6 @@ import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java index fb18891f51d7..078ee6a8907c 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java @@ -27,9 +27,14 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.ByteBufferUtils; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import javax.annotation.Nullable; @@ -40,7 +45,7 @@ import java.util.SortedSet; public final class IndexedUtf8ValueSetIndex> - implements StringValueSetIndex, Utf8ValueSetIndex + implements StringValueSetIndex, Utf8ValueSetIndex, TypedValueIndex { // This determines the cut-off point to switch the merging algorithm from doing binary-search per element in the value // set to doing a sorted merge algorithm between value set and dictionary. The ratio here represents the ratio b/w @@ -87,20 +92,33 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) private ImmutableBitmap getBitmapForValue() { - final ByteBuffer valueUtf8 = value == null ? null : ByteBuffer.wrap(StringUtils.toUtf8(value)); - final int idx = dictionary.indexOf(valueUtf8); + final int idx = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(value)); return getBitmap(idx); } }; } + @Nullable + @Override + public BitmapColumnIndex forValue(Object value, TypeSignature valueType) + { + if (valueType.isPrimitive()) { + return forValue( + ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value) + .castTo(ExpressionType.STRING) + .asString() + ); + } + return null; + } + @Override public BitmapColumnIndex forSortedValues(SortedSet values) { return getBitmapColumnIndexForSortedIterableUtf8( Iterables.transform( values, - input -> input != null ? ByteBuffer.wrap(StringUtils.toUtf8(input)) : null + StringUtils::toUtf8ByteBuffer ), values.size() ); @@ -198,6 +216,15 @@ private void findNext() // if the size of in-filter values is less than the threshold percentage of dictionary size, then use binary search // based lookup per value. The algorithm works well for smaller number of values. + return getSimpleImmutableBitmapIterableIndexFromIterator(valuesUtf8); + } + + /** + * Iterates over the value set, using binary search to look up each element. The algorithm works well for smaller + * number of values, and must be used if the values are not sorted in the same manner as {@link #dictionary} + */ + private SimpleImmutableBitmapIterableIndex getSimpleImmutableBitmapIterableIndexFromIterator(Iterable valuesUtf8) + { return new SimpleImmutableBitmapIterableIndex() { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java new file mode 100644 index 000000000000..8d3e2160c31b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.index.semantic; + +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.index.BitmapColumnIndex; + +import javax.annotation.Nullable; + +public interface TypedValueIndex +{ + /** + * Get the {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when passed a + * value that doesn't exist. May return null if a value index cannot be computed for the supplied value type. + */ + @Nullable + BitmapColumnIndex forValue(Object value, TypeSignature valueType); +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index aff15475ea0e..194153a3b6fa 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -34,6 +34,8 @@ import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -42,6 +44,8 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.CompressedColumnarDoublesSuppliers; @@ -58,6 +62,7 @@ import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -191,15 +196,19 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) - || clazz.equals(DictionaryEncodedValueIndex.class)) { - return (T) new DoubleDictionaryEncodedValueSetIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new DoubleValueSetIndex(); + } else if (clazz.equals(TypedValueIndex.class)) { + return (T) new DoubleValueIndex(); + } else if (clazz.equals(StringValueSetIndex.class)) { + return (T) new DoubleStringValueSetIndex(); } else if (clazz.equals(NumericRangeIndex.class)) { return (T) new DoubleNumericRangeIndex(); } else if (clazz.equals(DruidPredicateIndex.class)) { return (T) new DoublePredicateIndex(); + } else if ( + clazz.equals(DictionaryEncodedStringValueIndex.class) || + clazz.equals(DictionaryEncodedValueIndex.class) + ) { + return (T) new DoubleDictionaryEncodedValueSetIndex(); } return null; @@ -215,7 +224,45 @@ private ImmutableBitmap getBitmap(int idx) return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; } - private class DoubleValueSetIndex implements StringValueSetIndex + private class DoubleValueIndex implements TypedValueIndex + { + @Nullable + @Override + public BitmapColumnIndex forValue(Object value, TypeSignature valueType) + { + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value) + .castTo(ExpressionType.DOUBLE); + if (eval.isNumericNull()) { + return null; + } + final double doubleValue = eval.asDouble(); + return new SimpleBitmapColumnIndex() + { + final FixedIndexed dictionary = doubleDictionarySupplier.get(); + @Override + public double estimateSelectivity(int totalRows) + { + final int id = dictionary.indexOf(doubleValue); + if (id < 0) { + return 0.0; + } + return (double) getBitmap(id).size() / totalRows; + } + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) + { + final int id = dictionary.indexOf(doubleValue); + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(id)); + } + }; + } + } + + private class DoubleStringValueSetIndex implements StringValueSetIndex { @Override public BitmapColumnIndex forValue(@Nullable String value) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 08bdd6875766..8918413dd1f1 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -33,6 +33,8 @@ import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -41,6 +43,8 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; @@ -57,6 +61,7 @@ import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.NumericRangeIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -191,15 +196,19 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) - || clazz.equals(DictionaryEncodedValueIndex.class)) { - return (T) new LongDictionaryEncodedValueSetIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new LongValueSetIndex(); + } else if (clazz.equals(TypedValueIndex.class)) { + return (T) new LongValueIndex(); + } else if (clazz.equals(StringValueSetIndex.class)) { + return (T) new LongStringValueSetIndex(); } else if (clazz.equals(NumericRangeIndex.class)) { return (T) new LongNumericRangeIndex(); } else if (clazz.equals(DruidPredicateIndex.class)) { return (T) new LongPredicateIndex(); + } else if ( + clazz.equals(DictionaryEncodedStringValueIndex.class) || + clazz.equals(DictionaryEncodedValueIndex.class) + ) { + return (T) new LongDictionaryEncodedValueSetIndex(); } return null; @@ -215,7 +224,45 @@ private ImmutableBitmap getBitmap(int idx) return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; } - private class LongValueSetIndex implements StringValueSetIndex + private class LongValueIndex implements TypedValueIndex + { + @Nullable + @Override + public BitmapColumnIndex forValue(Object value, TypeSignature valueType) + { + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value) + .castTo(ExpressionType.LONG); + if (eval.isNumericNull()) { + return null; + } + final long longValue = eval.asLong(); + return new SimpleBitmapColumnIndex() + { + final FixedIndexed dictionary = longDictionarySupplier.get(); + @Override + public double estimateSelectivity(int totalRows) + { + final int id = dictionary.indexOf(longValue); + if (id < 0) { + return 0.0; + } + return (double) getBitmap(id).size() / totalRows; + } + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) + { + final int id = dictionary.indexOf(longValue); + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(id)); + } + }; + } + } + + private class LongStringValueSetIndex implements StringValueSetIndex { final FixedIndexed dictionary = longDictionarySupplier.get(); int defaultValueIndex = dictionary.indexOf(NullHandling.defaultLongValue()); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index e49f180c2164..70d0b97a9694 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -22,15 +22,21 @@ import com.google.common.base.Supplier; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.StringEncodingStrategy; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSupplier; @@ -39,10 +45,13 @@ import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.VByte; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -143,11 +152,21 @@ public static VariantColumnAndIndexSupplier read( columnName, NestedCommonFormatColumnSerializer.BITMAP_INDEX_FILE_NAME ); - GenericIndexed valueIndexes = GenericIndexed.read( + final GenericIndexed valueIndexes = GenericIndexed.read( valueIndexBuffer, bitmapSerdeFactory.getObjectStrategy(), columnBuilder.getFileMapper() ); + final ByteBuffer elementIndexBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( + mapper, + columnName, + NestedCommonFormatColumnSerializer.ARRAY_ELEMENT_BITMAP_INDEX_FILE_NAME + ); + final GenericIndexed arrayElementIndexes = GenericIndexed.read( + elementIndexBuffer, + bitmapSerdeFactory.getObjectStrategy(), + columnBuilder.getFileMapper() + ); longDictionarySupplier = FixedIndexed.read( longDictionaryBuffer, @@ -184,6 +203,7 @@ public static VariantColumnAndIndexSupplier read( arrayDictionarySupplier, ints, valueIndexes, + arrayElementIndexes, bitmapSerdeFactory.getBitmapFactory(), columnConfig, size @@ -201,7 +221,7 @@ public static VariantColumnAndIndexSupplier read( private final ColumnType logicalType; @Nullable private final Byte variantTypeSetByte; - + private final BitmapFactory bitmapFactory; private final GenericIndexed stringDictionary; private final Supplier frontCodedStringDictionarySupplier; private final Supplier> longDictionarySupplier; @@ -210,6 +230,8 @@ public static VariantColumnAndIndexSupplier read( private final Supplier encodedValueColumnSupplier; @SuppressWarnings("unused") private final GenericIndexed valueIndexes; + @SuppressWarnings("unused") + private final GenericIndexed arrayElementIndexes; private final ImmutableBitmap nullValueBitmap; public VariantColumnAndIndexSupplier( @@ -222,7 +244,8 @@ public VariantColumnAndIndexSupplier( Supplier arrayDictionarySupplier, Supplier encodedValueColumnSupplier, GenericIndexed valueIndexes, - @SuppressWarnings("unused") BitmapFactory bitmapFactory, + GenericIndexed elementIndexes, + BitmapFactory bitmapFactory, @SuppressWarnings("unused") ColumnConfig columnConfig, @SuppressWarnings("unused") int numRows ) @@ -236,6 +259,8 @@ public VariantColumnAndIndexSupplier( this.arrayDictionarySupplier = arrayDictionarySupplier; this.encodedValueColumnSupplier = encodedValueColumnSupplier; this.valueIndexes = valueIndexes; + this.arrayElementIndexes = elementIndexes; + this.bitmapFactory = bitmapFactory; this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0); } @@ -279,8 +304,99 @@ public T as(Class clazz) if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); return (T) (NullValueIndex) () -> nullIndex; + } else if (clazz.equals(TypedValueIndex.class) && variantTypeSetByte == null && logicalType.isArray()) { + return (T) new ArrayValueIndex(); } // coming soon... return null; } + + private ImmutableBitmap getBitmap(int idx) + { + if (idx < 0) { + return bitmapFactory.makeEmptyImmutableBitmap(); + } + + final ImmutableBitmap bitmap = valueIndexes.get(idx); + return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; + } + + private class ArrayValueIndex implements TypedValueIndex + { + @Nullable + @Override + public BitmapColumnIndex forValue(Object value, TypeSignature valueType) + { + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value) + .castTo(ExpressionType.fromColumnTypeStrict(logicalType)); + if (eval.value() == null) { + return null; + } + final Object[] arrayToMatch = eval.asArray(); + Indexed elements; + switch (logicalType.getElementType().getType()) { + case STRING: + elements = frontCodedStringDictionarySupplier != null + ? frontCodedStringDictionarySupplier.get() + : stringDictionary.singleThreaded(); + break; + case LONG: + elements = longDictionarySupplier.get(); + break; + case DOUBLE: + elements = doubleDictionarySupplier.get(); + break; + default: + throw DruidException.defensive( + "Unhandled array type [%s] how did this happen?", + logicalType.getElementType() + ); + } + + final int[] ids = new int[arrayToMatch.length]; + boolean hasMissingElement = false; + for (int i = 0; i < arrayToMatch.length; i++) { + if (logicalType.getElementType().is(ValueType.STRING)) { + ids[i] = elements.indexOf(StringUtils.toUtf8ByteBuffer((String) arrayToMatch[i])); + } else { + ids[i] = elements.indexOf(arrayToMatch[i]); + } + if (ids[i] < 0) { + hasMissingElement = true; + break; + } + } + + final boolean noMatch = hasMissingElement; + final FrontCodedIntArrayIndexed dictionary = arrayDictionarySupplier.get(); + return new SimpleBitmapColumnIndex() + { + @Override + public double estimateSelectivity(int totalRows) + { + if (noMatch) { + return 0.0; + } + final int id = dictionary.indexOf(ids); + if (id < 0) { + return 0.0; + } + return (double) getBitmap(id).size() / totalRows; + } + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) + { + if (noMatch) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + final int id = dictionary.indexOf(ids); + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(id)); + } + }; + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index 7e24af112e88..aee9824cde8d 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -42,6 +42,8 @@ import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.SpatialIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -113,7 +115,11 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(StringValueSetIndex.class)) { + } else if ( + clazz.equals(StringValueSetIndex.class) || + clazz.equals(Utf8ValueSetIndex.class) || + clazz.equals(TypedValueIndex.class) + ) { return (T) new IndexedUtf8ValueSetIndex<>( bitmapFactory, dict, @@ -136,8 +142,10 @@ public T as(Class clazz) columnConfig, numRows ); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) - || clazz.equals(DictionaryEncodedValueIndex.class)) { + } else if ( + clazz.equals(DictionaryEncodedStringValueIndex.class) || + clazz.equals(DictionaryEncodedValueIndex.class) + ) { // Need string dictionary instead of UTF8 dictionary return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( bitmapFactory, From 1b13fc71fbe2e9a88b25bfedeaad153b2b00a869 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Jul 2023 21:45:36 -0700 Subject: [PATCH 28/44] style --- .../segment/nested/ScalarDoubleColumnAndIndexSupplier.java | 2 +- .../druid/segment/nested/ScalarLongColumnAndIndexSupplier.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 194153a3b6fa..a775217c7a88 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -198,7 +198,7 @@ public T as(Class clazz) return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(TypedValueIndex.class)) { return (T) new DoubleValueIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { + } else if (clazz.equals(StringValueSetIndex.class)) { return (T) new DoubleStringValueSetIndex(); } else if (clazz.equals(NumericRangeIndex.class)) { return (T) new DoubleNumericRangeIndex(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 8918413dd1f1..85107fbd8691 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -198,7 +198,7 @@ public T as(Class clazz) return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(TypedValueIndex.class)) { return (T) new LongValueIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { + } else if (clazz.equals(StringValueSetIndex.class)) { return (T) new LongStringValueSetIndex(); } else if (clazz.equals(NumericRangeIndex.class)) { return (T) new LongNumericRangeIndex(); From 1d01db59c0df2caadcd180de0e0c04bf6c3d69a3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 12 Jul 2023 23:21:13 -0700 Subject: [PATCH 29/44] sql tests --- .../hll/sql/HllSketchBaseSqlAggregator.java | 2 +- .../sql/ThetaSketchBaseSqlAggregator.java | 4 +- .../hll/HllSketchAggregatorTest.java | 12 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 143 +++++++++++- .../theta/SketchAggregationTest.java | 6 +- .../sql/ThetaSketchSqlAggregatorTest.java | 205 +++++++++++++++++- .../resources/nested-all-types-test-data.json | 2 +- 7 files changed, 360 insertions(+), 14 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index a68227b01e7c..ede81f0f21a8 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -194,7 +194,7 @@ public Aggregation toDruidAggregation( stringEncoding, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), ROUND, - false + inputType.isArray() ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 219753822c3e..69d5e58191ac 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -117,7 +117,7 @@ public Aggregation toDruidAggregation( finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, null, - false + columnArg.getDruidType() != null && columnArg.getDruidType().isArray() ); } else { final RelDataType dataType = columnRexNode.getType(); @@ -149,7 +149,7 @@ public Aggregation toDruidAggregation( finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, null, - false + inputType.isArray() ); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 353b9722c4d8..29adea8b5445 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -444,12 +444,12 @@ public void testPostAggs() throws Exception public void testArrays() throws Exception { AggregatorFactory[] aggs = new AggregatorFactory[]{ - new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false) + new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false) }; IndexBuilder bob = IndexBuilder.create(timeseriesHelper.getObjectMapper()) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 7d50ae16bdbb..871293442214 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -19,11 +19,13 @@ package org.apache.druid.query.aggregation.datasketches.hll.sql; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.StringUtils; @@ -32,6 +34,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.Druids; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -257,8 +260,9 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( ) throws IOException { HllSketchModule.registerSerde(); + ObjectMapper mapper = injector.getInstance(ObjectMapper.class); final QueryableIndex index = IndexBuilder - .create() + .create(mapper) .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -279,6 +283,36 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .rows(TestDataBuilder.ROWS1_WITH_NUMERIC_DIMS) .buildMMappedIndex(); + final QueryableIndex indexAllTypesAuto = + IndexBuilder.create(mapper) + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) + .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) + .withMetrics( + new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), + new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), + new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false), + new CountAggregatorFactory("cnt") + ) + .withRollup(false) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE + ) + ) + .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(temporaryFolder.newFolder()) + .buildMMappedIndex(); + return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( DataSegment.builder() .dataSource(CalciteTests.DATASOURCE1) @@ -288,6 +322,15 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource("all_types") + .interval(indexAllTypesAuto.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexAllTypesAuto ); } @@ -1273,6 +1316,104 @@ public void testFloatAndDoubleAreConsideredTheSame() ); } + @Test + public void testArrays() + { + testQuery( + "SELECT" + + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayString))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayLong))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayDouble))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll0))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll1))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll2))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll3))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll4))," + + " HLL_SKETCH_ESTIMATE(DS_HLL(hll5))" + + " FROM druid.all_types", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource("all_types") + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .aggregators( + new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, true, true), + new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, true, true), + new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, true, true), + new HllSketchMergeAggregatorFactory("a3", "hll0", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a4", "hll1", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a5", "hll2", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a6", "hll3", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a7", "hll4", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a8", "hll5", null, null, null, false, true) + ) + .postAggregators( + new HllSketchToEstimatePostAggregator( + "p1", + new FieldAccessPostAggregator("p0", "a0"), + false + ), + new HllSketchToEstimatePostAggregator( + "p3", + new FieldAccessPostAggregator("p2", "a1"), + false + ), + new HllSketchToEstimatePostAggregator( + "p5", + new FieldAccessPostAggregator("p4", "a2"), + false + ), + // pre-aggregated array counts + new HllSketchToEstimatePostAggregator( + "p7", + new FieldAccessPostAggregator("p6", "a3"), + false + ), + new HllSketchToEstimatePostAggregator( + "p9", + new FieldAccessPostAggregator("p8", "a4"), + false + ), + new HllSketchToEstimatePostAggregator( + "p11", + new FieldAccessPostAggregator("p10", "a5"), + false + ), + // array element counts + new HllSketchToEstimatePostAggregator( + "p13", + new FieldAccessPostAggregator("p12", "a6"), + false + ), + new HllSketchToEstimatePostAggregator( + "p15", + new FieldAccessPostAggregator("p14", "a7"), + false + ), + new HllSketchToEstimatePostAggregator( + "p17", + new FieldAccessPostAggregator("p16", "a8"), + false + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + 4.000000029802323D, + 4.000000029802323D, + 4.000000029802323D, + 4.000000029802323D, + 4.000000029802323D, + 4.000000029802323D, + 5.000000049670538D, + 4.000000029802323D, + 6.000000074505807D} + ) + ); + } + private ExpressionVirtualColumn makeSketchEstimateExpression(String outputName, String field) { return new ExpressionVirtualColumn( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 79d1353b833f..1d9f19c20670 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -536,7 +536,8 @@ public void testArrays() throws Exception "p2", new FieldAccessPostAggregator("f2", "a2"), null - ),new SketchEstimatePostAggregator( + ), + new SketchEstimatePostAggregator( "p3", new FieldAccessPostAggregator("f3", "a3"), null @@ -550,7 +551,8 @@ public void testArrays() throws Exception "p5", new FieldAccessPostAggregator("f5", "a5"), null - ),new SketchEstimatePostAggregator( + ), + new SketchEstimatePostAggregator( "p6", new FieldAccessPostAggregator("f6", "a6"), null diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 952080b859d9..d0dfdd620d84 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -19,16 +19,19 @@ package org.apache.druid.query.aggregation.datasketches.theta.sql; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.Druids; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.CountAggregatorFactory; @@ -115,7 +118,8 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( { SketchModule.registerSerde(); - final QueryableIndex index = IndexBuilder.create() + ObjectMapper mapper = injector.getInstance(ObjectMapper.class); + final QueryableIndex index = IndexBuilder.create(mapper) .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -139,6 +143,84 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .rows(TestDataBuilder.ROWS1) .buildMMappedIndex(); + final QueryableIndex indexAllTypesAuto = + IndexBuilder.create(mapper) + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) + .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) + .withMetrics( + new SketchMergeAggregatorFactory( + "sketch0", + "arrayString", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "sketch1", + "arrayLong", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "sketch2", + "arrayDouble", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "sketch3", + "arrayString", + null, + null, + null, + null, + false + ), + new SketchMergeAggregatorFactory( + "sketch4", + "arrayLong", + null, + null, + null, + null, + false + ), + new SketchMergeAggregatorFactory( + "sketch5", + "arrayDouble", + null, + null, + null, + null, + false + ), + new CountAggregatorFactory("cnt") + ) + .withRollup(false) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE + ) + ) + .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(temporaryFolder.newFolder()) + .buildMMappedIndex(); + return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( DataSegment.builder() .dataSource(DATA_SOURCE) @@ -148,6 +230,15 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource("all_types") + .interval(indexAllTypesAuto.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexAllTypesAuto ); } @@ -1219,4 +1310,116 @@ public void testThetaEstimateAsVirtualColumnWithTopN() ) ); } + + @Test + public void testArrays() + { + testQuery( + "SELECT\n" + + " APPROX_COUNT_DISTINCT_DS_THETA(arrayString), " + + " APPROX_COUNT_DISTINCT_DS_THETA(arrayLong), " + + " APPROX_COUNT_DISTINCT_DS_THETA(arrayDouble), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch0), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch1), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch2), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch3), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch4), " + + " APPROX_COUNT_DISTINCT_DS_THETA(sketch5) " + + "FROM druid.all_types", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource("all_types") + .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) + .granularity(Granularities.ALL) + .aggregators( + ImmutableList.of( + new SketchMergeAggregatorFactory( + "a0", + "arrayString", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "a1", + "arrayLong", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "a2", + "arrayDouble", + null, + null, + null, + null, + true + ), + new SketchMergeAggregatorFactory( + "a3", + "sketch0", + null, + null, + false, + null, + false + ), + new SketchMergeAggregatorFactory( + "a4", + "sketch1", + null, + null, + false, + null, + false + ), + new SketchMergeAggregatorFactory( + "a5", + "sketch2", + null, + null, + false, + null, + false + ), + new SketchMergeAggregatorFactory( + "a6", + "sketch3", + null, + null, + false, + null, + false + ), + new SketchMergeAggregatorFactory( + "a7", + "sketch4", + null, + null, + false, + null, + false + ), + new SketchMergeAggregatorFactory( + "a8", + "sketch5", + null, + null, + false, + null, + false + ) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of(new Object[]{4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 6L}) + ); + } } diff --git a/processing/src/test/resources/nested-all-types-test-data.json b/processing/src/test/resources/nested-all-types-test-data.json index 05984a7d4a23..95a43a70d1cd 100644 --- a/processing/src/test/resources/nested-all-types-test-data.json +++ b/processing/src/test/resources/nested-all-types-test-data.json @@ -4,4 +4,4 @@ {"timestamp": "2023-01-01T00:00:00", "str":"b", "long":4, "double":3.3, "bool": true, "variant": "1", "variantEmptyObj":{}, "variantEmtpyArray":4, "obj":{"a": 400, "b": {"x": "d", "y": 1.1, "z": [3, 4]}}, "complexObj":{"x": 1234, "z": {"a": [1.1, 2.2, 3.3], "b": true}}, "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayBool":[null, false, true], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"c", "long": null, "double":4.4, "bool": true, "variant": "hello", "variantEmptyObj":{}, "variantEmtpyArray":[], "obj":{"a": 500, "b": {"x": "e", "z": [1, 2, 3, 4]}}, "complexObj":{"x": 11, "y": [], "z": {"a": [null], "b": false}}, "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":[], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null, "arrayBool":[false], "arrayObject":[{"x": 1000},{"y":2000}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} {"timestamp": "2023-01-01T00:00:00", "str":"d", "long":5, "double":5.9, "bool": false, "variantEmptyObj":"a", "variantEmtpyArray":6, "obj":{"a": 600, "b": {"x": "f", "y": 1.1, "z": [6, 7, 8, 9]}}, "arrayString": ["a", "b"], "arrayStringNulls": null, "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayBool":[], "arrayNestedLong":[[1], [1, 2, null]], "arrayObject":[{"a": 1},{"b":2}], "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} -{"timestamp": "2023-01-01T00:00:00", "str":null, "double":null, "bool": true, "variant": 51, "variantEmptyObj":1, "variantEmtpyArray":[], "obj":{"a": 700, "b": {"x": "g", "y": 1.1, "z": [9, null, 9, 9]}}, "complexObj":{"x": 400, "y": [{"l": [null], "m": 100, "n": 5},{"l": ["a", "b", "c"], "m": "a", "n": 1}], "z": {}}, "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null], "arrayVariant":null, "arrayBool":[true, false, true], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} +{"timestamp": "2023-01-01T00:00:00", "str":null, "double":null, "bool": true, "variant": 51, "variantEmptyObj":1, "variantEmtpyArray":[], "obj":{"a": 700, "b": {"x": "g", "y": 1.1, "z": [9, null, 9, 9]}}, "complexObj":{"x": 400, "y": [{"l": [null], "m": 100, "n": 5},{"l": ["a", "b", "c"], "m": "a", "n": 1}], "z": {}}, "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null], "arrayVariant":null, "arrayBool":[true, false, true], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}], "null": null, "cstr": "hello", "clong": 1234, "cdouble": 1.234, "cObj":{"x": 1, "y": "hello", "z": {"a": 1.1, "b": 1234, "c": ["a", "b", "c"]}}, "cstringArray": ["a", "b", "c"], "cLongArray": [1, 2, 3], "cDoubleArray": [1.1, 2.2, 3.3], "cEmptyArray":[], "cEmptyObj":{}, "cNullArray": [null, null], "cEmptyObjectArray": [{},{},{}], "cObjectArray": [{"a":"b", "x":1, "y":1.3}]} From e7cb77c54047d8f266649bcc66aa87e66ba81dfb Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Jul 2023 00:04:09 -0700 Subject: [PATCH 30/44] adjustment --- .../aggregation/datasketches/theta/SketchAggregator.java | 2 +- .../datasketches/theta/SketchAggregationTest.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java index b397622ad325..c614220e4fce 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java @@ -151,7 +151,7 @@ static void updateUnion(Union union, Object update, boolean processAsArrays) union.update((int[]) update); } else if (update instanceof long[]) { union.update((long[]) update); - } else if (update instanceof Object[]) { + } else if (update instanceof Object[] && processAsArrays) { final byte[] arrayBytes = ExprEval.toBytesBestEffort(update); union.update(arrayBytes); } else if (update instanceof List) { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 1d9f19c20670..91a8f5de8e37 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -444,7 +444,7 @@ public void testArrays() throws Exception null, null, null, - false + true ), new SketchMergeAggregatorFactory( "a1", @@ -453,7 +453,7 @@ public void testArrays() throws Exception null, null, null, - false + true ), new SketchMergeAggregatorFactory( "a2", @@ -462,7 +462,7 @@ public void testArrays() throws Exception null, null, null, - false + true ), new SketchMergeAggregatorFactory( "a3", From 7efcd83409e3738a787cff1f86c168de7703be7a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Jul 2023 20:49:52 -0700 Subject: [PATCH 31/44] remove agg changes in favor of splitting into a separate PR --- .../hll/HllSketchAggregatorFactory.java | 22 +- .../hll/HllSketchBuildAggregatorFactory.java | 24 +- .../datasketches/hll/HllSketchBuildUtil.java | 26 +- .../hll/HllSketchMergeAggregatorFactory.java | 2 +- .../hll/sql/HllSketchBaseSqlAggregator.java | 3 +- .../HllSketchBuildVectorProcessorFactory.java | 42 +-- .../ObjectHllSketchBuildVectorProcessor.java | 6 +- .../datasketches/theta/SketchAggregator.java | 30 +- .../theta/SketchAggregatorFactory.java | 27 +- .../theta/SketchBufferAggregator.java | 6 +- .../theta/SketchMergeAggregatorFactory.java | 27 +- .../theta/SketchVectorAggregator.java | 9 +- .../OldSketchBuildAggregatorFactory.java | 2 +- .../OldSketchMergeAggregatorFactory.java | 2 +- .../sql/ThetaSketchBaseSqlAggregator.java | 6 +- .../hll/HllSketchAggregatorFactoryTest.java | 8 +- .../hll/HllSketchAggregatorTest.java | 170 ---------- .../HllSketchBuildAggregatorFactoryTest.java | 4 +- .../hll/HllSketchBuildUtilTest.java | 4 +- .../HllSketchMergeAggregatorFactoryTest.java | 3 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 216 ++---------- ...UsingSketchMergeAggregatorFactoryTest.java | 2 +- .../theta/SketchAggregationTest.java | 313 +----------------- .../theta/SketchAggregatorFactoryTest.java | 9 +- .../SketchToStringPostAggregatorTest.java | 2 +- .../sql/ThetaSketchSqlAggregatorTest.java | 287 ++-------------- .../bloom/BloomFilterAggregatorFactory.java | 22 +- .../bloom/ObjectBloomFilterAggregator.java | 3 - .../druid/query/filter/BloomDimFilter.java | 24 +- .../bloom/BloomFilterAggregatorTest.java | 78 +---- .../filter/sql/BloomDimFilterSqlTest.java | 2 +- .../duty/ITAutoCompactionTest.java | 7 +- 32 files changed, 177 insertions(+), 1211 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index 1f65b0480367..4bc734dc0051 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -62,8 +62,6 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory private final boolean shouldFinalize; private final boolean round; - private final boolean processAsArray; - HllSketchAggregatorFactory( final String name, final String fieldName, @@ -71,8 +69,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory @Nullable final String tgtHllType, @Nullable final StringEncoding stringEncoding, final Boolean shouldFinalize, - final boolean round, - final boolean processAsArray + final boolean round ) { this.name = Objects.requireNonNull(name); @@ -82,7 +79,6 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory this.stringEncoding = stringEncoding == null ? DEFAULT_STRING_ENCODING : stringEncoding; this.shouldFinalize = shouldFinalize == null ? DEFAULT_SHOULD_FINALIZE : shouldFinalize; this.round = round; - this.processAsArray = processAsArray; } @Override @@ -131,13 +127,6 @@ public boolean isRound() return round; } - @JsonProperty - @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean isProcessAsArray() - { - return processAsArray; - } - @Override public List requiredFields() { @@ -160,8 +149,7 @@ public List getRequiredColumns() tgtHllType.toString(), stringEncoding, shouldFinalize, - round, - false + round ) ); } @@ -296,14 +284,13 @@ public boolean equals(Object o) && Objects.equals(name, that.name) && Objects.equals(fieldName, that.fieldName) && tgtHllType == that.tgtHllType - && stringEncoding == that.stringEncoding - && processAsArray == that.processAsArray; + && stringEncoding == that.stringEncoding; } @Override public int hashCode() { - return Objects.hash(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, processAsArray); + return Objects.hash(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); } @Override @@ -317,7 +304,6 @@ public String toString() (stringEncoding != DEFAULT_STRING_ENCODING ? ", stringEncoding=" + stringEncoding : "") + (shouldFinalize != DEFAULT_SHOULD_FINALIZE ? ", shouldFinalize=" + shouldFinalize : "") + (round != DEFAULT_ROUND ? ", round=" + round : "") + - (processAsArray ? ", processAsArray=true" : "") + '}'; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index ec2bf5a23334..6d42c678bbb1 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -23,10 +23,9 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorUtil; @@ -55,7 +54,6 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory { public static final ColumnType TYPE = ColumnType.ofComplex(HllSketchModule.BUILD_TYPE_NAME); - @JsonCreator public HllSketchBuildAggregatorFactory( @JsonProperty("name") final String name, @@ -64,11 +62,10 @@ public HllSketchBuildAggregatorFactory( @JsonProperty("tgtHllType") @Nullable final String tgtHllType, @JsonProperty("stringEncoding") @Nullable final StringEncoding stringEncoding, @JsonProperty("shouldFinalize") final Boolean shouldFinalize, - @JsonProperty("round") final boolean round, - @JsonProperty("processAsArray") final boolean processAsArray + @JsonProperty("round") final boolean round ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, processAsArray); + super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); } @@ -147,8 +144,7 @@ public AggregatorFactory withName(String newName) getTgtHllType(), getStringEncoding(), isShouldFinalize(), - isRound(), - isProcessAsArray() + isRound() ); } @@ -227,20 +223,12 @@ private HllSketchUpdater formulateSketchUpdater(ColumnSelectorFactory columnSele }; break; case ARRAY: - final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(capabilities); - updater = sketch -> { - final Object o = selector.getObject(); - if (o != null) { - byte[] bytes = ExprEval.toBytes(expressionType, o); - sketch.get().update(bytes); - } - }; - break; + throw InvalidInput.exception("ARRAY types are not supported for hll sketch"); default: updater = sketch -> { Object obj = selector.getObject(); if (obj != null) { - HllSketchBuildUtil.updateSketch(sketch.get(), getStringEncoding(), obj, isProcessAsArray()); + HllSketchBuildUtil.updateSketch(sketch.get(), getStringEncoding(), obj); } }; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java index 0437999ab85a..bcd4c4eb6d90 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -25,7 +25,6 @@ import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; -import org.apache.druid.math.expr.ExprEval; import org.apache.druid.segment.DimensionDictionarySelector; import javax.annotation.Nullable; @@ -34,12 +33,7 @@ public class HllSketchBuildUtil { - public static void updateSketch( - final HllSketch sketch, - final StringEncoding stringEncoding, - final Object value, - final boolean processAsArray - ) + public static void updateSketch(final HllSketch sketch, final StringEncoding stringEncoding, final Object value) { if (value instanceof Integer || value instanceof Long) { sketch.update(((Number) value).longValue()); @@ -47,21 +41,11 @@ public static void updateSketch( sketch.update(((Number) value).doubleValue()); } else if (value instanceof String) { updateSketchWithString(sketch, stringEncoding, (String) value); - } else if (value instanceof Object[] && processAsArray) { - byte[] arrayBytes = ExprEval.toBytesBestEffort(value); - sketch.update(arrayBytes); } else if (value instanceof List) { - if (processAsArray) { - final ExprEval eval = ExprEval.bestEffortArray((List) value); - final byte[] arrayBytes = ExprEval.toBytes(eval); - sketch.update(arrayBytes); - } else { - // Lists are treated as multi-value strings, which count each element as a separate distinct value - // noinspection rawtypes - for (Object entry : (List) value) { - if (entry != null) { - updateSketchWithString(sketch, stringEncoding, entry.toString()); - } + // noinspection rawtypes + for (Object entry : (List) value) { + if (entry != null) { + updateSketchWithString(sketch, stringEncoding, entry.toString()); } } } else if (value instanceof char[]) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 20d2a854c8cd..833df8ab1a55 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -64,7 +64,7 @@ public HllSketchMergeAggregatorFactory( @JsonProperty("round") final boolean round ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round, false); + super(name, fieldName, lgK, tgtHllType, stringEncoding, shouldFinalize, round); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index ede81f0f21a8..c6dd3e7afa02 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -193,8 +193,7 @@ public Aggregation toDruidAggregation( tgtHllType, stringEncoding, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - ROUND, - inputType.isArray() + ROUND ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java index e6ca1f933630..ca85ccd06fca 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -19,10 +19,8 @@ package org.apache.druid.query.aggregation.datasketches.hll.vector; -import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; import org.apache.druid.segment.VectorColumnProcessorFactory; import org.apache.druid.segment.column.ColumnCapabilities; @@ -31,9 +29,6 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - public class HllSketchBuildVectorProcessorFactory implements VectorColumnProcessorFactory { private final HllSketchBuildBufferAggregatorHelper helper; @@ -95,40 +90,7 @@ public HllSketchBuildVectorProcessor makeArrayProcessor( VectorObjectSelector selector ) { - final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(capabilities); - return new HllSketchBuildVectorProcessor() - { - @Override - public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) - { - final Object[] vector = selector.getObjectVector(); - final HllSketch sketch = helper.getSketchAtPosition(buf, position); - - for (int i = startRow; i < endRow; i++) { - if (vector[i] != null) { - byte[] bytes = ExprEval.toBytes(expressionType, vector[i]); - sketch.update(bytes); - } - } - } - - @Override - public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) - { - final Object[] vector = selector.getObjectVector(); - - for (int i = 0; i < numRows; i++) { - final int idx = rows != null ? rows[i] : i; - final int position = positions[i] + positionOffset; - final HllSketch sketch = helper.getSketchAtPosition(buf, position); - - if (vector[idx] != null) { - byte[] bytes = ExprEval.toBytes(expressionType, vector[idx]); - sketch.update(bytes); - } - } - } - }; + throw DruidException.defensive("ARRAY types are not supported for distinct count"); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java index 51308db4a88d..56eceb15f5c1 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java @@ -59,8 +59,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) HllSketchBuildUtil.updateSketch( sketch, stringEncoding, - vector[i], - false + vector[i] ); } } @@ -80,8 +79,7 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in HllSketchBuildUtil.updateSketch( sketch, stringEncoding, - vector[idx], - false + vector[idx] ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java index c614220e4fce..32344e408294 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java @@ -24,7 +24,6 @@ import org.apache.datasketches.theta.Union; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.ISE; -import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.segment.BaseObjectColumnValueSelector; @@ -36,16 +35,14 @@ public class SketchAggregator implements Aggregator private final BaseObjectColumnValueSelector selector; private final int size; - private final boolean processAsArray; @Nullable private Union union; - public SketchAggregator(BaseObjectColumnValueSelector selector, int size, boolean processAsArray) + public SketchAggregator(BaseObjectColumnValueSelector selector, int size) { this.selector = selector; this.size = size; - this.processAsArray = processAsArray; } private void initUnion() @@ -64,7 +61,7 @@ public void aggregate() if (union == null) { initUnion(); } - updateUnion(union, update, processAsArray); + updateUnion(union, update); } } @@ -88,7 +85,7 @@ public long aggregateWithSize() initialSketchSize = union.getCurrentBytes(); } - updateUnion(union, update, processAsArray); + updateUnion(union, update); long sketchSizeDelta = union.getCurrentBytes() - initialSketchSize; return sketchSizeDelta + unionSizeDelta; @@ -135,7 +132,7 @@ public void close() union = null; } - static void updateUnion(Union union, Object update, boolean processAsArrays) + static void updateUnion(Union union, Object update) { if (update instanceof SketchHolder) { ((SketchHolder) update).updateUnion(union); @@ -151,21 +148,12 @@ static void updateUnion(Union union, Object update, boolean processAsArrays) union.update((int[]) update); } else if (update instanceof long[]) { union.update((long[]) update); - } else if (update instanceof Object[] && processAsArrays) { - final byte[] arrayBytes = ExprEval.toBytesBestEffort(update); - union.update(arrayBytes); } else if (update instanceof List) { - if (processAsArrays) { - final ExprEval eval = ExprEval.bestEffortArray((List) update); - final byte[] arrayBytes = ExprEval.toBytes(eval); - union.update(arrayBytes); - } else { - for (Object entry : (List) update) { - if (entry != null) { - final String asString = entry.toString(); - if (!NullHandling.isNullOrEquivalent(asString)) { - union.update(asString); - } + for (Object entry : (List) update) { + if (entry != null) { + final String asString = entry.toString(); + if (!NullHandling.isNullOrEquivalent(asString)) { + union.update(asString); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index 76b3be162c49..211373e873b0 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -27,6 +27,7 @@ import org.apache.datasketches.theta.SetOperation; import org.apache.datasketches.theta.Union; import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.Aggregator; @@ -39,6 +40,7 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -63,9 +65,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory protected final int size; private final byte cacheId; - protected final boolean processAsArray; - - public SketchAggregatorFactory(String name, String fieldName, Integer size, byte cacheId, boolean processAsArray) + public SketchAggregatorFactory(String name, String fieldName, Integer size, byte cacheId) { this.name = Preconditions.checkNotNull(name, "Must have a valid, non-null aggregator name"); this.fieldName = Preconditions.checkNotNull(fieldName, "Must have a valid, non-null fieldName"); @@ -74,22 +74,29 @@ public SketchAggregatorFactory(String name, String fieldName, Integer size, byte Util.checkIfIntPowerOf2(this.size, "size"); this.cacheId = cacheId; - this.processAsArray = processAsArray; } @SuppressWarnings("unchecked") @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { + ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); + if (capabilities != null && capabilities.isArray()) { + throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); + } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - return new SketchAggregator(selector, size, processAsArray); + return new SketchAggregator(selector, size); } @Override public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) { + ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); + if (capabilities != null && capabilities.isArray()) { + throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); + } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - final SketchAggregator aggregator = new SketchAggregator(selector, size, processAsArray); + final SketchAggregator aggregator = new SketchAggregator(selector, size); return new AggregatorAndSize(aggregator, aggregator.getInitialSizeBytes()); } @@ -97,14 +104,18 @@ public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { + ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); + if (capabilities != null && capabilities.isArray()) { + throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); + } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); - return new SketchBufferAggregator(selector, size, getMaxIntermediateSizeWithNulls(), processAsArray); + return new SketchBufferAggregator(selector, size, getMaxIntermediateSizeWithNulls()); } @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { - return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls(), processAsArray); + return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls()); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java index 787b3f84402b..34aae3f36e18 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java @@ -31,13 +31,11 @@ public class SketchBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; private final SketchBufferAggregatorHelper helper; - private final boolean processAsArray; - public SketchBufferAggregator(BaseObjectColumnValueSelector selector, int size, int maxIntermediateSize, boolean processAsArray) + public SketchBufferAggregator(BaseObjectColumnValueSelector selector, int size, int maxIntermediateSize) { this.selector = selector; this.helper = new SketchBufferAggregatorHelper(size, maxIntermediateSize); - this.processAsArray = processAsArray; } @Override @@ -55,7 +53,7 @@ public void aggregate(ByteBuffer buf, int position) } Union union = helper.getOrCreateUnion(buf, position); - SketchAggregator.updateUnion(union, update, processAsArray); + SketchAggregator.updateUnion(union, update); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java index 33626448e3dc..41869d5ea509 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java @@ -46,11 +46,10 @@ public SketchMergeAggregatorFactory( @JsonProperty("size") @Nullable Integer size, @JsonProperty("shouldFinalize") @Nullable Boolean shouldFinalize, @JsonProperty("isInputThetaSketch") @Nullable Boolean isInputThetaSketch, - @JsonProperty("errorBoundsStdDev") @Nullable Integer errorBoundsStdDev, - @JsonProperty("processAsArray") boolean processAsArray + @JsonProperty("errorBoundsStdDev") @Nullable Integer errorBoundsStdDev ) { - super(name, fieldName, size, AggregatorUtil.SKETCH_MERGE_CACHE_TYPE_ID, processAsArray); + super(name, fieldName, size, AggregatorUtil.SKETCH_MERGE_CACHE_TYPE_ID); this.shouldFinalize = (shouldFinalize == null) ? true : shouldFinalize; this.isInputThetaSketch = (isInputThetaSketch == null) ? false : isInputThetaSketch; this.errorBoundsStdDev = errorBoundsStdDev; @@ -66,8 +65,7 @@ public List getRequiredColumns() size, shouldFinalize, isInputThetaSketch, - errorBoundsStdDev, - processAsArray + errorBoundsStdDev ) ); } @@ -75,7 +73,7 @@ public List getRequiredColumns() @Override public AggregatorFactory getCombiningFactory() { - return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, false, errorBoundsStdDev, processAsArray); + return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, false, errorBoundsStdDev); } @Override @@ -90,8 +88,7 @@ public AggregatorFactory getMergingFactory(AggregatorFactory other) throws Aggre Math.max(size, castedOther.size), shouldFinalize, false, - errorBoundsStdDev, - processAsArray + errorBoundsStdDev ); } else { throw new AggregatorFactoryNotMergeableException(this, other); @@ -120,13 +117,6 @@ public Integer getErrorBoundsStdDev() return errorBoundsStdDev; } - @JsonProperty - @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean isProcessAsArray() - { - return processAsArray; - } - /** * Finalize the computation on sketch object and returns estimate from underlying * sketch. @@ -188,8 +178,7 @@ public AggregatorFactory withName(String newName) getSize(), getShouldFinalize(), getIsInputThetaSketch(), - getErrorBoundsStdDev(), - isProcessAsArray() + getErrorBoundsStdDev() ); } @@ -223,7 +212,7 @@ public boolean equals(Object o) return false; } - return isInputThetaSketch == that.isInputThetaSketch && processAsArray == that.processAsArray; + return isInputThetaSketch == that.isInputThetaSketch; } @Override @@ -233,7 +222,6 @@ public int hashCode() result = 31 * result + (shouldFinalize ? 1 : 0); result = 31 * result + (isInputThetaSketch ? 1 : 0); result = 31 * result + (errorBoundsStdDev != null ? errorBoundsStdDev.hashCode() : 0); - result = 31 * result + (processAsArray ? 1 : 0); return result; } @@ -247,7 +235,6 @@ public String toString() + ", shouldFinalize=" + shouldFinalize + ", isInputThetaSketch=" + isInputThetaSketch + ", errorBoundsStdDev=" + errorBoundsStdDev - + ", processAsArray=" + processAsArray + "}"; } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java index 43125504177a..a862265d561c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java @@ -33,18 +33,15 @@ public class SketchVectorAggregator implements VectorAggregator { private final SketchBufferAggregatorHelper helper; private final Supplier objectSupplier; - private final boolean processAsArray; SketchVectorAggregator( final VectorColumnSelectorFactory columnSelectorFactory, final String column, final int size, - final int maxIntermediateSize, - final boolean processAsArray + final int maxIntermediateSize ) { this.helper = new SketchBufferAggregatorHelper(size, maxIntermediateSize); - this.processAsArray = processAsArray; this.objectSupplier = ColumnProcessors.makeVectorProcessor( column, @@ -68,7 +65,7 @@ public void aggregate(final ByteBuffer buf, final int position, final int startR for (int i = startRow; i < endRow; i++) { final Object o = vector[i]; if (o != null) { - SketchAggregator.updateUnion(union, o, processAsArray); + SketchAggregator.updateUnion(union, o); } } } @@ -90,7 +87,7 @@ public void aggregate( if (o != null) { final int position = positions[i] + positionOffset; final Union union = helper.getOrCreateUnion(buf, position); - SketchAggregator.updateUnion(union, o, processAsArray); + SketchAggregator.updateUnion(union, o); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java index 4868fb74d298..2c8d21941eb3 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java @@ -35,7 +35,7 @@ public OldSketchBuildAggregatorFactory( @JsonProperty("size") Integer size ) { - super(name, fieldName, size, true, false, null, false); + super(name, fieldName, size, true, false, null); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java index 94070ec87a69..e884b210a3a0 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java @@ -36,7 +36,7 @@ public OldSketchMergeAggregatorFactory( @JsonProperty("shouldFinalize") Boolean shouldFinalize ) { - super(name, fieldName, size, shouldFinalize, true, null, false); + super(name, fieldName, size, shouldFinalize, true, null); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 69d5e58191ac..6564b276c971 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -116,8 +116,7 @@ public Aggregation toDruidAggregation( sketchSize, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, - null, - columnArg.getDruidType() != null && columnArg.getDruidType().isArray() + null ); } else { final RelDataType dataType = columnRexNode.getType(); @@ -148,8 +147,7 @@ public Aggregation toDruidAggregation( sketchSize, finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), null, - null, - inputType.isArray() + null ); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java index 5d8eb0d5ede3..2c4ff635faa4 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java @@ -263,7 +263,7 @@ public void testToString() .collect(Collectors.toList()); for (Field field : toStringFields) { - if ("shouldFinalize".equals(field.getName()) || "stringEncoding".equals(field.getName()) || "processAsArray".equals(field.getName())) { + if ("shouldFinalize".equals(field.getName()) || "stringEncoding".equals(field.getName())) { // Skip; not included in the toString if it has the default value. continue; } @@ -290,7 +290,6 @@ public void testResultArraySignature() null, null, null, - false, false ), new HllSketchBuildAggregatorFactory( @@ -300,8 +299,7 @@ public void testResultArraySignature() null, null, null, - true, - false + true ), new HllSketchMergeAggregatorFactory( "hllMerge", @@ -384,7 +382,7 @@ private static class TestHllSketchAggregatorFactory extends HllSketchAggregatorF boolean round ) { - super(name, fieldName, lgK, tgtHllType, stringEncoding, null, round, false); + super(name, fieldName, lgK, tgtHllType, stringEncoding, null, round); } @Override diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 29adea8b5445..b8acb0ce2c22 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -23,34 +23,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.timeseries.TimeseriesResultValue; -import org.apache.druid.segment.IncrementalIndexSegment; -import org.apache.druid.segment.IndexBuilder; -import org.apache.druid.segment.QueryableIndexSegment; -import org.apache.druid.segment.Segment; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.testing.InitializedNullHandlingTest; -import org.apache.druid.timeline.SegmentId; -import org.junit.After; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -59,7 +46,6 @@ import org.junit.runners.Parameterized; import java.io.File; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -83,8 +69,6 @@ public class HllSketchAggregatorTest extends InitializedNullHandlingTest @Rule public final TemporaryFolder timeseriesFolder = new TemporaryFolder(); - private final Closer closer; - public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize, StringEncoding stringEncoding) { HllSketchModule.registerSerde(); @@ -96,7 +80,6 @@ public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize, Stri ); this.vectorize = QueryContexts.Vectorize.fromString(vectorize); this.stringEncoding = stringEncoding; - this.closer = Closer.create(); } @Parameterized.Parameters(name = "groupByConfig = {0}, vectorize = {1}, stringEncoding = {2}") @@ -115,12 +98,6 @@ public static Collection constructorFeeder() return constructors; } - @After - public void teardown() throws IOException - { - closer.close(); - } - @Test public void ingestSketches() throws Exception { @@ -440,153 +417,6 @@ public void testPostAggs() throws Exception Assert.assertEquals(expectedSummary, ((HllSketchHolder) row.get(4)).getSketch().toString()); } - @Test - public void testArrays() throws Exception - { - AggregatorFactory[] aggs = new AggregatorFactory[]{ - new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false) - }; - - IndexBuilder bob = IndexBuilder.create(timeseriesHelper.getObjectMapper()) - .tmpDir(groupByFolder.newFolder()) - .schema( - IncrementalIndexSchema.builder() - .withTimestampSpec(NestedDataTestUtils.TIMESTAMP_SPEC) - .withDimensionsSpec(NestedDataTestUtils.AUTO_DISCOVERY) - .withMetrics(aggs) - .withQueryGranularity(Granularities.NONE) - .withRollup(true) - .withMinTimestamp(0) - .build() - ) - .inputSource( - ResourceInputSource.of( - NestedDataTestUtils.class.getClassLoader(), - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ) - ) - .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT) - .transform(TransformSpec.NONE) - .inputTmpDir(groupByFolder.newFolder()); - - List realtimeSegs = ImmutableList.of( - new IncrementalIndexSegment(bob.buildIncrementalIndex(), SegmentId.dummy("test_datasource")) - ); - List segs = ImmutableList.of( - new QueryableIndexSegment(bob.buildMMappedMergedIndex(), SegmentId.dummy("test_datasource")) - ); - - GroupByQuery query = GroupByQuery.builder() - .setDataSource("test_datasource") - .setGranularity(Granularities.ALL) - .setInterval(Intervals.ETERNITY) - .setAggregatorSpecs( - new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, false, false), - new HllSketchMergeAggregatorFactory("a3", "hll0", null, null, null, false, false), - new HllSketchMergeAggregatorFactory("a4", "hll1", null, null, null, false, false), - new HllSketchMergeAggregatorFactory("a5", "hll2", null, null, null, false, false), - new HllSketchMergeAggregatorFactory("a6", "hll3", null, null, null, false, false), - new HllSketchMergeAggregatorFactory("a7", "hll4", null, null, null, false, false), - new HllSketchMergeAggregatorFactory("a8", "hll5", null, null, null, false, false), - new CountAggregatorFactory("a9") - ) - .setPostAggregatorSpecs( - ImmutableList.of( - new HllSketchToEstimatePostAggregator( - "p0", - new FieldAccessPostAggregator("f0", "a0"), - false - ), - new HllSketchToEstimatePostAggregator( - "p1", - new FieldAccessPostAggregator("f1", "a1"), - false - ), - new HllSketchToEstimatePostAggregator( - "p2", - new FieldAccessPostAggregator("f2", "a2"), - false - ), - // pre-aggregated array counts - new HllSketchToEstimatePostAggregator( - "p3", - new FieldAccessPostAggregator("f3", "a3"), - false - ), - new HllSketchToEstimatePostAggregator( - "p4", - new FieldAccessPostAggregator("f4", "a4"), - false - ), - new HllSketchToEstimatePostAggregator( - "p5", - new FieldAccessPostAggregator("f5", "a5"), - false - ), - // array element counts - new HllSketchToEstimatePostAggregator( - "p6", - new FieldAccessPostAggregator("f6", "a6"), - false - ), - new HllSketchToEstimatePostAggregator( - "p7", - new FieldAccessPostAggregator("f7", "a7"), - false - ), - new HllSketchToEstimatePostAggregator( - "p8", - new FieldAccessPostAggregator("f8", "a8"), - false - ) - ) - ) - .build(); - - Sequence realtimeSeq = groupByHelper.runQueryOnSegmentsObjs(realtimeSegs, query); - Sequence seq = groupByHelper.runQueryOnSegmentsObjs(segs, query); - List realtimeList = realtimeSeq.toList(); - List list = seq.toList(); - - // expect 4 distinct arrays for each of these columns from 14 rows - Assert.assertEquals(1, realtimeList.size()); - Assert.assertEquals(14L, realtimeList.get(0).get(9)); - // array column estimate counts - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(10), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(11), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(12), 0.01); - // pre-aggregated arrays counts - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(13), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(14), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(15), 0.01); - // if processAsArray is false, count is done as string mvds so it counts the total number of elements - Assert.assertEquals(5.0, (Double) realtimeList.get(0).get(16), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(17), 0.01); - Assert.assertEquals(6.0, (Double) realtimeList.get(0).get(18), 0.01); - - Assert.assertEquals(1, list.size()); - Assert.assertEquals(14L, list.get(0).get(9)); - // array column estimate counts - Assert.assertEquals(4.0, (Double) list.get(0).get(10), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(11), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(12), 0.01); - // pre-aggregated arrays counts - Assert.assertEquals(4.0, (Double) list.get(0).get(13), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(14), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(15), 0.01); - // if processAsArray is false, count is done as string mvds so it counts the total number of elements - Assert.assertEquals(5.0, (Double) list.get(0).get(16), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(17), 0.01); - Assert.assertEquals(6.0, (Double) list.get(0).get(18), 0.01); - } - private static String buildParserJson(List dimensions, List columns) { Map timestampSpec = ImmutableMap.of( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java index 8d8ab6fcd82c..51ca671cd0d7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java @@ -50,7 +50,6 @@ public void testSerde() throws IOException TgtHllType.HLL_8.name(), StringEncoding.UTF8, false, - true, true ); @@ -58,7 +57,7 @@ public void testSerde() throws IOException Assert.assertEquals( "{\"type\":\"HLLSketchBuild\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":18,\"tgtHllType\":\"HLL_8\"," - + "\"stringEncoding\":\"utf8\",\"shouldFinalize\":false,\"round\":true,\"processAsArray\":true}", + + "\"stringEncoding\":\"utf8\",\"shouldFinalize\":false,\"round\":true}", serializedString ); @@ -80,7 +79,6 @@ public void testSerdeWithDefaults() throws IOException null, null, null, - false, false ); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java index 98c129d29413..eca5e6f37a4f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java @@ -204,12 +204,12 @@ private void updateSketch(final StringEncoding stringEncoding, final Object firs { // first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value. if (first != null) { - HllSketchBuildUtil.updateSketch(sketch, stringEncoding, first, false); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, first); } for (final Object o : others) { if (o != null) { - HllSketchBuildUtil.updateSketch(sketch, stringEncoding, o, false); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, o); } } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index 01bff8952591..101b25b99be0 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -93,8 +93,7 @@ public void testGetMergingFactoryBadType() throws Exception TGT_HLL_TYPE, STRING_ENCODING, SHOULD_FINALIZE, - ROUND, - false + ROUND ); targetRound.getMergingFactory(other); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 871293442214..688380fed7f6 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -19,13 +19,11 @@ package org.apache.druid.query.aggregation.datasketches.hll.sql; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.StringUtils; @@ -34,7 +32,6 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.Druids; -import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -174,13 +171,13 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest */ private static final List EXPECTED_PA_AGGREGATORS = ImmutableList.of( - new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, false, true, false), - new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, false, true, false), - new HllSketchBuildAggregatorFactory("a2", "cnt", null, null, null, false, true, false), - new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, false, true, false), - new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, false, true, false), - new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true, true, false), - new HllSketchBuildAggregatorFactory("a6", "dim2", null, null, StringEncoding.UTF8, true, true, false) + new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, false, true), + new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, false, true), + new HllSketchBuildAggregatorFactory("a2", "cnt", null, null, null, false, true), + new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, false, true), + new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, false, true), + new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true, true), + new HllSketchBuildAggregatorFactory("a6", "dim2", null, null, StringEncoding.UTF8, true, true) ); /** @@ -190,10 +187,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest private static final List EXPECTED_FILTERED_AGGREGATORS = EXPECTED_PA_AGGREGATORS.stream() .limit(5) - .map(factory -> new FilteredAggregatorFactory( - factory, - equality("dim2", "a", ColumnType.STRING) - )) + .map(factory -> new FilteredAggregatorFactory(factory, equality("dim2", "a", ColumnType.STRING))) .collect(Collectors.toList()); /** @@ -260,9 +254,8 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( ) throws IOException { HllSketchModule.registerSerde(); - ObjectMapper mapper = injector.getInstance(ObjectMapper.class); final QueryableIndex index = IndexBuilder - .create(mapper) + .create() .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -270,12 +263,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .withMetrics( new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), - new HllSketchBuildAggregatorFactory("hllsketch_dim1", "dim1", null, null, null, false, ROUND, false), - new HllSketchBuildAggregatorFactory("hllsketch_dim3", "dim3", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hllsketch_m1", "m1", null, null, null, false, ROUND, false), - new HllSketchBuildAggregatorFactory("hllsketch_f1", "f1", null, null, null, false, ROUND, false), - new HllSketchBuildAggregatorFactory("hllsketch_l1", "l1", null, null, null, false, ROUND, false), - new HllSketchBuildAggregatorFactory("hllsketch_d1", "d1", null, null, null, false, ROUND, false) + new HllSketchBuildAggregatorFactory("hllsketch_dim1", "dim1", null, null, null, false, ROUND), + new HllSketchBuildAggregatorFactory("hllsketch_dim3", "dim3", null, null, null, false, false), + new HllSketchBuildAggregatorFactory("hllsketch_m1", "m1", null, null, null, false, ROUND), + new HllSketchBuildAggregatorFactory("hllsketch_f1", "f1", null, null, null, false, ROUND), + new HllSketchBuildAggregatorFactory("hllsketch_l1", "l1", null, null, null, false, ROUND), + new HllSketchBuildAggregatorFactory("hllsketch_d1", "d1", null, null, null, false, ROUND) ) .withRollup(false) .build() @@ -283,36 +276,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .rows(TestDataBuilder.ROWS1_WITH_NUMERIC_DIMS) .buildMMappedIndex(); - final QueryableIndex indexAllTypesAuto = - IndexBuilder.create(mapper) - .tmpDir(temporaryFolder.newFolder()) - .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) - .schema( - new IncrementalIndexSchema.Builder() - .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) - .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) - .withMetrics( - new HllSketchBuildAggregatorFactory("hll0", "arrayString", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll1", "arrayLong", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll2", "arrayDouble", null, null, null, false, false, true), - new HllSketchBuildAggregatorFactory("hll3", "arrayString", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll4", "arrayLong", null, null, null, false, false, false), - new HllSketchBuildAggregatorFactory("hll5", "arrayDouble", null, null, null, false, false, false), - new CountAggregatorFactory("cnt") - ) - .withRollup(false) - .build() - ) - .inputSource( - ResourceInputSource.of( - NestedDataTestUtils.class.getClassLoader(), - NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE - ) - ) - .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) - .inputTmpDir(temporaryFolder.newFolder()) - .buildMMappedIndex(); - return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( DataSegment.builder() .dataSource(CalciteTests.DATASOURCE1) @@ -322,15 +285,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index - ).add( - DataSegment.builder() - .dataSource("all_types") - .interval(indexAllTypesAuto.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(0)) - .size(0) - .build(), - indexAllTypesAuto ); } @@ -387,13 +341,13 @@ public void testApproxCountDistinctHllSketch() .aggregators( ImmutableList.of( new LongSumAggregatorFactory("a0", "cnt"), - new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND, false), + new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, null, null, ROUND), new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND, false), + new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, null, null, ROUND), not(equality("dim2", "", ColumnType.STRING)) ), - new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND, false), - new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND, false), + new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND), + new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", null, null, ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, null, null, ROUND), new HllSketchMergeAggregatorFactory("a7", "hllsketch_dim1", 21, "HLL_4", null, null, ROUND) @@ -446,8 +400,7 @@ public void testAvgDailyCountDistinctHllSketch() null, null, null, - ROUND, - false + ROUND ) ) ) @@ -524,7 +477,7 @@ public void testApproxCountDistinctHllSketchIsRounded() .setGranularity(Granularities.ALL) .setAggregatorSpecs( aggregators( - new HllSketchBuildAggregatorFactory("a0", "m1", null, null, null, true, true, false) + new HllSketchBuildAggregatorFactory("a0", "m1", null, null, null, true, true) ) ) .setHavingSpec(having(equality("a0", 2L, ColumnType.LONG))) @@ -774,11 +727,11 @@ public void testHllSketchPostAggsFinalizeOuterSketches() ) .aggregators( ImmutableList.of( - new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, true, true, false), - new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, true, true, false), - new HllSketchBuildAggregatorFactory("a2", "v0", null, null, null, true, true, false), - new HllSketchBuildAggregatorFactory("a3", "v1", null, null, null, true, true, false), - new HllSketchBuildAggregatorFactory("a4", "dim2", null, null, null, true, true, false) + new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, true, true), + new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, true, true), + new HllSketchBuildAggregatorFactory("a2", "v0", null, null, null, true, true), + new HllSketchBuildAggregatorFactory("a3", "v1", null, null, null, true, true), + new HllSketchBuildAggregatorFactory("a4", "dim2", null, null, null, true, true) ) ) .postAggregators( @@ -864,8 +817,7 @@ public void testtHllSketchPostAggsPostSort() null, null, false, - true, - false + true ) ) ) @@ -915,8 +867,7 @@ public void testEmptyTimeseriesResults() null, null, null, - true, - false + true ), new HllSketchBuildAggregatorFactory( "a1", @@ -925,8 +876,7 @@ public void testEmptyTimeseriesResults() null, null, false, - true, - false + true ) ) ) @@ -963,8 +913,7 @@ public void testGroupByAggregatorDefaultValues() null, null, null, - true, - false + true ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -976,8 +925,7 @@ public void testGroupByAggregatorDefaultValues() null, null, false, - true, - false + true ), equality("dim1", "nonexistent", ColumnType.STRING) ) @@ -1017,11 +965,11 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() .setAggregatorSpecs( aggregators( new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a0", "v0", null, null, null, null, true, false), + new HllSketchBuildAggregatorFactory("a0", "v0", null, null, null, null, true), equality("dim1", "nonexistent", ColumnType.STRING) ), new FilteredAggregatorFactory( - new HllSketchBuildAggregatorFactory("a1", "v0", null, null, null, null, true, false), + new HllSketchBuildAggregatorFactory("a1", "v0", null, null, null, null, true), equality("dim1", "nonexistent", ColumnType.STRING) ) ) @@ -1316,104 +1264,6 @@ public void testFloatAndDoubleAreConsideredTheSame() ); } - @Test - public void testArrays() - { - testQuery( - "SELECT" - + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayString))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayLong))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(arrayDouble))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll0))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll1))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll2))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll3))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll4))," - + " HLL_SKETCH_ESTIMATE(DS_HLL(hll5))" - + " FROM druid.all_types", - ImmutableList.of( - Druids.newTimeseriesQueryBuilder() - .dataSource("all_types") - .intervals(querySegmentSpec(Filtration.eternity())) - .granularity(Granularities.ALL) - .aggregators( - new HllSketchBuildAggregatorFactory("a0", "arrayString", null, null, null, false, true, true), - new HllSketchBuildAggregatorFactory("a1", "arrayLong", null, null, null, false, true, true), - new HllSketchBuildAggregatorFactory("a2", "arrayDouble", null, null, null, false, true, true), - new HllSketchMergeAggregatorFactory("a3", "hll0", null, null, null, false, true), - new HllSketchMergeAggregatorFactory("a4", "hll1", null, null, null, false, true), - new HllSketchMergeAggregatorFactory("a5", "hll2", null, null, null, false, true), - new HllSketchMergeAggregatorFactory("a6", "hll3", null, null, null, false, true), - new HllSketchMergeAggregatorFactory("a7", "hll4", null, null, null, false, true), - new HllSketchMergeAggregatorFactory("a8", "hll5", null, null, null, false, true) - ) - .postAggregators( - new HllSketchToEstimatePostAggregator( - "p1", - new FieldAccessPostAggregator("p0", "a0"), - false - ), - new HllSketchToEstimatePostAggregator( - "p3", - new FieldAccessPostAggregator("p2", "a1"), - false - ), - new HllSketchToEstimatePostAggregator( - "p5", - new FieldAccessPostAggregator("p4", "a2"), - false - ), - // pre-aggregated array counts - new HllSketchToEstimatePostAggregator( - "p7", - new FieldAccessPostAggregator("p6", "a3"), - false - ), - new HllSketchToEstimatePostAggregator( - "p9", - new FieldAccessPostAggregator("p8", "a4"), - false - ), - new HllSketchToEstimatePostAggregator( - "p11", - new FieldAccessPostAggregator("p10", "a5"), - false - ), - // array element counts - new HllSketchToEstimatePostAggregator( - "p13", - new FieldAccessPostAggregator("p12", "a6"), - false - ), - new HllSketchToEstimatePostAggregator( - "p15", - new FieldAccessPostAggregator("p14", "a7"), - false - ), - new HllSketchToEstimatePostAggregator( - "p17", - new FieldAccessPostAggregator("p16", "a8"), - false - ) - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of( - new Object[]{ - 4.000000029802323D, - 4.000000029802323D, - 4.000000029802323D, - 4.000000029802323D, - 4.000000029802323D, - 4.000000029802323D, - 5.000000049670538D, - 4.000000029802323D, - 6.000000074505807D} - ) - ); - } - private ExpressionVirtualColumn makeSketchEstimateExpression(String outputName, String field) { return new ExpressionVirtualColumn( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java index 7e90ba3398c9..f6d922ba3ae6 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java @@ -52,7 +52,7 @@ private static BufferHashGrouper makeGrouper( AggregatorAdapters.factorizeBuffered( columnSelectorFactory, ImmutableList.of( - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2, false), + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), new CountAggregatorFactory("count") ) ), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 91a8f5de8e37..9487fb8651ef 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -31,19 +31,14 @@ import org.apache.datasketches.theta.Union; import org.apache.datasketches.theta.UpdateSketch; import org.apache.druid.data.input.MapBasedRow; -import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.TestObjectColumnSelector; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; @@ -53,13 +48,6 @@ import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.epinephelinae.GroupByTestColumnSelectorFactory; import org.apache.druid.query.groupby.epinephelinae.GrouperTestUtil; -import org.apache.druid.segment.IncrementalIndexSegment; -import org.apache.druid.segment.IndexBuilder; -import org.apache.druid.segment.QueryableIndexSegment; -import org.apache.druid.segment.Segment; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.transform.TransformSpec; -import org.apache.druid.timeline.SegmentId; import org.junit.After; import org.junit.Assert; import org.junit.Rule; @@ -90,8 +78,6 @@ public class SketchAggregationTest @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); - private final Closer closer; - public SketchAggregationTest(final GroupByQueryConfig config, final String vectorize) { SketchModule.registerSerde(); @@ -101,7 +87,6 @@ public SketchAggregationTest(final GroupByQueryConfig config, final String vecto tempFolder ); this.vectorize = QueryContexts.Vectorize.fromString(vectorize); - this.closer = Closer.create(); } @Parameterized.Parameters(name = "config = {0}, vectorize = {1}") @@ -119,7 +104,6 @@ public static Collection constructorFeeder() @After public void teardown() throws IOException { - closer.close(); helper.close(); } @@ -315,10 +299,10 @@ public void testThetaCardinalityOnSimpleColumn() throws Exception @Test public void testSketchMergeAggregatorFactorySerde() throws Exception { - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null, false)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true, null, false)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, null, false)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, 2, false)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true, null)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, null)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false, 2)); } @Test @@ -326,16 +310,16 @@ public void testSketchMergeFinalization() { SketchHolder sketch = SketchHolder.of(Sketches.updateSketchBuilder().setNominalEntries(128).build()); - SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null, false); + SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null, null); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, null, false); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, null); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null, null, false); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null, null); Assert.assertEquals(sketch, agg.finalizeComputation(sketch)); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, 2, false); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null, 2); SketchEstimateWithErrorBounds est = (SketchEstimateWithErrorBounds) agg.finalizeComputation(sketch); Assert.assertEquals(0.0, est.getEstimate(), 0.0001); Assert.assertEquals(0.0, est.getHighBound(), 0.0001); @@ -344,270 +328,6 @@ public void testSketchMergeFinalization() } - @Test - public void testArrays() throws Exception - { - AggregatorFactory[] aggs = new AggregatorFactory[]{ - new SketchMergeAggregatorFactory( - "sketch0", - "arrayString", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch1", - "arrayLong", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch2", - "arrayDouble", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch3", - "arrayString", - null, - null, - null, - null, - false - ), - new SketchMergeAggregatorFactory( - "sketch4", - "arrayLong", - null, - null, - null, - null, - false - ), - new SketchMergeAggregatorFactory( - "sketch5", - "arrayDouble", - null, - null, - null, - null, - false - ) - }; - IndexBuilder bob = IndexBuilder.create(helper.getObjectMapper()) - .tmpDir(tempFolder.newFolder()) - .schema( - IncrementalIndexSchema.builder() - .withTimestampSpec(NestedDataTestUtils.TIMESTAMP_SPEC) - .withDimensionsSpec(NestedDataTestUtils.AUTO_DISCOVERY) - .withMetrics(aggs) - .withQueryGranularity(Granularities.NONE) - .withRollup(true) - .withMinTimestamp(0) - .build() - ) - .inputSource( - ResourceInputSource.of( - NestedDataTestUtils.class.getClassLoader(), - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ) - ) - .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT) - .transform(TransformSpec.NONE) - .inputTmpDir(tempFolder.newFolder()); - - List realtimeSegs = ImmutableList.of( - new IncrementalIndexSegment(bob.buildIncrementalIndex(), SegmentId.dummy("test_datasource")) - ); - List segs = ImmutableList.of( - new QueryableIndexSegment(bob.buildMMappedMergedIndex(), SegmentId.dummy("test_datasource")) - ); - - GroupByQuery query = GroupByQuery.builder() - .setDataSource("test_datasource") - .setGranularity(Granularities.ALL) - .setInterval(Intervals.ETERNITY) - .setAggregatorSpecs( - new SketchMergeAggregatorFactory( - "a0", - "arrayString", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a1", - "arrayLong", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a2", - "arrayDouble", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a3", - "sketch0", - null, - null, - true, - null, - false - ), - new SketchMergeAggregatorFactory( - "a4", - "sketch1", - null, - null, - true, - null, - false - ), - new SketchMergeAggregatorFactory( - "a5", - "sketch2", - null, - null, - true, - null, - false - ), - new SketchMergeAggregatorFactory( - "a6", - "sketch3", - null, - null, - true, - null, - false - ), - new SketchMergeAggregatorFactory( - "a7", - "sketch4", - null, - null, - true, - null, - false - ), - new SketchMergeAggregatorFactory( - "a8", - "sketch5", - null, - null, - true, - null, - false - ), - new CountAggregatorFactory("a9") - ) - .setPostAggregatorSpecs( - ImmutableList.of( - new SketchEstimatePostAggregator( - "p0", - new FieldAccessPostAggregator("f0", "a0"), - null - ), - new SketchEstimatePostAggregator( - "p1", - new FieldAccessPostAggregator("f1", "a1"), - null - ), - new SketchEstimatePostAggregator( - "p2", - new FieldAccessPostAggregator("f2", "a2"), - null - ), - new SketchEstimatePostAggregator( - "p3", - new FieldAccessPostAggregator("f3", "a3"), - null - ), - new SketchEstimatePostAggregator( - "p4", - new FieldAccessPostAggregator("f4", "a4"), - null - ), - new SketchEstimatePostAggregator( - "p5", - new FieldAccessPostAggregator("f5", "a5"), - null - ), - new SketchEstimatePostAggregator( - "p6", - new FieldAccessPostAggregator("f6", "a6"), - null - ), - new SketchEstimatePostAggregator( - "p7", - new FieldAccessPostAggregator("f7", "a7"), - null - ), - new SketchEstimatePostAggregator( - "p8", - new FieldAccessPostAggregator("f8", "a8"), - null - ) - ) - ) - .build(); - - Sequence realtimeSeq = helper.runQueryOnSegmentsObjs(realtimeSegs, query); - Sequence seq = helper.runQueryOnSegmentsObjs(segs, query); - List realtimeList = realtimeSeq.toList(); - List list = seq.toList(); - - // expect 4 distinct arrays for each of these columns from 14 rows - Assert.assertEquals(1, realtimeList.size()); - Assert.assertEquals(14L, realtimeList.get(0).get(9)); - // array column estimate counts - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(10), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(11), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(12), 0.01); - // pre-aggregated arrays counts - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(13), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(14), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(15), 0.01); - // if processAsArray is false, count is done as string mvds so it counts the total number of elements - Assert.assertEquals(5.0, (Double) realtimeList.get(0).get(16), 0.01); - Assert.assertEquals(4.0, (Double) realtimeList.get(0).get(17), 0.01); - Assert.assertEquals(6.0, (Double) realtimeList.get(0).get(18), 0.01); - - Assert.assertEquals(1, list.size()); - Assert.assertEquals(14L, list.get(0).get(9)); - // array column estimate counts - Assert.assertEquals(4.0, (Double) list.get(0).get(10), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(11), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(12), 0.01); - // pre-aggregated arrays counts - Assert.assertEquals(4.0, (Double) list.get(0).get(13), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(14), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(15), 0.01); - // if processAsArray is false, count is done as string mvds so it counts the total number of elements - Assert.assertEquals(5.0, (Double) list.get(0).get(16), 0.01); - Assert.assertEquals(4.0, (Double) list.get(0).get(17), 0.01); - Assert.assertEquals(6.0, (Double) list.get(0).get(18), 0.01); - } - private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception { Assert.assertEquals( @@ -684,8 +404,7 @@ public void testCacheKey() 16, null, null, - null, - false + null ); final SketchMergeAggregatorFactory factory2 = new SketchMergeAggregatorFactory( "name", @@ -693,8 +412,7 @@ public void testCacheKey() 16, null, null, - null, - false + null ); final SketchMergeAggregatorFactory factory3 = new SketchMergeAggregatorFactory( "name", @@ -702,8 +420,7 @@ public void testCacheKey() 32, null, null, - null, - false + null ); Assert.assertTrue(Arrays.equals(factory1.getCacheKey(), factory2.getCacheKey())); @@ -788,7 +505,7 @@ public void testRelocation() columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("sketch", sketchHolder))); SketchHolder[] holders = helper.runRelocateVerificationTest( - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2, false), + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), columnSelectorFactory, SketchHolder.class ); @@ -805,7 +522,7 @@ public void testUpdateUnionWithNullInList() value.add("bar"); List[] columnValues = new List[]{value}; final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues); - final Aggregator agg = new SketchAggregator(selector, 4096, false); + final Aggregator agg = new SketchAggregator(selector, 4096); agg.aggregate(); Assert.assertFalse(agg.isNull()); Assert.assertNotNull(agg.get()); @@ -820,7 +537,7 @@ public void testUpdateUnionWithDouble() { Double[] columnValues = new Double[]{2.0}; final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues); - final Aggregator agg = new SketchAggregator(selector, 4096, false); + final Aggregator agg = new SketchAggregator(selector, 4096); agg.aggregate(); Assert.assertFalse(agg.isNull()); Assert.assertNotNull(agg.get()); @@ -839,7 +556,7 @@ public void testAggregateWithSize() } final TestObjectColumnSelector selector = new TestObjectColumnSelector<>(columnValues); - final SketchAggregator agg = new SketchAggregator(selector, 128, false); + final SketchAggregator agg = new SketchAggregator(selector, 128); // Verify initial size of sketch Assert.assertEquals(48L, agg.getInitialSizeBytes()); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java index 775271661696..1d70ff30f251 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java @@ -41,10 +41,10 @@ public class SketchAggregatorFactoryTest { private static final SketchMergeAggregatorFactory AGGREGATOR_16384 = - new SketchMergeAggregatorFactory("x", "x", 16384, null, false, null, false); + new SketchMergeAggregatorFactory("x", "x", 16384, null, false, null); private static final SketchMergeAggregatorFactory AGGREGATOR_32768 = - new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null, false); + new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null); @Test public void testGuessAggregatorHeapFootprint() @@ -71,6 +71,7 @@ public void testFactorizeSized() ColumnSelectorFactory colSelectorFactory = EasyMock.mock(ColumnSelectorFactory.class); EasyMock.expect(colSelectorFactory.makeColumnValueSelector(EasyMock.anyString())) .andReturn(EasyMock.createMock(ColumnValueSelector.class)).anyTimes(); + EasyMock.expect(colSelectorFactory.getColumnCapabilities("x")).andReturn(null).anyTimes(); EasyMock.replay(colSelectorFactory); AggregatorAndSize aggregatorAndSize = AGGREGATOR_16384.factorizeWithSize(colSelectorFactory); @@ -93,8 +94,8 @@ public void testResultArraySignature() new OldSketchBuildAggregatorFactory("oldBuild", "col", 16), new OldSketchMergeAggregatorFactory("oldMerge", "col", 16, false), new OldSketchMergeAggregatorFactory("oldMergeFinalize", "col", 16, true), - new SketchMergeAggregatorFactory("merge", "col", 16, false, false, null, false), - new SketchMergeAggregatorFactory("mergeFinalize", "col", 16, true, false, null, false) + new SketchMergeAggregatorFactory("merge", "col", 16, false, false, null), + new SketchMergeAggregatorFactory("mergeFinalize", "col", 16, true, false, null) ) .postAggregators( new FieldAccessPostAggregator("oldBuild-access", "oldBuild"), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java index b52d37a0b276..f2d19af813d1 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchToStringPostAggregatorTest.java @@ -111,7 +111,7 @@ public void testCompute() { // not going to iterate over the selector since getting a summary of an empty sketch is sufficient final TestObjectColumnSelector selector = new TestObjectColumnSelector(new Object[0]); - final Aggregator agg = new SketchAggregator(selector, 4096, false); + final Aggregator agg = new SketchAggregator(selector, 4096); final Map fields = new HashMap<>(); fields.put("sketch", agg.get()); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index d0dfdd620d84..7f8c1970b2d7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -19,19 +19,16 @@ package org.apache.druid.query.aggregation.datasketches.theta.sql; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.Druids; -import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.CountAggregatorFactory; @@ -118,8 +115,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( { SketchModule.registerSerde(); - ObjectMapper mapper = injector.getInstance(ObjectMapper.class); - final QueryableIndex index = IndexBuilder.create(mapper) + final QueryableIndex index = IndexBuilder.create() .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -133,8 +129,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( null, false, false, - null, - false + null ) ) .withRollup(false) @@ -143,84 +138,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .rows(TestDataBuilder.ROWS1) .buildMMappedIndex(); - final QueryableIndex indexAllTypesAuto = - IndexBuilder.create(mapper) - .tmpDir(temporaryFolder.newFolder()) - .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) - .schema( - new IncrementalIndexSchema.Builder() - .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) - .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) - .withMetrics( - new SketchMergeAggregatorFactory( - "sketch0", - "arrayString", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch1", - "arrayLong", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch2", - "arrayDouble", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "sketch3", - "arrayString", - null, - null, - null, - null, - false - ), - new SketchMergeAggregatorFactory( - "sketch4", - "arrayLong", - null, - null, - null, - null, - false - ), - new SketchMergeAggregatorFactory( - "sketch5", - "arrayDouble", - null, - null, - null, - null, - false - ), - new CountAggregatorFactory("cnt") - ) - .withRollup(false) - .build() - ) - .inputSource( - ResourceInputSource.of( - NestedDataTestUtils.class.getClassLoader(), - NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE - ) - ) - .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) - .inputTmpDir(temporaryFolder.newFolder()) - .buildMMappedIndex(); - return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( DataSegment.builder() .dataSource(DATA_SOURCE) @@ -230,15 +147,6 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index - ).add( - DataSegment.builder() - .dataSource("all_types") - .interval(indexAllTypesAuto.getDataInterval()) - .version("1") - .shardSpec(new LinearShardSpec(0)) - .size(0) - .build(), - indexAllTypesAuto ); } @@ -322,8 +230,7 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null, - false + null ), new FilteredAggregatorFactory( new SketchMergeAggregatorFactory( @@ -332,8 +239,7 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null, - false + null ), not(equality("dim2", "", ColumnType.STRING)) ), @@ -343,8 +249,7 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a4", @@ -352,11 +257,10 @@ public void testApproxCountDistinctThetaSketch() null, null, null, - null, - false + null ), - new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null, false), - new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null, false) + new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null), + new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null) ) ) .context(QUERY_CONTEXT_DEFAULT) @@ -403,8 +307,7 @@ public void testAvgDailyCountDistinctThetaSketch() null, null, null, - null, - false + null ) ) ) @@ -530,8 +433,7 @@ public void testThetaSketchPostAggs() null, false, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a2", @@ -539,8 +441,7 @@ public void testThetaSketchPostAggs() null, false, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a3", @@ -548,8 +449,7 @@ public void testThetaSketchPostAggs() null, false, null, - null, - false + null ) ) ) @@ -710,8 +610,7 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a2", @@ -719,8 +618,7 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a3", @@ -728,8 +626,7 @@ public void testThetaSketchPostAggsFinalizeOuterSketches() null, null, null, - null, - false + null ) ) ) @@ -839,8 +736,7 @@ public void testThetaSketchPostAggsPostSort() null, false, null, - null, - false + null ) ) ) @@ -896,8 +792,7 @@ public void testThetaSketchPostAggsPostSortFinalizeOuterSketches() null, null, null, - null, - false + null ) ) ) @@ -949,8 +844,7 @@ public void testEmptyTimeseriesResults() null, null, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a1", @@ -958,8 +852,7 @@ public void testEmptyTimeseriesResults() null, null, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a2", @@ -967,8 +860,7 @@ public void testEmptyTimeseriesResults() 1024, false, null, - null, - false + null ), new SketchMergeAggregatorFactory( "a3", @@ -976,8 +868,7 @@ public void testEmptyTimeseriesResults() 1024, false, null, - null, - false + null ) ) ) @@ -1016,8 +907,7 @@ public void testGroupByAggregatorDefaultValues() null, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1028,8 +918,7 @@ public void testGroupByAggregatorDefaultValues() null, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1040,8 +929,7 @@ public void testGroupByAggregatorDefaultValues() 1024, false, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1052,8 +940,7 @@ public void testGroupByAggregatorDefaultValues() 1024, false, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ) @@ -1101,8 +988,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1113,8 +999,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() null, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1125,8 +1010,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() 1024, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ), @@ -1137,8 +1021,7 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches() 1024, true, null, - null, - false + null ), equality("dim1", "nonexistent", ColumnType.STRING) ) @@ -1310,116 +1193,4 @@ public void testThetaEstimateAsVirtualColumnWithTopN() ) ); } - - @Test - public void testArrays() - { - testQuery( - "SELECT\n" - + " APPROX_COUNT_DISTINCT_DS_THETA(arrayString), " - + " APPROX_COUNT_DISTINCT_DS_THETA(arrayLong), " - + " APPROX_COUNT_DISTINCT_DS_THETA(arrayDouble), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch0), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch1), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch2), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch3), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch4), " - + " APPROX_COUNT_DISTINCT_DS_THETA(sketch5) " - + "FROM druid.all_types", - ImmutableList.of( - Druids.newTimeseriesQueryBuilder() - .dataSource("all_types") - .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) - .granularity(Granularities.ALL) - .aggregators( - ImmutableList.of( - new SketchMergeAggregatorFactory( - "a0", - "arrayString", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a1", - "arrayLong", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a2", - "arrayDouble", - null, - null, - null, - null, - true - ), - new SketchMergeAggregatorFactory( - "a3", - "sketch0", - null, - null, - false, - null, - false - ), - new SketchMergeAggregatorFactory( - "a4", - "sketch1", - null, - null, - false, - null, - false - ), - new SketchMergeAggregatorFactory( - "a5", - "sketch2", - null, - null, - false, - null, - false - ), - new SketchMergeAggregatorFactory( - "a6", - "sketch3", - null, - null, - false, - null, - false - ), - new SketchMergeAggregatorFactory( - "a7", - "sketch4", - null, - null, - false, - null, - false - ), - new SketchMergeAggregatorFactory( - "a8", - "sketch5", - null, - null, - false, - null, - false - ) - ) - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of(new Object[]{4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 6L}) - ); - } } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java index ce99e5b9b9e5..374f9ef3e6fb 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorFactory.java @@ -287,29 +287,13 @@ private BaseBloomFilterAggregator factorizeInternal(ColumnSelectorFactory column maxNumEntries, onHeap ); - case ARRAY: - return new ByteBloomFilterAggregator( + case COMPLEX: + // in an ideal world, we would check complex type, but until then assume it's a bloom filter + return new BloomFilterMergeAggregator( columnFactory.makeColumnValueSelector(field.getDimension()), - capabilities, maxNumEntries, onHeap ); - case COMPLEX: - if (BloomFilterSerializersModule.BLOOM_FILTER_TYPE_NAME.equals(capabilities.getComplexTypeName())) { - return new BloomFilterMergeAggregator( - columnFactory.makeColumnValueSelector(field.getDimension()), - maxNumEntries, - onHeap - ); - } else { - // fall back to bytes aggregator - return new ByteBloomFilterAggregator( - columnFactory.makeColumnValueSelector(field.getDimension()), - capabilities, - maxNumEntries, - onHeap - ); - } default: throw new IAE( "Cannot create bloom filter %s for invalid column type [%s]", diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java index 87e9f6721b7b..0ad7a179fdb6 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ObjectBloomFilterAggregator.java @@ -19,7 +19,6 @@ package org.apache.druid.query.aggregation.bloom; -import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.filter.BloomKFilter; import org.apache.druid.segment.BaseObjectColumnValueSelector; @@ -55,8 +54,6 @@ void bufferAdd(ByteBuffer buf) BloomKFilter.addFloat(buf, (float) object); } else if (object instanceof String) { BloomKFilter.addString(buf, (String) object); - } else if (object instanceof Object[]) { - BloomKFilter.addBytes(buf, ExprEval.toBytesBestEffort(object)); } else { BloomKFilter.addBytes(buf, null, 0, 0); } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index ab002496ee9c..1963aa8e7632 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -173,19 +173,9 @@ public boolean applyNull() @Override public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { - final ExpressionType expressionType = arrayType == null - ? null - : ExpressionType.fromColumnTypeStrict(arrayType); - if (expressionType != null) { - return input -> { - if (input == null) { - return bloomKFilter.testBytes(null, 0, 0); - } - final byte[] bytes = ExprEval.toBytes(expressionType, input); - return bloomKFilter.testBytes(bytes); - }; - } else { - // fall back to per row detection + if (arrayType == null) { + // fall back to per row detection - the only time arrayType should ever be null is if an object predicate + // that detects an Object[] input return input -> { if (input == null) { return bloomKFilter.testBytes(null, 0, 0); @@ -194,6 +184,14 @@ public Predicate makeArrayPredicate(@Nullable TypeSignature return bloomKFilter.testBytes(bytes); }; } + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(arrayType); + return input -> { + if (input == null) { + return bloomKFilter.testBytes(null, 0, 0); + } + final byte[] bytes = ExprEval.toBytes(expressionType, input); + return bloomKFilter.testBytes(bytes); + }; } }, extractionFn, diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java index 41c553d56719..5888b7d13dde 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterAggregatorTest.java @@ -28,8 +28,6 @@ import org.apache.druid.guice.BloomFilterSerializersModule; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.BufferAggregator; @@ -93,13 +91,6 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest private static final Float[] FLOAT_VALUES1 = new Float[]{0.4f, 0.8f, 23.2f}; private static final Long[] LONG_VALUES1 = new Long[]{10241L, 12312355L, 0L, 81L}; - private static final Object[] ARRAY_VALUES = new Object[]{ - new Object[]{1L, 2L}, - new Object[]{3L, 4L}, - new Object[]{0L, 1000L}, - new Object[]{null, 123L} - }; - private static final int MAX_NUM_VALUES = 15; private static BloomKFilter filter1; @@ -111,7 +102,6 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest private static String serializedLongFilter; private static String serializedDoubleFilter; private static String serializedFloatFilter; - private static String serializedArrayFilter; static { try { @@ -144,11 +134,6 @@ public class BloomFilterAggregatorTest extends InitializedNullHandlingTest } serializedDoubleFilter = filterToString(doubleFilter); - BloomKFilter arrayFilter = new BloomKFilter(MAX_NUM_VALUES); - for (Object o : ARRAY_VALUES) { - arrayFilter.addBytes(ExprEval.toBytes(ExpressionType.LONG_ARRAY, o)); - } - serializedArrayFilter = filterToString(arrayFilter); } catch (Exception ex) { throw new RuntimeException(ex); @@ -276,7 +261,7 @@ public void testAggregateLongValues() throws IOException TestLongColumnSelector selector = new TestLongColumnSelector(Arrays.asList(LONG_VALUES1)); LongBloomFilterAggregator agg = new LongBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (int i = 0; i < LONG_VALUES1.length; i++) { + for (Long ignored : LONG_VALUES1) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -293,7 +278,7 @@ public void testAggregateFloatValues() throws IOException TestFloatColumnSelector selector = new TestFloatColumnSelector(Arrays.asList(FLOAT_VALUES1)); FloatBloomFilterAggregator agg = new FloatBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (int i = 0; i < FLOAT_VALUES1.length; i++) { + for (Float ignored : FLOAT_VALUES1) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -310,7 +295,7 @@ public void testAggregateDoubleValues() throws IOException TestDoubleColumnSelector selector = new TestDoubleColumnSelector(Arrays.asList(DOUBLE_VALUES1)); DoubleBloomFilterAggregator agg = new DoubleBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - for (int i = 0; i < DOUBLE_VALUES1.length; i++) { + for (Double ignored : DOUBLE_VALUES1) { aggregateColumn(Collections.singletonList(selector), agg); } @@ -410,49 +395,6 @@ public void testBufferAggregateDoubleValues() throws IOException Assert.assertEquals(serializedDoubleFilter, serialized); } - @Test - public void testAggregateArrayValues() throws IOException - { - TestObjectColumnSelector selector = new TestObjectColumnSelector( - Arrays.asList(ARRAY_VALUES) - ); - ObjectBloomFilterAggregator agg = new ObjectBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - - for (int i = 0; i < ARRAY_VALUES.length; i++) { - aggregateColumn(Collections.singletonList(selector), agg); - } - - BloomKFilter bloomKFilter = BloomKFilter.deserialize( - (ByteBuffer) valueAggregatorFactory.finalizeComputation(agg.get()) - ); - String serialized = filterToString(bloomKFilter); - Assert.assertEquals(serializedArrayFilter, serialized); - } - - @Test - public void testBufferAggregateArrayValues() throws IOException - { - TestObjectColumnSelector selector = new TestObjectColumnSelector( - Arrays.asList(ARRAY_VALUES) - ); - ObjectBloomFilterAggregator agg = new ObjectBloomFilterAggregator(selector, MAX_NUM_VALUES, true); - - int maxSize = valueAggregatorFactory.getMaxIntermediateSizeWithNulls(); - ByteBuffer buf = ByteBuffer.allocate(maxSize + 64); - int pos = 10; - buf.limit(pos + maxSize); - - agg.init(buf, pos); - - IntStream.range(0, ARRAY_VALUES.length) - .forEach(i -> bufferAggregateColumn(Collections.singletonList(selector), agg, buf, pos)); - BloomKFilter bloomKFilter = BloomKFilter.deserialize( - (ByteBuffer) valueAggregatorFactory.finalizeComputation(agg.get(buf, pos)) - ); - String serialized = filterToString(bloomKFilter); - Assert.assertEquals(serializedArrayFilter, serialized); - } - @Test public void testCombineValues() throws IOException { @@ -730,18 +672,4 @@ public double getDouble() return values.get(pos); } } - - public static class TestObjectColumnSelector extends SteppableSelector implements ColumnValueSelector - { - public TestObjectColumnSelector(List values) - { - super(values); - } - - @Override - public Object getObject() - { - return values.get(pos); - } - } } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index f10ad759f58f..15152694ce4d 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -84,6 +84,7 @@ public void testBloomFilter() throws IOException @Test public void testBloomFilterExprFilter() throws IOException { + cannotVectorize(); BloomKFilter filter = new BloomKFilter(1500); filter.addString("a-foo"); filter.addString("-foo"); @@ -92,7 +93,6 @@ public void testBloomFilterExprFilter() throws IOException } byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); String base64 = StringUtils.encodeBase64String(bytes); - skipVectorize(); // fool the planner to make an expression virtual column to test bloom filter Druid expression testQuery( diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 8aabb8a344e5..3c40affa7834 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -171,8 +171,8 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new CountAggregatorFactory("count"), // FloatSumAggregator combine method takes in two Float but return Double new FloatSumAggregatorFactory("sum_added", "added"), - new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null, false), - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false, false, false), + new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), + new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false, false), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L, null) }, false @@ -265,7 +265,7 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new AggregatorFactory[]{ new CountAggregatorFactory("count"), new LongSumAggregatorFactory("sum_added", "added"), - new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null, false), + new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), new HllSketchBuildAggregatorFactory( "HLLSketchBuild", "user", @@ -273,7 +273,6 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis TgtHllType.HLL_4.name(), null, false, - false, false ), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L, null) From 7a14e897d1e8ffc4893159545a1d58c76a5fa631 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Jul 2023 22:26:30 -0700 Subject: [PATCH 32/44] simplify --- .../sql/TDigestSketchSqlAggregatorTest.java | 4 +--- .../hll/sql/HllSketchSqlAggregatorTest.java | 4 +--- .../sql/DoublesSketchSqlAggregatorTest.java | 8 ++----- .../sql/ThetaSketchSqlAggregatorTest.java | 4 +--- .../sql/BloomFilterSqlAggregatorTest.java | 4 +--- ...etsHistogramQuantileSqlAggregatorTest.java | 4 +--- .../sql/QuantileSqlAggregatorTest.java | 4 +--- .../sql/VarianceSqlAggregatorTest.java | 4 +--- .../sql/calcite/BaseCalciteQueryTest.java | 24 ++++++++++++++++--- .../calcite/CalciteParameterQueryTest.java | 4 +--- .../druid/sql/calcite/CalciteQueryTest.java | 18 ++++---------- .../sql/calcite/CalciteSelectQueryTest.java | 4 +--- 12 files changed, 37 insertions(+), 49 deletions(-) diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java index beef63e7a6b7..1c738976f683 100644 --- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java +++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java @@ -444,9 +444,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators(ImmutableList.of( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 688380fed7f6..3a07b86a17c7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -853,9 +853,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java index be1c3505232f..4777174192be 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java @@ -726,9 +726,7 @@ public void testEmptyTimeseriesResults() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), @@ -777,9 +775,7 @@ public void testEmptyTimeseriesResultsWithFinalizeSketches() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 7f8c1970b2d7..a1573821ba74 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -832,9 +832,7 @@ public void testEmptyTimeseriesResults() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators( ImmutableList.of( diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index 6eb402a7e1f2..3ca9b9c2d691 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -489,9 +489,7 @@ public void testEmptyTimeseriesResults() throws Exception .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators( ImmutableList.of( diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index 03b194900336..6f29d60c1bd1 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -519,9 +519,7 @@ public void testEmptyTimeseriesResults() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators(ImmutableList.of( new FixedBucketsHistogramAggregatorFactory( diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index 42aa77914627..aa7e5aed5ac5 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -409,9 +409,7 @@ public void testEmptyTimeseriesResults() .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators( ImmutableList.of( diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java index c84d94a2f735..85434d2b33b8 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java @@ -519,9 +519,7 @@ public void testEmptyTimeseriesResults() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .aggregators( new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index b487331ba9c0..012e48ac9454 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -429,14 +429,32 @@ public static ExpressionDimFilter expressionFilter(final String expression) return new ExpressionDimFilter(expression, CalciteTests.createExprMacroTable()); } + /** + * This method should be used instead of {@link #equality(String, Object, ColumnType)} when the match value type + * does not match the column type. If {@link NullHandling#sqlCompatible()} is true, this method is equivalent to + * {@link #equality(String, Object, ColumnType)}. When false, this method uses + * {@link #numericSelector(String, String)} so that the equality comparison uses a bound filter to correctly match + * numerical types. + */ + public static DimFilter numericEquality( + final String fieldName, + final Object value, + final ColumnType matchValueType + ) + { + if (NullHandling.sqlCompatible()) { + return equality(fieldName, value, matchValueType); + } + return numericSelector(fieldName, String.valueOf(value)); + } + public static DimFilter numericSelector( final String fieldName, - final String value, - final ExtractionFn extractionFn + final String value ) { // We use Bound filters for numeric equality to achieve "10.0" = "10" - return bound(fieldName, value, value, false, false, extractionFn, StringComparators.NUMERIC); + return bound(fieldName, value, value, false, false, null, StringComparators.NUMERIC); } /** diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index 9d3fecd075cd..f11c49389109 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -123,9 +123,7 @@ public void testParametersInSelectAndFilter() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators(aggregators( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 6fe832123dd3..69138e2ae004 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -4653,7 +4653,7 @@ public void testFilterOnStringAsNumber() .setDimFilter( NullHandling.replaceWithDefault() ? or( - numericSelector("dim1", "10", null), + numericSelector("dim1", "10"), and( selector("v0", "10.00"), bound("dim1", "9", "10.5", true, false, null, StringComparators.NUMERIC) @@ -5709,9 +5709,7 @@ public void testCountStarWithFilterOnCastedString() .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim1", "2", null) - : equality("dim1", 2L, ColumnType.LONG) + numericEquality("dim1", 2L, ColumnType.LONG) ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) @@ -9257,9 +9255,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValues() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators( @@ -9368,9 +9364,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators( @@ -11888,9 +11882,7 @@ public void testTrigonometricFunction() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators(aggregators( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java index cd947ac2ed61..7fa655565eba 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java @@ -989,9 +989,7 @@ public void testSelectCountStar() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.replaceWithDefault() - ? numericSelector("dim2", "0", null) - : equality("dim2", 0L, ColumnType.LONG) + numericEquality("dim2", 0L, ColumnType.LONG) ) .granularity(Granularities.ALL) .aggregators(aggregators( From 8e46dbc9ecd2a9a510796cc7fd535f9aafa06b37 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 13 Jul 2023 23:55:06 -0700 Subject: [PATCH 33/44] adjustments --- .../sql/TDigestSketchSqlAggregatorTest.java | 4 +- .../HllSketchBuildVectorProcessorFactory.java | 2 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 4 +- .../sql/DoublesSketchSqlAggregatorTest.java | 8 +-- .../sql/ThetaSketchSqlAggregatorTest.java | 4 +- .../bloom/ByteBloomFilterAggregator.java | 56 ------------------- .../sql/BloomFilterSqlAggregatorTest.java | 4 +- ...etsHistogramQuantileSqlAggregatorTest.java | 4 +- .../sql/QuantileSqlAggregatorTest.java | 4 +- .../sql/VarianceSqlAggregatorTest.java | 4 +- .../calcite/CalciteParameterQueryTest.java | 4 +- .../druid/sql/calcite/CalciteQueryTest.java | 16 ++---- .../sql/calcite/CalciteSelectQueryTest.java | 4 +- 13 files changed, 16 insertions(+), 102 deletions(-) delete mode 100644 extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java index 1c738976f683..f42cade76737 100644 --- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java +++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java @@ -443,9 +443,7 @@ public void testEmptyTimeseriesResults() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators(ImmutableList.of( new TDigestSketchAggregatorFactory("a0:agg", "m1", TDigestSketchAggregatorFactory.DEFAULT_COMPRESSION), diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java index ca85ccd06fca..bce83d50d5cf 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -90,7 +90,7 @@ public HllSketchBuildVectorProcessor makeArrayProcessor( VectorObjectSelector selector ) { - throw DruidException.defensive("ARRAY types are not supported for distinct count"); + throw DruidException.defensive("ARRAY types are not supported for hll sketch"); } @Override diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 3a07b86a17c7..349f1a57d1c0 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -852,9 +852,7 @@ public void testEmptyTimeseriesResults() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators( aggregators( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java index 4777174192be..6800a2f61eb7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java @@ -725,9 +725,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), @@ -774,9 +772,7 @@ public void testEmptyTimeseriesResultsWithFinalizeSketches() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators(ImmutableList.of( new DoublesSketchAggregatorFactory("a0:agg", "m1", null), new DoublesSketchAggregatorFactory("a1:agg", "qsketch_m1", null), diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index a1573821ba74..3946ce558b19 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -831,9 +831,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators( ImmutableList.of( new SketchMergeAggregatorFactory( diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java deleted file mode 100644 index 299b535321a7..000000000000 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/aggregation/bloom/ByteBloomFilterAggregator.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.aggregation.bloom; - -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; -import org.apache.druid.query.filter.BloomKFilter; -import org.apache.druid.segment.BaseObjectColumnValueSelector; -import org.apache.druid.segment.column.TypeSignature; -import org.apache.druid.segment.column.ValueType; - -import java.nio.ByteBuffer; - -public class ByteBloomFilterAggregator extends BaseBloomFilterAggregator> -{ - private final ExpressionType columnType; - - ByteBloomFilterAggregator( - BaseObjectColumnValueSelector baseObjectColumnValueSelector, - TypeSignature columnType, - int maxNumEntries, - boolean onHeap - ) - { - super(baseObjectColumnValueSelector, maxNumEntries, onHeap); - this.columnType = ExpressionType.fromColumnTypeStrict(columnType); - } - - @Override - void bufferAdd(ByteBuffer buf) - { - final Object val = selector.getObject(); - if (val == null) { - BloomKFilter.addBytes(buf, null, 0, 0); - } else { - BloomKFilter.addBytes(buf, ExprEval.toBytes(columnType, val)); - } - } -} diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java index 3ca9b9c2d691..1c77f0986e11 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java @@ -488,9 +488,7 @@ public void testEmptyTimeseriesResults() throws Exception .dataSource(CalciteTests.DATASOURCE3) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators( ImmutableList.of( new BloomFilterAggregatorFactory( diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java index 6f29d60c1bd1..e7b36163efa5 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java @@ -518,9 +518,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators(ImmutableList.of( new FixedBucketsHistogramAggregatorFactory( "a0:agg", diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java index aa7e5aed5ac5..53bc115c18b9 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java @@ -408,9 +408,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators( ImmutableList.of( new ApproximateHistogramFoldingAggregatorFactory( diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java index 85434d2b33b8..8f4f88e5d924 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/sql/VarianceSqlAggregatorTest.java @@ -518,9 +518,7 @@ public void testEmptyTimeseriesResults() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .aggregators( new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), new VarianceAggregatorFactory("a1:agg", "d1", "sample", "double"), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index f11c49389109..edda85260bda 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -122,9 +122,7 @@ public void testParametersInSelectAndFilter() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0"), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 69138e2ae004..876ade4271ff 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -5708,9 +5708,7 @@ public void testCountStarWithFilterOnCastedString() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters( - numericEquality("dim1", 2L, ColumnType.LONG) - ) + .filters(numericEquality("dim1", 2L, ColumnType.LONG)) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() @@ -9254,9 +9252,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValues() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators( aggregators( @@ -9363,9 +9359,7 @@ public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators( aggregators( @@ -11881,9 +11875,7 @@ public void testTrigonometricFunction() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0") diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java index 7fa655565eba..be05b46c3716 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java @@ -988,9 +988,7 @@ public void testSelectCountStar() ImmutableList.of(Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) - .filters( - numericEquality("dim2", 0L, ColumnType.LONG) - ) + .filters(numericEquality("dim2", 0L, ColumnType.LONG)) .granularity(Granularities.ALL) .aggregators(aggregators( new CountAggregatorFactory("a0"), From aed201695c562688c0ab7fc0e342d22060bfe12d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 14 Jul 2023 19:51:59 -0700 Subject: [PATCH 34/44] refactor a bunch of stuff --- ...ryEncodedStringIndexSupplierBenchmark.java | 8 +- .../indexing/input/DruidSegmentReader.java | 3 +- .../druid/query/filter/BoundDimFilter.java | 491 +++++-------- .../druid/query/filter/EqualityFilter.java | 176 +++-- .../druid/query/filter/InDimFilter.java | 10 +- .../apache/druid/query/filter/NullFilter.java | 5 +- .../druid/query/filter/RangeFilter.java | 691 +++++++++--------- .../vector/ArrayVectorValueMatcher.java | 1 - .../vector/DoubleVectorValueMatcher.java | 2 - .../vector/FloatVectorValueMatcher.java | 2 - .../filter/vector/LongVectorValueMatcher.java | 2 - .../MultiValueStringVectorValueMatcher.java | 3 - .../vector/ObjectVectorValueMatcher.java | 1 - .../SingleValueStringVectorValueMatcher.java | 3 - .../StringObjectVectorValueMatcher.java | 2 - .../druid/segment/AutoTypeColumnIndexer.java | 22 +- .../druid/segment/ColumnProcessorFactory.java | 10 +- .../druid/segment/ColumnProcessors.java | 2 +- .../druid/segment/column/ColumnConfig.java | 10 +- .../druid/segment/filter/BoundFilter.java | 75 +- .../filter/ColumnComparisonFilter.java | 2 +- .../segment/filter/ExpressionFilter.java | 2 +- .../apache/druid/segment/filter/Filters.java | 10 +- .../druid/segment/filter/LikeFilter.java | 18 +- .../filter/PredicateValueMatcherFactory.java | 3 +- .../druid/segment/filter/SelectorFilter.java | 12 +- .../StringConstantValueMatcherFactory.java | 2 +- ...> IndexedStringDruidPredicateIndexes.java} | 7 +- ...dexedUtf8LexicographicalRangeIndexes.java} | 8 +- ...ndex.java => IndexedUtf8ValueIndexes.java} | 13 +- .../DictionaryEncodedStringValueIndex.java | 4 +- .../semantic/DictionaryEncodedValueIndex.java | 4 +- ...eIndex.java => DruidPredicateIndexes.java} | 2 +- ....java => LexicographicalRangeIndexes.java} | 2 +- .../index/semantic/NullValueIndex.java | 2 +- ...ngeIndex.java => NumericRangeIndexes.java} | 2 +- ...tIndex.java => StringValueSetIndexes.java} | 2 +- ...TypedValueIndex.java => ValueIndexes.java} | 9 +- .../join/lookup/LookupJoinMatcher.java | 2 +- .../join/table/IndexedTableJoinMatcher.java | 2 +- .../NestedFieldColumnIndexSupplier.java | 82 +-- .../NestedFieldDictionaryEncodedColumn.java | 30 +- .../ScalarDoubleColumnAndIndexSupplier.java | 30 +- .../ScalarLongColumnAndIndexSupplier.java | 30 +- .../druid/segment/nested/VariantColumn.java | 44 +- .../nested/VariantColumnAndIndexSupplier.java | 9 +- .../segment/serde/NullValueIndexSupplier.java | 2 +- .../serde/StringUtf8ColumnIndexSupplier.java | 28 +- .../virtual/ListFilteredVirtualColumn.java | 38 +- .../virtual/NestedFieldVirtualColumn.java | 22 +- .../druid/query/filter/InDimFilterTest.java | 6 +- .../druid/query/filter/LikeDimFilterTest.java | 12 +- ...ColumnSelectorColumnIndexSelectorTest.java | 10 +- .../segment/IndexMergerNullHandlingTest.java | 6 +- .../druid/segment/IndexMergerTestBase.java | 4 +- .../druid/segment/filter/RangeFilterTest.java | 54 +- .../druid/segment/join/JoinTestHelper.java | 3 +- .../nested/NestedDataColumnSupplierTest.java | 62 +- .../NestedDataColumnSupplierV4Test.java | 36 +- .../NestedFieldColumnIndexSupplierTest.java | 192 ++--- .../ScalarDoubleColumnSupplierTest.java | 14 +- .../nested/ScalarLongColumnSupplierTest.java | 14 +- .../ScalarStringColumnSupplierTest.java | 12 +- .../nested/VariantColumnSupplierTest.java | 12 +- ...tionaryEncodedStringIndexSupplierTest.java | 4 +- .../druid/sql/calcite/filtration/Ranges.java | 20 +- 66 files changed, 1200 insertions(+), 1203 deletions(-) rename processing/src/main/java/org/apache/druid/segment/index/{IndexedStringDruidPredicateIndex.java => IndexedStringDruidPredicateIndexes.java} (95%) rename processing/src/main/java/org/apache/druid/segment/index/{IndexedUtf8LexicographicalRangeIndex.java => IndexedUtf8LexicographicalRangeIndexes.java} (97%) rename processing/src/main/java/org/apache/druid/segment/index/{IndexedUtf8ValueSetIndex.java => IndexedUtf8ValueIndexes.java} (96%) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{DruidPredicateIndex.java => DruidPredicateIndexes.java} (97%) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{LexicographicalRangeIndex.java => LexicographicalRangeIndexes.java} (98%) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{NumericRangeIndex.java => NumericRangeIndexes.java} (98%) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{StringValueSetIndex.java => StringValueSetIndexes.java} (97%) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{TypedValueIndex.java => ValueIndexes.java} (77%) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java index 882c6789b560..eef24302f5d6 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java @@ -30,8 +30,8 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.IndexedUtf8ValueIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -72,7 +72,7 @@ public class DictionaryEncodedStringIndexSupplierBenchmark public static class BenchmarkState { @Nullable - private IndexedUtf8ValueSetIndex stringValueSetIndex; + private IndexedUtf8ValueIndexes stringValueSetIndex; private final TreeSet values = new TreeSet<>(); private static final int START_INT = 10_000_000; @@ -112,7 +112,7 @@ public void setup() ); StringUtf8ColumnIndexSupplier indexSupplier = new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null); - stringValueSetIndex = (IndexedUtf8ValueSetIndex) indexSupplier.as(StringValueSetIndex.class); + stringValueSetIndex = (IndexedUtf8ValueIndexes) indexSupplier.as(StringValueSetIndexes.class); List filterValues = new ArrayList<>(); List nonFilterValues = new ArrayList<>(); for (int i = 0; i < dictionarySize; i++) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java index cff9081d57ed..d048cf4d8b91 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentReader.java @@ -61,6 +61,7 @@ import org.apache.druid.utils.CollectionUtils; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -280,7 +281,7 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) @Override public Supplier makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { return selector::getObject; diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 9463308b5069..17cecc411511 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -279,8 +279,12 @@ && getExtractionFn() == null } else if (getUpper() == null) { range = isLowerStrict() ? Range.greaterThan(getLower()) : Range.atLeast(getLower()); } else { - range = Range.range(getLower(), isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED, - getUpper(), isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED); + range = Range.range( + getLower(), + isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED, + getUpper(), + isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED + ); } retSet.add(range); return retSet; @@ -323,7 +327,8 @@ public int hashCode() upperStrict, extractionFn, ordering, - filterTuning); + filterTuning + ); } @Override @@ -358,97 +363,212 @@ public String toString() return builder.appendFilterTuning(filterTuning).build(); } - private DruidLongPredicate createLongPredicate() + private Supplier makeLongPredicateSupplier() { - boolean hasLowerLongBound; - boolean hasUpperLongBound; - long lowerLongBound; - long upperLongBound; - boolean matchesNothing = false; - - if (hasLowerBound()) { - final Long lowerLong = GuavaUtils.tryParseLong(lower); - if (lowerLong == null) { - BigDecimal lowerBigDecimal = getBigDecimalLowerBoundFromFloatString(lower); - if (lowerBigDecimal == null) { - // Unparseable values fall before all actual numbers, so all numbers - // will match the lower bound. - hasLowerLongBound = false; - lowerLongBound = 0L; - } else { - try { - lowerLongBound = lowerBigDecimal.longValueExact(); - hasLowerLongBound = true; - } - catch (ArithmeticException ae) { // the BigDecimal can't be contained in a long + return Suppliers.memoize(() -> { + boolean hasLowerLongBound; + boolean hasUpperLongBound; + long lowerLongBound; + long upperLongBound; + boolean matchesNothing = false; + + if (hasLowerBound()) { + final Long lowerLong = GuavaUtils.tryParseLong(lower); + if (lowerLong == null) { + BigDecimal lowerBigDecimal = getBigDecimalLowerBoundFromFloatString(lower); + if (lowerBigDecimal == null) { + // Unparseable values fall before all actual numbers, so all numbers + // will match the lower bound. hasLowerLongBound = false; lowerLongBound = 0L; - if (lowerBigDecimal.compareTo(BigDecimal.ZERO) > 0) { - // positive lower bound, > all longs, will match nothing - matchesNothing = true; + } else { + try { + lowerLongBound = lowerBigDecimal.longValueExact(); + hasLowerLongBound = true; + } + catch (ArithmeticException ae) { // the BigDecimal can't be contained in a long + hasLowerLongBound = false; + lowerLongBound = 0L; + if (lowerBigDecimal.compareTo(BigDecimal.ZERO) > 0) { + // positive lower bound, > all longs, will match nothing + matchesNothing = true; + } } } + } else { + hasLowerLongBound = true; + lowerLongBound = lowerLong; } } else { - hasLowerLongBound = true; - lowerLongBound = lowerLong; + hasLowerLongBound = false; + lowerLongBound = 0L; } - } else { - hasLowerLongBound = false; - lowerLongBound = 0L; - } - if (hasUpperBound()) { - Long upperLong = GuavaUtils.tryParseLong(upper); - if (upperLong == null) { - BigDecimal upperBigDecimal = getBigDecimalUpperBoundFromFloatString(upper); - if (upperBigDecimal == null) { - // Unparseable values fall before all actual numbers, so no numbers - // can match the upper bound. - matchesNothing = true; - hasUpperLongBound = false; - upperLongBound = 0L; - } else { - try { - upperLongBound = upperBigDecimal.longValueExact(); - hasUpperLongBound = true; - } - catch (ArithmeticException ae) { // the BigDecimal can't be - // contained in a long + if (hasUpperBound()) { + Long upperLong = GuavaUtils.tryParseLong(upper); + if (upperLong == null) { + BigDecimal upperBigDecimal = getBigDecimalUpperBoundFromFloatString(upper); + if (upperBigDecimal == null) { + // Unparseable values fall before all actual numbers, so no numbers + // can match the upper bound. + matchesNothing = true; hasUpperLongBound = false; upperLongBound = 0L; - if (upperBigDecimal.compareTo(BigDecimal.ZERO) < 0) { - // negative upper bound, < all longs, will match nothing - matchesNothing = true; + } else { + try { + upperLongBound = upperBigDecimal.longValueExact(); + hasUpperLongBound = true; + } + catch (ArithmeticException ae) { // the BigDecimal can't be + // contained in a long + hasUpperLongBound = false; + upperLongBound = 0L; + if (upperBigDecimal.compareTo(BigDecimal.ZERO) < 0) { + // negative upper bound, < all longs, will match nothing + matchesNothing = true; + } } } + } else { + hasUpperLongBound = true; + upperLongBound = upperLong; } } else { - hasUpperLongBound = true; - upperLongBound = upperLong; + hasUpperLongBound = false; + upperLongBound = 0L; } - } else { - hasUpperLongBound = false; - upperLongBound = 0L; - } - if (matchesNothing) { - return DruidLongPredicate.ALWAYS_FALSE; - } else { - return makeLongPredicateFromBounds( - hasLowerLongBound, - hasUpperLongBound, - lowerStrict, - upperStrict, - lowerLongBound, - upperLongBound); - } + if (matchesNothing) { + return DruidLongPredicate.ALWAYS_FALSE; + } else { + final RangeFilter.RangeType rangeType = RangeFilter.RangeType.of( + hasLowerLongBound, + lowerStrict, + hasUpperLongBound, + upperStrict + ); + return RangeFilter.makeLongPredicate(rangeType, lowerLongBound, upperLongBound); + } + }); } - private Supplier makeLongPredicateSupplier() + private Supplier makeFloatPredicateSupplier() + { + return Suppliers.memoize(() -> { + final boolean hasLowerFloatBound; + final boolean hasUpperFloatBound; + final float lowerFloatBound; + final float upperFloatBound; + boolean matchesNothing = false; + + if (hasLowerBound()) { + final Float lowerFloat = Floats.tryParse(lower); + if (lowerFloat == null) { + // Unparseable values fall before all actual numbers, so all numbers + // will match the lower bound. + hasLowerFloatBound = false; + lowerFloatBound = 0L; + } else { + hasLowerFloatBound = true; + lowerFloatBound = lowerFloat; + } + } else { + hasLowerFloatBound = false; + lowerFloatBound = 0L; + } + + if (hasUpperBound()) { + Float upperFloat = Floats.tryParse(upper); + if (upperFloat == null) { + // Unparseable values fall before all actual numbers, so no numbers + // can match the upper bound. + matchesNothing = true; + hasUpperFloatBound = false; + upperFloatBound = 0L; + } else { + hasUpperFloatBound = true; + upperFloatBound = upperFloat; + } + } else { + hasUpperFloatBound = false; + upperFloatBound = 0L; + } + + + if (matchesNothing) { + return DruidFloatPredicate.ALWAYS_FALSE; + } else { + + final RangeFilter.RangeType rangeType = RangeFilter.RangeType.of( + hasLowerFloatBound, + lowerStrict, + hasUpperFloatBound, + upperStrict + ); + final DruidDoublePredicate doublePredicate = RangeFilter.makeDoublePredicate( + rangeType, + lowerFloatBound, + upperFloatBound + ); + return doublePredicate::applyDouble; + } + }); + } + + private Supplier makeDoublePredicateSupplier() { - Supplier longPredicate = () -> createLongPredicate(); - return Suppliers.memoize(longPredicate); + return Suppliers.memoize(() -> { + final boolean hasLowerBound; + final boolean hasUpperBound; + final double lowerDoubleBound; + final double upperDoubleBound; + boolean matchesNothing = false; + + if (hasLowerBound()) { + final Double lowerDouble = Doubles.tryParse(lower); + if (lowerDouble == null) { + // Unparseable values fall before all actual numbers, so all numbers + // will match the lower bound. + hasLowerBound = false; + lowerDoubleBound = 0L; + } else { + hasLowerBound = true; + lowerDoubleBound = lowerDouble; + } + } else { + hasLowerBound = false; + lowerDoubleBound = 0L; + } + + if (hasUpperBound()) { + Double upperDouble = Doubles.tryParse(upper); + if (upperDouble == null) { + // Unparseable values fall before all actual numbers, so no numbers can + // match the upper bound. + matchesNothing = true; + hasUpperBound = false; + upperDoubleBound = 0L; + } else { + hasUpperBound = true; + upperDoubleBound = upperDouble; + } + } else { + hasUpperBound = false; + upperDoubleBound = 0L; + } + + if (matchesNothing) { + return DruidDoublePredicate.ALWAYS_FALSE; + } else { + final RangeFilter.RangeType rangeType = RangeFilter.RangeType.of( + hasLowerBound, + lowerStrict, + hasUpperBound, + upperStrict + ); + return RangeFilter.makeDoublePredicate(rangeType, lowerDoubleBound, upperDoubleBound); + } + }); } @Nullable @@ -486,227 +606,4 @@ private BigDecimal getBigDecimalUpperBoundFromFloatString(String floatStr) return convertedBD.setScale(0, RoundingMode.FLOOR); } } - - private DruidFloatPredicate createDruidFloatPredicate() - { - final boolean hasLowerFloatBound; - final boolean hasUpperFloatBound; - final float lowerFloatBound; - final float upperFloatBound; - boolean matchesNothing = false; - - if (hasLowerBound()) { - final Float lowerFloat = Floats.tryParse(lower); - if (lowerFloat == null) { - // Unparseable values fall before all actual numbers, so all numbers - // will match the lower bound. - hasLowerFloatBound = false; - lowerFloatBound = 0L; - } else { - hasLowerFloatBound = true; - lowerFloatBound = lowerFloat; - } - } else { - hasLowerFloatBound = false; - lowerFloatBound = 0L; - } - - if (hasUpperBound()) { - Float upperFloat = Floats.tryParse(upper); - if (upperFloat == null) { - // Unparseable values fall before all actual numbers, so no numbers - // can match the upper bound. - matchesNothing = true; - hasUpperFloatBound = false; - upperFloatBound = 0L; - } else { - hasUpperFloatBound = true; - upperFloatBound = upperFloat; - } - } else { - hasUpperFloatBound = false; - upperFloatBound = 0L; - } - - if (matchesNothing) { - return DruidFloatPredicate.ALWAYS_FALSE; - } else { - return input -> { - final DruidDoublePredicate druidDoublePredicate = makeDoublePredicateFromBounds( - hasLowerFloatBound, - hasUpperFloatBound, - lowerStrict, - upperStrict, - (double) lowerFloatBound, - (double) upperFloatBound); - return druidDoublePredicate.applyDouble((double) input); - }; - } - } - - private Supplier makeFloatPredicateSupplier() - { - Supplier floatPredicate = () -> createDruidFloatPredicate(); - return Suppliers.memoize(floatPredicate); - } - - private DruidDoublePredicate createDruidDoublePredicate() - { - final boolean hasLowerBound; - final boolean hasUpperBound; - final double lowerDoubleBound; - final double upperDoubleBound; - boolean matchesNothing = false; - - if (hasLowerBound()) { - final Double lowerDouble = Doubles.tryParse(lower); - if (lowerDouble == null) { - // Unparseable values fall before all actual numbers, so all numbers - // will match the lower bound. - hasLowerBound = false; - lowerDoubleBound = 0L; - } else { - hasLowerBound = true; - lowerDoubleBound = lowerDouble; - } - } else { - hasLowerBound = false; - lowerDoubleBound = 0L; - } - - if (hasUpperBound()) { - Double upperDouble = Doubles.tryParse(upper); - if (upperDouble == null) { - // Unparseable values fall before all actual numbers, so no numbers can - // match the upper bound. - matchesNothing = true; - hasUpperBound = false; - upperDoubleBound = 0L; - } else { - hasUpperBound = true; - upperDoubleBound = upperDouble; - } - } else { - hasUpperBound = false; - upperDoubleBound = 0L; - } - - if (matchesNothing) { - return DruidDoublePredicate.ALWAYS_FALSE; - } else { - return makeDoublePredicateFromBounds( - hasLowerBound, - hasUpperBound, - lowerStrict, - upperStrict, - lowerDoubleBound, - upperDoubleBound); - } - } - - private Supplier makeDoublePredicateSupplier() - { - Supplier doublePredicate = () -> createDruidDoublePredicate(); - return Suppliers.memoize(doublePredicate); - } - - public static DruidLongPredicate makeLongPredicateFromBounds( - final boolean hasLowerLongBound, - final boolean hasUpperLongBound, - final boolean lowerStrict, - final boolean upperStrict, - final long lowerLongBound, - final long upperLongBound - ) - { - if (hasLowerLongBound && hasUpperLongBound) { - if (upperStrict && lowerStrict) { - return input -> input > lowerLongBound && input < upperLongBound; - } else if (lowerStrict) { - return input -> input > lowerLongBound && input <= upperLongBound; - } else if (upperStrict) { - return input -> input >= lowerLongBound && input < upperLongBound; - } else { - return input -> input >= lowerLongBound && input <= upperLongBound; - } - } else if (hasUpperLongBound) { - if (upperStrict) { - return input -> input < upperLongBound; - } else { - return input -> input <= upperLongBound; - } - } else if (hasLowerLongBound) { - if (lowerStrict) { - return input -> input > lowerLongBound; - } else { - return input -> input >= lowerLongBound; - } - } else { - return DruidLongPredicate.ALWAYS_TRUE; - } - } - - public static DruidDoublePredicate makeDoublePredicateFromBounds( - final boolean hasLowerDoubleBound, - final boolean hasUpperDoubleBound, - final boolean lowerStrict, - final boolean upperStrict, - final double lowerDoubleBound, - final double upperDoubleBound - ) - { - if (hasLowerDoubleBound && hasUpperDoubleBound) { - if (upperStrict && lowerStrict) { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - final int upperComparing = Double.compare(upperDoubleBound, input); - return ((lowerComparing > 0)) && (upperComparing > 0); - }; - } else if (lowerStrict) { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - final int upperComparing = Double.compare(upperDoubleBound, input); - return (lowerComparing > 0) && (upperComparing >= 0); - }; - } else if (upperStrict) { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - final int upperComparing = Double.compare(upperDoubleBound, input); - return (lowerComparing >= 0) && (upperComparing > 0); - }; - } else { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - final int upperComparing = Double.compare(upperDoubleBound, input); - return (lowerComparing >= 0) && (upperComparing >= 0); - }; - } - } else if (hasUpperDoubleBound) { - if (upperStrict) { - return input -> { - final int upperComparing = Double.compare(upperDoubleBound, input); - return upperComparing > 0; - }; - } else { - return input -> { - final int upperComparing = Double.compare(upperDoubleBound, input); - return upperComparing >= 0; - }; - } - } else if (hasLowerDoubleBound) { - if (lowerStrict) { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - return lowerComparing > 0; - }; - } else { - return input -> { - final int lowerComparing = Double.compare(input, lowerDoubleBound); - return lowerComparing >= 0; - }; - } - } else { - return DruidDoublePredicate.ALWAYS_TRUE; - } - } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 23ed49d1eb15..4fd9b4aef539 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -24,6 +24,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Predicate; import com.google.common.base.Predicates; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; @@ -56,9 +58,10 @@ import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.PredicateValueMatcherFactory; import org.apache.druid.segment.filter.ValueMatchers; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; +import org.apache.druid.segment.index.semantic.ValueIndexes; import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -68,6 +71,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; public class EqualityFilter extends AbstractOptimizableDimFilter implements Filter { @@ -252,19 +256,19 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); if (indexSupplier == null) { - return Filters.makeNullIndex(false, selector); + return new AllFalseBitmapColumnIndex(selector); } - final TypedValueIndex valueSetIndex = indexSupplier.as(TypedValueIndex.class); - if (valueSetIndex != null) { - return valueSetIndex.forValue(matchValue, matchValueType); + final ValueIndexes valueIndexes = indexSupplier.as(ValueIndexes.class); + if (valueIndexes != null) { + return valueIndexes.forValue(matchValue, matchValueType); } if (matchValueType.isPrimitive()) { - final StringValueSetIndex stringValueSetIndex = indexSupplier.as(StringValueSetIndex.class); - if (stringValueSetIndex != null) { + final StringValueSetIndexes stringValueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); + if (stringValueSetIndexes != null) { - return stringValueSetIndex.forValue(String.valueOf(matchValue)); + return stringValueSetIndexes.forValue(String.valueOf(matchValue)); } } // column exists, but has no indexes we can use @@ -276,7 +280,7 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { return ColumnProcessors.makeProcessor( column, - new TypedConstantValueMatcherFactory(matchValue, matchValueType), + new TypedConstantValueMatcherFactory(matchValue, matchValueType, predicateFactory), factory ); } @@ -350,134 +354,143 @@ private static class EqualityPredicateFactory implements DruidPredicateFactory { private final ExprEval matchValue; private final ColumnType matchValueType; - - private final Object initLock = new Object(); - - private volatile DruidLongPredicate longPredicate; - private volatile DruidFloatPredicate floatPredicate; - private volatile DruidDoublePredicate doublePredicate; + private final Supplier> stringPredicateSupplier; + private final Supplier longPredicateSupplier; + private final Supplier floatPredicateSupplier; + private final Supplier doublePredicateSupplier; + private final ConcurrentHashMap, Predicate> arrayPredicates; + private final Supplier> typeDetectingArrayPredicateSupplier; + private final Supplier> objectPredicateSupplier; public EqualityPredicateFactory(Object matchValue, ColumnType matchValueType) { this.matchValue = ExprEval.ofType(ExpressionType.fromColumnType(matchValueType), matchValue); this.matchValueType = matchValueType; + this.stringPredicateSupplier = makeStringPredicateSupplier(); + this.longPredicateSupplier = makeLongPredicateSupplier(); + this.floatPredicateSupplier = makeFloatPredicateSupplier(); + this.doublePredicateSupplier = makeDoublePredicateSupplier(); + this.objectPredicateSupplier = makeObjectPredicateSupplier(); + this.arrayPredicates = new ConcurrentHashMap<>(); + this.typeDetectingArrayPredicateSupplier = makeTypeDetectingArrayPredicate(); } @Override public Predicate makeStringPredicate() { - return Predicates.equalTo(matchValue.castTo(ExpressionType.STRING).asString()); + return stringPredicateSupplier.get(); } @Override public DruidLongPredicate makeLongPredicate() { - initLongPredicate(); - return longPredicate; + return longPredicateSupplier.get(); } @Override public DruidFloatPredicate makeFloatPredicate() { - initFloatPredicate(); - return floatPredicate; + return floatPredicateSupplier.get(); } @Override public DruidDoublePredicate makeDoublePredicate() { - initDoublePredicate(); - return doublePredicate; + return doublePredicateSupplier.get(); } @Override public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) { - if (arrayType != null) { - final Comparator arrayComparator = arrayType.getNullableStrategy(); - final Object[] matchArray = matchValue.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - return input -> arrayComparator.compare(input, matchArray) == 0; - } else { + if (arrayType == null) { // fall back to per row detection if input array type is unknown - return input -> { - final ExprEval eval = ExprEval.bestEffortOf(input); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - final Object[] matchArray = matchValue.castTo(eval.type()).asArray(); - return arrayComparator.compare(input, matchArray) == 0; - }; + return typeDetectingArrayPredicateSupplier.get(); } + + return arrayPredicates.computeIfAbsent(arrayType, (existing) -> makeArrayPredicateInternal(arrayType)); } @Override public Predicate makeObjectPredicate() { - if (matchValueType.equals(ColumnType.NESTED_DATA)) { - return input -> Objects.equals(StructuredData.unwrap(input), StructuredData.unwrap(matchValue.value())); - } - return Predicates.equalTo(matchValue.valueOrDefault()); + return objectPredicateSupplier.get(); } - private void initLongPredicate() + private Supplier> makeStringPredicateSupplier() { - if (longPredicate != null) { - return; - } - synchronized (initLock) { - if (longPredicate != null) { - return; - } + return Suppliers.memoize(() -> Predicates.equalTo(matchValue.castTo(ExpressionType.STRING).asString())); + } + + private Supplier makeLongPredicateSupplier() + { + return Suppliers.memoize(() -> { final Long valueAsLong = (Long) matchValue.castTo(ExpressionType.LONG).valueOrDefault(); if (valueAsLong == null) { - longPredicate = DruidLongPredicate.ALWAYS_FALSE; + return DruidLongPredicate.ALWAYS_FALSE; } else { // store the primitive, so we don't unbox for every comparison final long unboxedLong = valueAsLong; - longPredicate = input -> input == unboxedLong; + return input -> input == unboxedLong; } - } + }); } - private void initFloatPredicate() + private Supplier makeFloatPredicateSupplier() { - if (floatPredicate != null) { - return; - } - synchronized (initLock) { - if (floatPredicate != null) { - return; - } + return Suppliers.memoize(() -> { final Double doubleValue = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); if (doubleValue == null) { - floatPredicate = DruidFloatPredicate.ALWAYS_FALSE; + return DruidFloatPredicate.ALWAYS_FALSE; } else { // Compare with floatToIntBits instead of == to canonicalize NaNs. final int floatBits = Float.floatToIntBits(doubleValue.floatValue()); - floatPredicate = input -> Float.floatToIntBits(input) == floatBits; + return input -> Float.floatToIntBits(input) == floatBits; } - } + }); } - private void initDoublePredicate() + private Supplier makeDoublePredicateSupplier() { - if (doublePredicate != null) { - return; - } - synchronized (initLock) { - if (doublePredicate != null) { - return; - } + return Suppliers.memoize(() -> { final Double aDouble = (Double) matchValue.castTo(ExpressionType.DOUBLE).valueOrDefault(); if (aDouble == null) { - doublePredicate = DruidDoublePredicate.ALWAYS_FALSE; + return DruidDoublePredicate.ALWAYS_FALSE; } else { // Compare with doubleToLongBits instead of == to canonicalize NaNs. final long bits = Double.doubleToLongBits(aDouble); - doublePredicate = input -> Double.doubleToLongBits(input) == bits; + return input -> Double.doubleToLongBits(input) == bits; } - } + }); + } + + private Supplier> makeObjectPredicateSupplier() + { + return Suppliers.memoize(() -> { + if (matchValueType.equals(ColumnType.NESTED_DATA)) { + return input -> Objects.equals(StructuredData.unwrap(input), StructuredData.unwrap(matchValue.value())); + } + return Predicates.equalTo(matchValue.valueOrDefault()); + }); + } + + private Supplier> makeTypeDetectingArrayPredicate() + { + return Suppliers.memoize(() -> input -> { + final ExprEval eval = ExprEval.bestEffortOf(input); + final Comparator arrayComparator = eval.type().getNullableStrategy(); + final Object[] matchArray = matchValue.castTo(eval.type()).asArray(); + return arrayComparator.compare(input, matchArray) == 0; + }); + } + private Predicate makeArrayPredicateInternal(TypeSignature arrayType) + { + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(arrayType); + final Comparator arrayComparator = arrayType.getNullableStrategy(); + final Object[] matchArray = matchValue.castTo(expressionType).asArray(); + return input -> arrayComparator.compare(input, matchArray) == 0; } @Override @@ -493,6 +506,7 @@ public boolean equals(Object o) return Objects.equals(matchValue, that.matchValue) && Objects.equals(matchValueType, that.matchValueType); } + @Override public int hashCode() { @@ -503,12 +517,16 @@ public int hashCode() private static class TypedConstantValueMatcherFactory implements ColumnProcessorFactory { private final ExprEval matchValue; - private final ColumnType matchValueType; + private final PredicateValueMatcherFactory predicateMatcherFactory; - public TypedConstantValueMatcherFactory(Object matchValue, ColumnType matchValueType) + public TypedConstantValueMatcherFactory( + Object matchValue, + ColumnType matchValueType, + DruidPredicateFactory predicateFactory + ) { this.matchValue = ExprEval.ofType(ExpressionType.fromColumnType(matchValueType), matchValue); - this.matchValueType = matchValueType; + this.predicateMatcherFactory = new PredicateValueMatcherFactory(predicateFactory); } @Override @@ -548,20 +566,16 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) @Override public ValueMatcher makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { - return new PredicateValueMatcherFactory( - new EqualityPredicateFactory(matchValue.valueOrDefault(), matchValueType) - ).makeArrayProcessor(selector, columnCapabilities); + return predicateMatcherFactory.makeArrayProcessor(selector, columnCapabilities); } @Override public ValueMatcher makeComplexProcessor(BaseObjectColumnValueSelector selector) { - return new PredicateValueMatcherFactory( - new EqualityPredicateFactory(matchValue.valueOrDefault(), matchValueType) - ).makeComplexProcessor(selector); + return predicateMatcherFactory.makeComplexProcessor(selector); } } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 52f55cd5fa1f..6a39c46a96be 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -60,7 +60,7 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -292,7 +292,7 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) if (indexSupplier == null) { // column doesn't exist, match against null - return Filters.makeNullIndex( + return Filters.makeMissingColumnNullIndex( predicateFactory.makeStringPredicate().apply(null), selector ); @@ -303,9 +303,9 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) return utf8ValueSetIndex.forSortedValuesUtf8(valuesUtf8); } - final StringValueSetIndex stringValueSetIndex = indexSupplier.as(StringValueSetIndex.class); - if (stringValueSetIndex != null) { - return stringValueSetIndex.forSortedValues(values); + final StringValueSetIndexes stringValueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); + if (stringValueSetIndexes != null) { + return stringValueSetIndexes.forSortedValues(values); } } return Filters.makePredicateIndex( diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index f2ec707b3d53..0d88e3214194 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -149,13 +150,13 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) } final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); if (indexSupplier == null) { - return Filters.makeNullIndex(true, selector); + return new AllTrueBitmapColumnIndex(selector); } final NullValueIndex nullValueIndex = indexSupplier.as(NullValueIndex.class); if (nullValueIndex == null) { return null; } - return nullValueIndex.forNull(); + return nullValueIndex.get(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 90d80c26fec5..d9c1fe608bdd 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -31,7 +31,6 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; @@ -53,9 +52,11 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -65,22 +66,22 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; public class RangeFilter extends AbstractOptimizableDimFilter implements Filter { private final String column; private final ColumnType matchValueType; + private final ExpressionType matchValueExpressionType; @Nullable private final Object upper; - @Nullable private final Object lower; - private final ExprEval upperEval; private final ExprEval lowerEval; - private final boolean lowerStrict; - private final boolean upperStrict; + private final boolean lowerOpen; + private final boolean upperOpen; @Nullable private final ExtractionFn extractionFn; @Nullable @@ -89,6 +90,8 @@ public class RangeFilter extends AbstractOptimizableDimFilter implements Filter private final Supplier longPredicateSupplier; private final Supplier floatPredicateSupplier; private final Supplier doublePredicateSupplier; + private final ConcurrentHashMap, Predicate> arrayPredicates; + private final Supplier> typeDetectingArrayPredicateSupplier; @JsonCreator public RangeFilter( @@ -96,8 +99,8 @@ public RangeFilter( @JsonProperty("matchValueType") ColumnType matchValueType, @JsonProperty("lower") @Nullable Object lower, @JsonProperty("upper") @Nullable Object upper, - @JsonProperty("lowerStrict") @Nullable Boolean lowerStrict, - @JsonProperty("upperStrict") @Nullable Boolean upperStrict, + @JsonProperty("lowerOpen") @Nullable Boolean lowerOpen, + @JsonProperty("upperOpen") @Nullable Boolean upperOpen, @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) @@ -110,24 +113,25 @@ public RangeFilter( throw InvalidInput.exception("Invalid range filter on column [%s], matchValueType cannot be null", column); } this.matchValueType = matchValueType; - if (lower == null && upper == null) { + this.matchValueExpressionType = ExpressionType.fromColumnType(matchValueType); + this.upper = upper; + this.lower = lower; + this.upperEval = ExprEval.ofType(matchValueExpressionType, upper); + this.lowerEval = ExprEval.ofType(matchValueExpressionType, lower); + + if (lowerEval.value() == null && upperEval.value() == null) { throw InvalidInput.exception( "Invalid range filter on column [%s], lower and upper cannot be null at the same time", column ); } - final ExpressionType expressionType = ExpressionType.fromColumnType(matchValueType); - this.upper = upper; - this.lower = lower; - this.upperEval = ExprEval.ofType(expressionType, upper); - this.lowerEval = ExprEval.ofType(expressionType, lower); - if (expressionType.isNumeric()) { + if (matchValueExpressionType.isNumeric()) { if (lower != null && lowerEval.value() == null) { throw InvalidInput.exception( "Invalid range filter on column [%s], lower bound [%s] cannot be parsed as specified match value type [%s]", column, lower, - expressionType + matchValueExpressionType ); } if (upper != null && upperEval.value() == null) { @@ -135,12 +139,12 @@ public RangeFilter( "Invalid range filter on column [%s], upper bound [%s] cannot be parsed as specified match value type [%s]", column, upper, - expressionType + matchValueExpressionType ); } } - this.lowerStrict = lowerStrict != null && lowerStrict; - this.upperStrict = upperStrict != null && upperStrict; + this.lowerOpen = lowerOpen != null && lowerOpen; + this.upperOpen = upperOpen != null && upperOpen; // remove once SQL planner no longer uses extractionFn this.extractionFn = extractionFn; this.filterTuning = filterTuning; @@ -148,6 +152,9 @@ public RangeFilter( this.longPredicateSupplier = makeLongPredicateSupplier(); this.floatPredicateSupplier = makeFloatPredicateSupplier(); this.doublePredicateSupplier = makeDoublePredicateSupplier(); + this.arrayPredicates = new ConcurrentHashMap<>(); + this.typeDetectingArrayPredicateSupplier = makeTypeDetectingArrayPredicate(); + } @JsonProperty @@ -180,26 +187,16 @@ public Object getLower() @JsonProperty @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean isLowerStrict() + public boolean isLowerOpen() { - return lowerStrict; + return lowerOpen; } @JsonProperty @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean isUpperStrict() - { - return upperStrict; - } - - public boolean hasLowerBound() - { - return lower != null; - } - - public boolean hasUpperBound() + public boolean isUpperOpen() { - return upper != null; + return upperOpen; } @Nullable @@ -218,6 +215,16 @@ public FilterTuning getFilterTuning() return filterTuning; } + public boolean hasLowerBound() + { + return lower != null; + } + + public boolean hasUpperBound() + { + return upper != null; + } + @Override public byte[] getCacheKey() { @@ -248,8 +255,8 @@ public byte[] getCacheKey() boundType = 0x3; } - final byte lowerStrictByte = this.isLowerStrict() ? (byte) 1 : 0x0; - final byte upperStrictByte = this.isUpperStrict() ? (byte) 1 : 0x0; + final byte lowerStrictByte = this.isLowerOpen() ? (byte) 1 : 0x0; + final byte upperStrictByte = this.isUpperOpen() ? (byte) 1 : 0x0; return new CacheKeyBuilder(DimFilterUtils.RANGE_CACHE_ID) .appendByte(boundType) @@ -288,22 +295,28 @@ public Filter toFilter() @Override public RangeSet getDimensionRangeSet(String dimension) { - // range partitioning converts stuff to strings.. so do that i guess + if (!(Objects.equals(column, dimension) && getExtractionFn() == null)) { + return null; + } + + // We need to return a RangeSet, but we have Object, not String. We align with the interface by + // converting things to String, but we'd probably be better off adjusting the interface to something that is + // more type aware in the future String lowerString = lowerEval.asString(); String upperString = upperEval.asString(); RangeSet retSet = TreeRangeSet.create(); Range range; if (getLower() == null) { - range = isUpperStrict() ? Range.lessThan(upperString) : Range.atMost(upperString); + range = isUpperOpen() ? Range.lessThan(upperString) : Range.atMost(upperString); } else if (getUpper() == null) { - range = isLowerStrict() ? Range.greaterThan(lowerString) : Range.atLeast(lowerString); + range = isLowerOpen() ? Range.greaterThan(lowerString) : Range.atLeast(lowerString); } else { range = Range.range( lowerString, - isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED, + isLowerOpen() ? BoundType.OPEN : BoundType.CLOSED, upperString, - isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED + isUpperOpen() ? BoundType.OPEN : BoundType.CLOSED ); } retSet.add(range); @@ -317,52 +330,35 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { return null; } - if (matchValueType.is(ValueType.STRING) && extractionFn == null) { - final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); - if (indexSupplier == null) { - return Filters.makeNullIndex(false, selector); - } - final LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); - if (rangeIndex != null) { + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + if (indexSupplier == null) { + return new AllFalseBitmapColumnIndex(selector); + } + + if (matchValueType.is(ValueType.STRING)) { + final LexicographicalRangeIndexes rangeIndexes = indexSupplier.as(LexicographicalRangeIndexes.class); + if (rangeIndexes != null) { final String lower = hasLowerBound() ? lowerEval.asString() : null; final String upper = hasUpperBound() ? upperEval.asString() : null; - if (NullHandling.isNullOrEquivalent(lower) && NullHandling.isNullOrEquivalent(upper)) { - return Filters.makeNullIndex(false, selector); - } - final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( - lower, - lowerStrict, - upper, - upperStrict - ); - if (rangeBitmaps != null) { - return rangeBitmaps; - } + return rangeIndexes.forRange(lower, lowerOpen, upper, upperOpen); } } - if (matchValueType.isNumeric() && extractionFn == null) { - final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); - if (indexSupplier == null) { - return Filters.makeNullIndex(false, selector); - } - final NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - if (rangeIndex != null) { + if (matchValueType.isNumeric()) { + final NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + if (rangeIndexes != null) { final Number lower = (Number) lowerEval.value(); final Number upper = (Number) upperEval.value(); - final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( - lower, - isLowerStrict(), - upper, - isUpperStrict() - ); - if (rangeBitmaps != null) { - return rangeBitmaps; - } + return rangeIndexes.forRange(lower, lowerOpen, upper, upperOpen); } } // fall back to predicate based index if it is available - return Filters.makePredicateIndex(column, selector, getPredicateFactory()); + final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class); + if (predicateIndexes != null) { + return predicateIndexes.forPredicate(getPredicateFactory()); + } + // index doesn't exist + return null; } @Override @@ -422,8 +418,8 @@ public Filter rewriteRequiredColumns(Map columnRewrites) matchValueType, lower, upper, - lowerStrict, - upperStrict, + lowerOpen, + upperOpen, extractionFn, filterTuning ); @@ -431,14 +427,13 @@ public Filter rewriteRequiredColumns(Map columnRewrites) public boolean isEquality() { - if (!hasUpperBound() || !hasLowerBound() || lowerStrict || upperStrict) { + if (!hasUpperBound() || !hasLowerBound() || lowerOpen || upperOpen) { return false; } if (matchValueType.isArray()) { - ExpressionType matchArrayType = ExpressionType.fromColumnType(matchValueType); return Arrays.deepEquals( - ExprEval.ofType(matchArrayType, upper).asArray(), - ExprEval.ofType(matchArrayType, lower).asArray() + lowerEval.asArray(), + upperEval.asArray() ); } else { return Objects.equals(upper, lower); @@ -459,22 +454,21 @@ public boolean equals(Object o) boolean upperSame; boolean lowerSame; if (matchValueType.isArray()) { - ExpressionType matchArrayType = ExpressionType.fromColumnType(matchValueType); upperSame = Arrays.deepEquals( - ExprEval.ofType(matchArrayType, upper).asArray(), - ExprEval.ofType(matchArrayType, that.upper).asArray() + upperEval.asArray(), + that.upperEval.asArray() ); lowerSame = Arrays.deepEquals( - ExprEval.ofType(matchArrayType, lower).asArray(), - ExprEval.ofType(matchArrayType, that.lower).asArray() + lowerEval.asArray(), + that.lowerEval.asArray() ); } else { upperSame = Objects.equals(upper, that.upper); lowerSame = Objects.equals(lower, that.lower); } - return lowerStrict == that.lowerStrict && - upperStrict == that.upperStrict && + return lowerOpen == that.lowerOpen && + upperOpen == that.upperOpen && column.equals(that.column) && Objects.equals(matchValueType, that.matchValueType) && upperSame && @@ -491,8 +485,8 @@ public int hashCode() matchValueType, upper, lower, - lowerStrict, - upperStrict, + lowerOpen, + upperOpen, extractionFn, filterTuning ); @@ -505,7 +499,7 @@ public String toString() if (lower != null) { builder.append(lower); - if (lowerStrict) { + if (lowerOpen) { builder.append(" < "); } else { builder.append(" <= "); @@ -517,7 +511,7 @@ public String toString() builder.append(StringUtils.format(" as %s", matchValueType.toString())); if (upper != null) { - if (upperStrict) { + if (upperOpen) { builder.append(" < "); } else { builder.append(" <= "); @@ -536,10 +530,10 @@ private DruidPredicateFactory getPredicateFactory() private Supplier makeLongPredicateSupplier() { return Suppliers.memoize(() -> { - boolean hasLowerBound; - boolean hasUpperBound; - long lowerBound; - long upperBound; + final boolean hasLowerBound; + final boolean hasUpperBound; + final long lowerBound; + final long upperBound; if (hasLowerBound()) { ExprEval lowerCast = lowerEval.castTo(ExpressionType.LONG); @@ -568,14 +562,8 @@ private Supplier makeLongPredicateSupplier() hasUpperBound = false; upperBound = Long.MAX_VALUE; } - return BoundDimFilter.makeLongPredicateFromBounds( - hasLowerBound, - hasUpperBound, - lowerStrict, - upperStrict, - lowerBound, - upperBound - ); + final RangeType rangeType = RangeType.of(hasLowerBound, lowerOpen, hasUpperBound, upperOpen); + return makeLongPredicate(rangeType, lowerBound, upperBound); }); } @@ -590,10 +578,10 @@ private Supplier makeFloatPredicateSupplier() private Supplier makeDoublePredicateSupplier() { return Suppliers.memoize(() -> { - boolean hasLowerBound; - boolean hasUpperBound; - double lowerBound; - double upperBound; + final boolean hasLowerBound; + final boolean hasUpperBound; + final double lowerBound; + final double upperBound; if (hasLowerBound()) { ExprEval lowerCast = lowerEval.castTo(ExpressionType.DOUBLE); @@ -623,14 +611,8 @@ private Supplier makeDoublePredicateSupplier() upperBound = Double.POSITIVE_INFINITY; } - return BoundDimFilter.makeDoublePredicateFromBounds( - hasLowerBound, - hasUpperBound, - lowerStrict, - upperStrict, - lowerBound, - upperBound - ); + RangeType rangeType = RangeType.of(hasLowerBound, lowerOpen, hasUpperBound, upperOpen); + return makeDoublePredicate(rangeType, lowerBound, upperBound); }); } @@ -640,107 +622,33 @@ private Supplier> makeStringPredicateSupplier() final Comparator stringComparator = matchValueType.isNumeric() ? StringComparators.NUMERIC : StringComparators.LEXICOGRAPHIC; - final String lowerBound = lowerEval.castTo(ExpressionType.STRING).asString(); - final String upperBound = upperEval.castTo(ExpressionType.STRING).asString(); + final String lowerBound = hasLowerBound() ? lowerEval.castTo(ExpressionType.STRING).asString() : null; + final String upperBound = hasUpperBound() ? upperEval.castTo(ExpressionType.STRING).asString() : null; - if (hasLowerBound() && hasUpperBound()) { - if (upperStrict && lowerStrict) { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - final int upperComparing = stringComparator.compare(upperBound, input); - return ((lowerComparing > 0)) && (upperComparing > 0); - }; - } else if (lowerStrict) { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - final int upperComparing = stringComparator.compare(upperBound, input); - return (lowerComparing > 0) && (upperComparing >= 0); - }; - } else if (upperStrict) { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - final int upperComparing = stringComparator.compare(upperBound, input); - return (lowerComparing >= 0) && (upperComparing > 0); - }; - } else { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - final int upperComparing = stringComparator.compare(upperBound, input); - return (lowerComparing >= 0) && (upperComparing >= 0); - }; - } - } else if (hasUpperBound()) { - if (upperStrict) { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int upperComparing = stringComparator.compare(upperBound, input); - return upperComparing > 0; - }; - } else { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int upperComparing = stringComparator.compare(upperBound, input); - return upperComparing >= 0; - }; - } - } else if (hasLowerBound()) { - if (lowerStrict) { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - return lowerComparing > 0; - }; - } else { - return input -> { - if (NullHandling.isNullOrEquivalent(input)) { - return false; - } - final int lowerComparing = stringComparator.compare(input, lowerBound); - return lowerComparing >= 0; - }; - } - } else { - return Predicates.notNull(); - } + final RangeType rangeType = RangeType.of(hasLowerBound(), lowerOpen, hasUpperBound(), upperOpen); + + return makeComparatorPredicate(rangeType, stringComparator, lowerBound, upperBound); }); } - private Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) + + + private Predicate makeArrayPredicate(TypeSignature inputType) { - if (hasLowerBound() && hasUpperBound()) { - if (upperStrict && lowerStrict) { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - final int upperComparing = arrayComparator.compare(upperBound, input); - return ((lowerComparing > 0)) && (upperComparing > 0); - }; - } else { - // fall back to per row type detection + final Comparator arrayComparator = inputType.getNullableStrategy(); + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(inputType); + final RangeType rangeType = RangeType.of(hasLowerBound(), lowerOpen, hasUpperBound(), upperOpen); + final Object[] lowerBound = hasLowerBound() ? lowerEval.castTo(expressionType).asArray() : null; + final Object[] upperBound = hasUpperBound() ? upperEval.castTo(expressionType).asArray() : null; + return makeComparatorPredicate(rangeType, arrayComparator, lowerBound, upperBound); + } + + private Supplier> makeTypeDetectingArrayPredicate() + { + return Suppliers.memoize(() -> { + RangeType rangeType = RangeType.of(hasLowerBound(), lowerOpen, hasUpperBound(), upperOpen); + switch (rangeType) { + case OPEN: return input -> { if (input == null) { return false; @@ -753,22 +661,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature 0)) && (upperComparing > 0); }; - } - } else if (lowerStrict) { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - final int upperComparing = arrayComparator.compare(upperBound, input); - return (lowerComparing > 0) && (upperComparing >= 0); - }; - } else { - // fall back to per row type detection + case LOWER_OPEN_UPPER_CLOSED: return input -> { if (input == null) { return false; @@ -781,22 +674,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature 0) && (upperComparing >= 0); }; - } - } else if (upperStrict) { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - final int upperComparing = arrayComparator.compare(upperBound, input); - return (lowerComparing >= 0) && (upperComparing > 0); - }; - } else { - // fall back to per row type detection + case LOWER_CLOSED_UPPER_OPEN: return input -> { if (input == null) { return false; @@ -809,22 +687,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature= 0) && (upperComparing > 0); }; - } - } else { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - final int upperComparing = arrayComparator.compare(upperBound, input); - return (lowerComparing >= 0) && (upperComparing >= 0); - }; - } else { - // fall back to per row type detection + case CLOSED: return input -> { if (input == null) { return false; @@ -837,22 +700,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature= 0) && (upperComparing >= 0); }; - } - } - } else if (hasUpperBound()) { - if (upperStrict) { - if (arrayType != null) { - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int upperComparing = arrayComparator.compare(upperBound, input); - return upperComparing > 0; - }; - } else { - // fall back to per row type detection + case LOWER_UNBOUNDED_UPPER_OPEN: return input -> { if (input == null) { return false; @@ -863,20 +711,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature 0; }; - } - } else { - if (arrayType != null) { - final Object[] upperBound = upperEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int upperComparing = arrayComparator.compare(upperBound, input); - return upperComparing >= 0; - }; - } else { - // fall back to per row type detection + case LOWER_UNBOUNDED_UPPER_CLOSED: return input -> { if (input == null) { return false; @@ -887,22 +722,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature= 0; }; - } - } - } else if (hasLowerBound()) { - if (lowerStrict) { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - return lowerComparing > 0; - }; - } else { - // fall back to per row type detection + case LOWER_OPEN_UPPER_UNBOUNDED: return input -> { if (input == null) { return false; @@ -913,20 +733,7 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature 0; }; - } - } else { - if (arrayType != null) { - final Object[] lowerBound = lowerEval.castTo(ExpressionType.fromColumnType(arrayType)).asArray(); - final Comparator arrayComparator = arrayType.getNullableStrategy(); - return input -> { - if (input == null) { - return false; - } - final int lowerComparing = arrayComparator.compare(input, lowerBound); - return lowerComparing >= 0; - }; - } else { - // fall back to per row type detection + case LOWER_CLOSED_UPPER_UNBOUNDED: return input -> { if (input == null) { return false; @@ -937,11 +744,11 @@ private Predicate makeArrayPredicate(@Nullable TypeSignature= 0; }; - } + case UNBOUNDED: + default: + return Predicates.notNull(); } - } else { - return Predicates.notNull(); - } + }); } private class RangePredicateFactory implements DruidPredicateFactory @@ -992,7 +799,13 @@ public DruidDoublePredicate makeDoublePredicate() @Override public Predicate makeArrayPredicate(@Nullable TypeSignature inputType) { - return RangeFilter.this.makeArrayPredicate(inputType); + if (inputType == null) { + return typeDetectingArrayPredicateSupplier.get(); + } + return arrayPredicates.computeIfAbsent( + inputType, + (existing) -> RangeFilter.this.makeArrayPredicate(inputType) + ); } @Override @@ -1022,4 +835,228 @@ public String toString() '}'; } } + + public static DruidLongPredicate makeLongPredicate( + final RangeType rangeType, + final long lowerLongBound, + final long upperLongBound + ) + { + switch (rangeType) { + case OPEN: + return input -> input > lowerLongBound && input < upperLongBound; + case LOWER_OPEN_UPPER_CLOSED: + return input -> input > lowerLongBound && input <= upperLongBound; + case LOWER_CLOSED_UPPER_OPEN: + return input -> input >= lowerLongBound && input < upperLongBound; + case CLOSED: + return input -> input >= lowerLongBound && input <= upperLongBound; + case LOWER_UNBOUNDED_UPPER_OPEN: + return input -> input < upperLongBound; + case LOWER_UNBOUNDED_UPPER_CLOSED: + return input -> input <= upperLongBound; + case LOWER_OPEN_UPPER_UNBOUNDED: + return input -> input > lowerLongBound; + case LOWER_CLOSED_UPPER_UNBOUNDED: + return input -> input >= lowerLongBound; + case UNBOUNDED: + default: + return DruidLongPredicate.ALWAYS_TRUE; + } + } + + public static DruidDoublePredicate makeDoublePredicate( + final RangeType rangeType, + final double lowerDoubleBound, + final double upperDoubleBound + ) + { + switch (rangeType) { + case OPEN: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + final int upperComparing = Double.compare(upperDoubleBound, input); + return ((lowerComparing > 0)) && (upperComparing > 0); + }; + case LOWER_OPEN_UPPER_CLOSED: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + final int upperComparing = Double.compare(upperDoubleBound, input); + return (lowerComparing > 0) && (upperComparing >= 0); + }; + case LOWER_CLOSED_UPPER_OPEN: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + final int upperComparing = Double.compare(upperDoubleBound, input); + return (lowerComparing >= 0) && (upperComparing > 0); + }; + case CLOSED: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + final int upperComparing = Double.compare(upperDoubleBound, input); + return (lowerComparing >= 0) && (upperComparing >= 0); + }; + case LOWER_UNBOUNDED_UPPER_OPEN: + return input -> { + final int upperComparing = Double.compare(upperDoubleBound, input); + return upperComparing > 0; + }; + case LOWER_UNBOUNDED_UPPER_CLOSED: + return input -> { + final int upperComparing = Double.compare(upperDoubleBound, input); + return upperComparing >= 0; + }; + case LOWER_OPEN_UPPER_UNBOUNDED: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + return lowerComparing > 0; + }; + case LOWER_CLOSED_UPPER_UNBOUNDED: + return input -> { + final int lowerComparing = Double.compare(input, lowerDoubleBound); + return lowerComparing >= 0; + }; + case UNBOUNDED: + default: + return DruidDoublePredicate.ALWAYS_TRUE; + } + } + + public static Predicate makeComparatorPredicate( + RangeType rangeType, + Comparator comparator, + @Nullable T lowerBound, + @Nullable T upperBound + ) + { + switch (rangeType) { + case OPEN: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + final int upperComparing = comparator.compare(upperBound, input); + return ((lowerComparing > 0)) && (upperComparing > 0); + }; + case LOWER_OPEN_UPPER_CLOSED: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + final int upperComparing = comparator.compare(upperBound, input); + return (lowerComparing > 0) && (upperComparing >= 0); + }; + case LOWER_CLOSED_UPPER_OPEN: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + final int upperComparing = comparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing > 0); + }; + case CLOSED: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + final int upperComparing = comparator.compare(upperBound, input); + return (lowerComparing >= 0) && (upperComparing >= 0); + }; + case LOWER_UNBOUNDED_UPPER_OPEN: + return input -> { + if (input == null) { + return false; + } + final int upperComparing = comparator.compare(upperBound, input); + return upperComparing > 0; + }; + case LOWER_UNBOUNDED_UPPER_CLOSED: + return input -> { + if (input == null) { + return false; + } + final int upperComparing = comparator.compare(upperBound, input); + return upperComparing >= 0; + }; + case LOWER_OPEN_UPPER_UNBOUNDED: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + return lowerComparing > 0; + }; + case LOWER_CLOSED_UPPER_UNBOUNDED: + return input -> { + if (input == null) { + return false; + } + final int lowerComparing = comparator.compare(input, lowerBound); + return lowerComparing >= 0; + }; + case UNBOUNDED: + default: + return Predicates.notNull(); + } + } + + public enum RangeType + { + /** + * (...) + */ + OPEN, + /** + * [...] + */ + CLOSED, + /** + * [...) + */ + LOWER_CLOSED_UPPER_OPEN, + /** + * (...] + */ + LOWER_OPEN_UPPER_CLOSED, + /** + * (...∞ + */ + LOWER_OPEN_UPPER_UNBOUNDED, + /** + * [...∞ + */ + LOWER_CLOSED_UPPER_UNBOUNDED, + /** + * -∞...) + */ + LOWER_UNBOUNDED_UPPER_OPEN, + /** + * -∞...] + */ + LOWER_UNBOUNDED_UPPER_CLOSED, + /** + * -∞...∞ + */ + UNBOUNDED; + + public static RangeType of(boolean hasLower, boolean lowerOpen, boolean hasUpper, boolean upperOpen) + { + if (hasLower && hasUpper) { + if (lowerOpen) { + return upperOpen ? OPEN : LOWER_OPEN_UPPER_CLOSED; + } else { + return upperOpen ? LOWER_CLOSED_UPPER_OPEN : CLOSED; + } + } else if (hasLower) { + return lowerOpen ? LOWER_OPEN_UPPER_UNBOUNDED : LOWER_CLOSED_UPPER_UNBOUNDED; + } else if (hasUpper) { + return upperOpen ? LOWER_UNBOUNDED_UPPER_OPEN : LOWER_UNBOUNDED_UPPER_CLOSED; + } + return UNBOUNDED; + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java index f07e9d5f7ac2..13c53aeee023 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ArrayVectorValueMatcher.java @@ -90,7 +90,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java index 44e448e86d29..66064fc4693c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java @@ -91,7 +91,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -129,7 +128,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java index 46c0ea961f43..ed8b787668eb 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java @@ -94,7 +94,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -131,7 +130,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java index 13fe7f07cbe1..a38703451c5c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java @@ -94,7 +94,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -131,7 +130,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java index e1fd144aaabd..866a9608fdd9 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java @@ -92,7 +92,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -170,7 +169,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -210,7 +208,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java index 4b59454bbfaf..42ca5eeb8190 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ObjectVectorValueMatcher.java @@ -84,7 +84,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java index c73f868abccb..181241726f4f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java @@ -100,7 +100,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -168,7 +167,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -194,7 +192,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java index 26982013cf56..e587b313cae2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/StringObjectVectorValueMatcher.java @@ -64,7 +64,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; @@ -103,7 +102,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask) } match.setSelectionSize(numRows); - assert match.isValid(mask); return match; } }; diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java index f51117ed0696..f0b52669fa2a 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java @@ -238,6 +238,23 @@ public DimensionSelector makeDimensionSelector( rootType ); } + if (spec.getExtractionFn() == null) { + return new BaseSingleValueDimensionSelector() + { + @Nullable + @Override + protected String getValue() + { + return Evals.asString(rootLiteralSelector.getObject()); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + } return new BaseSingleValueDimensionSelector() { @Nullable @@ -245,10 +262,7 @@ public DimensionSelector makeDimensionSelector( protected String getValue() { final String s = Evals.asString(rootLiteralSelector.getObject()); - if (spec.getExtractionFn() != null) { - return spec.getExtractionFn().apply(s); - } - return s; + return spec.getExtractionFn().apply(s); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java index a27e490ba69e..1e243f275526 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java @@ -23,6 +23,8 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import javax.annotation.Nullable; + /** * Class that encapsulates knowledge about how to create "column processors", which are... objects that process columns * and want to have type-specific logic. Used by {@link ColumnProcessors#makeProcessor}. @@ -77,7 +79,13 @@ public interface ColumnProcessorFactory */ T makeLongProcessor(BaseLongColumnValueSelector selector); - T makeArrayProcessor(BaseObjectColumnValueSelector selector, ColumnCapabilities columnCapabilities); + /** + * + * @param selector array selector + * @param columnCapabilities information about the underlying column to match. Null here just means the capabilities + * are unknown, and not necessarily indicative that the column doesn't exist + */ + T makeArrayProcessor(BaseObjectColumnValueSelector selector, @Nullable ColumnCapabilities columnCapabilities); /** * Create a processor for a complex column. diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java index 3a32f52f6090..bf6c399519f4 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java @@ -289,7 +289,7 @@ private static T makeProcessorInternal( case ARRAY: return processorFactory.makeArrayProcessor( valueSelectorFunction.apply(selectorFactory), - capabilities != null ? capabilities : ColumnCapabilitiesImpl.createDefault().setType(effectiveType) + capabilities ); case COMPLEX: return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory)); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index e85e7a4d4144..ae5ca7ff8a30 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -20,9 +20,9 @@ package org.apache.druid.segment.column; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; public interface ColumnConfig { @@ -48,7 +48,7 @@ public double skipValuePredicateIndexScale() /** * If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap - * index operations required to perform to use a {@link LexicographicalRangeIndex} or {@link NumericRangeIndex}, + * index operations required to perform to use {@link LexicographicalRangeIndexes} or {@link NumericRangeIndexes}, * then for any {@link ColumnIndexSupplier} which chooses to participate in this config it will skip computing the * index, indicated by a return value of null from the 'forRange' methods, to force the filter to be processed * with a scan using a {@link org.apache.druid.query.filter.ValueMatcher} instead. @@ -81,7 +81,7 @@ default double skipValueRangeIndexScale() /** * If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap - * index operations required to perform to use a {@link DruidPredicateIndex} then for any {@link ColumnIndexSupplier} + * index operations required to perform to use {@link DruidPredicateIndexes} then for any {@link ColumnIndexSupplier} * which chooses to participate in this config it will skip computing the index, in favor of doing a full scan and * using a {@link org.apache.druid.query.filter.ValueMatcher} instead. This is indicated returning null from * {@link ColumnIndexSupplier#as(Class)} even though it would have otherwise been able to create a diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index f43a226bf140..d08160a3166c 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -46,10 +46,13 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; +import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -77,36 +80,37 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) if (!Filters.checkFilterTuningUseIndex(boundDimFilter.getDimension(), selector, filterTuning)) { return null; } + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(boundDimFilter.getDimension()); + if (indexSupplier == null) { + // missing column -> match all rows if the predicate matches null; match no rows otherwise + return getPredicateFactory().makeStringPredicate().apply(null) + ? new AllTrueBitmapColumnIndex(selector) + : new AllFalseBitmapColumnIndex(selector); + } + if (supportStringShortCircuit()) { - final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(boundDimFilter.getDimension()); - if (indexSupplier == null) { - return Filters.makeNullIndex(doesMatchNull(), selector); - } - final LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); - if (rangeIndex != null) { - final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( + final LexicographicalRangeIndexes rangeIndexes = indexSupplier.as(LexicographicalRangeIndexes.class); + if (rangeIndexes != null) { + final BitmapColumnIndex rangeBitmaps = rangeIndexes.forRange( boundDimFilter.getLower(), boundDimFilter.isLowerStrict(), boundDimFilter.getUpper(), boundDimFilter.isUpperStrict() ); - if (rangeBitmaps != null) { - // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set - if (doesMatchNull()) { - return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); - } else { - return rangeBitmaps; - } + if (rangeBitmaps == null) { + return null; + } + // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set + if (doesMatchNull()) { + return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); + } else { + return rangeBitmaps; } } } if (supportNumericShortCircuit()) { - final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(boundDimFilter.getDimension()); - if (indexSupplier == null) { - return Filters.makeNullIndex(doesMatchNull(), selector); - } - final NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - if (rangeIndex != null) { + final NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + if (rangeIndexes != null) { final Number lower = boundDimFilter.hasLowerBound() ? Doubles.tryParse(boundDimFilter.getLower()) : null; final Number upper = boundDimFilter.hasUpperBound() ? Doubles.tryParse(boundDimFilter.getUpper()) : null; // valid number bounds are required to use the range index, otherwise we need to fall back to the predicate @@ -115,26 +119,31 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) final boolean lowerValid = !(boundDimFilter.hasLowerBound() && lower == null); final boolean upperValid = !(boundDimFilter.hasUpperBound() && upper == null); if (lowerValid && upperValid) { - final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( + final BitmapColumnIndex rangeBitmaps = rangeIndexes.forRange( lower, boundDimFilter.isLowerStrict(), upper, boundDimFilter.isUpperStrict() ); - if (rangeBitmaps != null) { - // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set - if (doesMatchNull()) { - return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); - } else { - return rangeBitmaps; - } + if (rangeBitmaps == null) { + return null; + } + // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set + if (doesMatchNull()) { + return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); + } else { + return rangeBitmaps; } } } } - // fall back to predicate based index if it is available - return Filters.makePredicateIndex(boundDimFilter.getDimension(), selector, getPredicateFactory()); + final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class); + if (predicateIndexes != null) { + return predicateIndexes.forPredicate(getPredicateFactory()); + } + // index doesn't exist + return null; } @Nullable @@ -150,7 +159,7 @@ private BitmapColumnIndex wrapRangeIndexWithNullValueIndex( if (nulls == null) { return null; } - nullBitmap = nulls.forNull(); + nullBitmap = nulls.get(); return new BitmapColumnIndex() { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java index 9fb866f2cdcb..9be846786527 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ColumnComparisonFilter.java @@ -238,7 +238,7 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector @Override public Supplier makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { return () -> { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index f3c77f91163d..711395cfe820 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -194,7 +194,7 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) final Expr.BindingAnalysis details = bindingDetails.get(); if (details.getRequiredBindings().isEmpty()) { // Constant expression. - return Filters.makeNullIndex( + return Filters.makeMissingColumnNullIndex( expr.get().eval(InputBindings.nilBindings()).asBoolean(), selector ); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 3e185dd39b17..c522f8b4aa7a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -44,7 +44,7 @@ import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.join.filter.AllNullColumnSelectorFactory; import javax.annotation.Nullable; @@ -131,9 +131,9 @@ public static BitmapColumnIndex makePredicateIndex( Preconditions.checkNotNull(predicateFactory, "predicateFactory"); final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); if (indexSupplier != null) { - final DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); - if (predicateIndex != null) { - return predicateIndex.forPredicate(predicateFactory); + final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class); + if (predicateIndexes != null) { + return predicateIndexes.forPredicate(predicateFactory); } // index doesn't exist return null; @@ -144,7 +144,7 @@ public static BitmapColumnIndex makePredicateIndex( : new AllFalseBitmapColumnIndex(selector); } - public static BitmapColumnIndex makeNullIndex(boolean matchesNull, final ColumnIndexSelector selector) + public static BitmapColumnIndex makeMissingColumnNullIndex(boolean matchesNull, final ColumnIndexSelector selector) { return matchesNull ? new AllTrueBitmapColumnIndex(selector) : new AllFalseBitmapColumnIndex(selector); } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index a6a86196c517..ff3f9b51e96e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -38,8 +38,8 @@ import org.apache.druid.segment.index.AllFalseBitmapColumnIndex; import org.apache.druid.segment.index.AllTrueBitmapColumnIndex; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -82,23 +82,23 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) : new AllFalseBitmapColumnIndex(selector); } if (isSimpleEquals()) { - StringValueSetIndex valueIndex = indexSupplier.as(StringValueSetIndex.class); - if (valueIndex != null) { - return valueIndex.forValue( + StringValueSetIndexes valueIndexes = indexSupplier.as(StringValueSetIndexes.class); + if (valueIndexes != null) { + return valueIndexes.forValue( NullHandling.emptyToNullIfNeeded(likeMatcher.getPrefix()) ); } } if (isSimplePrefix()) { - final LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); - if (rangeIndex != null) { + final LexicographicalRangeIndexes rangeIndexes = indexSupplier.as(LexicographicalRangeIndexes.class); + if (rangeIndexes != null) { final String lower = NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix()); final String upper = NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix()) + Character.MAX_VALUE; if (likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_ANY) { - return rangeIndex.forRange(lower, false, upper, false); + return rangeIndexes.forRange(lower, false, upper, false); } else { - return rangeIndex.forRange(lower, false, upper, false, likeMatcher::matchesSuffixOnly); + return rangeIndexes.forRange(lower, false, upper, false, likeMatcher::matchesSuffixOnly); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java index 9673248f8263..36b97d6d4d5b 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/PredicateValueMatcherFactory.java @@ -39,6 +39,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import javax.annotation.Nullable; import java.util.List; /** @@ -88,7 +89,7 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) @Override public ValueMatcher makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { if (selector instanceof NilColumnValueSelector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index fc9044cb8377..a5307befb365 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -36,7 +36,7 @@ import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -87,21 +87,21 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) final boolean isNull = NullHandling.isNullOrEquivalent(value); final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(dimension); if (indexSupplier == null) { - return Filters.makeNullIndex(isNull, selector); + return Filters.makeMissingColumnNullIndex(isNull, selector); } if (isNull) { final NullValueIndex nullValueIndex = indexSupplier.as(NullValueIndex.class); if (nullValueIndex == null) { return null; } - return nullValueIndex.forNull(); + return nullValueIndex.get(); } else { - final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); - if (valueSetIndex == null) { + final StringValueSetIndexes valueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); + if (valueSetIndexes == null) { // column exists, but has no index return null; } - return valueSetIndex.forValue(value); + return valueSetIndexes.forValue(value); } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java index 4c993977ad6e..a1a722e25273 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/StringConstantValueMatcherFactory.java @@ -80,7 +80,7 @@ public ValueMatcher makeLongProcessor(BaseLongColumnValueSelector selector) @Override public ValueMatcher makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { // this is gonna fail because SelectorPredicateFactory does not implement array predicate... diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndexes.java similarity index 95% rename from processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndexes.java index 5fd63c8b3dc5..9038ed560e11 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDruidPredicateIndexes.java @@ -26,13 +26,14 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import javax.annotation.Nullable; import java.util.Iterator; import java.util.NoSuchElementException; -public final class IndexedStringDruidPredicateIndex> implements DruidPredicateIndex +public final class IndexedStringDruidPredicateIndexes> implements + DruidPredicateIndexes { private final BitmapFactory bitmapFactory; private final TDictionary dictionary; @@ -40,7 +41,7 @@ public final class IndexedStringDruidPredicateIndex bitmaps, diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndexes.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndexes.java index 343ea3b5ed36..0dde5754bbe4 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8LexicographicalRangeIndexes.java @@ -32,15 +32,15 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.Iterator; import java.util.NoSuchElementException; -public final class IndexedUtf8LexicographicalRangeIndex> - implements LexicographicalRangeIndex +public final class IndexedUtf8LexicographicalRangeIndexes> + implements LexicographicalRangeIndexes { private final BitmapFactory bitmapFactory; private final TDictionary dictionary; @@ -50,7 +50,7 @@ public final class IndexedUtf8LexicographicalRangeIndex bitmaps, diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java similarity index 96% rename from processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 078ee6a8907c..5137958c6daa 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -33,9 +33,9 @@ import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.ValueIndexes; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -44,8 +44,8 @@ import java.util.NoSuchElementException; import java.util.SortedSet; -public final class IndexedUtf8ValueSetIndex> - implements StringValueSetIndex, Utf8ValueSetIndex, TypedValueIndex +public final class IndexedUtf8ValueIndexes> + implements StringValueSetIndexes, Utf8ValueSetIndex, ValueIndexes { // This determines the cut-off point to switch the merging algorithm from doing binary-search per element in the value // set to doing a sorted merge algorithm between value set and dictionary. The ratio here represents the ratio b/w @@ -60,7 +60,7 @@ public final class IndexedUtf8ValueSetIndex bitmaps; - public IndexedUtf8ValueSetIndex( + public IndexedUtf8ValueIndexes( BitmapFactory bitmapFactory, TDictionary dictionary, Indexed bitmaps @@ -75,6 +75,7 @@ public IndexedUtf8ValueSetIndex( @Override public BitmapColumnIndex forValue(@Nullable String value) { + final ByteBuffer utf8 = StringUtils.toUtf8ByteBuffer(value); return new SimpleBitmapColumnIndex() { @Override @@ -92,7 +93,7 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) private ImmutableBitmap getBitmapForValue() { - final int idx = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(value)); + final int idx = dictionary.indexOf(utf8); return getBitmap(idx); } }; diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java index 69a4c698dc9e..f7b76289a604 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java @@ -29,8 +29,8 @@ * to directly retrieve bitmaps via dictionary ids, as well as access to lower level details of such a column like * value lookup and value cardinality. * - * Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndex}, - * {@link LexicographicalRangeIndex}, {@link NumericRangeIndex}, or {@link DruidPredicateIndex} + * Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndexes}, + * {@link LexicographicalRangeIndexes}, {@link NumericRangeIndexes}, or {@link DruidPredicateIndexes} */ public interface DictionaryEncodedStringValueIndex extends DictionaryEncodedValueIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java index a928a71a5261..b9e997e9d43a 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java @@ -30,8 +30,8 @@ * either already know what value the dictionary id represents, not care at all, or have some other means to know * exactly which bitmaps to retrieve. * - * Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndex}, - * {@link LexicographicalRangeIndex}, {@link NumericRangeIndex}, or {@link DruidPredicateIndex}. + * Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndexes}, + * {@link LexicographicalRangeIndexes}, {@link NumericRangeIndexes}, or {@link DruidPredicateIndexes}. */ public interface DictionaryEncodedValueIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java index 0ddbf4febeb7..c7e87ec20f1e 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java @@ -27,7 +27,7 @@ /** * Uses a {@link DruidPredicateFactory} to construct a {@link BitmapColumnIndex} */ -public interface DruidPredicateIndex +public interface DruidPredicateIndexes { /** * Get a {@link BitmapColumnIndex} corresponding to all the rows that match the supplied {@link DruidPredicateFactory} diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndexes.java similarity index 98% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndexes.java index 151f30a00a15..2d3b71c49d84 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/LexicographicalRangeIndexes.java @@ -29,7 +29,7 @@ * allowing short-circuit processing of string value ranges. This index does not match null values, union the results * of this index with {@link NullValueIndex} if null values should be considered part of the value range. */ -public interface LexicographicalRangeIndex +public interface LexicographicalRangeIndexes { /** * Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range. If supplied starting diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java index e4627a4c39ef..8768caa54627 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java @@ -26,5 +26,5 @@ */ public interface NullValueIndex { - BitmapColumnIndex forNull(); + BitmapColumnIndex get(); } diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndexes.java similarity index 98% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndexes.java index 97dcd41f4d23..7f3caa75d2a7 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/NumericRangeIndexes.java @@ -28,7 +28,7 @@ * This index does not match null values, union the results of this index with {@link NullValueIndex} if null values * should be considered part of the value range. */ -public interface NumericRangeIndex +public interface NumericRangeIndexes { /** * Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range. If supplied starting diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java index d5178f7bf669..acb4b6712716 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java @@ -28,7 +28,7 @@ /** * Index on individual values, and provides bitmaps for the rows which contain these values */ -public interface StringValueSetIndex +public interface StringValueSetIndexes { /** * Get the {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when passed a diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java similarity index 77% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java index 8d3e2160c31b..28fcf0ae9b17 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/TypedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java @@ -26,11 +26,18 @@ import javax.annotation.Nullable; -public interface TypedValueIndex +public interface ValueIndexes { + /** * Get the {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when passed a * value that doesn't exist. May return null if a value index cannot be computed for the supplied value type. + * + * @param value value to match + * @param valueType type of the value to match, used to assist conversion from the match value type to the column + * value type + * @return {@link ImmutableBitmap} corresponding to the rows which match the value, or null if an index + * connot be computed for the supplied value type */ @Nullable BitmapColumnIndex forValue(Object value, TypeSignature valueType); diff --git a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java index 9b68eb303a66..319d4a81ef1b 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/lookup/LookupJoinMatcher.java @@ -116,7 +116,7 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) @Override public Supplier makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { throw new QueryUnsupportedException("Joining against a ARRAY columns is not supported."); diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java index 6640e24726eb..a433a0ae5522 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java @@ -495,7 +495,7 @@ public ConditionMatcher makeLongProcessor(BaseLongColumnValueSelector selector) @Override public ConditionMatcher makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { return () -> { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index f4eb3dd58c55..06d29bcf9e7e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -56,11 +56,11 @@ import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -132,7 +132,7 @@ public T as(Class clazz) if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex; if (localDictionarySupplier.get().get(0) == 0) { - // null index is always 0 in the global dictionary, even if there are no null rows in any of the literal columns + // null index is always 0 in the global dictionary, even if there are no null rows in any of the nested fields nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0)); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); @@ -145,40 +145,40 @@ public T as(Class clazz) if (singleType != null) { switch (singleType.getType()) { case STRING: - if (clazz.equals(StringValueSetIndex.class)) { - return (T) new NestedStringValueSetIndex(); - } else if (clazz.equals(LexicographicalRangeIndex.class)) { - return (T) new NestedStringLexicographicalRangeIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new NestedStringPredicateIndex(); + if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new NestedStringValueSetIndexes(); + } else if (clazz.equals(LexicographicalRangeIndexes.class)) { + return (T) new NestedStringLexicographicalRangeIndexes(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new NestedStringPredicateIndexes(); } return null; case LONG: - if (clazz.equals(StringValueSetIndex.class)) { - return (T) new NestedLongValueSetIndex(); - } else if (clazz.equals(NumericRangeIndex.class)) { - return (T) new NestedLongNumericRangeIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new NestedLongPredicateIndex(); + if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new NestedLongStringValueSetIndex(); + } else if (clazz.equals(NumericRangeIndexes.class)) { + return (T) new NestedLongNumericRangeIndexes(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new NestedLongPredicateIndexes(); } return null; case DOUBLE: - if (clazz.equals(StringValueSetIndex.class)) { - return (T) new NestedDoubleValueSetIndex(); - } else if (clazz.equals(NumericRangeIndex.class)) { - return (T) new NestedDoubleNumericRangeIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new NestedDoublePredicateIndex(); + if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new NestedDoubleStringValueSetIndex(); + } else if (clazz.equals(NumericRangeIndexes.class)) { + return (T) new NestedDoubleNumericRangeIndexes(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new NestedDoublePredicateIndexes(); } return null; default: return null; } } - if (clazz.equals(StringValueSetIndex.class)) { - return (T) new NestedVariantValueSetIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new NestedVariantPredicateIndex(); + if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new NestedVariantStringValueSetIndexes(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new NestedVariantPredicateIndexes(); } return null; } @@ -353,7 +353,7 @@ public ImmutableBitmap getBitmap(int idx) } } - private class NestedStringValueSetIndex implements StringValueSetIndex + private class NestedStringValueSetIndexes implements StringValueSetIndexes { @Override public BitmapColumnIndex forValue(@Nullable String value) @@ -435,7 +435,7 @@ private void findNext() } } - private class NestedStringLexicographicalRangeIndex implements LexicographicalRangeIndex + private class NestedStringLexicographicalRangeIndexes implements LexicographicalRangeIndexes { @Override @Nullable @@ -533,7 +533,7 @@ public ImmutableBitmap next() } } - private class NestedStringPredicateIndex implements DruidPredicateIndex + private class NestedStringPredicateIndexes implements DruidPredicateIndexes { @Override @Nullable @@ -598,7 +598,7 @@ private void findNext() } } - private class NestedLongValueSetIndex implements StringValueSetIndex + private class NestedLongStringValueSetIndex implements StringValueSetIndexes { @Override public BitmapColumnIndex forValue(@Nullable String value) @@ -718,7 +718,7 @@ private void findNext() } } - private class NestedLongNumericRangeIndex implements NumericRangeIndex + private class NestedLongNumericRangeIndexes implements NumericRangeIndexes { @Override @Nullable @@ -741,7 +741,7 @@ public BitmapColumnIndex forRange( } } - private class NestedLongPredicateIndex implements DruidPredicateIndex + private class NestedLongPredicateIndexes implements DruidPredicateIndexes { @Override @Nullable @@ -811,7 +811,7 @@ private void findNext() } } - private class NestedDoubleValueSetIndex implements StringValueSetIndex + private class NestedDoubleStringValueSetIndex implements StringValueSetIndexes { @Override public BitmapColumnIndex forValue(@Nullable String value) @@ -930,7 +930,7 @@ private void findNext() } } - private class NestedDoubleNumericRangeIndex implements NumericRangeIndex + private class NestedDoubleNumericRangeIndexes implements NumericRangeIndexes { @Override @Nullable @@ -953,7 +953,7 @@ public BitmapColumnIndex forRange( } } - private class NestedDoublePredicateIndex implements DruidPredicateIndex + private class NestedDoublePredicateIndexes implements DruidPredicateIndexes { @Override @Nullable @@ -1023,7 +1023,7 @@ private void findNext() } } - private abstract class NestedVariantLiteralIndex + private abstract class NestedVariantIndexes { final FixedIndexed localDictionary = localDictionarySupplier.get(); final Indexed stringDictionary = globalStringDictionarySupplier.get(); @@ -1072,9 +1072,9 @@ IntList getIndexes(@Nullable String value) } /** - * {@link StringValueSetIndex} but for variant typed nested literal columns + * {@link StringValueSetIndexes} but for variant typed nested columns */ - private class NestedVariantValueSetIndex extends NestedVariantLiteralIndex implements StringValueSetIndex + private class NestedVariantStringValueSetIndexes extends NestedVariantIndexes implements StringValueSetIndexes { @Override public BitmapColumnIndex forValue(@Nullable String value) @@ -1151,9 +1151,9 @@ private void findNext() } /** - * {@link DruidPredicateIndex} but for variant typed nested literal columns + * {@link DruidPredicateIndexes} but for variant typed nested fields */ - private class NestedVariantPredicateIndex extends NestedVariantLiteralIndex implements DruidPredicateIndex + private class NestedVariantPredicateIndexes extends NestedVariantIndexes implements DruidPredicateIndexes { @Override @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java index b21760416fee..7dea845f707c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java @@ -192,13 +192,21 @@ private int getIdFromGlobalDictionary(@Nullable String val) if (singleType != null) { switch (singleType.getType()) { case LONG: - final int globalLong = globalLongDictionary.indexOf(GuavaUtils.tryParseLong(val)); + final Long l = GuavaUtils.tryParseLong(val); + if (l == null) { + return -1; + } + final int globalLong = globalLongDictionary.indexOf(l); if (globalLong < 0) { return -1; } return globalLong + adjustLongId; case DOUBLE: - final int globalDouble = globalDoubleDictionary.indexOf(Doubles.tryParse(val)); + final Double d = Doubles.tryParse(val); + if (d == null) { + return -1; + } + final int globalDouble = globalDoubleDictionary.indexOf(d); if (globalDouble < 0) { return -1; } @@ -209,15 +217,21 @@ private int getIdFromGlobalDictionary(@Nullable String val) } else { int candidate = globalDictionary.indexOf(StringUtils.toUtf8ByteBuffer(val)); if (candidate < 0) { - candidate = globalLongDictionary.indexOf(GuavaUtils.tryParseLong(val)); - if (candidate >= 0) { - candidate += adjustLongId; + final Long l = GuavaUtils.tryParseLong(val); + if (l != null) { + candidate = globalLongDictionary.indexOf(l); + if (candidate >= 0) { + candidate += adjustLongId; + } } } if (candidate < 0) { - candidate = globalDoubleDictionary.indexOf(Doubles.tryParse(val)); - if (candidate >= 0) { - candidate += adjustDoubleId; + final Double d = Doubles.tryParse(val); + if (d != null) { + candidate = globalDoubleDictionary.indexOf(d); + if (candidate >= 0) { + candidate += adjustDoubleId; + } } } return candidate; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index a775217c7a88..5624025a20c0 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -58,11 +58,11 @@ import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; +import org.apache.druid.segment.index.semantic.ValueIndexes; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -196,14 +196,14 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(TypedValueIndex.class)) { - return (T) new DoubleValueIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new DoubleStringValueSetIndex(); - } else if (clazz.equals(NumericRangeIndex.class)) { + } else if (clazz.equals(ValueIndexes.class)) { + return (T) new DoubleValueIndexes(); + } else if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new DoubleStringValueSetIndexes(); + } else if (clazz.equals(NumericRangeIndexes.class)) { return (T) new DoubleNumericRangeIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new DoublePredicateIndex(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new DoublePredicateIndexes(); } else if ( clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class) @@ -224,7 +224,7 @@ private ImmutableBitmap getBitmap(int idx) return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; } - private class DoubleValueIndex implements TypedValueIndex + private class DoubleValueIndexes implements ValueIndexes { @Nullable @Override @@ -262,7 +262,7 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) } } - private class DoubleStringValueSetIndex implements StringValueSetIndex + private class DoubleStringValueSetIndexes implements StringValueSetIndexes { @Override public BitmapColumnIndex forValue(@Nullable String value) @@ -403,7 +403,7 @@ private void findNext() } } - private class DoubleNumericRangeIndex implements NumericRangeIndex + private class DoubleNumericRangeIndex implements NumericRangeIndexes { @Nullable @Override @@ -453,7 +453,7 @@ public ImmutableBitmap next() } } - private class DoublePredicateIndex implements DruidPredicateIndex + private class DoublePredicateIndexes implements DruidPredicateIndexes { @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 85107fbd8691..a5170ca9a58f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -57,11 +57,11 @@ import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; +import org.apache.druid.segment.index.semantic.ValueIndexes; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -196,14 +196,14 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(TypedValueIndex.class)) { - return (T) new LongValueIndex(); - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new LongStringValueSetIndex(); - } else if (clazz.equals(NumericRangeIndex.class)) { + } else if (clazz.equals(ValueIndexes.class)) { + return (T) new LongValueIndexes(); + } else if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new LongStringValueSetIndexes(); + } else if (clazz.equals(NumericRangeIndexes.class)) { return (T) new LongNumericRangeIndex(); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new LongPredicateIndex(); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new LongPredicateIndexes(); } else if ( clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class) @@ -224,7 +224,7 @@ private ImmutableBitmap getBitmap(int idx) return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; } - private class LongValueIndex implements TypedValueIndex + private class LongValueIndexes implements ValueIndexes { @Nullable @Override @@ -262,7 +262,7 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) } } - private class LongStringValueSetIndex implements StringValueSetIndex + private class LongStringValueSetIndexes implements StringValueSetIndexes { final FixedIndexed dictionary = longDictionarySupplier.get(); int defaultValueIndex = dictionary.indexOf(NullHandling.defaultLongValue()); @@ -401,7 +401,7 @@ private void findNext() } } - private class LongNumericRangeIndex implements NumericRangeIndex + private class LongNumericRangeIndex implements NumericRangeIndexes { @Nullable @Override @@ -451,7 +451,7 @@ public ImmutableBitmap next() } } - private class LongPredicateIndex implements DruidPredicateIndex + private class LongPredicateIndexes implements DruidPredicateIndexes { @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java index a1c9cd06e1a3..d5406ba8c45c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java @@ -297,15 +297,21 @@ public int lookupId(String val) if (candidate >= 0) { return candidate; } - candidate = longDictionary.indexOf(GuavaUtils.tryParseLong(val)); - if (candidate >= 0) { - candidate += adjustLongId; - return candidate; + final Long l = GuavaUtils.tryParseLong(val); + if (l != null) { + candidate = longDictionary.indexOf(l); + if (candidate >= 0) { + candidate += adjustLongId; + return candidate; + } } - candidate = doubleDictionary.indexOf(Doubles.tryParse(val)); - if (candidate >= 0) { - candidate += adjustDoubleId; - return candidate; + final Double d = Doubles.tryParse(val); + if (d != null) { + candidate = doubleDictionary.indexOf(d); + if (candidate >= 0) { + candidate += adjustDoubleId; + return candidate; + } } // not in here, we can't really do anything cool here @@ -324,15 +330,21 @@ public IntSet lookupIds(String val) if (candidate >= 0) { intList.add(candidate); } - candidate = longDictionary.indexOf(GuavaUtils.tryParseLong(val)); - if (candidate >= 0) { - candidate += adjustLongId; - intList.add(candidate); + Long l = GuavaUtils.tryParseLong(val); + if (l != null) { + candidate = longDictionary.indexOf(l); + if (candidate >= 0) { + candidate += adjustLongId; + intList.add(candidate); + } } - candidate = doubleDictionary.indexOf(Doubles.tryParse(val)); - if (candidate >= 0) { - candidate += adjustDoubleId; - intList.add(candidate); + Double d = Doubles.tryParse(val); + if (d != null) { + candidate = doubleDictionary.indexOf(d); + if (candidate >= 0) { + candidate += adjustDoubleId; + intList.add(candidate); + } } return intList; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index 70d0b97a9694..5fabfefef6c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -51,7 +51,7 @@ import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.ValueIndexes; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -304,10 +304,9 @@ public T as(Class clazz) if (clazz.equals(NullValueIndex.class)) { final BitmapColumnIndex nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(TypedValueIndex.class) && variantTypeSetByte == null && logicalType.isArray()) { - return (T) new ArrayValueIndex(); + } else if (clazz.equals(ValueIndexes.class) && variantTypeSetByte == null && logicalType.isArray()) { + return (T) new ArrayValueIndexes(); } - // coming soon... return null; } @@ -321,7 +320,7 @@ private ImmutableBitmap getBitmap(int idx) return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; } - private class ArrayValueIndex implements TypedValueIndex + private class ArrayValueIndexes implements ValueIndexes { @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java index 39c6a5e44aa8..476d88a1ce00 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NullValueIndexSupplier.java @@ -55,7 +55,7 @@ public T as(Class clazz) private final class NullableNumericNullValueIndex implements NullValueIndex { @Override - public BitmapColumnIndex forNull() + public BitmapColumnIndex get() { return nullValueIndex; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index aee9824cde8d..3f717f802552 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -31,19 +31,19 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.IndexedStringDictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.IndexedStringDruidPredicateIndex; -import org.apache.druid.segment.index.IndexedUtf8LexicographicalRangeIndex; -import org.apache.druid.segment.index.IndexedUtf8ValueSetIndex; +import org.apache.druid.segment.index.IndexedStringDruidPredicateIndexes; +import org.apache.druid.segment.index.IndexedUtf8LexicographicalRangeIndexes; +import org.apache.druid.segment.index.IndexedUtf8ValueIndexes; import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.SpatialIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; -import org.apache.druid.segment.index.semantic.TypedValueIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.ValueIndexes; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -116,25 +116,25 @@ public T as(Class clazz) } return (T) (NullValueIndex) () -> nullIndex; } else if ( - clazz.equals(StringValueSetIndex.class) || + clazz.equals(StringValueSetIndexes.class) || clazz.equals(Utf8ValueSetIndex.class) || - clazz.equals(TypedValueIndex.class) + clazz.equals(ValueIndexes.class) ) { - return (T) new IndexedUtf8ValueSetIndex<>( + return (T) new IndexedUtf8ValueIndexes<>( bitmapFactory, dict, singleThreadedBitmaps ); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new IndexedStringDruidPredicateIndex<>( + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new IndexedStringDruidPredicateIndexes<>( bitmapFactory, new StringEncodingStrategies.Utf8ToStringIndexed(dict), singleThreadedBitmaps, columnConfig, numRows ); - } else if (clazz.equals(LexicographicalRangeIndex.class)) { - return (T) new IndexedUtf8LexicographicalRangeIndex<>( + } else if (clazz.equals(LexicographicalRangeIndexes.class)) { + return (T) new IndexedUtf8LexicographicalRangeIndexes<>( bitmapFactory, dict, singleThreadedBitmaps, diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index c7a93147c09f..366b323bdefc 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -51,10 +51,10 @@ import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import javax.annotation.Nullable; import java.util.Collections; @@ -226,12 +226,12 @@ public T as(Class clazz) if (clazz.equals(NullValueIndex.class)) { return (T) new ListFilteredNullValueIndex(underlyingIndex, idMapping, numRows); - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new ListFilteredStringValueSetIndex(underlyingIndex, idMapping); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new ListFilteredDruidPredicateIndex(underlyingIndex, idMapping); - } else if (clazz.equals(LexicographicalRangeIndex.class)) { - return (T) new ListFilteredLexicographicalRangeIndex(underlyingIndex, idMapping); + } else if (clazz.equals(StringValueSetIndexes.class)) { + return (T) new ListFilteredStringValueSetIndexes(underlyingIndex, idMapping); + } else if (clazz.equals(DruidPredicateIndexes.class)) { + return (T) new ListFilteredDruidPredicateIndexes(underlyingIndex, idMapping); + } else if (clazz.equals(LexicographicalRangeIndexes.class)) { + return (T) new ListFilteredLexicographicalRangeIndexes(underlyingIndex, idMapping); } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { return (T) new ListFilteredDictionaryEncodedStringValueIndex(underlyingIndex, idMapping); } @@ -376,7 +376,7 @@ private ListFilteredNullValueIndex(DictionaryEncodedStringValueIndex delegate, I } @Override - public BitmapColumnIndex forNull() + public BitmapColumnIndex get() { return new SimpleImmutableBitmapIterableIndex() { @@ -405,11 +405,11 @@ protected Iterable getBitmapIterable() } } - private static class ListFilteredStringValueSetIndex extends BaseListFilteredColumnIndex - implements StringValueSetIndex + private static class ListFilteredStringValueSetIndexes extends BaseListFilteredColumnIndex + implements StringValueSetIndexes { - private ListFilteredStringValueSetIndex( + private ListFilteredStringValueSetIndexes( DictionaryEncodedStringValueIndex delegate, IdMapping idMapping ) @@ -494,11 +494,11 @@ private void findNext() } } - private static class ListFilteredDruidPredicateIndex extends BaseListFilteredColumnIndex - implements DruidPredicateIndex + private static class ListFilteredDruidPredicateIndexes extends BaseListFilteredColumnIndex + implements DruidPredicateIndexes { - private ListFilteredDruidPredicateIndex(DictionaryEncodedStringValueIndex delegate, IdMapping idMapping) + private ListFilteredDruidPredicateIndexes(DictionaryEncodedStringValueIndex delegate, IdMapping idMapping) { super(delegate, idMapping); } @@ -531,11 +531,11 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory) } } - private static class ListFilteredLexicographicalRangeIndex extends BaseListFilteredColumnIndex - implements LexicographicalRangeIndex + private static class ListFilteredLexicographicalRangeIndexes extends BaseListFilteredColumnIndex + implements LexicographicalRangeIndexes { - private ListFilteredLexicographicalRangeIndex( + private ListFilteredLexicographicalRangeIndexes( DictionaryEncodedStringValueIndex delegate, IdMapping idMapping ) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index f024169d2b79..ff72fd37f106 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -26,7 +26,6 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.primitives.Doubles; -import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Numbers; import org.apache.druid.math.expr.Evals; @@ -41,6 +40,7 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.NilColumnValueSelector; @@ -1013,18 +1013,7 @@ private void computeVectorsIfNeeded() longVector[i] = 0L; nullVector[i] = true; } else { - Long l; - if (v instanceof Number) { - l = ((Number) v).longValue(); - } else { - final String s = String.valueOf(v); - final Double d = Doubles.tryParse(s); - if (d != null) { - l = d.longValue(); - } else { - l = GuavaUtils.tryParseLong(s); - } - } + Long l = DimensionHandlerUtils.convertObjectToLong(v); if (l != null) { longVector[i] = l; if (nullVector != null) { @@ -1082,12 +1071,7 @@ private void computeVectorsIfNeeded() doubleVector[i] = 0.0; nullVector[i] = true; } else { - Double d; - if (v instanceof Number) { - d = ((Number) v).doubleValue(); - } else { - d = Doubles.tryParse(String.valueOf(v)); - } + Double d = DimensionHandlerUtils.convertObjectToDouble(v); if (d != null) { doubleVector[i] = d; if (nullVector != null) { diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index ab3a7c525d44..ffa793cecd43 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -35,7 +35,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; @@ -284,7 +284,7 @@ public void testUsesStringSetIndex() final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class); final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class); - final StringValueSetIndex valueIndex = Mockito.mock(StringValueSetIndex.class); + final StringValueSetIndexes valueIndex = Mockito.mock(StringValueSetIndexes.class); final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class); final InDimFilter.ValuesSet expectedValuesSet = new InDimFilter.ValuesSet(); @@ -292,7 +292,7 @@ public void testUsesStringSetIndex() Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier); Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(null); // Will check for UTF-8 first. - Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex); + Mockito.when(indexSupplier.as(StringValueSetIndexes.class)).thenReturn(valueIndex); Mockito.when(valueIndex.forSortedValues(expectedValuesSet)).thenReturn(bitmapColumnIndex); final BitmapColumnIndex retVal = inFilter.getBitmapColumnIndex(indexSelector); diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java index 00ae514761f4..f0018b94f7cf 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java @@ -26,8 +26,8 @@ import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; @@ -111,11 +111,11 @@ public void testPrefixMatchUsesRangeIndex() final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class); final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class); - final LexicographicalRangeIndex rangeIndex = Mockito.mock(LexicographicalRangeIndex.class); + final LexicographicalRangeIndexes rangeIndex = Mockito.mock(LexicographicalRangeIndexes.class); final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class); Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier); - Mockito.when(indexSupplier.as(LexicographicalRangeIndex.class)).thenReturn(rangeIndex); + Mockito.when(indexSupplier.as(LexicographicalRangeIndexes.class)).thenReturn(rangeIndex); Mockito.when( // Verify that likeFilter uses forRange without a matcher predicate; it's unnecessary and slows things down rangeIndex.forRange("f", false, "f" + Character.MAX_VALUE, false) @@ -135,11 +135,11 @@ public void testExactMatchUsesValueIndex() final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class); final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class); - final StringValueSetIndex valueIndex = Mockito.mock(StringValueSetIndex.class); + final StringValueSetIndexes valueIndex = Mockito.mock(StringValueSetIndexes.class); final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class); Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier); - Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex); + Mockito.when(indexSupplier.as(StringValueSetIndexes.class)).thenReturn(valueIndex); Mockito.when(valueIndex.forValue("f")).thenReturn(bitmapColumnIndex); final BitmapColumnIndex retVal = likeFilter.getBitmapColumnIndex(indexSelector); diff --git a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java index c57fdd45b6e7..e042fd39b74f 100644 --- a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java @@ -29,7 +29,7 @@ import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier; import org.easymock.EasyMock; import org.junit.Assert; @@ -73,8 +73,8 @@ public void setup() ).anyTimes(); EasyMock.expect(holder.getColumn()).andReturn(stringColumn).anyTimes(); EasyMock.expect(holder.getIndexSupplier()).andReturn(indexSupplier).anyTimes(); - StringValueSetIndex someIndex = EasyMock.createMock(StringValueSetIndex.class); - EasyMock.expect(indexSupplier.as(StringValueSetIndex.class)).andReturn(someIndex).anyTimes(); + StringValueSetIndexes someIndex = EasyMock.createMock(StringValueSetIndexes.class); + EasyMock.expect(indexSupplier.as(StringValueSetIndexes.class)).andReturn(someIndex).anyTimes(); DictionaryEncodedStringValueIndex valueIndex = EasyMock.createMock(DictionaryEncodedStringValueIndex.class); EasyMock.expect(indexSupplier.as(DictionaryEncodedStringValueIndex.class)).andReturn(valueIndex).anyTimes(); BitmapColumnIndex columnIndex = EasyMock.createMock(BitmapColumnIndex.class); @@ -108,7 +108,7 @@ public void testStringDictionaryUseIndex() ); Assert.assertNotNull(bitmapIndex); - StringValueSetIndex valueIndex = supplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueIndex = supplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueIndex); ImmutableBitmap valueBitmap = valueIndex.forValue("foo") .computeBitmapResult( @@ -127,7 +127,7 @@ public void testNonStringDictionaryDoNotUseIndex() ); Assert.assertNull(bitmapIndex); - StringValueSetIndex valueIndex = supplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueIndex = supplier.as(StringValueSetIndexes.class); Assert.assertNull(valueIndex); EasyMock.verify(bitmapFactory, virtualColumns, index, indexSupplier); } diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java index 034eec7ccca1..3b3706c6ea5a 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java @@ -36,7 +36,7 @@ import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.junit.Assert; import org.junit.Before; @@ -185,8 +185,8 @@ public void testStringColumnNullHandling() throws Exception final DictionaryEncodedStringValueIndex valueIndex = columnHolder.getIndexSupplier().as( DictionaryEncodedStringValueIndex.class ); - final StringValueSetIndex valueSetIndex = columnHolder.getIndexSupplier().as( - StringValueSetIndex.class + final StringValueSetIndexes valueSetIndex = columnHolder.getIndexSupplier().as( + StringValueSetIndexes.class ); // Read through the column to find all the rows that should match null. diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index 5b69ccfa14bd..5cc10619a72e 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -60,7 +60,7 @@ import org.apache.druid.segment.incremental.IncrementalIndexAdapter; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.Interval; @@ -127,7 +127,7 @@ static BitmapValues getBitmapIndex(QueryableIndexIndexableAdapter adapter, Strin return BitmapValues.EMPTY; } - final StringValueSetIndex index = indexSupplier.as(StringValueSetIndex.class); + final StringValueSetIndexes index = indexSupplier.as(StringValueSetIndexes.class); if (index == null) { return BitmapValues.EMPTY; } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java index eadbf64e0698..622f2ea8c95a 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTest.java @@ -193,20 +193,29 @@ public void testLexicographicMatchWithEmptyString() @Test public void testLexicographicMatchNull() { - assertFilterMatches( - new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null), - ImmutableList.of() - ); - assertFilterMatches( - new RangeFilter("dim1", ColumnType.STRING, "", "", false, false, null, null), - NullHandling.replaceWithDefault() ? ImmutableList.of() : ImmutableList.of("0") - ); + if (NullHandling.replaceWithDefault()) { + // in default value mode this is null on both ends... + Throwable t = Assert.assertThrows( + DruidException.class, + () -> assertFilterMatches( + new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null), + ImmutableList.of() + ) + ); + Assert.assertEquals( + "Invalid range filter on column [dim0], lower and upper cannot be null at the same time", + t.getMessage() + ); + } else { assertFilterMatches( - new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), + new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null), ImmutableList.of() ); - } else { + assertFilterMatches( + new RangeFilter("dim1", ColumnType.STRING, "", "", false, false, null, null), + ImmutableList.of("0") + ); // still matches even with auto-schema because match-values are upcast to array types assertFilterMatches( new RangeFilter("dim2", ColumnType.STRING, "", "", false, false, null, null), @@ -219,31 +228,31 @@ public void testLexicographicMatchNull() public void testLexicographicMatchMissingColumn() { assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, "", "", false, false, null, null), + new RangeFilter("dim3", ColumnType.STRING, "", "z", false, false, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, "", null, false, true, null, null), + new RangeFilter("dim3", ColumnType.STRING, "a", null, false, true, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, null, "", false, true, null, null), + new RangeFilter("dim3", ColumnType.STRING, null, "z", false, true, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, "", "", true, false, null, null), + new RangeFilter("dim3", ColumnType.STRING, "", "z", true, false, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, "", "", false, true, null, null), + new RangeFilter("dim3", ColumnType.STRING, "", "z", false, true, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, null, "", false, false, null, null), + new RangeFilter("dim3", ColumnType.STRING, null, "z", false, false, null, null), ImmutableList.of() ); assertFilterMatches( - new RangeFilter("dim3", ColumnType.STRING, null, "", false, true, null, null), + new RangeFilter("dim3", ColumnType.STRING, null, "z", false, true, null, null), ImmutableList.of() ); } @@ -755,7 +764,7 @@ public void testMatchWithExtractionFn() ExtractionFn makeNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); assertFilterMatches( - new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, makeNullFn, null), + new RangeFilter("dim0", ColumnType.STRING, "", "z", false, false, makeNullFn, null), ImmutableList.of() ); @@ -958,8 +967,8 @@ public void testListFilteredVirtualColumn() @Test public void testRequiredColumnRewrite() { - RangeFilter filter = new RangeFilter("dim0", ColumnType.STRING, "", "", false, false, null, null); - RangeFilter filter2 = new RangeFilter("dim1", ColumnType.STRING, "", "", false, false, null, null); + RangeFilter filter = new RangeFilter("dim0", ColumnType.STRING, "abc", "def", false, false, null, null); + RangeFilter filter2 = new RangeFilter("dim1", ColumnType.STRING, "abc", "def", false, false, null, null); Assert.assertTrue(filter.supportsRequiredColumnRewrite()); Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); @@ -1311,13 +1320,16 @@ public void test_equals() EqualsVerifier.forClass(RangeFilter.class) .withNonnullFields("column", "matchValueType") .withIgnoredFields( + "matchValueExpressionType", "lowerEval", "upperEval", "cachedOptimizedFilter", "stringPredicateSupplier", "longPredicateSupplier", "floatPredicateSupplier", - "doublePredicateSupplier" + "doublePredicateSupplier", + "arrayPredicates", + "typeDetectingArrayPredicateSupplier" ) .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) .usingGetClass() diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java index af359767bc73..b80f4a1e2ec1 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java @@ -61,6 +61,7 @@ import org.apache.druid.segment.join.table.RowBasedIndexedTable; import org.junit.Assert; +import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -156,7 +157,7 @@ public Supplier makeLongProcessor(BaseLongColumnValueSelector selector) @Override public Supplier makeArrayProcessor( BaseObjectColumnValueSelector selector, - ColumnCapabilities columnCapabilities + @Nullable ColumnCapabilities columnCapabilities ) { return selector::getObject; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 02ea10cc6d5c..80daa3549dcf 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -55,9 +55,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import org.apache.druid.segment.vector.BitmapVectorOffset; @@ -350,8 +350,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector xDimSelector = column.makeDimensionSelector(xPath, offset, null); ColumnIndexSupplier xIndexSupplier = column.getColumnIndexSupplier(xPath); Assert.assertNotNull(xIndexSupplier); - StringValueSetIndex xValueIndex = xIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex xPredicateIndex = xIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes xValueIndex = xIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes xPredicateIndex = xIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex xNulls = xIndexSupplier.as(NullValueIndex.class); final List yPath = NestedPathFinder.parseJsonPath("$.y"); @@ -361,8 +361,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector yDimSelector = column.makeDimensionSelector(yPath, offset, null); ColumnIndexSupplier yIndexSupplier = column.getColumnIndexSupplier(yPath); Assert.assertNotNull(yIndexSupplier); - StringValueSetIndex yValueIndex = yIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex yPredicateIndex = yIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes yValueIndex = yIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes yPredicateIndex = yIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex yNulls = yIndexSupplier.as(NullValueIndex.class); final List zPath = NestedPathFinder.parseJsonPath("$.z"); @@ -372,8 +372,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector zDimSelector = column.makeDimensionSelector(zPath, offset, null); ColumnIndexSupplier zIndexSupplier = column.getColumnIndexSupplier(zPath); Assert.assertNotNull(zIndexSupplier); - StringValueSetIndex zValueIndex = zIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex zPredicateIndex = zIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes zValueIndex = zIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes zPredicateIndex = zIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex zNulls = zIndexSupplier.as(NullValueIndex.class); final List vPath = NestedPathFinder.parseJsonPath("$.v"); @@ -386,8 +386,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector vDimSelector = column.makeDimensionSelector(vPath, offset, null); ColumnIndexSupplier vIndexSupplier = column.getColumnIndexSupplier(vPath); Assert.assertNotNull(vIndexSupplier); - StringValueSetIndex vValueIndex = vIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex vPredicateIndex = vIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes vValueIndex = vIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes vPredicateIndex = vIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex vNulls = vIndexSupplier.as(NullValueIndex.class); final List nullishPath = NestedPathFinder.parseJsonPath("$.nullish"); @@ -397,8 +397,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector nullishDimSelector = column.makeDimensionSelector(nullishPath, offset, null); ColumnIndexSupplier nullishIndexSupplier = column.getColumnIndexSupplier(nullishPath); Assert.assertNotNull(nullishIndexSupplier); - StringValueSetIndex nullishValueIndex = nullishIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex nullishPredicateIndex = nullishIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes nullishValueIndex = nullishIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes nullishPredicateIndex = nullishIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex nullishNulls = nullishIndexSupplier.as(NullValueIndex.class); Assert.assertEquals(ImmutableList.of(nullishPath, vPath, xPath, yPath, zPath), column.getNestedFields()); @@ -459,8 +459,8 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException VectorObjectSelector sVectorSelectorFiltered = column.makeVectorObjectSelector(sPath, bitmapVectorOffset); ColumnIndexSupplier sIndexSupplier = column.getColumnIndexSupplier(sPath); Assert.assertNotNull(sIndexSupplier); - Assert.assertNull(sIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(sIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(sIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(sIndexSupplier.as(DruidPredicateIndexes.class)); NullValueIndex sNulls = sIndexSupplier.as(NullValueIndex.class); final List sElementPath = NestedPathFinder.parseJsonPath("$.s[1]"); @@ -472,8 +472,8 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException ); ColumnIndexSupplier sElementIndexSupplier = column.getColumnIndexSupplier(sElementPath); Assert.assertNotNull(sElementIndexSupplier); - Assert.assertNull(sElementIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(sElementIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(sElementIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(sElementIndexSupplier.as(DruidPredicateIndexes.class)); Assert.assertNull(sElementIndexSupplier.as(NullValueIndex.class)); final List lPath = NestedPathFinder.parseJsonPath("$.l"); @@ -484,8 +484,8 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException VectorObjectSelector lVectorSelectorFiltered = column.makeVectorObjectSelector(lPath, bitmapVectorOffset); ColumnIndexSupplier lIndexSupplier = column.getColumnIndexSupplier(lPath); Assert.assertNotNull(lIndexSupplier); - Assert.assertNull(lIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(lIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(lIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(lIndexSupplier.as(DruidPredicateIndexes.class)); NullValueIndex lNulls = lIndexSupplier.as(NullValueIndex.class); final List lElementPath = NestedPathFinder.parseJsonPath("$.l[1]"); @@ -498,8 +498,8 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException ); ColumnIndexSupplier lElementIndexSupplier = column.getColumnIndexSupplier(lElementPath); Assert.assertNotNull(lElementIndexSupplier); - Assert.assertNull(lElementIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(lElementIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(lElementIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(lElementIndexSupplier.as(DruidPredicateIndexes.class)); Assert.assertNull(lElementIndexSupplier.as(NullValueIndex.class)); final List dPath = NestedPathFinder.parseJsonPath("$.d"); @@ -510,8 +510,8 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException VectorObjectSelector dVectorSelectorFiltered = column.makeVectorObjectSelector(dPath, bitmapVectorOffset); ColumnIndexSupplier dIndexSupplier = column.getColumnIndexSupplier(dPath); Assert.assertNotNull(dIndexSupplier); - Assert.assertNull(dIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(dIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(dIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(dIndexSupplier.as(DruidPredicateIndexes.class)); NullValueIndex dNulls = dIndexSupplier.as(NullValueIndex.class); final List dElementPath = NestedPathFinder.parseJsonPath("$.d[1]"); @@ -524,14 +524,14 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException ); ColumnIndexSupplier dElementIndexSupplier = column.getColumnIndexSupplier(dElementPath); Assert.assertNotNull(dElementIndexSupplier); - Assert.assertNull(dElementIndexSupplier.as(StringValueSetIndex.class)); - Assert.assertNull(dElementIndexSupplier.as(DruidPredicateIndex.class)); + Assert.assertNull(dElementIndexSupplier.as(StringValueSetIndexes.class)); + Assert.assertNull(dElementIndexSupplier.as(DruidPredicateIndexes.class)); Assert.assertNull(dElementIndexSupplier.as(NullValueIndex.class)); - ImmutableBitmap sNullIndex = sNulls.forNull().computeBitmapResult(resultFactory); - ImmutableBitmap lNullIndex = lNulls.forNull().computeBitmapResult(resultFactory); - ImmutableBitmap dNullIndex = dNulls.forNull().computeBitmapResult(resultFactory); + ImmutableBitmap sNullIndex = sNulls.get().computeBitmapResult(resultFactory); + ImmutableBitmap lNullIndex = lNulls.get().computeBitmapResult(resultFactory); + ImmutableBitmap dNullIndex = dNulls.get().computeBitmapResult(resultFactory); int rowCounter = 0; while (offset.withinBounds()) { @@ -679,8 +679,8 @@ private void testPath( String path, ColumnValueSelector valueSelector, DimensionSelector dimSelector, - StringValueSetIndex valueSetIndex, - DruidPredicateIndex predicateIndex, + StringValueSetIndexes valueSetIndex, + DruidPredicateIndexes predicateIndex, NullValueIndex nullValueIndex, @Nullable ColumnType singleType ) @@ -721,7 +721,7 @@ private void testPath( Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(rowNumber)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(dimSelector.makeValueMatcher(theString).matches()); Assert.assertFalse(dimSelector.makeValueMatcher(NO_MATCH).matches()); @@ -735,7 +735,7 @@ private void testPath( Assert.assertNull(dimSelector.getObject()); Assert.assertNull(dimSelector.lookupName(dimSelector.getRow().get(0))); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index 7eefb0807677..02b634c1f8a6 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -53,9 +53,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.ColumnPartSerde; import org.apache.druid.segment.serde.ComplexColumnPartSerde; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -312,7 +312,7 @@ public void testLegacyV3ReaderFormat() throws IOException ColumnIndexSupplier indexSupplier = v3.getColumnIndexSupplier(path); Assert.assertNotNull(indexSupplier); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); BitmapColumnIndex indexForValue = valueSetIndex.forValue(firstValue); @@ -356,7 +356,7 @@ public void testLegacyV4ReaderFormat() throws IOException ColumnIndexSupplier indexSupplier = v4.getColumnIndexSupplier(path); Assert.assertNotNull(indexSupplier); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); BitmapColumnIndex indexForValue = valueSetIndex.forValue(firstValue); @@ -377,8 +377,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector xDimSelector = column.makeDimensionSelector(xPath, offset, null); ColumnIndexSupplier xIndexSupplier = column.getColumnIndexSupplier(xPath); Assert.assertNotNull(xIndexSupplier); - StringValueSetIndex xValueIndex = xIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex xPredicateIndex = xIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes xValueIndex = xIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes xPredicateIndex = xIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex xNulls = xIndexSupplier.as(NullValueIndex.class); final List yPath = NestedPathFinder.parseJsonPath("$.y"); @@ -388,8 +388,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector yDimSelector = column.makeDimensionSelector(yPath, offset, null); ColumnIndexSupplier yIndexSupplier = column.getColumnIndexSupplier(yPath); Assert.assertNotNull(yIndexSupplier); - StringValueSetIndex yValueIndex = yIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex yPredicateIndex = yIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes yValueIndex = yIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes yPredicateIndex = yIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex yNulls = yIndexSupplier.as(NullValueIndex.class); final List zPath = NestedPathFinder.parseJsonPath("$.z"); @@ -399,8 +399,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector zDimSelector = column.makeDimensionSelector(zPath, offset, null); ColumnIndexSupplier zIndexSupplier = column.getColumnIndexSupplier(zPath); Assert.assertNotNull(zIndexSupplier); - StringValueSetIndex zValueIndex = zIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex zPredicateIndex = zIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes zValueIndex = zIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes zPredicateIndex = zIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex zNulls = zIndexSupplier.as(NullValueIndex.class); final List vPath = NestedPathFinder.parseJsonPath("$.v"); @@ -413,8 +413,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector vDimSelector = column.makeDimensionSelector(vPath, offset, null); ColumnIndexSupplier vIndexSupplier = column.getColumnIndexSupplier(vPath); Assert.assertNotNull(vIndexSupplier); - StringValueSetIndex vValueIndex = vIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex vPredicateIndex = vIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes vValueIndex = vIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes vPredicateIndex = vIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex vNulls = vIndexSupplier.as(NullValueIndex.class); final List nullishPath = NestedPathFinder.parseJsonPath("$.nullish"); @@ -424,8 +424,8 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException DimensionSelector nullishDimSelector = column.makeDimensionSelector(nullishPath, offset, null); ColumnIndexSupplier nullishIndexSupplier = column.getColumnIndexSupplier(nullishPath); Assert.assertNotNull(nullishIndexSupplier); - StringValueSetIndex nullishValueIndex = nullishIndexSupplier.as(StringValueSetIndex.class); - DruidPredicateIndex nullishPredicateIndex = nullishIndexSupplier.as(DruidPredicateIndex.class); + StringValueSetIndexes nullishValueIndex = nullishIndexSupplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes nullishPredicateIndex = nullishIndexSupplier.as(DruidPredicateIndexes.class); NullValueIndex nullishNulls = nullishIndexSupplier.as(NullValueIndex.class); Assert.assertEquals(ImmutableList.of(nullishPath, vPath, xPath, yPath, zPath), column.getNestedFields()); @@ -463,8 +463,8 @@ private void testPath( String path, ColumnValueSelector valueSelector, DimensionSelector dimSelector, - StringValueSetIndex valueSetIndex, - DruidPredicateIndex predicateIndex, + StringValueSetIndexes valueSetIndex, + DruidPredicateIndexes predicateIndex, NullValueIndex nullValueIndex, @Nullable ColumnType singleType ) @@ -505,7 +505,7 @@ private void testPath( Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(rowNumber)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(dimSelector.makeValueMatcher(theString).matches()); Assert.assertFalse(dimSelector.makeValueMatcher(NO_MATCH).matches()); @@ -520,7 +520,7 @@ private void testPath( Assert.assertNull(dimSelector.lookupName(dimSelector.getRow().get(0))); Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(rowNumber)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(rowNumber)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java index 5090523bb626..fffb9068a3ef 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java @@ -41,12 +41,12 @@ import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; -import org.apache.druid.segment.index.semantic.LexicographicalRangeIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; +import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.NumericRangeIndex; +import org.apache.druid.segment.index.semantic.NumericRangeIndexes; import org.apache.druid.segment.index.semantic.SpatialIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.Serializer; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -168,7 +168,7 @@ public void testSingleTypeStringColumnValueIndex() throws IOException // local: [b, foo, fooo, z] // column: [foo, b, fooo, b, z, fooo, z, b, b, foo] - BitmapColumnIndex columnIndex = nullIndex.forNull(); + BitmapColumnIndex columnIndex = nullIndex.get(); Assert.assertNotNull(columnIndex); Assert.assertEquals(0.0, columnIndex.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); @@ -180,7 +180,7 @@ public void testSingleTypeStringColumnValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringSupplier(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows @@ -213,7 +213,7 @@ public void testSingleTypeStringColumnRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringSupplier(); - LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); + LexicographicalRangeIndexes rangeIndex = indexSupplier.as(LexicographicalRangeIndexes.class); Assert.assertNotNull(rangeIndex); // 10 rows @@ -370,7 +370,7 @@ public void testSingleTypeStringColumnRangeIndexWithPredicate() throws IOExcepti { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringSupplier(); - LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); + LexicographicalRangeIndexes rangeIndex = indexSupplier.as(LexicographicalRangeIndexes.class); Assert.assertNotNull(rangeIndex); // 10 rows @@ -438,7 +438,7 @@ public void testSingleTypeStringColumnPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringSupplier(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -468,7 +468,7 @@ public void testSingleTypeStringColumnWithNullValueIndex() throws IOException // local: [null, b, foo, fooo, z] // column: [foo, null, fooo, b, z, fooo, z, null, null, foo] - BitmapColumnIndex columnIndex = nullIndex.forNull(); + BitmapColumnIndex columnIndex = nullIndex.get(); Assert.assertNotNull(columnIndex); Assert.assertEquals(0.3, columnIndex.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); @@ -480,7 +480,7 @@ public void testSingleTypeStringColumnWithNullValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringWithNullsSupplier(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows @@ -513,7 +513,7 @@ public void testSingleValueStringWithNullRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringWithNullsSupplier(); - LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); + LexicographicalRangeIndexes rangeIndex = indexSupplier.as(LexicographicalRangeIndexes.class); Assert.assertNotNull(rangeIndex); // 10 rows @@ -603,7 +603,7 @@ public void testSingleValueStringWithNullPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeStringWithNullsSupplier(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -626,7 +626,7 @@ public void testSingleTypeLongColumnValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplier(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // sanity check to make sure we don't return indexes we don't support @@ -655,65 +655,65 @@ public void testSingleTypeLongColumnRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplier(); - NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - Assert.assertNotNull(rangeIndex); + NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + Assert.assertNotNull(rangeIndexes); // 10 rows // local: [1, 3, 100, 300] // column: [100, 1, 300, 1, 3, 3, 100, 300, 300, 1] - BitmapColumnIndex forRange = rangeIndex.forRange(10L, true, 400L, true); + BitmapColumnIndex forRange = rangeIndexes.forRange(10L, true, 400L, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.5, forRange.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 2, 6, 7, 8); - forRange = rangeIndex.forRange(1, true, 3, true); + forRange = rangeIndexes.forRange(1, true, 3, true); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(1, false, 3, true); + forRange = rangeIndexes.forRange(1, false, 3, true); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 1, 3, 9); - forRange = rangeIndex.forRange(1, false, 3, false); + forRange = rangeIndexes.forRange(1, false, 3, false); Assert.assertEquals(0.5, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 1, 3, 4, 5, 9); - forRange = rangeIndex.forRange(100L, true, 300L, true); + forRange = rangeIndexes.forRange(100L, true, 300L, true); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(100L, true, 300L, false); + forRange = rangeIndexes.forRange(100L, true, 300L, false); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 2, 7, 8); - forRange = rangeIndex.forRange(100L, false, 300L, true); + forRange = rangeIndexes.forRange(100L, false, 300L, true); Assert.assertEquals(0.2, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 6); - forRange = rangeIndex.forRange(100L, false, 300L, false); + forRange = rangeIndexes.forRange(100L, false, 300L, false); Assert.assertEquals(0.5, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 2, 6, 7, 8); - forRange = rangeIndex.forRange(null, true, null, true); + forRange = rangeIndexes.forRange(null, true, null, true); Assert.assertEquals(1.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - forRange = rangeIndex.forRange(null, false, null, false); + forRange = rangeIndexes.forRange(null, false, null, false); Assert.assertEquals(1.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); @@ -724,7 +724,7 @@ public void testSingleTypeLongColumnPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplier(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -754,7 +754,7 @@ public void testSingleTypeLongColumnWithNullValueIndex() throws IOException // local: [null, 1, 3, 100, 300] // column: [100, 1, null, 1, 3, null, 100, 300, null, 1] - BitmapColumnIndex columnIndex = nullIndex.forNull(); + BitmapColumnIndex columnIndex = nullIndex.get(); Assert.assertNotNull(columnIndex); Assert.assertEquals(0.3, columnIndex.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); @@ -766,7 +766,7 @@ public void testSingleTypeLongColumnWithNullValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplierWithNull(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows @@ -811,61 +811,61 @@ public void testSingleValueLongWithNullRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplierWithNull(); - NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - Assert.assertNotNull(rangeIndex); + NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + Assert.assertNotNull(rangeIndexes); // 10 rows // local: [null, 1, 3, 100, 300] // column: [100, 1, null, 1, 3, null, 100, 300, null, 1] - BitmapColumnIndex forRange = rangeIndex.forRange(100, false, 700, true); + BitmapColumnIndex forRange = rangeIndexes.forRange(100, false, 700, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 6, 7); - forRange = rangeIndex.forRange(100, true, 300, true); + forRange = rangeIndexes.forRange(100, true, 300, true); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(100, false, 300, true); + forRange = rangeIndexes.forRange(100, false, 300, true); Assert.assertEquals(0.2, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 6); - forRange = rangeIndex.forRange(100, true, 300, false); + forRange = rangeIndexes.forRange(100, true, 300, false); Assert.assertEquals(0.1, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 7); - forRange = rangeIndex.forRange(100, false, 300, false); + forRange = rangeIndexes.forRange(100, false, 300, false); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 6, 7); - forRange = rangeIndex.forRange(null, true, null, true); + forRange = rangeIndexes.forRange(null, true, null, true); Assert.assertEquals(0.7, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 3, 4, 6, 7, 9); - forRange = rangeIndex.forRange(null, false, null, false); + forRange = rangeIndexes.forRange(null, false, null, false); Assert.assertEquals(0.7, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 3, 4, 6, 7, 9); - forRange = rangeIndex.forRange(null, false, 0, false); + forRange = rangeIndexes.forRange(null, false, 0, false); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(null, false, 1, false); + forRange = rangeIndexes.forRange(null, false, 1, false); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 1, 3, 9); - forRange = rangeIndex.forRange(null, false, 1, true); + forRange = rangeIndexes.forRange(null, false, 1, true); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); @@ -876,7 +876,7 @@ public void testSingleValueLongWithNullPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeLongSupplierWithNull(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -899,7 +899,7 @@ public void testSingleTypeDoubleColumnValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplier(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // sanity check to make sure we don't return indexes we don't support @@ -928,87 +928,87 @@ public void testSingleTypeDoubleColumnRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplier(); - NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - Assert.assertNotNull(rangeIndex); + NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + Assert.assertNotNull(rangeIndexes); // 10 rows // local: [1.1, 1.2, 3.3, 6.6] // column: [1.1, 1.1, 1.2, 3.3, 1.2, 6.6, 3.3, 1.2, 1.1, 3.3] - BitmapColumnIndex forRange = rangeIndex.forRange(1.0, true, 5.0, true); + BitmapColumnIndex forRange = rangeIndexes.forRange(1.0, true, 5.0, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.9, forRange.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 6, 7, 8, 9); - forRange = rangeIndex.forRange(1.1, false, 3.3, false); + forRange = rangeIndexes.forRange(1.1, false, 3.3, false); Assert.assertNotNull(forRange); Assert.assertEquals(0.9, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 6, 7, 8, 9); - forRange = rangeIndex.forRange(1.1, true, 3.3, true); + forRange = rangeIndexes.forRange(1.1, true, 3.3, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.3, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 2, 4, 7); - forRange = rangeIndex.forRange(null, true, null, true); + forRange = rangeIndexes.forRange(null, true, null, true); Assert.assertEquals(1.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - forRange = rangeIndex.forRange(null, false, null, false); + forRange = rangeIndexes.forRange(null, false, null, false); Assert.assertEquals(1.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - forRange = rangeIndex.forRange(1.111, true, 1.19, true); + forRange = rangeIndexes.forRange(1.111, true, 1.19, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(1.01, true, 1.09, true); + forRange = rangeIndexes.forRange(1.01, true, 1.09, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(0.05, true, 0.98, true); + forRange = rangeIndexes.forRange(0.05, true, 0.98, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(0.05, true, 1.1, true); + forRange = rangeIndexes.forRange(0.05, true, 1.1, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(8.99, true, 10.10, true); + forRange = rangeIndexes.forRange(8.99, true, 10.10, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(8.99, true, 10.10, true); + forRange = rangeIndexes.forRange(8.99, true, 10.10, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(10.00, true, 10.10, true); + forRange = rangeIndexes.forRange(10.00, true, 10.10, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); @@ -1021,7 +1021,7 @@ public void testSingleTypeDoubleColumnPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplier(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -1051,7 +1051,7 @@ public void testSingleTypeDoubleColumnWithNullValueIndex() throws IOException // local: [null, 1.1, 1.2, 3.3, 6.6] // column: [1.1, null, 1.2, null, 1.2, 6.6, null, 1.2, 1.1, 3.3] - BitmapColumnIndex columnIndex = nullIndex.forNull(); + BitmapColumnIndex columnIndex = nullIndex.get(); Assert.assertNotNull(columnIndex); Assert.assertEquals(0.3, columnIndex.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); @@ -1063,7 +1063,7 @@ public void testSingleTypeDoubleColumnWithNullValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplierWithNull(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows @@ -1108,46 +1108,46 @@ public void testSingleValueDoubleWithNullRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplierWithNull(); - NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); - Assert.assertNotNull(rangeIndex); + NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + Assert.assertNotNull(rangeIndexes); // 10 rows // local: [null, 1.1, 1.2, 3.3, 6.6] // column: [1.1, null, 1.2, null, 1.2, 6.6, null, 1.2, 1.1, 3.3] - BitmapColumnIndex forRange = rangeIndex.forRange(1.1, false, 5.0, true); + BitmapColumnIndex forRange = rangeIndexes.forRange(1.1, false, 5.0, true); Assert.assertNotNull(forRange); Assert.assertEquals(0.6, forRange.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 2, 4, 7, 8, 9); - forRange = rangeIndex.forRange(null, true, null, true); + forRange = rangeIndexes.forRange(null, true, null, true); Assert.assertEquals(0.7, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 2, 4, 5, 7, 8, 9); - forRange = rangeIndex.forRange(null, false, null, false); + forRange = rangeIndexes.forRange(null, false, null, false); Assert.assertEquals(0.7, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 2, 4, 5, 7, 8, 9); - forRange = rangeIndex.forRange(null, true, 1.0, true); + forRange = rangeIndexes.forRange(null, true, 1.0, true); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); - forRange = rangeIndex.forRange(null, true, 1.1, false); + forRange = rangeIndexes.forRange(null, true, 1.1, false); Assert.assertEquals(0.2, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 8); - forRange = rangeIndex.forRange(6.6, false, null, false); + forRange = rangeIndexes.forRange(6.6, false, null, false); Assert.assertEquals(0.1, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 5); - forRange = rangeIndex.forRange(6.6, true, null, false); + forRange = rangeIndexes.forRange(6.6, true, null, false); Assert.assertEquals(0.0, forRange.estimateSelectivity(ROW_COUNT), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap); @@ -1158,7 +1158,7 @@ public void testSingleValueDoubleWithNullPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeSingleTypeDoubleSupplierWithNull(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -1191,7 +1191,7 @@ public void testVariantNullValueIndex() throws IOException // local: [null, b, z, 1, 300, 1.1, 9.9] // column: [1, b, null, 9.9, 300, 1, z, null, 1.1, b] - BitmapColumnIndex columnIndex = nullIndex.forNull(); + BitmapColumnIndex columnIndex = nullIndex.get(); Assert.assertNotNull(columnIndex); Assert.assertEquals(0.2, columnIndex.estimateSelectivity(ROW_COUNT), 0.0); ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); @@ -1203,7 +1203,7 @@ public void testVariantValueSetIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeVariantSupplierWithNull(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows @@ -1261,11 +1261,11 @@ public void testVariantRangeIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeVariantSupplierWithNull(); - LexicographicalRangeIndex rangeIndex = indexSupplier.as(LexicographicalRangeIndex.class); + LexicographicalRangeIndexes rangeIndex = indexSupplier.as(LexicographicalRangeIndexes.class); Assert.assertNull(rangeIndex); - NumericRangeIndex numericRangeIndex = indexSupplier.as(NumericRangeIndex.class); - Assert.assertNull(numericRangeIndex); + NumericRangeIndexes numericRangeIndexes = indexSupplier.as(NumericRangeIndexes.class); + Assert.assertNull(numericRangeIndexes); } @Test @@ -1273,7 +1273,7 @@ public void testVariantPredicateIndex() throws IOException { NestedFieldColumnIndexSupplier indexSupplier = makeVariantSupplierWithNull(); - DruidPredicateIndex predicateIndex = indexSupplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = indexSupplier.as(DruidPredicateIndexes.class); Assert.assertNotNull(predicateIndex); DruidPredicateFactory predicateFactory = new InDimFilter.InFilterDruidPredicateFactory( null, @@ -1433,7 +1433,7 @@ public void testEnsureNoImproperSelectionFromAdjustedGlobals() throws IOExceptio ROW_COUNT ); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 3 rows @@ -1487,16 +1487,16 @@ public double skipValuePredicateIndexScale() null, new InDimFilter.ValuesSet(ImmutableSet.of("0")) ); - Assert.assertNull(singleTypeStringSupplier.as(DruidPredicateIndex.class).forPredicate(predicateFactory)); - Assert.assertNull(singleTypeLongSupplier.as(DruidPredicateIndex.class).forPredicate(predicateFactory)); - Assert.assertNull(singleTypeDoubleSupplier.as(DruidPredicateIndex.class).forPredicate(predicateFactory)); - Assert.assertNull(variantSupplierWithNull.as(DruidPredicateIndex.class).forPredicate(predicateFactory)); + Assert.assertNull(singleTypeStringSupplier.as(DruidPredicateIndexes.class).forPredicate(predicateFactory)); + Assert.assertNull(singleTypeLongSupplier.as(DruidPredicateIndexes.class).forPredicate(predicateFactory)); + Assert.assertNull(singleTypeDoubleSupplier.as(DruidPredicateIndexes.class).forPredicate(predicateFactory)); + Assert.assertNull(variantSupplierWithNull.as(DruidPredicateIndexes.class).forPredicate(predicateFactory)); // range index computation is a bit more complicated and done inside of the index maker gizmo because we don't know // the range up front - LexicographicalRangeIndex stringRange = singleTypeStringSupplier.as(LexicographicalRangeIndex.class); - NumericRangeIndex longRange = singleTypeLongSupplier.as(NumericRangeIndex.class); - NumericRangeIndex doubleRange = singleTypeDoubleSupplier.as(NumericRangeIndex.class); + LexicographicalRangeIndexes stringRange = singleTypeStringSupplier.as(LexicographicalRangeIndexes.class); + NumericRangeIndexes longRanges = singleTypeLongSupplier.as(NumericRangeIndexes.class); + NumericRangeIndexes doubleRanges = singleTypeDoubleSupplier.as(NumericRangeIndexes.class); // string: [b, foo, fooo, z] // small enough should be cool @@ -1508,40 +1508,40 @@ public double skipValuePredicateIndexScale() // long: [1, 3, 100, 300] // small enough should be cool - Assert.assertNotNull(longRange.forRange(1, false, 100, true)); + Assert.assertNotNull(longRanges.forRange(1, false, 100, true)); // range too big, no index - Assert.assertNull(longRange.forRange(1, false, null, false)); + Assert.assertNull(longRanges.forRange(1, false, null, false)); // double: [1.1, 1.2, 3.3, 6.6] // small enough should be cool - Assert.assertNotNull(doubleRange.forRange(null, false, 1.2, false)); + Assert.assertNotNull(doubleRanges.forRange(null, false, 1.2, false)); // range too big, no index - Assert.assertNull(doubleRange.forRange(null, false, 3.3, false)); + Assert.assertNull(doubleRanges.forRange(null, false, 3.3, false)); // other index types should not be impacted Assert.assertNotNull(singleTypeStringSupplier.as(DictionaryEncodedStringValueIndex.class)); Assert.assertNotNull(singleTypeStringSupplier.as(DictionaryEncodedValueIndex.class)); - Assert.assertNotNull(singleTypeStringSupplier.as(StringValueSetIndex.class).forValue("foo")); + Assert.assertNotNull(singleTypeStringSupplier.as(StringValueSetIndexes.class).forValue("foo")); Assert.assertNotNull( - singleTypeStringSupplier.as(StringValueSetIndex.class) + singleTypeStringSupplier.as(StringValueSetIndexes.class) .forSortedValues(new TreeSet<>(ImmutableSet.of("foo", "fooo", "z"))) ); Assert.assertNotNull(singleTypeStringSupplier.as(NullValueIndex.class)); Assert.assertNotNull(singleTypeLongSupplier.as(DictionaryEncodedStringValueIndex.class)); Assert.assertNotNull(singleTypeLongSupplier.as(DictionaryEncodedValueIndex.class)); - Assert.assertNotNull(singleTypeLongSupplier.as(StringValueSetIndex.class).forValue("1")); + Assert.assertNotNull(singleTypeLongSupplier.as(StringValueSetIndexes.class).forValue("1")); Assert.assertNotNull( - singleTypeLongSupplier.as(StringValueSetIndex.class) + singleTypeLongSupplier.as(StringValueSetIndexes.class) .forSortedValues(new TreeSet<>(ImmutableSet.of("1", "3", "100"))) ); Assert.assertNotNull(singleTypeLongSupplier.as(NullValueIndex.class)); Assert.assertNotNull(singleTypeDoubleSupplier.as(DictionaryEncodedStringValueIndex.class)); Assert.assertNotNull(singleTypeDoubleSupplier.as(DictionaryEncodedValueIndex.class)); - Assert.assertNotNull(singleTypeDoubleSupplier.as(StringValueSetIndex.class).forValue("1.1")); + Assert.assertNotNull(singleTypeDoubleSupplier.as(StringValueSetIndexes.class).forValue("1.1")); Assert.assertNotNull( - singleTypeDoubleSupplier.as(StringValueSetIndex.class) + singleTypeDoubleSupplier.as(StringValueSetIndexes.class) .forSortedValues(new TreeSet<>(ImmutableSet.of("1.1", "1.2", "3.3"))) ); Assert.assertNotNull(singleTypeDoubleSupplier.as(NullValueIndex.class)); @@ -1549,9 +1549,9 @@ public double skipValuePredicateIndexScale() // variant: [null, b, z, 1, 300, 1.1, 9.9] Assert.assertNotNull(variantSupplierWithNull.as(DictionaryEncodedStringValueIndex.class)); Assert.assertNotNull(variantSupplierWithNull.as(DictionaryEncodedValueIndex.class)); - Assert.assertNotNull(variantSupplierWithNull.as(StringValueSetIndex.class).forValue("b")); + Assert.assertNotNull(variantSupplierWithNull.as(StringValueSetIndexes.class).forValue("b")); Assert.assertNotNull( - variantSupplierWithNull.as(StringValueSetIndex.class) + variantSupplierWithNull.as(StringValueSetIndexes.class) .forSortedValues(new TreeSet<>(ImmutableSet.of("b", "1", "9.9"))) ); Assert.assertNotNull(variantSupplierWithNull.as(NullValueIndex.class)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java index 6e02e5bca0d1..383a2046db9e 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java @@ -44,9 +44,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -248,8 +248,8 @@ private void smokeTest(ScalarDoubleColumnAndIndexSupplier supplier, ScalarDouble ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset); - StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); - DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); + StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class); NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class); SortedMap fields = column.getFieldTypeInfo(); @@ -292,7 +292,7 @@ private void smokeTest(ScalarDoubleColumnAndIndexSupplier supplier, ScalarDouble Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(i)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); } else { if (NullHandling.sqlCompatible()) { @@ -300,7 +300,7 @@ private void smokeTest(ScalarDoubleColumnAndIndexSupplier supplier, ScalarDouble Assert.assertTrue(valueSelector.isNull()); Assert.assertTrue(vectorValueSelector.getNullVector()[0]); Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(i)); @@ -311,7 +311,7 @@ private void smokeTest(ScalarDoubleColumnAndIndexSupplier supplier, ScalarDouble Assert.assertNull(vectorValueSelector.getNullVector()); Assert.assertFalse(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(i)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java index 408b608f4b6f..87c4fcd8303a 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java @@ -44,9 +44,9 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -248,8 +248,8 @@ private void smokeTest(ScalarLongColumnAndIndexSupplier supplier, ScalarLongColu ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset); - StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); - DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); + StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class); NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class); SortedMap fields = column.getFieldTypeInfo(); @@ -292,7 +292,7 @@ private void smokeTest(ScalarLongColumnAndIndexSupplier supplier, ScalarLongColu Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(i)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); } else { if (NullHandling.sqlCompatible()) { @@ -300,7 +300,7 @@ private void smokeTest(ScalarLongColumnAndIndexSupplier supplier, ScalarLongColu Assert.assertTrue(valueSelector.isNull()); Assert.assertTrue(vectorValueSelector.getNullVector()[0]); Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(i)); @@ -309,7 +309,7 @@ private void smokeTest(ScalarLongColumnAndIndexSupplier supplier, ScalarLongColu Assert.assertFalse(valueSelector.isNull()); Assert.assertNull(vectorValueSelector.getNullVector()); Assert.assertFalse(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(i)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java index 2bf5c1ace4ec..da5edc734e61 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarStringColumnSupplierTest.java @@ -46,9 +46,9 @@ import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -248,8 +248,8 @@ private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8Di ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); DimensionSelector dimSelector = column.makeDimensionSelector(offset, null); - StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); - DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); + StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class); + DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class); NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class); SortedMap fields = column.getFieldTypeInfo(); @@ -283,7 +283,7 @@ private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8Di Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(i)); - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(dimSelector.makeValueMatcher(row).matches()); Assert.assertFalse(dimSelector.makeValueMatcher(NO_MATCH).matches()); @@ -298,7 +298,7 @@ private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8Di Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(i)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java index fa7645592246..69fe1dc9bfed 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java @@ -47,9 +47,9 @@ import org.apache.druid.segment.data.CompressionFactory; import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.semantic.DruidPredicateIndex; +import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -371,9 +371,9 @@ private void smokeTest( SingleValueDimensionVectorSelector dimensionVectorSelector = expectedLogicalType.isPrimitive() ? column.makeSingleValueDimensionVectorSelector(vectorOffset) : null; - StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class); Assert.assertNull(valueSetIndex); - DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); + DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class); Assert.assertNull(predicateIndex); NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class); Assert.assertNotNull(nullValueIndex); @@ -427,7 +427,7 @@ private void smokeTest( } } } - Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); } else { Assert.assertNull(valueSelector.getObject()); @@ -439,7 +439,7 @@ private void smokeTest( Assert.assertNull(dimensionVectorSelector.lookupName(dimensionVectorSelector.getRowVector()[0])); } } - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(i)); } offset.increment(); diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java index d65c085d584b..8843bd788565 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java @@ -29,7 +29,7 @@ import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.index.BitmapColumnIndex; -import org.apache.druid.segment.index.semantic.StringValueSetIndex; +import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; @@ -53,7 +53,7 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan public void testStringColumnWithNullValueSetIndex() throws IOException { StringUtf8ColumnIndexSupplier indexSupplier = makeStringWithNullsSupplier(); - StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + StringValueSetIndexes valueSetIndex = indexSupplier.as(StringValueSetIndexes.class); Assert.assertNotNull(valueSetIndex); // 10 rows diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java index 03f3366bcdf5..2165e8bc7dc1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Ranges.java @@ -52,7 +52,7 @@ public static RangeFilter not(final RangeFilter range) range.getMatchValueType(), range.getUpper(), null, - !range.isUpperStrict(), + !range.isUpperOpen(), false, range.getExtractionFn(), range.getFilterTuning() @@ -65,7 +65,7 @@ public static RangeFilter not(final RangeFilter range) null, range.getLower(), false, - !range.isLowerStrict(), + !range.isLowerOpen(), range.getExtractionFn(), range.getFilterTuning() ); @@ -82,12 +82,12 @@ public static Range toRange(final RangeFilter range) : null; if (lower == null) { - return range.isUpperStrict() ? Range.lessThan(upper) : Range.atMost(upper); + return range.isUpperOpen() ? Range.lessThan(upper) : Range.atMost(upper); } else if (upper == null) { - return range.isLowerStrict() ? Range.greaterThan(lower) : Range.atLeast(lower); + return range.isLowerOpen() ? Range.greaterThan(lower) : Range.atLeast(lower); } else { - BoundType lowerBoundType = range.isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED; - BoundType upperBoundType = range.isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED; + BoundType lowerBoundType = range.isLowerOpen() ? BoundType.OPEN : BoundType.CLOSED; + BoundType upperBoundType = range.isUpperOpen() ? BoundType.OPEN : BoundType.CLOSED; return Range.range(lower, lowerBoundType, upper, upperBoundType); } } @@ -105,12 +105,12 @@ public static Range toRange(final RangeFilter range, final ColumnTyp : null; if (lower == null) { - return range.isUpperStrict() ? Range.lessThan(upper) : Range.atMost(upper); + return range.isUpperOpen() ? Range.lessThan(upper) : Range.atMost(upper); } else if (upper == null) { - return range.isLowerStrict() ? Range.greaterThan(lower) : Range.atLeast(lower); + return range.isLowerOpen() ? Range.greaterThan(lower) : Range.atLeast(lower); } else { - BoundType lowerBoundType = range.isLowerStrict() ? BoundType.OPEN : BoundType.CLOSED; - BoundType upperBoundType = range.isUpperStrict() ? BoundType.OPEN : BoundType.CLOSED; + BoundType lowerBoundType = range.isLowerOpen() ? BoundType.OPEN : BoundType.CLOSED; + BoundType upperBoundType = range.isUpperOpen() ? BoundType.OPEN : BoundType.CLOSED; return Range.range(lower, lowerBoundType, upper, upperBoundType); } } From bd8eecbc99ee5d819ff90b01beb16cf5cd92424a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 14 Jul 2023 21:26:22 -0700 Subject: [PATCH 35/44] revert --- .../virtual/NestedFieldVirtualColumn.java | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index ff72fd37f106..dd28ea2eb5e2 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -26,6 +26,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.primitives.Doubles; +import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Numbers; import org.apache.druid.math.expr.Evals; @@ -40,7 +41,6 @@ import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.NilColumnValueSelector; @@ -1013,7 +1013,19 @@ private void computeVectorsIfNeeded() longVector[i] = 0L; nullVector[i] = true; } else { - Long l = DimensionHandlerUtils.convertObjectToLong(v); + Long l; + if (v instanceof Number) { + l = ((Number) v).longValue(); + } else { + final String s = String.valueOf(v); + l = GuavaUtils.tryParseLong(s); + if (l == null) { + final Double d = Doubles.tryParse(s); + if (d != null) { + l = d.longValue(); + } + } + } if (l != null) { longVector[i] = l; if (nullVector != null) { @@ -1071,7 +1083,12 @@ private void computeVectorsIfNeeded() doubleVector[i] = 0.0; nullVector[i] = true; } else { - Double d = DimensionHandlerUtils.convertObjectToDouble(v); + Double d; + if (v instanceof Number) { + d = ((Number) v).doubleValue(); + } else { + d = Doubles.tryParse(String.valueOf(v)); + } if (d != null) { doubleVector[i] = d; if (nullVector != null) { From 5a00b5326ee9583ed10debe14cb9294c11b57a47 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 16 Jul 2023 14:38:23 -0700 Subject: [PATCH 36/44] remove bloom filter array support for now --- .../druid/query/filter/BloomDimFilter.java | 28 ------------------- .../query/filter/BloomDimFilterTest.java | 28 ++++++------------- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index 1963aa8e7632..a22fec727464 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -28,12 +28,8 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.RangeSet; import com.google.common.hash.HashCode; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.segment.column.TypeSignature; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.DimensionPredicateFilter; import javax.annotation.Nullable; @@ -169,30 +165,6 @@ public boolean applyNull() } }; } - - @Override - public Predicate makeArrayPredicate(@Nullable TypeSignature arrayType) - { - if (arrayType == null) { - // fall back to per row detection - the only time arrayType should ever be null is if an object predicate - // that detects an Object[] input - return input -> { - if (input == null) { - return bloomKFilter.testBytes(null, 0, 0); - } - final byte[] bytes = ExprEval.toBytesBestEffort(input); - return bloomKFilter.testBytes(bytes); - }; - } - final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(arrayType); - return input -> { - if (input == null) { - return bloomKFilter.testBytes(null, 0, 0); - } - final byte[] bytes = ExprEval.toBytes(expressionType, input); - return bloomKFilter.testBytes(bytes); - }; - } }, extractionFn, filterTuning diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index 2c4823f7e988..a0a67270cfc7 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -33,8 +33,6 @@ import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.extraction.MapLookupExtractor; import org.apache.druid.query.extraction.TimeDimExtractionFn; import org.apache.druid.query.lookup.LookupExtractionFn; @@ -206,29 +204,19 @@ public void testMultiValueStringColumn() throws IOException isAutoSchema() ? ImmutableList.of("5") : ImmutableList.of("1", "2", "5") ); } else { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), - isAutoSchema() ? ImmutableList.of("5") : ImmutableList.of("1", "5") + ImmutableList.of("1", "5") ); - assertFilterMatches( + assertFilterMatchesSkipArrays( new BloomDimFilter("dim2", bloomKFilter(1000, ""), null), - isAutoSchema() ? ImmutableList.of() : ImmutableList.of("2") + ImmutableList.of("2") ); - if (isAutoSchema()) { - assertFilterMatches( - new BloomDimFilter( - "dim2", - bloomKFilter(1000, ExprEval.toBytes(ExpressionType.STRING_ARRAY, ImmutableList.of("a", "b"))), - null - ), - ImmutableList.of("0") - ); - } } - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("0", "3")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("0")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null), isAutoSchema() ? ImmutableList.of() : ImmutableList.of("4")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "d"), null), ImmutableList.of()); + assertFilterMatchesSkipArrays(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null), ImmutableList.of("0", "3")); + assertFilterMatchesSkipArrays(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null), ImmutableList.of("0")); + assertFilterMatchesSkipArrays(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null), ImmutableList.of("4")); + assertFilterMatchesSkipArrays(new BloomDimFilter("dim2", bloomKFilter(1000, "d"), null), ImmutableList.of()); } @Test From c72f98cd729c9ee5df85490aaeb26356f9fd005a Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 16 Jul 2023 21:19:09 -0700 Subject: [PATCH 37/44] fix test --- .../org/apache/druid/query/filter/BloomDimFilterTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index a0a67270cfc7..26290da5578c 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -199,9 +199,9 @@ public void testSingleValueStringColumnWithNulls() throws IOException public void testMultiValueStringColumn() throws IOException { if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + assertFilterMatchesSkipArrays( new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), - isAutoSchema() ? ImmutableList.of("5") : ImmutableList.of("1", "2", "5") + ImmutableList.of("1", "2", "5") ); } else { assertFilterMatchesSkipArrays( From 4806c01caf5724ea7f7b86e65dc72f155b4451be Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 16 Jul 2023 22:25:44 -0700 Subject: [PATCH 38/44] remove unused --- .../org/apache/druid/math/expr/ExprEval.java | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 2b855919a4d9..15334ce919be 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -142,27 +142,6 @@ public static void serialize(ByteBuffer buffer, int position, ExpressionType typ } } - public static byte[] toBytes(ExpressionType expressionType, Object o) - { - final ExprEval eval = ExprEval.ofType(expressionType, o); - return toBytes(eval); - } - - public static byte[] toBytesBestEffort(Object o) - { - final ExprEval eval = ExprEval.bestEffortOf(o); - return toBytes(eval); - } - - public static byte[] toBytes(ExprEval eval) - { - final NullableTypeStrategy strategy = eval.type().getNullableStrategy(); - final int size = strategy.estimateSizeBytes(eval.valueOrDefault()); - final ByteBuffer buffer = ByteBuffer.allocate(size); - strategy.write(buffer, eval.valueOrDefault(), size); - return buffer.array(); - } - /** * Converts a List to an appropriate array type, optionally doing some conversion to make multi-valued strings * consistent across selector types, which are not consistent in treatment of null, [], and [null]. From 0e02f3135f6dd875825ad7e4980356810d948f87 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 12:42:19 -0700 Subject: [PATCH 39/44] expression array comparator stuff --- .../BloomFilterExpressionsTest.java | 2 +- .../math/expr/BinaryLogicalOperatorExpr.java | 78 ++++++++++--------- .../druid/math/expr/BinaryOperatorExpr.java | 21 +++-- .../math/expr/ExpressionTypeConversion.java | 3 + .../apache/druid/math/expr/InputBindings.java | 2 +- .../segment/virtual/ExpressionSelectors.java | 2 +- .../org/apache/druid/math/expr/EvalTest.java | 42 ++++++++++ .../druid/query/expression/ExprMacroTest.java | 2 +- .../HyperUniqueExpressionsTest.java | 2 +- .../expression/NestedDataExpressionsTest.java | 2 +- .../RegexpReplaceExprMacroTest.java | 2 +- .../query/expression/LookupExprMacroTest.java | 2 +- 12 files changed, 111 insertions(+), 49 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/expressions/BloomFilterExpressionsTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/expressions/BloomFilterExpressionsTest.java index ca03eafb0a26..31d59a4423a8 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/expressions/BloomFilterExpressionsTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/expressions/BloomFilterExpressionsTest.java @@ -51,7 +51,7 @@ public class BloomFilterExpressionsTest extends InitializedNullHandlingTest ExprMacroTable macroTable = new ExprMacroTable(ImmutableList.of(createMacro, addMacro, testMacro)); Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers( - new ImmutableMap.Builder() + new ImmutableMap.Builder>() .put("bloomy", InputBindings.inputSupplier(BloomFilterExpressions.BLOOM_FILTER_TYPE, () -> new BloomKFilter(100))) .put("string", InputBindings.inputSupplier(ExpressionType.STRING, () -> SOME_STRING)) .put("long", InputBindings.inputSupplier(ExpressionType.LONG, () -> SOME_LONG)) diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java index de1de837818e..5fe60bdc9ef1 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java @@ -50,6 +50,13 @@ protected boolean evalString(@Nullable String left, @Nullable String right) return Comparators.naturalNullsFirst().compare(left, right) < 0; } + @Override + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) < 0; + } + @Override protected final boolean evalLong(long left, long right) { @@ -63,12 +70,6 @@ protected final boolean evalDouble(double left, double right) return Double.compare(left, right) < 0; } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return inspector.canVectorize(left, right); - } - @Override public ExprVectorProcessor asVectorProcessor(VectorInputBindingInspector inspector) { @@ -96,6 +97,13 @@ protected boolean evalString(@Nullable String left, @Nullable String right) return Comparators.naturalNullsFirst().compare(left, right) <= 0; } + @Override + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) <= 0; + } + @Override protected final boolean evalLong(long left, long right) { @@ -109,12 +117,6 @@ protected final boolean evalDouble(double left, double right) return Double.compare(left, right) <= 0; } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return inspector.canVectorize(left, right); - } - @Override public ExprVectorProcessor asVectorProcessor(VectorInputBindingInspector inspector) { @@ -142,6 +144,13 @@ protected boolean evalString(@Nullable String left, @Nullable String right) return Comparators.naturalNullsFirst().compare(left, right) > 0; } + @Override + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) > 0; + } + @Override protected final boolean evalLong(long left, long right) { @@ -155,12 +164,6 @@ protected final boolean evalDouble(double left, double right) return Double.compare(left, right) > 0; } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return inspector.canVectorize(left, right); - } - @Override public ExprVectorProcessor asVectorProcessor(VectorInputBindingInspector inspector) { @@ -188,6 +191,13 @@ protected boolean evalString(@Nullable String left, @Nullable String right) return Comparators.naturalNullsFirst().compare(left, right) >= 0; } + @Override + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) >= 0; + } + @Override protected final boolean evalLong(long left, long right) { @@ -201,12 +211,6 @@ protected final boolean evalDouble(double left, double right) return Double.compare(left, right) >= 0; } - @Override - public boolean canVectorize(InputBindingInspector inspector) - { - return inspector.canVectorize(left, right); - } - @Override public ExprVectorProcessor asVectorProcessor(VectorInputBindingInspector inspector) { @@ -235,21 +239,22 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected final boolean evalLong(long left, long right) - { - return left == right; + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) == 0; } @Override - protected final boolean evalDouble(double left, double right) + protected final boolean evalLong(long left, long right) { return left == right; } @Override - public boolean canVectorize(InputBindingInspector inspector) + protected final boolean evalDouble(double left, double right) { - return inspector.canVectorize(left, right); + return left == right; } @Override @@ -280,21 +285,22 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected final boolean evalLong(long left, long right) - { - return left != right; + protected boolean evalArray(ExprEval left, ExprEval right) { + ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); + // type cannot be null here because ExprEval type is not nullable + return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) != 0; } @Override - protected final boolean evalDouble(double left, double right) + protected final boolean evalLong(long left, long right) { return left != right; } @Override - public boolean canVectorize(InputBindingInspector inspector) + protected final boolean evalDouble(double left, double right) { - return inspector.canVectorize(left, right); + return left != right; } @Override diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java index 64acb04bb58f..5a26eb279f9a 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java @@ -195,6 +195,9 @@ public ExprEval eval(ObjectBinding bindings) case LONG: result = evalLong(leftVal.asLong(), rightVal.asLong()); break; + case ARRAY: + result = evalArray(leftVal, rightVal); + break; case DOUBLE: default: if (NullHandling.sqlCompatible() && (leftVal.isNumericNull() || rightVal.isNumericNull())) { @@ -203,21 +206,20 @@ public ExprEval eval(ObjectBinding bindings) result = evalDouble(leftVal.asDouble(), rightVal.asDouble()); break; } - if (!ExpressionProcessing.useStrictBooleans() && !type.is(ExprType.STRING)) { + if (!ExpressionProcessing.useStrictBooleans() && !type.is(ExprType.STRING) && !type.isArray()) { return ExprEval.ofBoolean(result, type.getType()); } return ExprEval.ofLongBoolean(result); } - protected boolean evalString(@Nullable String left, @Nullable String right) - { - throw new IllegalArgumentException("unsupported type " + ExprType.STRING); - } + protected abstract boolean evalString(@Nullable String left, @Nullable String right); protected abstract boolean evalLong(long left, long right); protected abstract boolean evalDouble(double left, double right); + protected abstract boolean evalArray(ExprEval left, ExprEval right); + @Nullable @Override public ExpressionType getOutputType(InputBindingInspector inspector) @@ -228,4 +230,13 @@ public ExpressionType getOutputType(InputBindingInspector inspector) } return implicitCast; } + + @Override + public boolean canVectorize(InputBindingInspector inspector) + { + ExpressionType leftType = left.getOutputType(inspector); + ExpressionType rightType = right.getOutputType(inspector); + ExpressionType commonType = ExpressionTypeConversion.leastRestrictiveType(leftType, rightType); + return inspector.canVectorize(left, right) && !(commonType == null || commonType.isArray()); + } } diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java index eb0e0219463d..fbd63dc90874 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java @@ -58,6 +58,9 @@ public static ExpressionType autoDetect(ExprEval eval, ExprEval otherEval) if (Types.is(type, ExprType.STRING) && Types.is(otherType, ExprType.STRING)) { return ExpressionType.STRING; } + if (type.isArray() || otherType.isArray()) { + return leastRestrictiveType(type, otherType); + } type = eval.value() != null ? type : otherType; otherType = otherEval.value() != null ? otherType : type; diff --git a/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java b/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java index d1bc2be32ce4..469ea05706be 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java +++ b/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java @@ -255,7 +255,7 @@ public static InputSupplier inputSupplier(ExpressionType type, Supplier bindings) + public static Expr.ObjectBinding forInputSuppliers(final Map> bindings) { return new Expr.ObjectBinding() { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 41704fadb3da..46bd682d33ce 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -321,7 +321,7 @@ public static Expr.ObjectBinding createBindings( ) { final List columns = plan.getAnalysis().getRequiredBindingsList(); - final Map suppliers = new HashMap<>(); + final Map> suppliers = new HashMap<>(); for (String columnName : columns) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName); final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue(); diff --git a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java index 1171e0f5098e..45191ade72ec 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -987,6 +987,48 @@ public void testBooleanInputs() } } + @Test + public void testArrayComparison() + { + Expr.ObjectBinding bindings = InputBindings.forInputSuppliers( + ImmutableMap.>builder() + .put( + "stringArray", + InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> new Object[]{"a", "b", null, "c"}) + ) + .put( + "longArray", + InputBindings.inputSupplier(ExpressionType.LONG_ARRAY, () -> new Object[]{1L, null, 2L, 3L}) + ) + .put( + "doubleArray", + InputBindings.inputSupplier(ExpressionType.DOUBLE_ARRAY, () -> new Object[]{1.1, 2.2, 3.3, null}) + ) + .build() + ); + + Assert.assertEquals(0L, eval("['a','b',null,'c'] > stringArray", bindings).value()); + Assert.assertEquals(1L, eval("['a','b',null,'c'] >= stringArray", bindings).value()); + Assert.assertEquals(1L, eval("['a','b',null,'c'] == stringArray", bindings).value()); + Assert.assertEquals(0L, eval("['a','b',null,'c'] != stringArray", bindings).value()); + Assert.assertEquals(1L, eval("['a','b',null,'c'] <= stringArray", bindings).value()); + Assert.assertEquals(0L, eval("['a','b',null,'c'] < stringArray", bindings).value()); + + Assert.assertEquals(0L, eval("[1,null,2,3] > longArray", bindings).value()); + Assert.assertEquals(1L, eval("[1,null,2,3] >= longArray", bindings).value()); + Assert.assertEquals(1L, eval("[1,null,2,3] == longArray", bindings).value()); + Assert.assertEquals(0L, eval("[1,null,2,3] != longArray", bindings).value()); + Assert.assertEquals(1L, eval("[1,null,2,3] <= longArray", bindings).value()); + Assert.assertEquals(0L, eval("[1,null,2,3] < longArray", bindings).value()); + + Assert.assertEquals(0L, eval("[1.1,2.2,3.3,null] > doubleArray", bindings).value()); + Assert.assertEquals(1L, eval("[1.1,2.2,3.3,null] >= doubleArray", bindings).value()); + Assert.assertEquals(1L, eval("[1.1,2.2,3.3,null] == doubleArray", bindings).value()); + Assert.assertEquals(0L, eval("[1.1,2.2,3.3,null] != doubleArray", bindings).value()); + Assert.assertEquals(1L, eval("[1.1,2.2,3.3,null] <= doubleArray", bindings).value()); + Assert.assertEquals(0L, eval("[1.1,2.2,3.3,null] < doubleArray", bindings).value()); + } + @Test public void testValueOrDefault() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/ExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/ExprMacroTest.java index 4b70cec0e0b4..fb38e803ebaa 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/ExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/ExprMacroTest.java @@ -37,7 +37,7 @@ public class ExprMacroTest private static final String IPV4_STRING = "192.168.0.1"; private static final long IPV4_LONG = 3232235521L; private static final Expr.ObjectBinding BINDINGS = InputBindings.forInputSuppliers( - ImmutableMap.builder() + ImmutableMap.>builder() .put("t", InputBindings.inputSupplier( ExpressionType.LONG, diff --git a/processing/src/test/java/org/apache/druid/query/expression/HyperUniqueExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/HyperUniqueExpressionsTest.java index cb641c59e0d2..7109f51f44e6 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/HyperUniqueExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/HyperUniqueExpressionsTest.java @@ -52,7 +52,7 @@ public class HyperUniqueExpressionsTest extends InitializedNullHandlingTest private static final double SOME_DOUBLE = 1.234; Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers( - new ImmutableMap.Builder() + new ImmutableMap.Builder>() .put("hll", InputBindings.inputSupplier(HyperUniqueExpressions.TYPE, HyperLogLogCollector::makeLatestCollector)) .put("string", InputBindings.inputSupplier(ExpressionType.STRING, () -> SOME_STRING)) .put("long", InputBindings.inputSupplier(ExpressionType.LONG, () -> SOME_LONG)) diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index 06726289ce69..fae95259d4ad 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -81,7 +81,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest ); Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers( - new ImmutableMap.Builder() + new ImmutableMap.Builder>() .put("nest", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> NEST)) .put("nestWrapped", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> new StructuredData(NEST))) .put("nester", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> NESTER)) diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index f0e3f3c843d7..3f3ce780c8c9 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -223,7 +223,7 @@ public void testUrlIdReplacementDynamic() "regexp_replace(regexp_replace(a, pattern1, replacement1), pattern2, replacement2)", InputBindings.forInputSuppliers( ImmutableMap - .builder() + .>builder() .put("a", InputBindings.inputSupplier(ExpressionType.STRING, () -> "http://example.com/path/to?query")) .put("pattern1", InputBindings.inputSupplier(ExpressionType.STRING, () -> "\\?(.*)$")) .put("pattern2", InputBindings.inputSupplier(ExpressionType.STRING, () -> "/(\\w+)(?=/|$)")) diff --git a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java index d0097bcc8108..65a3e68dcfea 100644 --- a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java +++ b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java @@ -34,7 +34,7 @@ public class LookupExprMacroTest extends InitializedNullHandlingTest { private static final Expr.ObjectBinding BINDINGS = InputBindings.forInputSuppliers( - ImmutableMap.builder() + ImmutableMap.>builder() .put("x", InputBindings.inputSupplier(ExpressionType.STRING, () -> "foo")) .build() ); From 6b33e42957269e8263b51a4637ed9ce3ca33254d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 13:01:41 -0700 Subject: [PATCH 40/44] style --- .../math/expr/BinaryLogicalOperatorExpr.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java index 5fe60bdc9ef1..c06f526288f8 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java @@ -51,7 +51,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) < 0; @@ -145,7 +146,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) > 0; @@ -192,7 +194,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) >= 0; @@ -239,7 +242,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) == 0; @@ -285,7 +289,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) != 0; From f8e52841d1fc5e88559f0dbffce69521ceacb91d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 13:02:18 -0700 Subject: [PATCH 41/44] missed one --- .../org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java index c06f526288f8..a65999c1ae99 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryLogicalOperatorExpr.java @@ -99,7 +99,8 @@ protected boolean evalString(@Nullable String left, @Nullable String right) } @Override - protected boolean evalArray(ExprEval left, ExprEval right) { + protected boolean evalArray(ExprEval left, ExprEval right) + { ExpressionType type = ExpressionTypeConversion.leastRestrictiveType(left.type(), right.type()); // type cannot be null here because ExprEval type is not nullable return type.getNullableStrategy().compare(left.castTo(type).asArray(), right.castTo(type).asArray()) <= 0; From 65ad8e4a72304b60cef6efa9a7062127d06b2fca Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 14:43:32 -0700 Subject: [PATCH 42/44] fix it --- .../java/org/apache/druid/math/expr/BinaryOperatorExpr.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java index 5a26eb279f9a..8dd4b9602518 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryOperatorExpr.java @@ -237,6 +237,6 @@ public boolean canVectorize(InputBindingInspector inspector) ExpressionType leftType = left.getOutputType(inspector); ExpressionType rightType = right.getOutputType(inspector); ExpressionType commonType = ExpressionTypeConversion.leastRestrictiveType(leftType, rightType); - return inspector.canVectorize(left, right) && !(commonType == null || commonType.isArray()); + return inspector.canVectorize(left, right) && (commonType == null || commonType.isPrimitive()); } } From affcf7a5e3d56e71527fb9620e2219f8d53c982d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 17:20:15 -0700 Subject: [PATCH 43/44] backwards compat --- .../org/apache/druid/math/expr/ExpressionTypeConversion.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java index fbd63dc90874..faa17d45e8d7 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java @@ -58,7 +58,10 @@ public static ExpressionType autoDetect(ExprEval eval, ExprEval otherEval) if (Types.is(type, ExprType.STRING) && Types.is(otherType, ExprType.STRING)) { return ExpressionType.STRING; } - if (type.isArray() || otherType.isArray()) { + // to preserve backwards compatibility, like with strings, we only use array type if both types are + // arrays... this is pretty wack, but it is what it is. we might want to consider changing this + // behavior in the future with a flag + if (type.isArray() && otherType.isArray()) { return leastRestrictiveType(type, otherType); } From 927fd71a081edaef3a910e78737767e4857dcb63 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Jul 2023 19:21:44 -0700 Subject: [PATCH 44/44] consistent naming --- .../org/apache/druid/query/filter/InDimFilter.java | 8 ++++---- .../druid/segment/index/IndexedUtf8ValueIndexes.java | 4 ++-- ...Utf8ValueSetIndex.java => Utf8ValueSetIndexes.java} | 2 +- .../segment/serde/StringUtf8ColumnIndexSupplier.java | 4 ++-- .../org/apache/druid/query/filter/InDimFilterTest.java | 10 +++++----- 5 files changed, 14 insertions(+), 14 deletions(-) rename processing/src/main/java/org/apache/druid/segment/index/semantic/{Utf8ValueSetIndex.java => Utf8ValueSetIndexes.java} (97%) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 6a39c46a96be..47c3d78a237b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -61,7 +61,7 @@ import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; -import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -298,9 +298,9 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) ); } - final Utf8ValueSetIndex utf8ValueSetIndex = indexSupplier.as(Utf8ValueSetIndex.class); - if (utf8ValueSetIndex != null) { - return utf8ValueSetIndex.forSortedValuesUtf8(valuesUtf8); + final Utf8ValueSetIndexes utf8ValueSetIndexes = indexSupplier.as(Utf8ValueSetIndexes.class); + if (utf8ValueSetIndexes != null) { + return utf8ValueSetIndexes.forSortedValuesUtf8(valuesUtf8); } final StringValueSetIndexes stringValueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 5137958c6daa..251059747013 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -34,7 +34,7 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; -import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; import javax.annotation.Nullable; @@ -45,7 +45,7 @@ import java.util.SortedSet; public final class IndexedUtf8ValueIndexes> - implements StringValueSetIndexes, Utf8ValueSetIndex, ValueIndexes + implements StringValueSetIndexes, Utf8ValueSetIndexes, ValueIndexes { // This determines the cut-off point to switch the merging algorithm from doing binary-search per element in the value // set to doing a sorted merge algorithm between value set and dictionary. The ratio here represents the ratio b/w diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java similarity index 97% rename from processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java rename to processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java index 7cf73b1fe32d..1bf2792e444d 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java @@ -25,7 +25,7 @@ import java.nio.ByteBuffer; import java.util.SortedSet; -public interface Utf8ValueSetIndex +public interface Utf8ValueSetIndexes { /** * Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index 3f717f802552..e80ca2595a36 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.SpatialIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; -import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; import javax.annotation.Nullable; @@ -117,7 +117,7 @@ public T as(Class clazz) return (T) (NullValueIndex) () -> nullIndex; } else if ( clazz.equals(StringValueSetIndexes.class) || - clazz.equals(Utf8ValueSetIndex.class) || + clazz.equals(Utf8ValueSetIndexes.class) || clazz.equals(ValueIndexes.class) ) { return (T) new IndexedUtf8ValueIndexes<>( diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index ffa793cecd43..41f2480621da 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -36,7 +36,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; -import org.apache.druid.segment.index.semantic.Utf8ValueSetIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; @@ -260,15 +260,15 @@ public void testUsesUtf8SetIndex() final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class); final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class); - final Utf8ValueSetIndex valueIndex = Mockito.mock(Utf8ValueSetIndex.class); + final Utf8ValueSetIndexes valueIndexes = Mockito.mock(Utf8ValueSetIndexes.class); final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class); final InDimFilter.ValuesSet expectedValuesSet = new InDimFilter.ValuesSet(); expectedValuesSet.addAll(Arrays.asList("v1", "v2")); Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier); - Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(valueIndex); - Mockito.when(valueIndex.forSortedValuesUtf8(expectedValuesSet.toUtf8())).thenReturn(bitmapColumnIndex); + Mockito.when(indexSupplier.as(Utf8ValueSetIndexes.class)).thenReturn(valueIndexes); + Mockito.when(valueIndexes.forSortedValuesUtf8(expectedValuesSet.toUtf8())).thenReturn(bitmapColumnIndex); final BitmapColumnIndex retVal = inFilter.getBitmapColumnIndex(indexSelector); Assert.assertSame("inFilter returns the intended bitmapColumnIndex", bitmapColumnIndex, retVal); @@ -291,7 +291,7 @@ public void testUsesStringSetIndex() expectedValuesSet.addAll(Arrays.asList("v1", "v2")); Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier); - Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(null); // Will check for UTF-8 first. + Mockito.when(indexSupplier.as(Utf8ValueSetIndexes.class)).thenReturn(null); // Will check for UTF-8 first. Mockito.when(indexSupplier.as(StringValueSetIndexes.class)).thenReturn(valueIndex); Mockito.when(valueIndex.forSortedValues(expectedValuesSet)).thenReturn(bitmapColumnIndex);