diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md index 340fa4ee8650..2de004618d2c 100644 --- a/docs/querying/math-expr.md +++ b/docs/querying/math-expr.md @@ -235,7 +235,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX`, `ARRAY`, or `ARRAY`) value from `expr` using JSONPath syntax of `path`. The optional `type` argument can be set to `'LONG'`,`'DOUBLE'`, `'STRING'`, `'ARRAY'`, `'ARRAY'`, or `'ARRAY'` to cast values to that type. | | json_query(expr, path) | Extract a `COMPLEX` value from `expr` using JSONPath syntax of `path` | | json_object(expr1, expr2[, expr3, expr4 ...]) | Construct a `COMPLEX` with alternating 'key' and 'value' arguments| | parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX`. If the input is not a `STRING` or it is invalid JSON, this function will result in an error.| diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index 57b81116fa8b..fec16ad99b1e 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -330,88 +330,163 @@ public String name() @Override public Expr apply(List args) { - final List parts = getJsonPathPartsFromLiteral(this, args.get(1)); - if (args.size() == 3 && args.get(2).isLiteral()) { - final ExpressionType castTo = ExpressionType.fromString((String) args.get(2).getLiteralValue()); + if (args.get(1).isLiteral()) { + if (args.size() == 3 && args.get(2).isLiteral()) { + return new JsonValueCastExpr(args); + } else { + return new JsonValueExpr(args); + } + } else { + return new JsonValueDynamicExpr(args); + } + } + + final class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + private final List parts; + + public JsonValueExpr(List args) + { + super(name(), args); + this.parts = getJsonPathPartsFromLiteral(JsonValueExprMacro.this, args.get(1)); + } + + @Override + public ExprEval eval(ObjectBinding bindings) + { + final ExprEval input = args.get(0).eval(bindings); + final ExprEval valAtPath = ExprEval.bestEffortOf( + NestedPathFinder.find(unwrap(input), parts) + ); + if (valAtPath.type().isPrimitive() || valAtPath.type().isPrimitiveArray()) { + return valAtPath; + } + return ExprEval.of(null); + } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + if (newArgs.get(1).isLiteral()) { + return shuttle.visit(new JsonValueExpr(newArgs)); + } else { + return shuttle.visit(new JsonValueDynamicExpr(newArgs)); + } + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + // we cannot infer output type because there could be anything at the path, and, we lack a proper VARIANT type + return null; + } + } + + final class JsonValueCastExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + private final List parts; + private final ExpressionType castTo; + + public JsonValueCastExpr(List args) + { + super(name(), args); + this.parts = getJsonPathPartsFromLiteral(JsonValueExprMacro.this, args.get(1)); + this.castTo = ExpressionType.fromString((String) args.get(2).getLiteralValue()); if (castTo == null) { throw JsonValueExprMacro.this.validationFailed( "invalid output type: [%s]", args.get(2).getLiteralValue() ); } - final class JsonValueCastExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr - { - public JsonValueCastExpr(List args) - { - super(name(), args); - } - - @Override - public ExprEval eval(ObjectBinding bindings) - { - final ExprEval input = args.get(0).eval(bindings); - final ExprEval valAtPath = ExprEval.bestEffortOf( - NestedPathFinder.find(unwrap(input), parts) - ); - if (valAtPath.type().isPrimitive() || valAtPath.type().isPrimitiveArray()) { - return valAtPath.castTo(castTo); - } - return ExprEval.ofType(castTo, null); - } + } - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); - return shuttle.visit(new JsonValueCastExpr(newArgs)); - } + @Override + public ExprEval eval(ObjectBinding bindings) + { + final ExprEval input = args.get(0).eval(bindings); + final ExprEval valAtPath = ExprEval.bestEffortOf( + NestedPathFinder.find(unwrap(input), parts) + ); + if (valAtPath.type().isPrimitive() || valAtPath.type().isPrimitiveArray()) { + return valAtPath.castTo(castTo); + } + return ExprEval.ofType(castTo, null); + } - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - return castTo; - } + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + if (newArgs.get(1).isLiteral()) { + return shuttle.visit(new JsonValueCastExpr(newArgs)); + } else { + return shuttle.visit(new JsonValueDynamicExpr(newArgs)); } - return new JsonValueCastExpr(args); - } else { - final class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr - { + } - public JsonValueExpr(List args) - { - super(name(), args); - } + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + return castTo; + } + } + + final class JsonValueDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public JsonValueDynamicExpr(List args) + { + super(name(), args); + } - @Override - public ExprEval eval(ObjectBinding bindings) - { - final ExprEval input = args.get(0).eval(bindings); - final ExprEval valAtPath = ExprEval.bestEffortOf( - NestedPathFinder.find(unwrap(input), parts) + @Override + public ExprEval eval(ObjectBinding bindings) + { + final ExprEval input = args.get(0).eval(bindings); + final ExprEval path = args.get(1).eval(bindings); + final ExpressionType castTo; + if (args.size() == 3) { + castTo = ExpressionType.fromString(args.get(2).eval(bindings).asString()); + if (castTo == null) { + throw JsonValueExprMacro.this.validationFailed( + "invalid output type: [%s]", + args.get(2).getLiteralValue() ); - if (valAtPath.type().isPrimitive() || valAtPath.type().isPrimitiveArray()) { - return valAtPath; - } - return ExprEval.of(null); } + } else { + castTo = null; + } + final List parts = NestedPathFinder.parseJsonPath(path.asString()); + final ExprEval valAtPath = ExprEval.bestEffortOf(NestedPathFinder.find(unwrap(input), parts)); + if (valAtPath.type().isPrimitive() || valAtPath.type().isPrimitiveArray()) { + return castTo == null ? valAtPath : valAtPath.castTo(castTo); + } + return castTo == null ? ExprEval.of(null) : ExprEval.ofType(castTo, null); + } - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + if (newArgs.get(1).isLiteral()) { + if (newArgs.size() == 3 && newArgs.get(2).isLiteral()) { + return shuttle.visit(new JsonValueCastExpr(newArgs)); + } else { return shuttle.visit(new JsonValueExpr(newArgs)); } - - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - // we cannot infer output type because there could be anything at the path, and, we lack a proper VARIANT type - return null; - } + } else { + return shuttle.visit(new JsonValueDynamicExpr(newArgs)); } - return new JsonValueExpr(args); + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + // we cannot infer output type because there could be anything at the path, and, we lack a proper VARIANT type + return null; } } } @@ -429,40 +504,90 @@ public String name() @Override public Expr apply(List args) { - final List parts = getJsonPathPartsFromLiteral(this, args.get(1)); - final class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + if (args.get(1).isLiteral()) { + return new JsonQueryExpr(args); + } else { + return new JsonQueryDynamicExpr(args); + } + } + + final class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + private final List parts; + + public JsonQueryExpr(List args) { - public JsonQueryExpr(List args) - { - super(name(), args); - } + super(name(), args); + this.parts = getJsonPathPartsFromLiteral(JsonQueryExprMacro.this, args.get(1)); + } - @Override - public ExprEval eval(ObjectBinding bindings) - { - ExprEval input = args.get(0).eval(bindings); - return ExprEval.ofComplex( - ExpressionType.NESTED_DATA, - NestedPathFinder.find(unwrap(input), parts) - ); - } + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval input = args.get(0).eval(bindings); + return ExprEval.ofComplex( + ExpressionType.NESTED_DATA, + NestedPathFinder.find(unwrap(input), parts) + ); + } - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + if (newArgs.get(1).isLiteral()) { return shuttle.visit(new JsonQueryExpr(newArgs)); + } else { + return shuttle.visit(new JsonQueryDynamicExpr(newArgs)); } + } - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - // call all the output JSON typed - return ExpressionType.NESTED_DATA; + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + // call all the output JSON typed + return ExpressionType.NESTED_DATA; + } + } + + final class JsonQueryDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public JsonQueryDynamicExpr(List args) + { + super(name(), args); + } + + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval input = args.get(0).eval(bindings); + ExprEval path = args.get(1).eval(bindings); + final List parts = NestedPathFinder.parseJsonPath(path.asString()); + return ExprEval.ofComplex( + ExpressionType.NESTED_DATA, + NestedPathFinder.find(unwrap(input), parts) + ); + } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + if (newArgs.get(1).isLiteral()) { + return shuttle.visit(new JsonQueryExpr(newArgs)); + } else { + return shuttle.visit(new JsonQueryDynamicExpr(newArgs)); } } - return new JsonQueryExpr(args); + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + // call all the output JSON typed + return ExpressionType.NESTED_DATA; + } } } diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index fae95259d4ad..d8bd2d93841b 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -272,6 +272,11 @@ public void testJsonValueExpression() eval = expr.eval(inputBindings); Assert.assertArrayEquals(new Object[]{"1", "2", "3"}, (Object[]) eval.value()); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); + + expr = Parser.parse("json_value(nester, array_offset(json_paths(nester), 0))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) eval.value()); + Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); } @Test @@ -317,6 +322,11 @@ public void testJsonQueryExpression() eval = expr.eval(inputBindings); Assert.assertEquals(1234L, eval.value()); Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_query(nester, array_offset(json_paths(nester), 0))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals(NESTER.get("x"), eval.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); } @Test diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java index 2ba07c126afd..bfa8f47e56ec 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java @@ -199,9 +199,10 @@ public DruidExpression toDruidExpression( final Expr pathExpr = plannerContext.parseExpression(druidExpressions.get(1).getExpression()); if (!pathExpr.isLiteral()) { - return null; + // if path argument is not constant, just use a pure expression + return DruidExpression.ofFunctionCall(ColumnType.NESTED_DATA, "json_query", druidExpressions); } - // pre-normalize path so that the same expressions with different jq syntax are collapsed + // pre-normalize path so that the same expressions with different json path syntax are collapsed final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); final List parts = extractNestedPathParts(call, path); final String jsonPath = NestedPathFinder.toNormalizedJsonPath(parts); @@ -353,9 +354,17 @@ public DruidExpression toDruidExpression( final Expr pathExpr = plannerContext.parseExpression(druidExpressions.get(1).getExpression()); if (!pathExpr.isLiteral()) { - return null; + // if path argument is not constant, just use a pure expression + return DruidExpression.ofFunctionCall( + druidType, + "json_value", + ImmutableList.builder() + .addAll(druidExpressions) + .add(DruidExpression.ofStringLiteral(druidType.asTypeString())) + .build() + ); } - // pre-normalize path so that the same expressions with different jq syntax are collapsed + // pre-normalize path so that the same expressions with different json path syntax are collapsed final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); final List parts = extractNestedPathParts(call, path); @@ -477,7 +486,7 @@ public DruidExpression toDruidExpression( if (!pathExpr.isLiteral()) { return null; } - // pre-normalize path so that the same expressions with different jq syntax are collapsed + // pre-normalize path so that the same expressions with different json path syntax are collapsed final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); final List parts; try { @@ -645,7 +654,7 @@ public DruidExpression toDruidExpression( if (!pathExpr.isLiteral()) { return null; } - // pre-normalize path so that the same expressions with different jq syntax are collapsed + // pre-normalize path so that the same expressions with different json path syntax are collapsed final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); final List parts = extractNestedPathParts(call, path); final String jsonPath = NestedPathFinder.toNormalizedJsonPath(parts); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index b1795fd98e48..65316b8a7601 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -6443,4 +6443,91 @@ public void testCoalesceOnNestedColumnsLater() .build() ); } + + @Test + public void testGroupByPathDynamicArg() + { + cannotVectorize(); + testQuery( + "SELECT " + + "JSON_VALUE(nest, ARRAY_OFFSET(JSON_PATHS(nest), 0)), " + + "SUM(cnt) " + + "FROM druid.nested GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "json_value(\"nest\",array_offset(json_paths(\"nest\"),0),'STRING')", + ColumnType.STRING + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 4L}, + new Object[]{"100", 2L}, + new Object[]{"200", 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testJsonQueryDynamicArg() + { + cannotVectorize(); + testQuery( + "SELECT JSON_PATHS(nester), JSON_QUERY(nester, ARRAY_OFFSET(JSON_PATHS(nester), 0))\n" + + "FROM druid.nested", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn( + "v0", + "json_paths(\"nester\")", + ColumnType.STRING_ARRAY + ), + expressionVirtualColumn( + "v1", + "json_query(\"nester\",array_offset(json_paths(\"nester\"),0))", + ColumnType.NESTED_DATA + ) + ) + .columns("v0", "v1") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{"[\"$.array\",\"$.n.x\"]", "[\"a\",\"b\"]"}, + new Object[]{"[\"$\"]", "\"hello\""}, + new Object[]{"[\"$\"]", null}, + new Object[]{"[\"$\"]", null}, + new Object[]{"[\"$\"]", null}, + new Object[]{"[\"$.array\",\"$.n.x\"]", "[\"a\",\"b\"]"}, + new Object[]{"[\"$\"]", "2"} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING_ARRAY) + .add("EXPR$1", ColumnType.NESTED_DATA) + .build() + + ); + } }