From d080341b0fd837a076f2be968596d0f6cf6fc70b Mon Sep 17 00:00:00 2001 From: cryptoe Date: Mon, 13 Mar 2023 17:29:53 +0530 Subject: [PATCH 1/3] Adding query stack fault to MSQ to capture native query errors. --- docs/multi-stage-query/reference.md | 1 + .../org/apache/druid/msq/exec/MSQTasks.java | 5 +- .../msq/indexing/error/MSQErrorReport.java | 10 ++- .../msq/indexing/error/QueryStackFault.java | 76 +++++++++++++++++++ .../apache/druid/msq/exec/MSQInsertTest.java | 7 +- .../apache/druid/msq/exec/MSQSelectTest.java | 19 ++--- .../msq/indexing/error/MSQFaultSerdeTest.java | 1 + 7 files changed, 100 insertions(+), 19 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 5c05b68bf3b0..eda7b77fa527 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -679,6 +679,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `InsertTimeOutOfBounds` | A REPLACE query generated a timestamp outside the bounds of the TIMESTAMP parameter for your OVERWRITE WHERE clause.

To avoid this error, verify that the you specified is valid. | `interval`: time chunk interval corresponding to the out-of-bounds timestamp | | `InvalidNullByte` | A string column included a null byte. Null bytes in strings are not permitted. | `column`: The column that included the null byte | | `QueryNotSupported` | QueryKit could not translate the provided native query to a multi-stage query.

This can happen if the query uses features that aren't supported, like GROUPING SETS. | | +| `QueryStackError` | MSQ uses the native query engine to run the leaf stages. This error tells MSQ that error is in native query engine.

Since this is a generic error, the user needs to look at the error message and stack trace to figure out the course of action. If the user is stuck, consider raising a github issue for assistance. | `baseErrorMessage` error message from the native stack. | | `RowTooLarge` | The query tried to process a row that was too large to write to a single frame. See the [Limits](#limits) table for specific limits on frame size. Note that the effective maximum row size is smaller than the maximum frame size due to alignment considerations during frame writing. | `maxFrameSize`: The limit on the frame size. | | `TaskStartTimeout` | Unable to launch all the worker tasks in time.

There might be insufficient available slots to start all the worker tasks simultaneously.

Try splitting up the query into smaller chunks with lesser `maxNumTasks` number. Another option is to increase capacity. | `numTasks`: The number of tasks attempted to launch. | | `TooManyAttemptsForJob` | Total relaunch attempt count across all workers exceeded max relaunch attempt limit. See the [Limits](#limits) table for the specific limit. | `maxRelaunchCount`: Max number of relaunches across all the workers defined in the [Limits](#limits) section.

`currentRelaunchCount`: current relaunch counter for the job across all workers.

`taskId`: Latest task id which failed

`rootErrorMessage`: Error message of the latest failed task.| diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java index 623d4c737f19..5bfdffd8c942 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java @@ -33,6 +33,7 @@ import org.apache.druid.msq.indexing.error.MSQException; import org.apache.druid.msq.indexing.error.MSQFault; import org.apache.druid.msq.indexing.error.MSQFaultUtils; +import org.apache.druid.msq.indexing.error.QueryStackFault; import org.apache.druid.msq.indexing.error.UnknownFault; import org.apache.druid.msq.indexing.error.WorkerFailedFault; import org.apache.druid.msq.indexing.error.WorkerRpcFailedFault; @@ -228,8 +229,8 @@ static String errorReportToLogMessage(final MSQErrorReport errorReport) logMessage.append(": ").append(MSQFaultUtils.generateMessageWithErrorCode(errorReport.getFault())); if (errorReport.getExceptionStackTrace() != null) { - if (errorReport.getFault() instanceof UnknownFault) { - // Log full stack trace for unknown faults. + if (errorReport.getFault() instanceof UnknownFault || errorReport.getFault() instanceof QueryStackFault) { + // Log full stack trace for unknown and QueryStack faults logMessage.append('\n').append(errorReport.getExceptionStackTrace()); } else { // Log first line only (error class, message) for known faults, to avoid polluting logs. diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java index 31bc2753aa99..d66d307912f2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java @@ -26,8 +26,10 @@ import com.google.common.base.Throwables; import org.apache.druid.frame.processor.FrameRowTooLargeException; import org.apache.druid.frame.write.UnsupportedColumnTypeException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.msq.statistics.TooManyBucketsException; +import org.apache.druid.query.groupby.epinephelinae.UnexpectedMultiValueDimensionException; import javax.annotation.Nullable; import java.util.Objects; @@ -190,7 +192,13 @@ public static MSQFault getFaultFromException(@Nullable final Throwable e) return new TooManyBucketsFault(((TooManyBucketsException) cause).getMaxBuckets()); } else if (cause instanceof FrameRowTooLargeException) { return new RowTooLargeFault(((FrameRowTooLargeException) cause).getMaxFrameSize()); - } else { + } else if (cause instanceof UnexpectedMultiValueDimensionException) { + return new QueryStackFault(StringUtils.format( + "Column [%s] is a multi value string. Please wrap the column using MV_TO_ARRAY() to proceed further.", + ((UnexpectedMultiValueDimensionException) cause).getDimensionName() + ), cause.getMessage()); + } + else { cause = cause.getCause(); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java new file mode 100644 index 000000000000..26e2785cdef6 --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.indexing.error; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +import java.util.Objects; + +/** + * Fault to throw when the error cames from the druid native query stack while running in the MSQ engine . + */ +@JsonTypeName(QueryStackFault.CODE) +public class QueryStackFault extends BaseMSQFault +{ + public static final String CODE = "QueryStackError"; + private final String baseErrorMessage; + + + @JsonCreator + public QueryStackFault( + @JsonProperty("errorMessage") String errorMessage, + @JsonProperty("baseErrorMessage") String baseErrorMessage + ) + { + super(CODE, errorMessage); + this.baseErrorMessage = baseErrorMessage; + } + + @JsonProperty + public String getBaseErrorMessage() + { + return baseErrorMessage; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + QueryStackFault that = (QueryStackFault) o; + return Objects.equals(baseErrorMessage, that.baseErrorMessage); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), baseErrorMessage); + } + +} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQInsertTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQInsertTest.java index efd9e5e9d9f9..61ac07bd444a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQInsertTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQInsertTest.java @@ -348,11 +348,8 @@ public void testInsertOnFoo1WithMultiValueDimGroupByWithoutGroupByEnable() .setQueryContext(localContext) .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( CoreMatchers.instanceOf(ISE.class), - ThrowableMessageMatcher.hasMessage(!FAULT_TOLERANCE.equals(contextName) - ? CoreMatchers.containsString( - "Encountered multi-value dimension [dim3] that cannot be processed with 'groupByEnableMultiValueUnnesting' set to false.") - : - CoreMatchers.containsString("exceeded max relaunch count") + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( + "Column [dim3] is a multi value string. Please wrap the column using MV_TO_ARRAY() to proceed further.") ) )) .verifyExecutionError(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index b871fc4a6e2a..e69072f3f802 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -1233,12 +1233,8 @@ public void testGroupByWithMultiValueWithoutGroupByEnable() .setQueryContext(localContext) .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( CoreMatchers.instanceOf(ISE.class), - ThrowableMessageMatcher.hasMessage( - !FAULT_TOLERANCE.equals(contextName) - ? CoreMatchers.containsString( - "Encountered multi-value dimension [dim3] that cannot be processed with 'groupByEnableMultiValueUnnesting' set to false.") - : - CoreMatchers.containsString("exceeded max relaunch count") + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( + "Column [dim3] is a multi value string. Please wrap the column using MV_TO_ARRAY() to proceed further.") ) )) .verifyExecutionError(); @@ -1375,11 +1371,8 @@ public void testGroupByWithMultiValueMvToArrayWithoutGroupByEnable() .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( CoreMatchers.instanceOf(ISE.class), ThrowableMessageMatcher.hasMessage( - !FAULT_TOLERANCE.equals(contextName) - ? CoreMatchers.containsString( + CoreMatchers.containsString( "Encountered multi-value dimension [dim3] that cannot be processed with 'groupByEnableMultiValueUnnesting' set to false.") - : - CoreMatchers.containsString("exceeded max relaunch count") ) )) .verifyExecutionError(); @@ -1506,7 +1499,11 @@ public void testGroupByOnFooWithDurableStoragePathAssertions() throws IOExceptio @Test public void testMultiValueStringWithIncorrectType() throws IOException { - final File toRead = MSQTestFileUtils.getResourceAsTemporaryFile(temporaryFolder, this, "/unparseable-mv-string-array.json"); + final File toRead = MSQTestFileUtils.getResourceAsTemporaryFile( + temporaryFolder, + this, + "/unparseable-mv-string-array.json" + ); final String toReadAsJson = queryFramework().queryJsonMapper().writeValueAsString(toRead.getAbsolutePath()); RowSignature rowSignature = RowSignature.builder() diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index aae47170a28a..209a81b52461 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -62,6 +62,7 @@ public void testFaultSerde() throws IOException assertFaultSerde(new InvalidNullByteFault("the column")); assertFaultSerde(new NotEnoughMemoryFault(1000, 1000, 900, 1, 2)); assertFaultSerde(QueryNotSupportedFault.INSTANCE); + assertFaultSerde(new QueryStackFault("new error", "base error")); assertFaultSerde(new RowTooLargeFault(1000)); assertFaultSerde(new TaskStartTimeoutFault(10)); assertFaultSerde(new TooManyBucketsFault(10)); From ac51785a845c02a03bbf126b5f5eff703e42db63 Mon Sep 17 00:00:00 2001 From: cryptoe Date: Tue, 4 Apr 2023 17:53:53 +0530 Subject: [PATCH 2/3] Review comments --- docs/multi-stage-query/reference.md | 2 +- .../org/apache/druid/msq/exec/MSQTasks.java | 8 +- .../druid/msq/guice/MSQIndexingModule.java | 2 + .../msq/indexing/error/MSQErrorReport.java | 10 ++- ...StackFault.java => QueryRuntimeFault.java} | 17 ++-- .../indexing/error/MSQErrorReportTest.java | 77 +++++++++++++++++++ .../msq/indexing/error/MSQFaultSerdeTest.java | 3 +- 7 files changed, 102 insertions(+), 17 deletions(-) rename extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/{QueryStackFault.java => QueryRuntimeFault.java} (78%) create mode 100644 extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQErrorReportTest.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 0ce01cd31ae2..5bad444aad48 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -750,7 +750,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `InsertTimeOutOfBounds` | A REPLACE query generated a timestamp outside the bounds of the TIMESTAMP parameter for your OVERWRITE WHERE clause.

To avoid this error, verify that the you specified is valid. | `interval`: time chunk interval corresponding to the out-of-bounds timestamp | | `InvalidNullByte` | A string column included a null byte. Null bytes in strings are not permitted. | `column`: The column that included the null byte | | `QueryNotSupported` | QueryKit could not translate the provided native query to a multi-stage query.

This can happen if the query uses features that aren't supported, like GROUPING SETS. | | -| `QueryStackError` | MSQ uses the native query engine to run the leaf stages. This error tells MSQ that error is in native query engine.

Since this is a generic error, the user needs to look at the error message and stack trace to figure out the course of action. If the user is stuck, consider raising a github issue for assistance. | `baseErrorMessage` error message from the native stack. | +| `QueryRuntimeError` | MSQ uses the native query engine to run the leaf stages. This error tells MSQ that error is in native query runtime.

Since this is a generic error, the user needs to look at logs for the error message and stack trace to figure out the next course of action. If the user is stuck, consider raising a github issue for assistance. | `baseErrorMessage` error message from the native query runtime. | | `RowTooLarge` | The query tried to process a row that was too large to write to a single frame. See the [Limits](#limits) table for specific limits on frame size. Note that the effective maximum row size is smaller than the maximum frame size due to alignment considerations during frame writing. | `maxFrameSize`: The limit on the frame size. | | `TaskStartTimeout` | Unable to launch `numTasks` tasks within `timeout` milliseconds.

There may be insufficient available slots to start all the worker tasks simultaneously. Try splitting up your query into smaller chunks using a smaller value of [`maxNumTasks`](#context-parameters). Another option is to increase capacity. | `numTasks`: The number of tasks attempted to launch.

`timeout`: Timeout, in milliseconds, that was exceeded. | | `TooManyAttemptsForJob` | Total relaunch attempt count across all workers exceeded max relaunch attempt limit. See the [Limits](#limits) table for the specific limit. | `maxRelaunchCount`: Max number of relaunches across all the workers defined in the [Limits](#limits) section.

`currentRelaunchCount`: current relaunch counter for the job across all workers.

`taskId`: Latest task id which failed

`rootErrorMessage`: Error message of the latest failed task.| diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java index 5bfdffd8c942..3564ea59a9e0 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java @@ -33,7 +33,7 @@ import org.apache.druid.msq.indexing.error.MSQException; import org.apache.druid.msq.indexing.error.MSQFault; import org.apache.druid.msq.indexing.error.MSQFaultUtils; -import org.apache.druid.msq.indexing.error.QueryStackFault; +import org.apache.druid.msq.indexing.error.QueryRuntimeFault; import org.apache.druid.msq.indexing.error.UnknownFault; import org.apache.druid.msq.indexing.error.WorkerFailedFault; import org.apache.druid.msq.indexing.error.WorkerRpcFailedFault; @@ -163,12 +163,12 @@ static StorageConnector makeStorageConnector(final Injector injector) /** * Builds an error report from a possible controller error report and a possible worker error report. Both may be * null, in which case this function will return a report with {@link UnknownFault}. - * + *
* We only include a single {@link MSQErrorReport} in the task report, because it's important that a query have * a single {@link MSQFault} explaining why it failed. To aid debugging * in cases where we choose the controller error over the worker error, we'll log the worker error too, even though * it doesn't appear in the report. - * + *
* Logic: we prefer the controller exception unless it's {@link WorkerFailedFault}, {@link WorkerRpcFailedFault}, * or {@link CanceledFault}. In these cases we prefer the worker error report. This ensures we get the best, most * useful exception even when the controller cancels worker tasks after a failure. (As tasks are canceled one by @@ -229,7 +229,7 @@ static String errorReportToLogMessage(final MSQErrorReport errorReport) logMessage.append(": ").append(MSQFaultUtils.generateMessageWithErrorCode(errorReport.getFault())); if (errorReport.getExceptionStackTrace() != null) { - if (errorReport.getFault() instanceof UnknownFault || errorReport.getFault() instanceof QueryStackFault) { + if (errorReport.getFault() instanceof UnknownFault || errorReport.getFault() instanceof QueryRuntimeFault) { // Log full stack trace for unknown and QueryStack faults logMessage.append('\n').append(errorReport.getExceptionStackTrace()); } else { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java index 49656729a028..ea6a3c5bdb8d 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java @@ -53,6 +53,7 @@ import org.apache.druid.msq.indexing.error.MSQFault; import org.apache.druid.msq.indexing.error.NotEnoughMemoryFault; import org.apache.druid.msq.indexing.error.QueryNotSupportedFault; +import org.apache.druid.msq.indexing.error.QueryRuntimeFault; import org.apache.druid.msq.indexing.error.RowTooLargeFault; import org.apache.druid.msq.indexing.error.TaskStartTimeoutFault; import org.apache.druid.msq.indexing.error.TooManyAttemptsForJob; @@ -114,6 +115,7 @@ public class MSQIndexingModule implements DruidModule InvalidNullByteFault.class, NotEnoughMemoryFault.class, QueryNotSupportedFault.class, + QueryRuntimeFault.class, RowTooLargeFault.class, TaskStartTimeoutFault.class, TooManyBucketsFault.class, diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java index d66d307912f2..2ed375c7911b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/MSQErrorReport.java @@ -49,7 +49,7 @@ public class MSQErrorReport MSQErrorReport( @JsonProperty("taskId") final String taskId, @JsonProperty("host") @Nullable final String host, - @JsonProperty("stageNumber") final Integer stageNumber, + @JsonProperty("stageNumber") @Nullable final Integer stageNumber, @JsonProperty("error") final MSQFault fault, @JsonProperty("exceptionStackTrace") @Nullable final String exceptionStackTrace ) @@ -193,12 +193,14 @@ public static MSQFault getFaultFromException(@Nullable final Throwable e) } else if (cause instanceof FrameRowTooLargeException) { return new RowTooLargeFault(((FrameRowTooLargeException) cause).getMaxFrameSize()); } else if (cause instanceof UnexpectedMultiValueDimensionException) { - return new QueryStackFault(StringUtils.format( + return new QueryRuntimeFault(StringUtils.format( "Column [%s] is a multi value string. Please wrap the column using MV_TO_ARRAY() to proceed further.", ((UnexpectedMultiValueDimensionException) cause).getDimensionName() ), cause.getMessage()); - } - else { + } else if (cause.getClass().getPackage().getName().startsWith("org.apache.druid.query")) { + // catch all for all query runtime exception faults. + return new QueryRuntimeFault(e.getMessage(), null); + } else { cause = cause.getCause(); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryRuntimeFault.java similarity index 78% rename from extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java rename to extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryRuntimeFault.java index 26e2785cdef6..2a341305790c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryStackFault.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/QueryRuntimeFault.java @@ -23,22 +23,24 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; +import javax.annotation.Nullable; import java.util.Objects; /** - * Fault to throw when the error cames from the druid native query stack while running in the MSQ engine . + * Fault to throw when the error comes from the druid native query runtime while running in the MSQ engine . */ -@JsonTypeName(QueryStackFault.CODE) -public class QueryStackFault extends BaseMSQFault +@JsonTypeName(QueryRuntimeFault.CODE) +public class QueryRuntimeFault extends BaseMSQFault { - public static final String CODE = "QueryStackError"; + public static final String CODE = "QueryRuntimeError"; + @Nullable private final String baseErrorMessage; @JsonCreator - public QueryStackFault( + public QueryRuntimeFault( @JsonProperty("errorMessage") String errorMessage, - @JsonProperty("baseErrorMessage") String baseErrorMessage + @Nullable @JsonProperty("baseErrorMessage") String baseErrorMessage ) { super(CODE, errorMessage); @@ -46,6 +48,7 @@ public QueryStackFault( } @JsonProperty + @Nullable public String getBaseErrorMessage() { return baseErrorMessage; @@ -63,7 +66,7 @@ public boolean equals(Object o) if (!super.equals(o)) { return false; } - QueryStackFault that = (QueryStackFault) o; + QueryRuntimeFault that = (QueryRuntimeFault) o; return Objects.equals(baseErrorMessage, that.baseErrorMessage); } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQErrorReportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQErrorReportTest.java new file mode 100644 index 000000000000..c04265850518 --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQErrorReportTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.indexing.error; + +import org.apache.druid.frame.processor.FrameRowTooLargeException; +import org.apache.druid.frame.write.UnsupportedColumnTypeException; +import org.apache.druid.java.util.common.parsers.ParseException; +import org.apache.druid.msq.statistics.TooManyBucketsException; +import org.apache.druid.query.QueryTimeoutException; +import org.apache.druid.query.groupby.epinephelinae.UnexpectedMultiValueDimensionException; +import org.junit.Assert; +import org.junit.Test; + +public class MSQErrorReportTest +{ + + public static final String ERROR_MESSAGE = "test"; + + @Test + public void testErrorReportFault() + { + Assert.assertEquals(UnknownFault.forException(null), MSQErrorReport.getFaultFromException(null)); + + MSQException msqException = new MSQException(null, UnknownFault.forMessage(ERROR_MESSAGE)); + Assert.assertEquals(msqException.getFault(), MSQErrorReport.getFaultFromException(msqException)); + + ParseException parseException = new ParseException(null, ERROR_MESSAGE); + Assert.assertEquals( + new CannotParseExternalDataFault(ERROR_MESSAGE), + MSQErrorReport.getFaultFromException(parseException) + ); + + UnsupportedColumnTypeException columnTypeException = new UnsupportedColumnTypeException(ERROR_MESSAGE, null); + Assert.assertEquals( + new ColumnTypeNotSupportedFault(ERROR_MESSAGE, null), + MSQErrorReport.getFaultFromException(columnTypeException) + ); + + TooManyBucketsException tooManyBucketsException = new TooManyBucketsException(10); + Assert.assertEquals(new TooManyBucketsFault(10), MSQErrorReport.getFaultFromException(tooManyBucketsException)); + + FrameRowTooLargeException tooLargeException = new FrameRowTooLargeException(10); + Assert.assertEquals(new RowTooLargeFault(10), MSQErrorReport.getFaultFromException(tooLargeException)); + + UnexpectedMultiValueDimensionException mvException = new UnexpectedMultiValueDimensionException(ERROR_MESSAGE); + Assert.assertEquals(QueryRuntimeFault.CODE, MSQErrorReport.getFaultFromException(mvException).getErrorCode()); + + QueryTimeoutException queryException = new QueryTimeoutException(ERROR_MESSAGE); + Assert.assertEquals( + new QueryRuntimeFault(ERROR_MESSAGE, null), + MSQErrorReport.getFaultFromException(queryException) + ); + + RuntimeException runtimeException = new RuntimeException(ERROR_MESSAGE); + Assert.assertEquals( + UnknownFault.forException(runtimeException), + MSQErrorReport.getFaultFromException(runtimeException) + ); + } +} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index a1d4748f2763..8342510c7688 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -63,7 +63,8 @@ public void testFaultSerde() throws IOException assertFaultSerde(new InvalidNullByteFault("the column")); assertFaultSerde(new NotEnoughMemoryFault(1000, 1000, 900, 1, 2)); assertFaultSerde(QueryNotSupportedFault.INSTANCE); - assertFaultSerde(new QueryStackFault("new error", "base error")); + assertFaultSerde(new QueryRuntimeFault("new error", "base error")); + assertFaultSerde(new QueryRuntimeFault("new error", null)); assertFaultSerde(new RowTooLargeFault(1000)); assertFaultSerde(new TaskStartTimeoutFault(10, 11)); assertFaultSerde(new TooManyBucketsFault(10)); From 49ddb66a777d4c9fe4fa86e3ed8e94c3151e3068 Mon Sep 17 00:00:00 2001 From: cryptoe Date: Wed, 5 Apr 2023 12:01:24 +0530 Subject: [PATCH 3/3] Fixing spell check --- docs/multi-stage-query/reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 5bad444aad48..d34f3e0cf9c4 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -750,7 +750,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `InsertTimeOutOfBounds` | A REPLACE query generated a timestamp outside the bounds of the TIMESTAMP parameter for your OVERWRITE WHERE clause.

To avoid this error, verify that the you specified is valid. | `interval`: time chunk interval corresponding to the out-of-bounds timestamp | | `InvalidNullByte` | A string column included a null byte. Null bytes in strings are not permitted. | `column`: The column that included the null byte | | `QueryNotSupported` | QueryKit could not translate the provided native query to a multi-stage query.

This can happen if the query uses features that aren't supported, like GROUPING SETS. | | -| `QueryRuntimeError` | MSQ uses the native query engine to run the leaf stages. This error tells MSQ that error is in native query runtime.

Since this is a generic error, the user needs to look at logs for the error message and stack trace to figure out the next course of action. If the user is stuck, consider raising a github issue for assistance. | `baseErrorMessage` error message from the native query runtime. | +| `QueryRuntimeError` | MSQ uses the native query engine to run the leaf stages. This error tells MSQ that error is in native query runtime.

Since this is a generic error, the user needs to look at logs for the error message and stack trace to figure out the next course of action. If the user is stuck, consider raising a `github` issue for assistance. | `baseErrorMessage` error message from the native query runtime. | | `RowTooLarge` | The query tried to process a row that was too large to write to a single frame. See the [Limits](#limits) table for specific limits on frame size. Note that the effective maximum row size is smaller than the maximum frame size due to alignment considerations during frame writing. | `maxFrameSize`: The limit on the frame size. | | `TaskStartTimeout` | Unable to launch `numTasks` tasks within `timeout` milliseconds.

There may be insufficient available slots to start all the worker tasks simultaneously. Try splitting up your query into smaller chunks using a smaller value of [`maxNumTasks`](#context-parameters). Another option is to increase capacity. | `numTasks`: The number of tasks attempted to launch.

`timeout`: Timeout, in milliseconds, that was exceeded. | | `TooManyAttemptsForJob` | Total relaunch attempt count across all workers exceeded max relaunch attempt limit. See the [Limits](#limits) table for the specific limit. | `maxRelaunchCount`: Max number of relaunches across all the workers defined in the [Limits](#limits) section.

`currentRelaunchCount`: current relaunch counter for the job across all workers.

`taskId`: Latest task id which failed

`rootErrorMessage`: Error message of the latest failed task.|