Skip to content

Commit d0d4aab

Browse files
bersprocketscloud-fan
authored andcommitted
[SPARK-44154][SQL][FOLLOWUP] BitmapCount and BitmapOrAgg should use DataTypeMismatch to indicate unexpected input data type
### What changes were proposed in this pull request? Change `BitmapCount` and `BitmapOrAgg` to use `DataTypeMismatch` rather than `TypeCheckResult.TypeCheckFailure` to indicate incorrect input types. ### Why are the changes needed? It appears `TypeCheckResult.TypeCheckFailure` has been deprecated: No expressions except for the recently added `BitmapCount` and `BitmapOrAgg` are using it. ### Does this PR introduce _any_ user-facing change? This PR changes an error message for two expressions that are not yet in any released version of Spark. Before PR: ``` spark-sql (default)> select bitmap_count(12); [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve "bitmap_count(12)" due to data type mismatch: Bitmap must be a BinaryType.; line 1 pos 7; 'Project [unresolvedalias(bitmap_count(12), None)] +- OneRowRelation spark-sql (default)> select bitmap_or_agg(12); [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve "bitmap_or_agg(12)" due to data type mismatch: Bitmap must be a BinaryType.; line 1 pos 7; 'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)] +- OneRowRelation ``` After PR: ``` spark-sql (default)> select bitmap_count(12); [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "bitmap_count(12)" due to data type mismatch: Parameter 0 requires the "BINARY" type, however "12" has the type "INT".; line 1 pos 7; 'Project [unresolvedalias(bitmap_count(12), None)] +- OneRowRelation spark-sql (default)> select bitmap_or_agg(12); [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "bitmap_or_agg(12)" due to data type mismatch: Parameter 0 requires the "BINARY" type, however "12" has the type "INT".; line 1 pos 7; 'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)] +- OneRowRelation ``` ### How was this patch tested? New unit tests. Closes #42139 from bersprockets/bitmap_type_check. Authored-by: Bruce Robbins <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 071feab commit d0d4aab

File tree

2 files changed

+66
-4
lines changed

2 files changed

+66
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala

+22-4
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
1919

2020
import org.apache.spark.sql.catalyst.InternalRow
2121
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
22+
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
2223
import org.apache.spark.sql.catalyst.expressions.aggregate.ImperativeAggregate
2324
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
2425
import org.apache.spark.sql.catalyst.trees.UnaryLike
2526
import org.apache.spark.sql.catalyst.types.DataTypeUtils
27+
import org.apache.spark.sql.catalyst.util.TypeUtils._
2628
import org.apache.spark.sql.errors.QueryExecutionErrors
2729
import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, LongType, StructType}
2830

@@ -111,9 +113,17 @@ case class BitmapCount(child: Expression)
111113

112114
override def checkInputDataTypes(): TypeCheckResult = {
113115
if (child.dataType != BinaryType) {
114-
TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType")
116+
DataTypeMismatch(
117+
errorSubClass = "UNEXPECTED_INPUT_TYPE",
118+
messageParameters = Map(
119+
"paramIndex" -> "0",
120+
"requiredType" -> toSQLType(BinaryType),
121+
"inputSql" -> toSQLExpr(child),
122+
"inputType" -> toSQLType(child.dataType)
123+
)
124+
)
115125
} else {
116-
TypeCheckResult.TypeCheckSuccess
126+
TypeCheckSuccess
117127
}
118128
}
119129

@@ -248,9 +258,17 @@ case class BitmapOrAgg(child: Expression,
248258

249259
override def checkInputDataTypes(): TypeCheckResult = {
250260
if (child.dataType != BinaryType) {
251-
TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType")
261+
DataTypeMismatch(
262+
errorSubClass = "UNEXPECTED_INPUT_TYPE",
263+
messageParameters = Map(
264+
"paramIndex" -> "0",
265+
"requiredType" -> toSQLType(BinaryType),
266+
"inputSql" -> toSQLExpr(child),
267+
"inputType" -> toSQLType(child.dataType)
268+
)
269+
)
252270
} else {
253-
TypeCheckResult.TypeCheckSuccess
271+
TypeCheckSuccess
254272
}
255273
}
256274

sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala

+44
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,48 @@ class BitmapExpressionsQuerySuite extends QueryTest with SharedSparkSession {
207207
Seq(Row("700000"))
208208
)
209209
}
210+
211+
test("bitmap_count called with non-binary type") {
212+
val df = Seq(12).toDF("a")
213+
checkError(
214+
exception = intercept[AnalysisException] {
215+
df.selectExpr("bitmap_count(a)")
216+
},
217+
errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
218+
parameters = Map(
219+
"sqlExpr" -> "\"bitmap_count(a)\"",
220+
"paramIndex" -> "0",
221+
"requiredType" -> "\"BINARY\"",
222+
"inputSql" -> "\"a\"",
223+
"inputType" -> "\"INT\""
224+
),
225+
context = ExpectedContext(
226+
fragment = "bitmap_count(a)",
227+
start = 0,
228+
stop = 14
229+
)
230+
)
231+
}
232+
233+
test("bitmap_or_agg called with non-binary type") {
234+
val df = Seq(12).toDF("a")
235+
checkError(
236+
exception = intercept[AnalysisException] {
237+
df.selectExpr("bitmap_or_agg(a)")
238+
},
239+
errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
240+
parameters = Map(
241+
"sqlExpr" -> "\"bitmap_or_agg(a)\"",
242+
"paramIndex" -> "0",
243+
"requiredType" -> "\"BINARY\"",
244+
"inputSql" -> "\"a\"",
245+
"inputType" -> "\"INT\""
246+
),
247+
context = ExpectedContext(
248+
fragment = "bitmap_or_agg(a)",
249+
start = 0,
250+
stop = 15
251+
)
252+
)
253+
}
210254
}

0 commit comments

Comments
 (0)