Skip to content

Commit 6c8bd94

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-45915][SQL] Treat decimal(x, 0) the same as IntegralType in PromoteStrings
### What changes were proposed in this pull request? The common type of decimal(x, 0) and string is double. But the common type of int/bigint and string are int/bigint. This PR updates `PromoteStrings` make the common type of decimal(x, 0) and string is decimal(x, 0). ### Why are the changes needed? 1. Make decimal(x, 0) behave the same as int/bigint in `PromoteStrings`. 2. Reduce one cast in binary comparison so we may use bucket read. For example: `cast(stringCol as double) = cast(decimalCol as double)` vs `cast(stringCol as decimal(x, 0)) = decimalCol`. ### Does this PR introduce _any_ user-facing change? Yes. The result type of decimal(x, 0) and string is decimal(x, 0) in binary comparison. ### How was this patch tested? Unit test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43812 from wangyum/SPARK-45915. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent d10f0ee commit 6c8bd94

File tree

5 files changed

+71
-68
lines changed

5 files changed

+71
-68
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -934,8 +934,8 @@ object TypeCoercion extends TypeCoercionBase {
934934
// There is no proper decimal type we can pick,
935935
// using double type is the best we can do.
936936
// See SPARK-22469 for details.
937-
case (n: DecimalType, s: StringType) => Some(DoubleType)
938-
case (s: StringType, n: DecimalType) => Some(DoubleType)
937+
case (DecimalType.Fixed(_, s), _: StringType) if s > 0 => Some(DoubleType)
938+
case (_: StringType, DecimalType.Fixed(_, s)) if s > 0 => Some(DoubleType)
939939

940940
case (l: StringType, r: AtomicType) if canPromoteAsInBinaryComparison(r) => Some(r)
941941
case (l: AtomicType, r: StringType) if canPromoteAsInBinaryComparison(l) => Some(l)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala

+3
Original file line numberDiff line numberDiff line change
@@ -1611,6 +1611,9 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
16111611
GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))),
16121612
GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")),
16131613
DoubleType)))
1614+
ruleTest(rule,
1615+
GreaterThan(Literal("1.0"), Literal(BigDecimal("1"))),
1616+
GreaterThan(Cast(Literal("1.0"), DecimalType(1, 0)), Literal(BigDecimal("1"))))
16141617
// Checks that dates/timestamps are not promoted to strings
16151618
val date0301 = Literal(java.sql.Date.valueOf("2017-03-01"))
16161619
val timestamp0301000000 = Literal(Timestamp.valueOf("2017-03-01 00:00:00"))

sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out

+24-24
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ Project [NOT (cast(cast(null as string) as bigint) = cast(1 as bigint)) AS (NOT
13301330
-- !query
13311331
SELECT cast(1 as decimal(10, 0)) = '1' FROM t
13321332
-- !query analysis
1333-
Project [(cast(cast(1 as decimal(10,0)) as double) = cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
1333+
Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
13341334
+- SubqueryAlias t
13351335
+- View (`t`, [1#x])
13361336
+- Project [cast(1#x as int) AS 1#x]
@@ -1341,7 +1341,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) = cast(1 as double)) AS (CAST
13411341
-- !query
13421342
SELECT cast(1 as decimal(10, 0)) > '2' FROM t
13431343
-- !query analysis
1344-
Project [(cast(cast(1 as decimal(10,0)) as double) > cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) > 2)#x]
1344+
Project [(cast(1 as decimal(10,0)) > cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > 2)#x]
13451345
+- SubqueryAlias t
13461346
+- View (`t`, [1#x])
13471347
+- Project [cast(1#x as int) AS 1#x]
@@ -1352,7 +1352,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) > cast(2 as double)) AS (CAST
13521352
-- !query
13531353
SELECT cast(1 as decimal(10, 0)) >= '2' FROM t
13541354
-- !query analysis
1355-
Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) >= 2)#x]
1355+
Project [(cast(1 as decimal(10,0)) >= cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= 2)#x]
13561356
+- SubqueryAlias t
13571357
+- View (`t`, [1#x])
13581358
+- Project [cast(1#x as int) AS 1#x]
@@ -1363,7 +1363,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(2 as double)) AS (CAS
13631363
-- !query
13641364
SELECT cast(1 as decimal(10, 0)) < '2' FROM t
13651365
-- !query analysis
1366-
Project [(cast(cast(1 as decimal(10,0)) as double) < cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) < 2)#x]
1366+
Project [(cast(1 as decimal(10,0)) < cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < 2)#x]
13671367
+- SubqueryAlias t
13681368
+- View (`t`, [1#x])
13691369
+- Project [cast(1#x as int) AS 1#x]
@@ -1374,7 +1374,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) < cast(2 as double)) AS (CAST
13741374
-- !query
13751375
SELECT cast(1 as decimal(10, 0)) <> '2' FROM t
13761376
-- !query analysis
1377-
Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(2 as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 2))#x]
1377+
Project [NOT (cast(1 as decimal(10,0)) = cast(2 as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 2))#x]
13781378
+- SubqueryAlias t
13791379
+- View (`t`, [1#x])
13801380
+- Project [cast(1#x as int) AS 1#x]
@@ -1385,7 +1385,7 @@ Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(2 as double)) AS (
13851385
-- !query
13861386
SELECT cast(1 as decimal(10, 0)) <= '2' FROM t
13871387
-- !query analysis
1388-
Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) <= 2)#x]
1388+
Project [(cast(1 as decimal(10,0)) <= cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= 2)#x]
13891389
+- SubqueryAlias t
13901390
+- View (`t`, [1#x])
13911391
+- Project [cast(1#x as int) AS 1#x]
@@ -1396,7 +1396,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(2 as double)) AS (CAS
13961396
-- !query
13971397
SELECT cast(1 as decimal(10, 0)) = cast(null as string) FROM t
13981398
-- !query analysis
1399-
Project [(cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING))#x]
1399+
Project [(cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING))#x]
14001400
+- SubqueryAlias t
14011401
+- View (`t`, [1#x])
14021402
+- Project [cast(1#x as int) AS 1#x]
@@ -1407,7 +1407,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as string) a
14071407
-- !query
14081408
SELECT cast(1 as decimal(10, 0)) > cast(null as string) FROM t
14091409
-- !query analysis
1410-
Project [(cast(cast(1 as decimal(10,0)) as double) > cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) > CAST(NULL AS STRING))#x]
1410+
Project [(cast(1 as decimal(10,0)) > cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > CAST(NULL AS STRING))#x]
14111411
+- SubqueryAlias t
14121412
+- View (`t`, [1#x])
14131413
+- Project [cast(1#x as int) AS 1#x]
@@ -1418,7 +1418,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) > cast(cast(null as string) a
14181418
-- !query
14191419
SELECT cast(1 as decimal(10, 0)) >= cast(null as string) FROM t
14201420
-- !query analysis
1421-
Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(NULL AS STRING))#x]
1421+
Project [(cast(1 as decimal(10,0)) >= cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(NULL AS STRING))#x]
14221422
+- SubqueryAlias t
14231423
+- View (`t`, [1#x])
14241424
+- Project [cast(1#x as int) AS 1#x]
@@ -1429,7 +1429,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(cast(null as string)
14291429
-- !query
14301430
SELECT cast(1 as decimal(10, 0)) < cast(null as string) FROM t
14311431
-- !query analysis
1432-
Project [(cast(cast(1 as decimal(10,0)) as double) < cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) < CAST(NULL AS STRING))#x]
1432+
Project [(cast(1 as decimal(10,0)) < cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < CAST(NULL AS STRING))#x]
14331433
+- SubqueryAlias t
14341434
+- View (`t`, [1#x])
14351435
+- Project [cast(1#x as int) AS 1#x]
@@ -1440,7 +1440,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) < cast(cast(null as string) a
14401440
-- !query
14411441
SELECT cast(1 as decimal(10, 0)) <> cast(null as string) FROM t
14421442
-- !query analysis
1443-
Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as string) as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING)))#x]
1443+
Project [NOT (cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING)))#x]
14441444
+- SubqueryAlias t
14451445
+- View (`t`, [1#x])
14461446
+- Project [cast(1#x as int) AS 1#x]
@@ -1451,7 +1451,7 @@ Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as strin
14511451
-- !query
14521452
SELECT cast(1 as decimal(10, 0)) <= cast(null as string) FROM t
14531453
-- !query analysis
1454-
Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(NULL AS STRING))#x]
1454+
Project [(cast(1 as decimal(10,0)) <= cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(NULL AS STRING))#x]
14551455
+- SubqueryAlias t
14561456
+- View (`t`, [1#x])
14571457
+- Project [cast(1#x as int) AS 1#x]
@@ -1462,7 +1462,7 @@ Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(cast(null as string)
14621462
-- !query
14631463
SELECT '1' = cast(1 as decimal(10, 0)) FROM t
14641464
-- !query analysis
1465-
Project [(cast(1 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
1465+
Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
14661466
+- SubqueryAlias t
14671467
+- View (`t`, [1#x])
14681468
+- Project [cast(1#x as int) AS 1#x]
@@ -1473,7 +1473,7 @@ Project [(cast(1 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (1 =
14731473
-- !query
14741474
SELECT '2' > cast(1 as decimal(10, 0)) FROM t
14751475
-- !query analysis
1476-
Project [(cast(2 as double) > cast(cast(1 as decimal(10,0)) as double)) AS (2 > CAST(1 AS DECIMAL(10,0)))#x]
1476+
Project [(cast(2 as decimal(10,0)) > cast(1 as decimal(10,0))) AS (2 > CAST(1 AS DECIMAL(10,0)))#x]
14771477
+- SubqueryAlias t
14781478
+- View (`t`, [1#x])
14791479
+- Project [cast(1#x as int) AS 1#x]
@@ -1484,7 +1484,7 @@ Project [(cast(2 as double) > cast(cast(1 as decimal(10,0)) as double)) AS (2 >
14841484
-- !query
14851485
SELECT '2' >= cast(1 as decimal(10, 0)) FROM t
14861486
-- !query analysis
1487-
Project [(cast(2 as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (2 >= CAST(1 AS DECIMAL(10,0)))#x]
1487+
Project [(cast(2 as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (2 >= CAST(1 AS DECIMAL(10,0)))#x]
14881488
+- SubqueryAlias t
14891489
+- View (`t`, [1#x])
14901490
+- Project [cast(1#x as int) AS 1#x]
@@ -1495,7 +1495,7 @@ Project [(cast(2 as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (2 >
14951495
-- !query
14961496
SELECT '2' < cast(1 as decimal(10, 0)) FROM t
14971497
-- !query analysis
1498-
Project [(cast(2 as double) < cast(cast(1 as decimal(10,0)) as double)) AS (2 < CAST(1 AS DECIMAL(10,0)))#x]
1498+
Project [(cast(2 as decimal(10,0)) < cast(1 as decimal(10,0))) AS (2 < CAST(1 AS DECIMAL(10,0)))#x]
14991499
+- SubqueryAlias t
15001500
+- View (`t`, [1#x])
15011501
+- Project [cast(1#x as int) AS 1#x]
@@ -1506,7 +1506,7 @@ Project [(cast(2 as double) < cast(cast(1 as decimal(10,0)) as double)) AS (2 <
15061506
-- !query
15071507
SELECT '2' <= cast(1 as decimal(10, 0)) FROM t
15081508
-- !query analysis
1509-
Project [(cast(2 as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (2 <= CAST(1 AS DECIMAL(10,0)))#x]
1509+
Project [(cast(2 as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (2 <= CAST(1 AS DECIMAL(10,0)))#x]
15101510
+- SubqueryAlias t
15111511
+- View (`t`, [1#x])
15121512
+- Project [cast(1#x as int) AS 1#x]
@@ -1517,7 +1517,7 @@ Project [(cast(2 as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (2 <
15171517
-- !query
15181518
SELECT '2' <> cast(1 as decimal(10, 0)) FROM t
15191519
-- !query analysis
1520-
Project [NOT (cast(2 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (NOT (2 = CAST(1 AS DECIMAL(10,0))))#x]
1520+
Project [NOT (cast(2 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (2 = CAST(1 AS DECIMAL(10,0))))#x]
15211521
+- SubqueryAlias t
15221522
+- View (`t`, [1#x])
15231523
+- Project [cast(1#x as int) AS 1#x]
@@ -1528,7 +1528,7 @@ Project [NOT (cast(2 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (
15281528
-- !query
15291529
SELECT cast(null as string) = cast(1 as decimal(10, 0)) FROM t
15301530
-- !query analysis
1531-
Project [(cast(cast(null as string) as double) = cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0)))#x]
1531+
Project [(cast(cast(null as string) as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0)))#x]
15321532
+- SubqueryAlias t
15331533
+- View (`t`, [1#x])
15341534
+- Project [cast(1#x as int) AS 1#x]
@@ -1539,7 +1539,7 @@ Project [(cast(cast(null as string) as double) = cast(cast(1 as decimal(10,0)) a
15391539
-- !query
15401540
SELECT cast(null as string) > cast(1 as decimal(10, 0)) FROM t
15411541
-- !query analysis
1542-
Project [(cast(cast(null as string) as double) > cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) > CAST(1 AS DECIMAL(10,0)))#x]
1542+
Project [(cast(cast(null as string) as decimal(10,0)) > cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) > CAST(1 AS DECIMAL(10,0)))#x]
15431543
+- SubqueryAlias t
15441544
+- View (`t`, [1#x])
15451545
+- Project [cast(1#x as int) AS 1#x]
@@ -1550,7 +1550,7 @@ Project [(cast(cast(null as string) as double) > cast(cast(1 as decimal(10,0)) a
15501550
-- !query
15511551
SELECT cast(null as string) >= cast(1 as decimal(10, 0)) FROM t
15521552
-- !query analysis
1553-
Project [(cast(cast(null as string) as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) >= CAST(1 AS DECIMAL(10,0)))#x]
1553+
Project [(cast(cast(null as string) as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) >= CAST(1 AS DECIMAL(10,0)))#x]
15541554
+- SubqueryAlias t
15551555
+- View (`t`, [1#x])
15561556
+- Project [cast(1#x as int) AS 1#x]
@@ -1561,7 +1561,7 @@ Project [(cast(cast(null as string) as double) >= cast(cast(1 as decimal(10,0))
15611561
-- !query
15621562
SELECT cast(null as string) < cast(1 as decimal(10, 0)) FROM t
15631563
-- !query analysis
1564-
Project [(cast(cast(null as string) as double) < cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) < CAST(1 AS DECIMAL(10,0)))#x]
1564+
Project [(cast(cast(null as string) as decimal(10,0)) < cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) < CAST(1 AS DECIMAL(10,0)))#x]
15651565
+- SubqueryAlias t
15661566
+- View (`t`, [1#x])
15671567
+- Project [cast(1#x as int) AS 1#x]
@@ -1572,7 +1572,7 @@ Project [(cast(cast(null as string) as double) < cast(cast(1 as decimal(10,0)) a
15721572
-- !query
15731573
SELECT cast(null as string) <= cast(1 as decimal(10, 0)) FROM t
15741574
-- !query analysis
1575-
Project [(cast(cast(null as string) as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) <= CAST(1 AS DECIMAL(10,0)))#x]
1575+
Project [(cast(cast(null as string) as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) <= CAST(1 AS DECIMAL(10,0)))#x]
15761576
+- SubqueryAlias t
15771577
+- View (`t`, [1#x])
15781578
+- Project [cast(1#x as int) AS 1#x]
@@ -1583,7 +1583,7 @@ Project [(cast(cast(null as string) as double) <= cast(cast(1 as decimal(10,0))
15831583
-- !query
15841584
SELECT cast(null as string) <> cast(1 as decimal(10, 0)) FROM t
15851585
-- !query analysis
1586-
Project [NOT (cast(cast(null as string) as double) = cast(cast(1 as decimal(10,0)) as double)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0))))#x]
1586+
Project [NOT (cast(cast(null as string) as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0))))#x]
15871587
+- SubqueryAlias t
15881588
+- View (`t`, [1#x])
15891589
+- Project [cast(1#x as int) AS 1#x]

0 commit comments

Comments
 (0)