Skip to content

Commit cb4cd9e

Browse files
authored
Fix failing strict wildcard pushdown tests (#1683) (#1692)
* Handle changes to the wildcard query in strict mode * fix more test
1 parent fb33366 commit cb4cd9e

File tree

3 files changed

+45
-21
lines changed

3 files changed

+45
-21
lines changed

spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -998,31 +998,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
998998
val df = esDataSource("pd_starts_with")
999999
var filter = df.filter(df("airport").startsWith("O"))
10001000

1001-
if (!keepHandledFilters) {
1001+
if (!keepHandledFilters && !strictPushDown) {
10021002
// term query pick field with multi values
10031003
assertEquals(2, filter.count())
10041004
return
10051005
}
10061006

10071007
filter.show
1008-
assertEquals(1, filter.count())
1009-
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1008+
if (strictPushDown) {
1009+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1010+
} else {
1011+
assertEquals(1, filter.count())
1012+
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1013+
}
10101014
}
10111015

10121016
@Test
10131017
def testDataSourcePushDown10EndsWith() {
10141018
val df = esDataSource("pd_ends_with")
10151019
var filter = df.filter(df("airport").endsWith("O"))
10161020

1017-
if (!keepHandledFilters) {
1021+
if (!keepHandledFilters && !strictPushDown) {
10181022
// term query pick field with multi values
10191023
assertEquals(2, filter.count())
10201024
return
10211025
}
10221026

10231027
filter.show
1024-
assertEquals(1, filter.count())
1025-
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1028+
if (strictPushDown) {
1029+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1030+
} else {
1031+
assertEquals(1, filter.count())
1032+
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1033+
}
10261034
}
10271035

10281036
@Test
@@ -1036,7 +1044,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
10361044
@Test
10371045
def testDataSourcePushDown12And() {
10381046
val df = esDataSource("pd_and")
1039-
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
1047+
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))
10401048

10411049
assertEquals(1, filter.count())
10421050
assertEquals("jan", filter.select("tag").take(1)(0)(0))

spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,31 +1055,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
10551055
val df = esDataSource("pd_starts_with")
10561056
var filter = df.filter(df("airport").startsWith("O"))
10571057

1058-
if (!keepHandledFilters) {
1058+
if (!keepHandledFilters && !strictPushDown) {
10591059
// term query pick field with multi values
10601060
assertEquals(2, filter.count())
10611061
return
10621062
}
10631063

10641064
filter.show
1065-
assertEquals(1, filter.count())
1066-
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1065+
if (strictPushDown) {
1066+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1067+
} else {
1068+
assertEquals(1, filter.count())
1069+
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1070+
}
10671071
}
10681072

10691073
@Test
10701074
def testDataSourcePushDown10EndsWith() {
10711075
val df = esDataSource("pd_ends_with")
10721076
var filter = df.filter(df("airport").endsWith("O"))
10731077

1074-
if (!keepHandledFilters) {
1078+
if (!keepHandledFilters && !strictPushDown) {
10751079
// term query pick field with multi values
10761080
assertEquals(2, filter.count())
10771081
return
10781082
}
10791083

10801084
filter.show
1081-
assertEquals(1, filter.count())
1082-
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1085+
if (strictPushDown) {
1086+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1087+
} else {
1088+
assertEquals(1, filter.count())
1089+
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1090+
}
10831091
}
10841092

10851093
@Test
@@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
10931101
@Test
10941102
def testDataSourcePushDown12And() {
10951103
val df = esDataSource("pd_and")
1096-
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
1104+
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))
10971105

10981106
assertEquals(1, filter.count())
10991107
assertEquals("jan", filter.select("tag").take(1)(0)(0))

spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,31 +1055,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
10551055
val df = esDataSource("pd_starts_with")
10561056
var filter = df.filter(df("airport").startsWith("O"))
10571057

1058-
if (!keepHandledFilters) {
1058+
if (!keepHandledFilters && !strictPushDown) {
10591059
// term query pick field with multi values
10601060
assertEquals(2, filter.count())
10611061
return
10621062
}
10631063

10641064
filter.show
1065-
assertEquals(1, filter.count())
1066-
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1065+
if (strictPushDown) {
1066+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1067+
} else {
1068+
assertEquals(1, filter.count())
1069+
assertEquals("feb", filter.select("tag").take(1)(0)(0))
1070+
}
10671071
}
10681072

10691073
@Test
10701074
def testDataSourcePushDown10EndsWith() {
10711075
val df = esDataSource("pd_ends_with")
10721076
var filter = df.filter(df("airport").endsWith("O"))
10731077

1074-
if (!keepHandledFilters) {
1078+
if (!keepHandledFilters && !strictPushDown) {
10751079
// term query pick field with multi values
10761080
assertEquals(2, filter.count())
10771081
return
10781082
}
10791083

10801084
filter.show
1081-
assertEquals(1, filter.count())
1082-
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1085+
if (strictPushDown) {
1086+
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
1087+
} else {
1088+
assertEquals(1, filter.count())
1089+
assertEquals("jan", filter.select("tag").take(1)(0)(0))
1090+
}
10831091
}
10841092

10851093
@Test
@@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
10931101
@Test
10941102
def testDataSourcePushDown12And() {
10951103
val df = esDataSource("pd_and")
1096-
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
1104+
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))
10971105

10981106
assertEquals(1, filter.count())
10991107
assertEquals("jan", filter.select("tag").take(1)(0)(0))

0 commit comments

Comments
 (0)