Description
Description
Java API client version
8.10.4
Java version
Java 17
Elasticsearch Version
8.11.4
Problem description
I have an Elasticsearch query like below, where I try to take the aggregations, filter with bucket_filter then do pagination with bucket_paging. If I execute this exact query, I get the correct output as expected. But if I switch the order of bucket_filter and bucket_paging in query, it returns less documents than expected. As guess that, with the later case, Elasticsearch executes the bucket_paging paging first (that return max 50 items), then applies the bucket_filter filter, that in turn filters out a few more items from previous 50 items.
I have also contacted with ElasticSearch support team and they confirmed that the order of pipeline aggregations (like bucket_selector and bucket_sort) does matter the query result.
My problem is, I'm using elasticsearch-java client library to build the query, which put aggregations into a map instead of a list, as a result the order of aggregations are random in the final built query.
Is there's any workaround so that I can fix this?
Source code (Kotlin):
val query = NativeQueryBuilder()
.withQuery({
MatchAllQuery.of { it }
} ()._toQuery())
query.withSearchType(Query.SearchType.QUERY_THEN_FETCH)
.withAggregation("by_planning_sum_id", Aggregation.of {
it.terms { it.field("root_planning_sum_id")
.also{ aggregate -> "${maxBucketsSize}".let{ aggregate.size(it.toInt()) }}}
.aggregations("country_data", Aggregation.of {
it.filter( {
val subQuery = QueryBuilders.bool()
.apply {
if ("${sortName}".isNotEmpty()) {
must(TermQuery.of { it.field("${sortCode}").value("${sortName}") }._toQuery()
)
}
}
if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
} ()._toQuery())
.aggregations("avg_score", Aggregation.of {
it.avg { it.field("review_score") }
})})
.aggregations(
"zero_flag", Aggregation.of { it.bucketScript {
it.bucketsPath { it.dict(mapOf("count" to "country_data>_count")) }
.script {it.inline {it.source("return ((params.count == 0) ? 0 : 1)")}}
.gapPolicy(GapPolicy.InsertZeros) } })
.aggregations("avg_score", Aggregation.of {
it.avg { it.field("review_score") }
})
.aggregations("bad_count", Aggregation.of {
it.filter( {
val subQuery = QueryBuilders.bool()
.must(TermQuery.of { it.field("review_score_class").value("bad") }._toQuery()
)
if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
} ()._toQuery())
})
.aggregations("quality_negative_count", Aggregation.of {
it.filter( {
val subQuery = QueryBuilders.bool()
.must(TermQuery.of { it.field("quality_label_class").value("negative") }._toQuery()
)
if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
} ()._toQuery())
})
.aggregations("bad_ratio", Aggregation.of { it.bucketScript {
it.bucketsPath { it.dict(mapOf("all" to "_count","bad" to "bad_count>_count"
)) }.script { it.inline { it.source("params.bad/params.all") } } } })
.aggregations(
"bucket_filter", Aggregation.of { it.bucketSelector { it.bucketsPath { it.dict(mapOf(
"count" to "_count")) }
.script { it.inline { it.source("params.count>=${lowestCount}") } } } })
.aggregations(
"bucket_paging", Aggregation.of { it.bucketSort { it.sort(listOf(
SortOptions.of { it.field { it.field("zero_flag").order(SortOrder.Desc) } },
SortOptions.of { it.field { it.field("country_data>${sortKey}").order(if ("${sortValue}" == "asc") SortOrder.Asc else SortOrder.Desc) } },
SortOptions.of { it.field { it.field("${sortKey}").order(if ("${sortValue}" == "asc") SortOrder.Asc else SortOrder.Desc) } },
SortOptions.of { it.field { it.field("${sortKey2}").order(if ("${sortValue2}" == "asc") SortOrder.Asc else SortOrder.Desc) } }
)).from("${pagerFrom}".toInt())
.size("${pagerSize}".toInt())} })
})
The query:
{
"aggregations": {
"by_planning_sum_id": {
"aggregations": {
"bad_count": {
"filter": {
"bool": {
"must": [{ "term": { "review_score_class": { "value": "bad" } } }]
}
}
},
"country_data": {
"aggregations": {
"avg_score": { "avg": { "field": "review_score" } }
},
"filter": {
"bool": {
"must": [{ "term": { "region_code": { "value": "JP" } } }]
}
}
},
"bad_ratio": {
"bucket_script": {
"buckets_path": { "all": "_count", "bad": "bad_count>_count" },
"script": { "source": "params.bad/params.all" }
}
},
"zero_flag": {
"bucket_script": {
"buckets_path": { "count": "country_data>_count" },
"gap_policy": "insert_zeros",
"script": { "source": "return ((params.count == 0) ? 0 : 1)" }
}
},
"quality_negative_count": {
"filter": {
"bool": {
"must": [
{ "term": { "quality_label_class": { "value": "negative" } } }
]
}
}
},
"avg_score": { "avg": { "field": "review_score" } },
"bucket_filter": {
"bucket_selector": {
"buckets_path": { "count": "_count" },
"script": { "source": "params.count>=30" }
}
},
"bucket_paging": {
"bucket_sort": {
"from": 0,
"size": 50,
"sort": [
{ "zero_flag": { "order": "desc" } },
{ "country_data>avg_score": { "order": "desc" } },
{ "avg_score": { "order": "desc" } },
{ "_key": { "order": "desc" } }
]
}
}
},
"terms": { "field": "root_planning_sum_id", "size": 10000 }
}
},
"query": "..."
}