Skip to content

Adding aggregations that keep the original adding order in source code #932

Closed
@giangnvt

Description

@giangnvt

Description

Java API client version
8.10.4

Java version
Java 17

Elasticsearch Version
8.11.4

Problem description
I have an Elasticsearch query like below, where I try to take the aggregations, filter with bucket_filter then do pagination with bucket_paging. If I execute this exact query, I get the correct output as expected. But if I switch the order of bucket_filter and bucket_paging in query, it returns less documents than expected. As guess that, with the later case, Elasticsearch executes the bucket_paging paging first (that return max 50 items), then applies the bucket_filter filter, that in turn filters out a few more items from previous 50 items.
I have also contacted with ElasticSearch support team and they confirmed that the order of pipeline aggregations (like bucket_selector and bucket_sort) does matter the query result.

My problem is, I'm using elasticsearch-java client library to build the query, which put aggregations into a map instead of a list, as a result the order of aggregations are random in the final built query.
Is there's any workaround so that I can fix this?

Source code (Kotlin):

val query = NativeQueryBuilder()
    .withQuery({
        MatchAllQuery.of { it }
    } ()._toQuery())
query.withSearchType(Query.SearchType.QUERY_THEN_FETCH)
      .withAggregation("by_planning_sum_id", Aggregation.of {
          it.terms { it.field("root_planning_sum_id")
                          .also{ aggregate -> "${maxBucketsSize}".let{ aggregate.size(it.toInt()) }}}
          .aggregations("country_data", Aggregation.of {
              it.filter( {
                  val subQuery = QueryBuilders.bool()
                      .apply {
                          if ("${sortName}".isNotEmpty()) {
                              must(TermQuery.of { it.field("${sortCode}").value("${sortName}") }._toQuery()
                              )
                          }
                      }
                  if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
              } ()._toQuery())
              .aggregations("avg_score", Aggregation.of {
                  it.avg { it.field("review_score") }
              })})
          .aggregations(
              "zero_flag", Aggregation.of { it.bucketScript {
                  it.bucketsPath { it.dict(mapOf("count" to "country_data>_count")) }
                      .script {it.inline {it.source("return ((params.count == 0) ? 0 : 1)")}}
                      .gapPolicy(GapPolicy.InsertZeros) } })
          .aggregations("avg_score", Aggregation.of {
              it.avg { it.field("review_score") }
          })
          .aggregations("bad_count", Aggregation.of {
              it.filter( {
                  val subQuery = QueryBuilders.bool()
                      .must(TermQuery.of { it.field("review_score_class").value("bad") }._toQuery()
                      )
                  if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
              } ()._toQuery())
          })
          .aggregations("quality_negative_count", Aggregation.of {
              it.filter( {
                  val subQuery = QueryBuilders.bool()
                      .must(TermQuery.of { it.field("quality_label_class").value("negative") }._toQuery()
                      )
                  if (subQuery.hasClauses()) subQuery.build() else MatchAllQuery.of { it }
              } ()._toQuery())
          })
          .aggregations("bad_ratio", Aggregation.of { it.bucketScript {
              it.bucketsPath { it.dict(mapOf("all" to "_count","bad" to "bad_count>_count"
              )) }.script { it.inline { it.source("params.bad/params.all") } } } })
          .aggregations(
              "bucket_filter", Aggregation.of { it.bucketSelector { it.bucketsPath { it.dict(mapOf(
                 "count" to "_count")) }
                    .script { it.inline { it.source("params.count>=${lowestCount}") } } } })
          .aggregations(
              "bucket_paging", Aggregation.of { it.bucketSort { it.sort(listOf(
                  SortOptions.of { it.field { it.field("zero_flag").order(SortOrder.Desc) } },
                  SortOptions.of { it.field { it.field("country_data>${sortKey}").order(if ("${sortValue}" == "asc") SortOrder.Asc else SortOrder.Desc) } },
                  SortOptions.of { it.field { it.field("${sortKey}").order(if ("${sortValue}" == "asc") SortOrder.Asc else SortOrder.Desc) } },
                  SortOptions.of { it.field { it.field("${sortKey2}").order(if ("${sortValue2}" == "asc") SortOrder.Asc else SortOrder.Desc) } }
              )).from("${pagerFrom}".toInt())
                  .size("${pagerSize}".toInt())} })
})

The query:

{
"aggregations": {
    "by_planning_sum_id": {
        "aggregations": {
            "bad_count": {
                "filter": {
                    "bool": {
                        "must": [{ "term": { "review_score_class": { "value": "bad" } } }]
                    }
                }
            },
            "country_data": {
                "aggregations": {
                    "avg_score": { "avg": { "field": "review_score" } }
                },
                "filter": {
                    "bool": {
                        "must": [{ "term": { "region_code": { "value": "JP" } } }]
                    }
                }
            },
            "bad_ratio": {
                "bucket_script": {
                    "buckets_path": { "all": "_count", "bad": "bad_count>_count" },
                    "script": { "source": "params.bad/params.all" }
                }
            },
            "zero_flag": {
                "bucket_script": {
                    "buckets_path": { "count": "country_data>_count" },
                    "gap_policy": "insert_zeros",
                    "script": { "source": "return ((params.count == 0) ? 0 : 1)" }
                }
            },
            "quality_negative_count": {
                "filter": {
                    "bool": {
                        "must": [
                            { "term": { "quality_label_class": { "value": "negative" } } }
                        ]
                    }
                }
            },
            "avg_score": { "avg": { "field": "review_score" } },
            "bucket_filter": {
                "bucket_selector": {
                    "buckets_path": { "count": "_count" },
                    "script": { "source": "params.count>=30" }
                }
            },
            "bucket_paging": {
                "bucket_sort": {
                    "from": 0,
                    "size": 50,
                    "sort": [
                        { "zero_flag": { "order": "desc" } },
                        { "country_data>avg_score": { "order": "desc" } },
                        { "avg_score": { "order": "desc" } },
                        { "_key": { "order": "desc" } }
                    ]
                }
            }
        },
        "terms": { "field": "root_planning_sum_id", "size": 10000 }
    }
},
"query": "..."
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions