Skip to content

Commit 717281a

Browse files
committed
Add shard_min_doc_count to Significant Terms Aggregation
Add MinimumDocumentCountAsLong field to align with Elasticsearch implementation and deprecate MinimumDocumentCount with ObsoleteAttribute Closes #2847 (cherry picked from commit fdd0eb1)
1 parent bf78582 commit 717281a

File tree

2 files changed

+60
-15
lines changed

2 files changed

+60
-15
lines changed

src/Nest/Aggregations/Bucket/SignificantTerms/SignificantTermsAggregation.cs

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,26 @@ public interface ISignificantTermsAggregation : IBucketAggregation
3434
/// Return only terms that match equal to or more than a configurable
3535
/// number of hits
3636
/// </summary>
37-
[JsonProperty("min_doc_count")]
37+
[JsonIgnore]
38+
[Obsolete("Use MinimumDocumentCountAsLong. Fixed in NEST 6.x")]
3839
int? MinimumDocumentCount { get; set; }
3940

41+
/// <summary>
42+
/// Return only terms that match equal to or more than a configurable
43+
/// number of hits
44+
/// </summary>
45+
[JsonProperty("min_doc_count")]
46+
long? MinimumDocumentCountAsLong { get; set; }
47+
48+
/// <summary>
49+
/// Regulates the certainty a shard has if the term should actually be added to the candidate
50+
/// list or not with respect to the <see cref="MinimumDocumentCountAsLong"/>.
51+
/// Terms will only be considered if their local shard frequency within
52+
/// the set is higher than the <see cref="ShardMinimumDocumentCount"/>.
53+
/// </summary>
54+
[JsonProperty("shard_min_doc_count")]
55+
long? ShardMinimumDocumentCount { get; set; }
56+
4057
/// <summary>
4158
/// Determines the mechanism by which aggregations are executed
4259
/// </summary>
@@ -103,7 +120,6 @@ public interface ISignificantTermsAggregation : IBucketAggregation
103120
/// </summary>
104121
[JsonProperty("background_filter")]
105122
QueryContainer BackgroundFilter { get; set; }
106-
107123
}
108124

109125
public class SignificantTermsAggregation : BucketAggregationBase, ISignificantTermsAggregation
@@ -114,8 +130,19 @@ public class SignificantTermsAggregation : BucketAggregationBase, ISignificantTe
114130
public int? Size { get; set; }
115131
/// <inheritdoc />
116132
public int? ShardSize { get; set; }
133+
134+
[Obsolete("Use MinimumDocumentCountAsLong. Fixed in NEST 6.x")]
135+
/// <inheritdoc />
136+
public int? MinimumDocumentCount
137+
{
138+
get => MinimumDocumentCountAsLong > int.MaxValue ? int.MaxValue : (int?)MinimumDocumentCountAsLong;
139+
set => MinimumDocumentCountAsLong = value;
140+
}
141+
117142
/// <inheritdoc />
118-
public int? MinimumDocumentCount { get; set; }
143+
public long? MinimumDocumentCountAsLong { get; set; }
144+
/// <inheritdoc />
145+
public long? ShardMinimumDocumentCount { get; set; }
119146
/// <inheritdoc />
120147
public TermsAggregationExecutionHint? ExecutionHint { get; set; }
121148

@@ -161,7 +188,16 @@ public class SignificantTermsAggregationDescriptor<T>
161188

162189
int? ISignificantTermsAggregation.ShardSize { get; set; }
163190

164-
int? ISignificantTermsAggregation.MinimumDocumentCount { get; set; }
191+
[Obsolete("Use MinimumDocumentCountAsLong. Fixed in NEST 6.x")]
192+
int? ISignificantTermsAggregation.MinimumDocumentCount
193+
{
194+
get => Self.MinimumDocumentCountAsLong > int.MaxValue ? int.MaxValue : (int?)Self.MinimumDocumentCountAsLong;
195+
set => Self.MinimumDocumentCountAsLong = value;
196+
}
197+
198+
long? ISignificantTermsAggregation.MinimumDocumentCountAsLong { get; set; }
199+
200+
long? ISignificantTermsAggregation.ShardMinimumDocumentCount { get; set; }
165201

166202
TermsAggregationExecutionHint? ISignificantTermsAggregation.ExecutionHint { get; set; }
167203

@@ -226,10 +262,19 @@ public SignificantTermsAggregationDescriptor<T> Exclude(Func<FluentDictionary<st
226262
/// <inheritdoc />
227263
public SignificantTermsAggregationDescriptor<T> ShardSize(int shardSize) => Assign(a => a.ShardSize = shardSize);
228264

265+
[Obsolete("Use MinimumDocumentCountAsLong. Fixed in NEST 6.x")]
229266
/// <inheritdoc />
230267
public SignificantTermsAggregationDescriptor<T> MinimumDocumentCount(int minimumDocumentCount) =>
231268
Assign(a => a.MinimumDocumentCount = minimumDocumentCount);
232269

270+
/// <inheritdoc />
271+
public SignificantTermsAggregationDescriptor<T> MinimumDocumentCountAsLong(long minimumDocumentCount) =>
272+
Assign(a => a.MinimumDocumentCountAsLong = minimumDocumentCount);
273+
274+
/// <inheritdoc />
275+
public SignificantTermsAggregationDescriptor<T> ShardMinimumDocumentCount(long shardMinimumDocumentCount) =>
276+
Assign(a => a.ShardMinimumDocumentCount = shardMinimumDocumentCount);
277+
233278
/// <inheritdoc />
234279
public SignificantTermsAggregationDescriptor<T> MutualInformation(Func<MutualInformationHeuristicDescriptor, IMutualInformationHeuristic> mutualInformationSelector = null) =>
235280
Assign(a => a.MutualInformation = mutualInformationSelector.InvokeOrDefault(new MutualInformationHeuristicDescriptor()));

src/Tests/Aggregations/Bucket/SignificantTerms/SignificantTermsAggregationUsageTests.cs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ namespace Tests.Aggregations.Bucket.SignificantTerms
1414
*
1515
* [WARNING]
1616
* --
17-
* The significant_terms aggregation can be very heavy when run on large indices. Work is in progress
18-
* to provide more lightweight sampling techniques.
17+
* The significant_terms aggregation can be very heavy when run on large indices. Work is in progress
18+
* to provide more lightweight sampling techniques.
1919
* As a result, the API for this feature may change in non-backwards compatible ways
2020
* --
21-
*
21+
*
2222
* See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-significantterms-aggregation.html[significant terms aggregation] for more detail.
2323
*/
2424
public class SignificantTermsAggregationUsageTests : AggregationUsageTestBase
@@ -49,7 +49,7 @@ public SignificantTermsAggregationUsageTests(ReadOnlyCluster i, EndpointUsage us
4949
.Aggregations(a => a
5050
.SignificantTerms("significant_names", st => st
5151
.Field(p => p.Name)
52-
.MinimumDocumentCount(10)
52+
.MinimumDocumentCountAsLong(10)
5353
.MutualInformation(mi => mi
5454
.BackgroundIsSuperSet()
5555
.IncludeNegatives()
@@ -63,7 +63,7 @@ public SignificantTermsAggregationUsageTests(ReadOnlyCluster i, EndpointUsage us
6363
Aggregations = new SignificantTermsAggregation("significant_names")
6464
{
6565
Field = Field<Project>(p => p.Name),
66-
MinimumDocumentCount = 10,
66+
MinimumDocumentCountAsLong = 10,
6767
MutualInformation = new MutualInformationHeuristic
6868
{
6969
BackgroundIsSuperSet = true,
@@ -85,7 +85,7 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
8585
* [float]
8686
* [[significant-terms-pattern-filter]]
8787
* == Filtering with a regular expression pattern
88-
*
88+
*
8989
* Using significant terms aggregation with filtering to include values using a regular expression pattern
9090
*/
9191
public class SignificantTermsIncludePatternAggregationUsageTests : AggregationUsageTestBase
@@ -117,7 +117,7 @@ public SignificantTermsIncludePatternAggregationUsageTests(ReadOnlyCluster i, En
117117
.Aggregations(a => a
118118
.SignificantTerms("significant_names", st => st
119119
.Field(p => p.Name)
120-
.MinimumDocumentCount(10)
120+
.MinimumDocumentCountAsLong(10)
121121
.MutualInformation(mi => mi
122122
.BackgroundIsSuperSet()
123123
.IncludeNegatives()
@@ -132,7 +132,7 @@ public SignificantTermsIncludePatternAggregationUsageTests(ReadOnlyCluster i, En
132132
Aggregations = new SignificantTermsAggregation("significant_names")
133133
{
134134
Field = Field<Project>(p => p.Name),
135-
MinimumDocumentCount = 10,
135+
MinimumDocumentCountAsLong = 10,
136136
MutualInformation = new MutualInformationHeuristic
137137
{
138138
BackgroundIsSuperSet = true,
@@ -155,7 +155,7 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
155155
* [float]
156156
* [[significant-terms-exact-value-filter]]
157157
* == Filtering with exact values
158-
*
158+
*
159159
* Using significant terms aggregation with filtering to exclude specific values
160160
*/
161161
public class SignificantTermsExcludeExactValuesAggregationUsageTests : AggregationUsageTestBase
@@ -187,7 +187,7 @@ public SignificantTermsExcludeExactValuesAggregationUsageTests(ReadOnlyCluster i
187187
.Aggregations(a => a
188188
.SignificantTerms("significant_names", st => st
189189
.Field(p => p.Name)
190-
.MinimumDocumentCount(10)
190+
.MinimumDocumentCountAsLong(10)
191191
.MutualInformation(mi => mi
192192
.BackgroundIsSuperSet()
193193
.IncludeNegatives()
@@ -202,7 +202,7 @@ public SignificantTermsExcludeExactValuesAggregationUsageTests(ReadOnlyCluster i
202202
Aggregations = new SignificantTermsAggregation("significant_names")
203203
{
204204
Field = Field<Project>(p => p.Name),
205-
MinimumDocumentCount = 10,
205+
MinimumDocumentCountAsLong = 10,
206206
MutualInformation = new MutualInformationHeuristic
207207
{
208208
BackgroundIsSuperSet = true,

0 commit comments

Comments
 (0)