Skip to content

Commit efbb717

Browse files
committed
Add phrase_limit to highlighting
- Add Encoder and PhraseLimit to integration test - Deprecate Encoder on HighlightField - Add IndexOptions.Offsets to LeadDeveloper LastName to allow Postings highlighter to be used Closes #2851 (cherry picked from commit 5632144)
1 parent 99eea29 commit efbb717

File tree

4 files changed

+122
-86
lines changed

4 files changed

+122
-86
lines changed

docs/search/request/highlighting-usage.asciidoc

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ s => s
3737
.Highlight(h => h
3838
.PreTags("<tag1>")
3939
.PostTags("</tag1>")
40+
.Encoder("html")
4041
.Fields(
4142
fs => fs
4243
.Field(p => p.Name.Suffix("standard"))
@@ -48,30 +49,28 @@ s => s
4849
fs => fs
4950
.Field(p => p.LeadDeveloper.FirstName)
5051
.Type(HighlighterType.Fvh)
51-
.BoundaryMaxScan(50)
5252
.PreTags("<name>")
5353
.PostTags("</name>")
54+
.BoundaryMaxScan(50)
55+
.PhraseLimit(10)
5456
.HighlightQuery(q => q
5557
.Match(m => m
5658
.Field(p => p.LeadDeveloper.FirstName)
5759
.Query("Kurt Edgardo Naomi Dariana Justice Felton")
5860
)
5961
),
6062
fs => fs
61-
.Field(p => p.State.Suffix("offsets"))
63+
.Field(p => p.LeadDeveloper.LastName)
6264
.Type(HighlighterType.Postings)
63-
.PreTags("<state>")
64-
.PostTags("</state>")
65+
.PreTags("<name>")
66+
.PostTags("</name>")
6567
.HighlightQuery(q => q
66-
.Terms(t => t
67-
.Field(f => f.State.Suffix("offsets"))
68-
.Terms(
69-
StateOfBeing.Stable.ToString().ToLowerInvariant(),
70-
StateOfBeing.BellyUp.ToString().ToLowerInvariant()
68+
.Match(m => m
69+
.Field(p => p.LeadDeveloper.LastName)
70+
.Query(LastNameSearch)
7171
)
7272
)
7373
)
74-
)
7574
)
7675
----
7776

@@ -91,6 +90,7 @@ new SearchRequest<Project>
9190
{
9291
PreTags = new[] { "<tag1>" },
9392
PostTags = new[] { "</tag1>" },
93+
Encoder = "html",
9494
Fields = new Dictionary<Field, IHighlightField>
9595
{
9696
{ "name.standard", new HighlightField
@@ -104,7 +104,8 @@ new SearchRequest<Project>
104104
},
105105
{ "leadDeveloper.firstName", new HighlightField
106106
{
107-
CustomType = "fvh",
107+
CustomType = "fvh", <1>
108+
PhraseLimit = 10,
108109
BoundaryMaxScan = 50,
109110
PreTags = new[] { "<name>"},
110111
PostTags = new[] { "</name>"},
@@ -115,22 +116,23 @@ new SearchRequest<Project>
115116
}
116117
}
117118
},
118-
{ "state.offsets", new HighlightField
119+
{ "leadDeveloper.lastName", new HighlightField
119120
{
120121
Type = HighlighterType.Postings,
121-
PreTags = new[] { "<state>"},
122-
PostTags = new[] { "</state>"},
123-
HighlightQuery = new TermsQuery
122+
PreTags = new[] { "<name>"},
123+
PostTags = new[] { "</name>"},
124+
HighlightQuery = new MatchQuery
124125
{
125-
Field = "state.offsets",
126-
Terms = new [] { "stable", "bellyup" }
126+
Field = "leadDeveloper.lastName",
127+
Query = LastNameSearch
127128
}
128129
}
129130
}
130131
}
131132
}
132133
}
133134
----
135+
<1> `CustomType` can be used to define a custom highlighter
134136

135137
[source,javascript]
136138
.Example json output
@@ -150,6 +152,7 @@ new SearchRequest<Project>
150152
"post_tags": [
151153
"</tag1>"
152154
],
155+
"encoder": "html",
153156
"fields": {
154157
"name.standard": {
155158
"type": "plain",
@@ -160,6 +163,7 @@ new SearchRequest<Project>
160163
},
161164
"leadDeveloper.firstName": {
162165
"type": "fvh",
166+
"phrase_limit": 10,
163167
"boundary_max_scan": 50,
164168
"pre_tags": [
165169
"<name>"
@@ -175,20 +179,19 @@ new SearchRequest<Project>
175179
}
176180
}
177181
},
178-
"state.offsets": {
182+
"leadDeveloper.lastName": {
179183
"type": "postings",
180184
"pre_tags": [
181-
"<state>"
185+
"<name>"
182186
],
183187
"post_tags": [
184-
"</state>"
188+
"</name>"
185189
],
186190
"highlight_query": {
187-
"terms": {
188-
"state.offsets": [
189-
"stable",
190-
"bellyup"
191-
]
191+
"match": {
192+
"leadDeveloper.lastName": {
193+
"query": "Stokes"
194+
}
192195
}
193196
}
194197
}
@@ -224,12 +227,12 @@ foreach (var highlightsByDocumentId in response.Highlights)
224227
highlight.Should().Contain("</name>");
225228
}
226229
}
227-
else if (highlightHit.Key == "state.offsets")
230+
else if (highlightHit.Key == "leadDeveloper.lastName")
228231
{
229232
foreach (var highlight in highlightHit.Value.Highlights)
230233
{
231-
highlight.Should().Contain("<state>");
232-
highlight.Should().Contain("</state>");
234+
highlight.Should().Contain("<name>");
235+
highlight.Should().Contain("</name>");
233236
}
234237
}
235238
else

src/Nest/Search/Search/Highlighting/HighlightField.cs

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ public interface IHighlightField
7979
/// Define how highlighted text will be encoded.
8080
/// It can be either default (no encoding) or html (will escape html, if you use html highlighting tags).
8181
/// </summary>
82-
[JsonProperty("encoder")]
82+
[JsonIgnore]
83+
[Obsolete("Encoder not valid on highlight field. Removed in NEST 6.x")]
8384
string Encoder { get; set; }
8485

8586
/// <summary>
@@ -118,14 +119,14 @@ public interface IHighlightField
118119
/// <summary>
119120
/// The type of highlighter to use
120121
/// </summary>
121-
HighlighterType? Type { get; set; }
122+
[JsonProperty("type")]
123+
[Obsolete("This is a temporary binary backwards compatible fix to make sure you can specify any custom highlighter type in 2.0.0. Removed in 5.0.0.")]
124+
string CustomType { get; set; }
122125

123126
/// <summary>
124127
/// The type of highlighter to use
125128
/// </summary>
126-
[JsonProperty("type")]
127-
[Obsolete("This is a temporary binary backwards compatible fix to make sure you can specify any custom highlighter type in 2.0.0. Removed in 5.0.0.")]
128-
string CustomType { get; set; }
129+
HighlighterType? Type { get; set; }
129130

130131
/// <summary>
131132
/// Forces the highlighting to highlight fields based on the source even if fields are stored separately.
@@ -147,6 +148,15 @@ public interface IHighlightField
147148
/// </summary>
148149
[JsonProperty("highlight_query")]
149150
QueryContainer HighlightQuery { get; set; }
151+
152+
/// <summary>
153+
/// Controls the number of matching phrases in a document that are considered. Prevents the
154+
/// <see cref="HighlighterType.Fvh"/> highlighter from analyzing too many phrases and consuming too much memory.
155+
/// When using matched_fields, <see cref="PhraseLimit"/> phrases per matched field are considered. Raising the limit increases query time
156+
/// and consumes more memory. Only supported by the <see cref="HighlighterType.Fvh"/> highlighter. Defaults to 256.
157+
/// </summary>
158+
[JsonProperty("phrase_limit")]
159+
int? PhraseLimit { get; set; }
150160
}
151161

152162
public class HighlightField : IHighlightField
@@ -208,11 +218,8 @@ public class HighlightField : IHighlightField
208218

209219
[Obsolete("Use BoundaryMaxScan")]
210220
public int? BoundaryMaxSize { get; set; }
211-
212-
/// <summary>
213-
/// Define how highlighted text will be encoded.
214-
/// It can be either default (no encoding) or html (will escape html, if you use html highlighting tags).
215-
/// </summary>
221+
/// <inheritdoc/>
222+
[Obsolete("Encoder not valid on highlight field. Removed in NEST 6.x")]
216223
public string Encoder { get; set; }
217224

218225
/// <summary>
@@ -244,6 +251,11 @@ public class HighlightField : IHighlightField
244251
/// </summary>
245252
public string BoundaryChars { get; set; }
246253

254+
/// <summary>
255+
/// The type of highlighter to use.
256+
/// </summary>
257+
public string CustomType { get; set; }
258+
247259
/// <summary>
248260
/// The type of highlighter to use.
249261
/// </summary>
@@ -253,11 +265,6 @@ public HighlighterType? Type
253265
set { this.CustomType = value.GetStringValue(); }
254266
}
255267

256-
/// <summary>
257-
/// The type of highlighter to use.
258-
/// </summary>
259-
public string CustomType { get; set; }
260-
261268
/// <summary>
262269
/// Forces the highlighting to highlight fields based on the source even if fields are stored separately.
263270
/// </summary>
@@ -275,6 +282,9 @@ public HighlighterType? Type
275282
/// The query to use for highlighting
276283
/// </summary>
277284
public QueryContainer HighlightQuery { get; set; }
285+
286+
/// <inheritdoc/>
287+
public int? PhraseLimit { get; set; }
278288
}
279289

280290
public class HighlightFieldDescriptor<T> : DescriptorBase<HighlightFieldDescriptor<T>,IHighlightField>, IHighlightField
@@ -289,23 +299,24 @@ public class HighlightFieldDescriptor<T> : DescriptorBase<HighlightFieldDescript
289299
int? IHighlightField.FragmentOffset { get; set; }
290300
int? IHighlightField.BoundaryMaxSize { get; set; }
291301
int? IHighlightField.BoundaryMaxScan { get; set; }
302+
[Obsolete("Encoder not valid on highlight field. Removed in NEST 6.x")]
292303
string IHighlightField.Encoder { get; set; }
293304
string IHighlightField.Order { get; set; }
294305
string IHighlightField.TagsSchema { get; set; }
295306
bool? IHighlightField.RequireFieldMatch { get; set; }
296307
string IHighlightField.BoundaryChars { get; set; }
308+
string IHighlightField.CustomType { get; set; }
297309
HighlighterType? IHighlightField.Type
298310
{
299311
#pragma warning disable CS0618 // Type or member is obsolete
300312
get { return Self.CustomType.ToEnum<HighlighterType>(); }
301313
set { Self.CustomType = value.GetStringValue(); }
302314
#pragma warning restore CS0618 // Type or member is obsolete
303315
}
304-
string IHighlightField.CustomType { get; set; }
305316
bool? IHighlightField.ForceSource { get; set; }
306317
Fields IHighlightField.MatchedFields { get; set; }
307-
308318
QueryContainer IHighlightField.HighlightQuery { get; set; }
319+
int? IHighlightField.PhraseLimit { get; set; }
309320

310321
/// <summary>
311322
/// The field on which to perform highlighting.
@@ -358,10 +369,10 @@ public class HighlightFieldDescriptor<T> : DescriptorBase<HighlightFieldDescript
358369
/// </summary>
359370
public HighlightFieldDescriptor<T> Type(HighlighterType type) => Assign(a => a.Type = type);
360371

372+
#pragma warning disable CS0618 // Type or member is obsolete
361373
/// <summary>
362374
/// The type of highlighter to use
363375
/// </summary>
364-
#pragma warning disable CS0618 // Type or member is obsolete
365376
public HighlightFieldDescriptor<T> Type(string type) => Assign(a => a.CustomType = type);
366377
#pragma warning restore CS0618 // Type or member is obsolete
367378

@@ -373,18 +384,18 @@ public class HighlightFieldDescriptor<T> : DescriptorBase<HighlightFieldDescript
373384
public HighlightFieldDescriptor<T> PreTags(string preTags) => Assign(a => a.PreTags = new[] { preTags });
374385

375386
/// <summary>
376-
/// Controls the pre tag in which to wrap highights.
387+
/// Controls the post tag in which to wrap highights.
377388
/// By default, the highlighting will wrap highlighted text in &lt;em&gt; and &lt;/em&gt;.
378389
/// Using the fast vector highlighter, there can be more tags, and the importance is ordered.
379390
/// </summary>
380-
public HighlightFieldDescriptor<T> PreTags(IEnumerable<string> preTags) => Assign(a => a.PreTags = preTags);
391+
public HighlightFieldDescriptor<T> PostTags(IEnumerable<string> postTags) => Assign(a => a.PostTags = postTags);
381392

382393
/// <summary>
383-
/// Controls the post tag in which to wrap highights.
394+
/// Controls the pre tag in which to wrap highights.
384395
/// By default, the highlighting will wrap highlighted text in &lt;em&gt; and &lt;/em&gt;.
385396
/// Using the fast vector highlighter, there can be more tags, and the importance is ordered.
386397
/// </summary>
387-
public HighlightFieldDescriptor<T> PostTags(IEnumerable<string> postTags) => Assign(a => a.PostTags = postTags);
398+
public HighlightFieldDescriptor<T> PreTags(IEnumerable<string> preTags) => Assign(a => a.PreTags = preTags);
388399

389400
/// <summary>
390401
/// Controls the post tag in which to wrap highights.
@@ -423,6 +434,7 @@ public class HighlightFieldDescriptor<T> : DescriptorBase<HighlightFieldDescript
423434
/// Define how highlighted text will be encoded.
424435
/// It can be either default (no encoding) or html (will escape html, if you use html highlighting tags).
425436
/// </summary>
437+
[Obsolete("Encoder not valid on highlight field. Removed in NEST 6.x")]
426438
public HighlightFieldDescriptor<T> Encoder(string encoder) => Assign(a => a.Encoder = encoder);
427439

428440
/// <summary>
@@ -464,5 +476,13 @@ public HighlightFieldDescriptor<T> MatchedFields(Func<FieldsDescriptor<T>, IProm
464476
/// </summary>
465477
public HighlightFieldDescriptor<T> HighlightQuery(Func<QueryContainerDescriptor<T>, QueryContainer> querySelector) =>
466478
Assign(a => a.HighlightQuery = querySelector?.Invoke(new QueryContainerDescriptor<T>()));
479+
480+
/// <summary>
481+
/// Controls the number of matching phrases in a document that are considered. Prevents the
482+
/// <see cref="HighlighterType.Fvh"/> highlighter from analyzing too many phrases and consuming too much memory.
483+
/// When using matched_fields, <see cref="PhraseLimit"/> phrases per matched field are considered. Raising the limit increases query time
484+
/// and consumes more memory. Only supported by the <see cref="HighlighterType.Fvh"/> highlighter. Defaults to 256.
485+
/// </summary>
486+
public HighlightFieldDescriptor<T> PhraseLimit(int phraseLimit) => Assign(a => a.PhraseLimit = phraseLimit);
467487
}
468488
}

src/Tests/Framework/ManagedElasticsearch/NodeSeeders/DefaultSeeder.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,10 @@ private static PropertiesDescriptor<Developer> DeveloperProperties(PropertiesDes
310310
.Name(p => p.FirstName)
311311
.TermVector(TermVectorOption.WithPositionsOffsetsPayloads)
312312
)
313+
.String(s => s
314+
.Name(p => p.LastName)
315+
.IndexOptions(IndexOptions.Offsets)
316+
)
313317
.Ip(s => s
314318
.Name(p => p.IPAddress)
315319
)

0 commit comments

Comments
 (0)