Skip to content

Commit d45687f

Browse files
authored
Speed up URL path encoding and remove dependency on httpclient (#576)
1 parent 4bd9029 commit d45687f

File tree

3 files changed

+94
-15
lines changed

3 files changed

+94
-15
lines changed

java-client/src/main/java/co/elastic/clients/transport/endpoints/EndpointBase.java

+52-4
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
import co.elastic.clients.elasticsearch._types.ErrorResponse;
2323
import co.elastic.clients.json.JsonpDeserializer;
2424
import co.elastic.clients.transport.Endpoint;
25-
import org.apache.http.client.utils.URLEncodedUtils;
2625

2726
import javax.annotation.Nullable;
27+
import java.nio.ByteBuffer;
28+
import java.nio.charset.StandardCharsets;
29+
import java.util.BitSet;
2830
import java.util.Collections;
2931
import java.util.Map;
3032
import java.util.function.Function;
@@ -145,8 +147,54 @@ public static RuntimeException noPathTemplateFound(String what) {
145147
"Please check the API documentation, or raise an issue if this should be a valid request.");
146148
}
147149

148-
public static void pathEncode(String src, StringBuilder dest) {
149-
// TODO: avoid dependency on HttpClient here (and use something more efficient)
150-
dest.append(URLEncodedUtils.formatSegments(src).substring(1));
150+
private static final BitSet PATH_SAFE;
151+
private static final char[] HEX_CHARS;
152+
153+
static {
154+
PATH_SAFE = new BitSet(256);
155+
// From RFC 3986
156+
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
157+
PATH_SAFE.set('a', 'z'+1);
158+
PATH_SAFE.set('A', 'Z'+1);
159+
PATH_SAFE.set('0', '9'+1);
160+
PATH_SAFE.set('-');
161+
PATH_SAFE.set('.');
162+
PATH_SAFE.set('_');
163+
PATH_SAFE.set('~');
164+
165+
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
166+
PATH_SAFE.set('!');
167+
PATH_SAFE.set('$');
168+
PATH_SAFE.set('&');
169+
PATH_SAFE.set('\'');
170+
PATH_SAFE.set('(');
171+
PATH_SAFE.set(')');
172+
PATH_SAFE.set('*');
173+
PATH_SAFE.set('+');
174+
PATH_SAFE.set(',');
175+
PATH_SAFE.set(';');
176+
PATH_SAFE.set('=');
177+
178+
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
179+
PATH_SAFE.set(':');
180+
PATH_SAFE.set('@');
181+
182+
HEX_CHARS = "0123456789ABCDEF".toCharArray();
183+
}
184+
185+
public static void pathEncode(final String src, StringBuilder dest) {
186+
final ByteBuffer buf = StandardCharsets.UTF_8.encode(src);
187+
// In UTF-8 multibyte encoding, all bytes have the high bit set. This means we can iterate
188+
// on all bytes and percent-encode without having to care about code point context.
189+
while (buf.hasRemaining()) {
190+
int b = buf.get() & 0xff;
191+
if (PATH_SAFE.get(b)) {
192+
dest.append((char) b);
193+
} else {
194+
dest.append("%");
195+
dest.append(HEX_CHARS[b >> 4 & 0xF]);
196+
dest.append(HEX_CHARS[b & 0xF]);
197+
}
198+
}
151199
}
152200
}

java-client/src/main/java/co/elastic/clients/transport/endpoints/SimpleEndpoint.java

-11
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import co.elastic.clients.elasticsearch._types.ErrorResponse;
2323
import co.elastic.clients.json.JsonpDeserializer;
2424
import co.elastic.clients.transport.JsonEndpoint;
25-
import org.apache.http.client.utils.URLEncodedUtils;
2625

2726
import java.util.Map;
2827
import java.util.function.Function;
@@ -88,14 +87,4 @@ public <NewResponseT> SimpleEndpoint<RequestT, NewResponseT> withResponseDeseria
8887
newResponseParser
8988
);
9089
}
91-
92-
public static RuntimeException noPathTemplateFound(String what) {
93-
return new RuntimeException("Could not find a request " + what + " with this set of properties. " +
94-
"Please check the API documentation, or raise an issue if this should be a valid request.");
95-
}
96-
97-
public static void pathEncode(String src, StringBuilder dest) {
98-
// TODO: avoid dependency on HttpClient here (and use something more efficient)
99-
dest.append(URLEncodedUtils.formatSegments(src).substring(1));
100-
}
10190
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package co.elastic.clients.transport.endpoints;
21+
22+
import org.junit.jupiter.api.Assertions;
23+
import org.junit.jupiter.api.Test;
24+
25+
public class EndpointBaseTest extends Assertions {
26+
27+
@Test
28+
public void testPathEncoding() {
29+
assertEquals("abCD12;-_*", pathEncode("abCD12;-_*"));
30+
assertEquals("XYZ%5B", pathEncode("XYZ["));
31+
assertEquals("xyz%7B", pathEncode("xyz{"));
32+
assertEquals("foo%2Fbar", pathEncode("foo/bar"));
33+
assertEquals("foo%20bar", pathEncode("foo bar"));
34+
assertEquals("f%C3%AAl%C3%A9", pathEncode("fêlé"));
35+
}
36+
37+
private String pathEncode(String s) {
38+
StringBuilder sb = new StringBuilder();
39+
EndpointBase.pathEncode(s, sb);
40+
return sb.toString();
41+
}
42+
}

0 commit comments

Comments
 (0)