Skip to content

Commit

Permalink
Use own copy of PercentCodec for URI path encoding (opensearch-projec…
Browse files Browse the repository at this point in the history
…t#1109)

* Use own copy of PercentCodec for URI path encoding

Adapted from Apache HttpComponents HttpCore v5's https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

Signed-off-by: Thomas Farr <[email protected]>

* Refactor PercentCodec a bit

Signed-off-by: Thomas Farr <[email protected]>

* Add change log

Signed-off-by: Thomas Farr <[email protected]>

* Switch to system property

Signed-off-by: Thomas Farr <[email protected]>

* spotless

Signed-off-by: Thomas Farr <[email protected]>

* Add UPGRADING note

Signed-off-by: Thomas Farr <[email protected]>

---------

Signed-off-by: Thomas Farr <[email protected]>
  • Loading branch information
Xtansia authored Jul 31, 2024
1 parent 905aa34 commit 3d8061e
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 115 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ This section is for maintaining a changelog for all breaking changes for the cli
### Dependencies

### Changed
- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109))

### Deprecated

Expand Down
5 changes: 4 additions & 1 deletion UPGRADING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# UPGRADING

## [UPGRADING 2.x to 3.0]
## Upgrading 2.x to 3.0
### URL Path Encoding
- The default URL path encoding has been changed to be more conservative. Previously the `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `@` and `:` characters were left un-encoded, they will now be percent-encoded. If you require the previous behavior you can specify the `org.opensearch.path.encoding=HTTP_CLIENT_V4_EQUIV` system property.

### SearchAfter of SearchRequest type
- Changed SearchAfter of SearchRequest type to FieldValue instead of String ([#769](https://github.com/opensearch-project/opensearch-java/pull/769))
- Consider using `FieldValue.of` to make string type values compatible.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) {
}

public static void pathEncode(String src, StringBuilder dest) {
dest.append(PathEncoder.encode(src));
PathEncoder.encode(dest, src);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,43 @@

package org.opensearch.client.util;

/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Optional;

public class PathEncoder {
private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils";
private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils";
private final static MethodHandle FORMAT_SEGMENTS_MH;
private enum Encoding {
RFC3986_PATH(PercentCodec.RFC3986_PATH),
HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH),

RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED),
HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED);

static {
Class<?> clazz = null;
try {
// Try Apache HttpClient5 first since this is a default one
clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS);
} catch (final ClassNotFoundException ex) {
private final PercentCodec percentCodec;

Encoding(PercentCodec percentCodec) {
this.percentCodec = percentCodec;
}

static Optional<Encoding> get(String name) {
try {
// Fallback to Apache HttpClient4
clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS);
} catch (final ClassNotFoundException ex1) {
clazz = null;
return Optional.of(Encoding.valueOf(name.toUpperCase()));
} catch (Exception ignored) {
return Optional.empty();
}
}
}

if (clazz == null) {
throw new IllegalStateException(
"Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath"
);
}
private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding";
private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV;

try {
FORMAT_SEGMENTS_MH = MethodHandles.lookup()
.findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class));
} catch (final NoSuchMethodException | IllegalAccessException ex) {
throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class");
}
private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY))
.flatMap(Encoding::get)
.orElse(ENCODING_DEFAULT);

public static String encode(String pathSegment) {
return ENCODING.percentCodec.encode(pathSegment);
}

public static String encode(String uri) {
try {
return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1);
} catch (final Throwable ex) {
throw new RuntimeException("Unable to encode URI: " + uri, ex);
}
public static void encode(StringBuilder dest, CharSequence pathSegment) {
ENCODING.percentCodec.encode(dest, pathSegment);
}
}
180 changes: 180 additions & 0 deletions java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.client.util;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;

/**
* Percent-encoding.
* <p>
* Adapted from Apache HttpComponents HttpCore v5's <a href="https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java">PercentCodec.java</a>
* </p>
*/
class PercentCodec {
private static class Chars {
private final BitSet set = new BitSet(256);

public void add(char... chars) {
for (char c : chars) {
set.set(c);
}
}

public void addRange(char start, char end) {
set.set(start, end + 1);
}

public void add(Chars set) {
this.set.or(set.set);
}

public boolean contains(int c) {
return set.get(c);
}
}

private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() {
{
add(':', '/', '?', '#', '[', ']', '@');
}
};
private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() {
{
add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=');
}
};
private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
add('-', '.', '_', '~');
}
};
private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() {
{
add(RFC3986_UNRESERVED_CHARS);
add(RFC3986_SUB_DELIMS_CHARS);
add('@');
}
};
private static final Chars RFC3986_PATH_CHARS = new Chars() {
{
add(RFC3986_PATH_NO_COLON_CHARS);
add(':');
}
};
private static final Chars RFC3986_URIC_CHARS = new Chars() {
{
add(RFC3986_SUB_DELIMS_CHARS);
add(RFC3986_UNRESERVED_CHARS);
}
};

private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
// Additional characters as per RFC 5987 attr-char
add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~');
}
};

private static final int RADIX = 16;

private static void encode(
final StringBuilder buf,
final CharSequence content,
final Charset charset,
final Chars safeChars,
final boolean blankAsPlus
) {
if (content == null) {
return;
}
final CharBuffer cb = CharBuffer.wrap(content);
final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
if (safeChars.contains(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append("+");
} else {
buf.append("%");
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
buf.append(hex1);
buf.append(hex2);
}
}
}

private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) {
if (content == null) {
return null;
}
final ByteBuffer bb = ByteBuffer.allocate(content.length());
final CharBuffer cb = CharBuffer.wrap(content);
while (cb.hasRemaining()) {
final char c = cb.get();
if (c == '%' && cb.remaining() >= 2) {
final char uc = cb.get();
final char lc = cb.get();
final int u = Character.digit(uc, RADIX);
final int l = Character.digit(lc, RADIX);
if (u != -1 && l != -1) {
bb.put((byte) ((u << 4) + l));
} else {
bb.put((byte) '%');
bb.put((byte) uc);
bb.put((byte) lc);
}
} else if (plusAsBlank && c == '+') {
bb.put((byte) ' ');
} else {
bb.put((byte) c);
}
}
bb.flip();
return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString();
}

public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);

private final Chars unreserved;

private PercentCodec(final Chars unreserved) {
this.unreserved = unreserved;
}

public void encode(final StringBuilder buf, final CharSequence content) {
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
}

public String encode(final CharSequence content) {
if (content == null) {
return null;
}
final StringBuilder buf = new StringBuilder();
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
return buf.toString();
}

public String decode(final CharSequence content) {
return decode(content, StandardCharsets.UTF_8, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,10 @@ public void testArrayPathParameter() {
assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a", "b"));
if (isHttpClient5Present()) {
assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

} else {
assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a", "b", "c"));
if (isHttpClient5Present()) {
assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
} else {
assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}

@Test
Expand All @@ -80,11 +71,7 @@ public void testPathEncoding() {
assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a/b", "c/d"));
if (isHttpClient5Present()) {
assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
} else {
assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

}

Expand All @@ -103,13 +90,4 @@ public void testArrayQueryParameter() {
req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed));
assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards"));
}

private static boolean isHttpClient5Present() {
try {
Class.forName("org.apache.hc.core5.net.URLEncodedUtils");
return true;
} catch (ClassNotFoundException e) {
return false;
}
}
}

This file was deleted.

Loading

0 comments on commit 3d8061e

Please sign in to comment.