Skip to content

Commit 2ce9093

Browse files
j-tylerJustin Marsh
andauthored
Use Java Base64 instead of Apache Commons Base64 (#3166)
* Use Java Base64 instead of Apache Commons Base64 In Java 8+, Java has a set of native Base64 utlities which are much more performant than the Apache Commons library. We observed some significant memory usage coming from the Base64 usaged in the codebase, and thus migrating to the Java native version will reduce memory allocation. In addition, this PR introduces a JMH benchmarking module which can be run via ./gradlew jmh. This module allows meaningful performance testing locally, allowing anyone to verify the performance gains asserted in a PR are actually true. We start out with a set of simple benchmarks showing the significant performance difference between the two Base64 implementations across various byte sizes. On my M4 MacBook Encode is about 30x faster and Decode 5x faster. * use missed static encoder reference * Remove import in AzureCloudDestinationTest * Respect legacy Apache 2 decode leniency Apache 2 Base64 decode has special leniency for invalid encodings. To ensure full backwards compat we fall back to using Apache 2 when decoding fails. * fix typo * Add missing imports. --------- Co-authored-by: Justin Marsh <[email protected]>
1 parent 8e3fd47 commit 2ce9093

File tree

17 files changed

+271
-41
lines changed

17 files changed

+271
-41
lines changed

ambry-api/src/main/java/com/github/ambry/frontend/s3/S3MultipartETag.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import com.fasterxml.jackson.databind.node.ObjectNode;
2121
import com.github.ambry.frontend.PutBlobMetaInfo;
2222
import com.github.ambry.utils.Pair;
23+
import com.github.ambry.utils.Utils;
2324
import java.io.IOException;
2425
import java.util.List;
2526
import java.util.Objects;
26-
import org.apache.commons.codec.binary.Base64;
2727

2828

2929
/**
@@ -93,15 +93,15 @@ public static String serialize(S3MultipartETag eTag) throws IOException {
9393
rootObject.put(DATA_CHUNK_LIST, chunks);
9494
rootObject.put(VERSION, CURRENT_VERSION);
9595

96-
return Base64.encodeBase64URLSafeString(rootObject.toString().getBytes());
96+
return Utils.base64EncodeUrlSafeWithoutPadding(rootObject.toString().getBytes());
9797
}
9898

9999
/**
100100
* Deserialize the Json String to {@link S3MultipartETag}
101101
* @return the {@link S3MultipartETag}
102102
*/
103103
public static S3MultipartETag deserialize(String encodedETagStr) throws IOException {
104-
String eTagStr = new String(Base64.decodeBase64(encodedETagStr));
104+
String eTagStr = new String(Utils.base64DecodeUrlSafe(encodedETagStr));
105105
JsonNode rootNode;
106106
try {
107107
rootNode = objectMapper.readTree(eTagStr);

ambry-benchmarks/.gitignore

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# JMH generated files
2+
/build/
3+
/out/
4+
5+
# JMH results
6+
*.log
7+
jmh-result.*
8+
9+
# IDE files
10+
.idea/
11+
*.iml
12+
.classpath
13+
.project
14+
.settings/
15+
bin/
16+
17+
# Generated JMH sources
18+
/generated/

ambry-benchmarks/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Ambry Benchmarks
2+
3+
JMH (Java Microbenchmark Harness) performance benchmarks for Ambry components.
4+
5+
## Running Benchmarks
6+
7+
```bash
8+
### Run all benchmarks
9+
./gradlew jmh
10+
11+
### Run specific benchmarks
12+
./gradlew jmh -Pjmh.includes='.*java8.*'
13+
```
14+
## Output
15+
16+
Results saved to: `ambry-benchmarks/build/reports/jmh/results.txt`

ambry-benchmarks/build.gradle

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Copyright (C) 2025 LinkedIn Corp. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
4+
// this file except in compliance with the License. You may obtain a copy of the
5+
// License at http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software distributed
8+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
9+
// CONDITIONS OF ANY KIND, either express or implied.
10+
buildscript {
11+
repositories {
12+
maven {
13+
url "https://plugins.gradle.org/m2/"
14+
}
15+
}
16+
dependencies {
17+
classpath "me.champeau.gradle:jmh-gradle-plugin:0.4.8"
18+
}
19+
}
20+
21+
apply plugin: "me.champeau.gradle.jmh"
22+
apply plugin: 'java'
23+
24+
dependencies {
25+
jmh "commons-codec:commons-codec:$commonsVersion"
26+
}
27+
28+
jmh {
29+
jmhVersion = '1.21'
30+
warmupIterations = 1
31+
warmup = '5s'
32+
iterations = 2
33+
timeOnIteration = '5s'
34+
fork = 1
35+
include = 'com\\.github\\.ambry\\..*'
36+
37+
// Enable GC profiler by default to track memory allocations
38+
// Shows allocation rate (MB/sec) and normalized allocation (B/op)
39+
profilers = ['gc']
40+
}
41+
42+
// To run without profilers (faster): ./gradlew jmh -Pjmh.profilers=
43+
// To add more profilers: ./gradlew jmh -Pjmh.profilers='gc,stack'
44+
45+
46+
tasks.withType(Test) {
47+
exclude '**/*Benchmark*'
48+
}
49+
tasks.build.enabled = false
50+
tasks.check.enabled = false
51+
tasks.assemble.enabled = false
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/**
2+
* Copyright 2025 LinkedIn Corp. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
*/
14+
package com.github.ambry.benchmarks;
15+
16+
import java.util.Random;
17+
import java.util.concurrent.ThreadLocalRandom;
18+
import java.util.concurrent.TimeUnit;
19+
import org.openjdk.jmh.annotations.Benchmark;
20+
import org.openjdk.jmh.annotations.BenchmarkMode;
21+
import org.openjdk.jmh.annotations.Fork;
22+
import org.openjdk.jmh.annotations.Level;
23+
import org.openjdk.jmh.annotations.Measurement;
24+
import org.openjdk.jmh.annotations.Mode;
25+
import org.openjdk.jmh.annotations.OutputTimeUnit;
26+
import org.openjdk.jmh.annotations.Param;
27+
import org.openjdk.jmh.annotations.Scope;
28+
import org.openjdk.jmh.annotations.Setup;
29+
import org.openjdk.jmh.annotations.State;
30+
import org.openjdk.jmh.annotations.Warmup;
31+
import org.openjdk.jmh.infra.Blackhole;
32+
33+
/**
34+
* Compares Apache Commons Base64 vs Java 8 Base64 across 1KB, 128KB, and 4MB blobs.
35+
* Tests encoding (bytes -> string) and decoding (string -> bytes) which are the primary operations.
36+
* Uses randomized data selection to prevent JVM constant folding and CPU cache optimizations.
37+
* Runtime: ~3 minutes with default config. Memory profiling enabled via GC profiler.
38+
*/
39+
@BenchmarkMode(Mode.Throughput)
40+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
41+
@Warmup(iterations = 1, time = 5)
42+
@Measurement(iterations = 2, time = 5)
43+
@Fork(value = 1, jvmArgsAppend = {"-Xms2g", "-Xmx2g"})
44+
@State(Scope.Thread)
45+
public class Base64Benchmark {
46+
47+
@Param({"1024", "131072", "4194304"})
48+
private int blobSize;
49+
50+
private static final int DATA_SAMPLES = 10;
51+
private byte[][] randomData;
52+
private String[] apacheEncodedData;
53+
private String[] java8EncodedData;
54+
55+
private static final java.util.Base64.Encoder JAVA8_ENCODER =
56+
java.util.Base64.getUrlEncoder().withoutPadding();
57+
private static final java.util.Base64.Decoder JAVA8_DECODER =
58+
java.util.Base64.getUrlDecoder();
59+
60+
@Setup(Level.Trial)
61+
public void setup() {
62+
randomData = new byte[DATA_SAMPLES][];
63+
apacheEncodedData = new String[DATA_SAMPLES];
64+
java8EncodedData = new String[DATA_SAMPLES];
65+
66+
Random random = new Random(42);
67+
for (int i = 0; i < DATA_SAMPLES; i++) {
68+
randomData[i] = new byte[blobSize];
69+
random.nextBytes(randomData[i]);
70+
apacheEncodedData[i] = org.apache.commons.codec.binary.Base64.encodeBase64URLSafeString(randomData[i]);
71+
java8EncodedData[i] = JAVA8_ENCODER.encodeToString(randomData[i]);
72+
}
73+
}
74+
75+
@Benchmark
76+
public void apacheCommonsEncode(Blackhole blackhole) {
77+
int index = ThreadLocalRandom.current().nextInt(DATA_SAMPLES);
78+
blackhole.consume(org.apache.commons.codec.binary.Base64.encodeBase64URLSafeString(randomData[index]));
79+
}
80+
81+
@Benchmark
82+
public void apacheCommonsDecode(Blackhole blackhole) {
83+
int index = ThreadLocalRandom.current().nextInt(DATA_SAMPLES);
84+
blackhole.consume(org.apache.commons.codec.binary.Base64.decodeBase64(apacheEncodedData[index]));
85+
}
86+
87+
@Benchmark
88+
public void java8Encode(Blackhole blackhole) {
89+
int index = ThreadLocalRandom.current().nextInt(DATA_SAMPLES);
90+
blackhole.consume(JAVA8_ENCODER.encodeToString(randomData[index]));
91+
}
92+
93+
@Benchmark
94+
public void java8Decode(Blackhole blackhole) {
95+
int index = ThreadLocalRandom.current().nextInt(DATA_SAMPLES);
96+
blackhole.consume(JAVA8_DECODER.decode(java8EncodedData[index]));
97+
}
98+
}

ambry-cloud/src/test/java/com/github/ambry/cloud/azure/AzureCloudDestinationTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
import java.util.concurrent.TimeUnit;
6565
import java.util.stream.Collectors;
6666
import java.lang.reflect.Field;
67-
import org.apache.commons.codec.binary.Base64;
6867
import org.junit.After;
6968
import org.junit.Before;
7069
import org.junit.Ignore;
@@ -86,7 +85,7 @@
8685
@Ignore
8786
public class AzureCloudDestinationTest {
8887

89-
private final String base64key = Base64.encodeBase64String("ambrykey".getBytes());
88+
private final String base64key = Utils.base64EncodeWithPadding("ambrykey".getBytes());
9089
private final String storageConnection =
9190
"DefaultEndpointsProtocol=https;AccountName=ambry;AccountKey=" + base64key + ";EndpointSuffix=core.windows.net";
9291
private final String clusterName = "main";

ambry-cloud/src/test/java/com/github/ambry/cloud/azure/AzureStorageCompactorTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
import java.util.Map;
5151
import java.util.Properties;
5252
import java.util.concurrent.TimeUnit;
53-
import org.apache.commons.codec.binary.Base64;
5453
import org.junit.After;
5554
import org.junit.Before;
5655
import org.junit.Ignore;
@@ -73,7 +72,7 @@
7372
@Ignore
7473
public class AzureStorageCompactorTest {
7574

76-
private final String base64key = Base64.encodeBase64String("ambrykey".getBytes());
75+
private final String base64key = Utils.base64EncodeWithPadding("ambrykey".getBytes());
7776
private final String storageConnection =
7877
"DefaultEndpointsProtocol=https;AccountName=ambry;AccountKey=" + base64key + ";EndpointSuffix=core.windows.net";
7978
private final int blobSize = 1024;

ambry-cloud/src/test/java/com/github/ambry/cloud/azure/AzureTestUtils.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import java.util.List;
3737
import java.util.Map;
3838
import java.util.Properties;
39-
import org.apache.commons.codec.binary.Base64;
4039
import reactor.core.publisher.Flux;
4140

4241
import static com.github.ambry.commons.BlobId.*;
@@ -49,7 +48,7 @@
4948
*/
5049
class AzureTestUtils {
5150

52-
static final String base64key = Base64.encodeBase64String("ambrykey".getBytes());
51+
static final String base64key = Utils.base64EncodeWithPadding("ambrykey".getBytes());
5352
static final String storageConnection =
5453
"DefaultEndpointsProtocol=https;AccountName=ambry;AccountKey=" + base64key + ";EndpointSuffix=core.windows.net";
5554
static final byte dataCenterId = 66;

ambry-commons/src/main/java/com/github/ambry/commons/BlobId.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import java.nio.ByteBuffer;
2727
import java.util.Objects;
2828
import java.util.UUID;
29-
import org.apache.commons.codec.binary.Base64;
3029

3130
import static com.github.ambry.account.Account.*;
3231
import static com.github.ambry.account.Container.*;
@@ -326,7 +325,7 @@ private BlobId(DataInputStream stream, ClusterMap clusterMap, boolean ensureFull
326325
* @throws IOException
327326
*/
328327
public BlobId(String id, ClusterMap clusterMap) throws IOException {
329-
this(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(id)))), clusterMap, true);
328+
this(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(id)))), clusterMap, true);
330329
}
331330

332331
/**
@@ -442,7 +441,7 @@ public BlobDataType getBlobDataType() {
442441
*/
443442
public static boolean isEncrypted(String blobIdString) throws IOException {
444443
DataInputStream stream =
445-
new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(blobIdString))));
444+
new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(blobIdString))));
446445
return (stream.readShort() >= BLOB_ID_V3) && ((stream.readByte() & IS_ENCRYPTED_MASK) != 0);
447446
}
448447

@@ -529,7 +528,7 @@ public byte[] getUuidBytesArray() {
529528

530529
@Override
531530
public String getID() {
532-
return Base64.encodeBase64URLSafeString(toBytes());
531+
return Utils.base64EncodeUrlSafeWithoutPadding(toBytes());
533532
}
534533

535534
@Override
@@ -703,7 +702,7 @@ public static BlobId craft(BlobId inputId, short targetVersion, short accountId,
703702
*/
704703
public static boolean isCrafted(String idStr) throws IOException {
705704
BlobIdPreamble blobIdPreamble =
706-
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(idStr)))));
705+
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(idStr)))));
707706
return blobIdPreamble.type == BlobIdType.CRAFTED;
708707
}
709708

@@ -715,7 +714,7 @@ public static boolean isCrafted(String idStr) throws IOException {
715714
*/
716715
public static short getVersion(String idStr) throws IOException {
717716
BlobIdPreamble blobIdPreamble =
718-
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(idStr)))));
717+
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(idStr)))));
719718
return blobIdPreamble.version;
720719
}
721720

@@ -729,7 +728,7 @@ public static short getVersion(String idStr) throws IOException {
729728
*/
730729
public static Pair<Short, Short> getAccountAndContainerIds(String idStr) throws IOException {
731730
BlobIdPreamble blobIdPreamble =
732-
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(idStr)))));
731+
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(idStr)))));
733732
return new Pair<>(blobIdPreamble.accountId, blobIdPreamble.containerId);
734733
}
735734

@@ -741,7 +740,7 @@ public static Pair<Short, Short> getAccountAndContainerIds(String idStr) throws
741740
*/
742741
public static BlobDataType getBlobDataType(String idStr) throws IOException {
743742
BlobIdPreamble blobIdPreamble =
744-
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(idStr)))));
743+
new BlobIdPreamble(new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(idStr)))));
745744
return blobIdPreamble.blobDataType;
746745
}
747746

ambry-commons/src/test/java/com/github/ambry/commons/BlobIdTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.github.ambry.utils.ByteBufferInputStream;
2323
import com.github.ambry.utils.Pair;
2424
import com.github.ambry.utils.TestUtils;
25+
import com.github.ambry.utils.Utils;
2526
import java.io.ByteArrayInputStream;
2627
import java.io.DataInputStream;
2728
import java.io.IOException;
@@ -35,7 +36,6 @@
3536
import java.util.UUID;
3637
import java.util.function.Function;
3738
import java.util.stream.Collectors;
38-
import org.apache.commons.codec.binary.Base64;
3939
import org.junit.Test;
4040
import org.junit.runner.RunWith;
4141
import org.junit.runners.Parameterized;
@@ -517,7 +517,7 @@ private String buildBadBlobId(short version, BlobIdType type, Byte datacenterId,
517517
idBuf.put(uuid.getBytes());
518518
}
519519
idBuf.put(extraChars.getBytes());
520-
return Base64.encodeBase64URLSafeString(idBuf.array());
520+
return Utils.base64EncodeUrlSafeWithoutPadding(idBuf.array());
521521
}
522522

523523
/**
@@ -545,7 +545,7 @@ private void deserializeBlobIdAndAssert(short version, String srcBlobIdStr) thro
545545
* @return the {@link DataInputStream}
546546
*/
547547
private DataInputStream getStreamFromBase64(String base64String) {
548-
return new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(base64String))));
548+
return new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(base64String))));
549549
}
550550

551551
/**
@@ -635,7 +635,7 @@ private void assertBlobIdFieldValues(short version, BlobId blobId, BlobIdType ty
635635
* @throws Exception Any unexpected exception.
636636
*/
637637
private short getVersionFromBlobString(String blobId) throws Exception {
638-
DataInputStream dis = new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Base64.decodeBase64(blobId))));
638+
DataInputStream dis = new DataInputStream(new ByteBufferInputStream(ByteBuffer.wrap(Utils.base64DecodeUrlSafe(blobId))));
639639
try {
640640
return dis.readShort();
641641
} finally {

0 commit comments

Comments
 (0)