Skip to content

Commit d283f71

Browse files
authored
Add mock-mount-s3 to benchmark/ scripts (#1332)
This change allows us to run our benchmark scripts in `benchmark/` using the `mock-mount-s3` binary, which presents a Mountpoint file system backed by an in-memory mock S3 client. This change itself incorporates quite a few changes (which may have been better suited as separate commits). There are some changes to accommodate configuration of part sizes in `mock-mount-s3`, removal of throughput limits (which is useful for benchmarking!), and finally adding the configuration options to the benchmarking scripts. This change does include some hardcoded objects being added to `mock-mount-s3` which can accomodate the benchmarking scripts. This means that if the object keys change, the files will be created by FIO and "uploaded" / populated in memory, which probably isn't what you want. ### Does this change impact existing behavior? No, there are no changes to main Mountpoint code. ### Does this change need a changelog entry? Does it require a version change? No, no behavior changes new or existing. --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and I agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). --------- Signed-off-by: Daniel Carl Jones <[email protected]>
1 parent f12f84d commit d283f71

File tree

4 files changed

+65
-22
lines changed

4 files changed

+65
-22
lines changed

benchmark/benchmark.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,22 +63,32 @@ def _mount_mp(
6363
6464
Returns Mountpoint version string.
6565
"""
66+
bucket = cfg['s3_bucket']
67+
stub_mode = str(cfg["stub_mode"]).lower()
6668

6769
if cfg['mountpoint_binary'] is None:
6870
mountpoint_args = [
6971
"cargo",
7072
"run",
7173
"--quiet",
7274
"--release",
73-
"--",
75+
"--features=mock",
7476
]
77+
78+
if stub_mode == "s3_client":
79+
# `mock-mount-s3` requires bucket to be prefixed with `sthree-` to verify we're not actually reaching S3
80+
logging.debug("using mock-mount-s3 due to `stub_mode`, bucket will be prefixed with \"sthree-\"")
81+
bucket = f"sthree-{cfg['s3_bucket']}"
82+
83+
mountpoint_args.append("--bin=mock-mount-s3")
84+
85+
# End Cargo command, begin passing arguments to Mountpoint
86+
mountpoint_args.append("--")
7587
else:
7688
mountpoint_args = [cfg['mountpoint_binary']]
7789

7890
os.makedirs(MP_LOGS_DIRECTORY, exist_ok=True)
7991

80-
bucket = cfg['s3_bucket']
81-
8292
mountpoint_version_output = subprocess.check_output([*mountpoint_args, "--version"]).decode("utf-8")
8393
log.info("Mountpoint version: %s", mountpoint_version_output.strip())
8494

@@ -118,6 +128,10 @@ def _mount_mp(
118128
for network_interface in cfg['network']['interface_names']:
119129
subprocess_args.append(f"--bind={network_interface}")
120130
if (max_throughput := cfg['network']['maximum_throughput_gbps']) is not None:
131+
if stub_mode == "s3_client":
132+
raise ValueError(
133+
"should not use `stub_mode=s3_client` with `maximum_throughput_gbps`, throughput will be limited"
134+
)
121135
subprocess_args.append(f"--maximum-throughput-gbps={max_throughput}")
122136

123137
if cfg['mountpoint_max_background'] is not None:
@@ -126,14 +140,16 @@ def _mount_mp(
126140
if cfg['mountpoint_congestion_threshold'] is not None:
127141
subprocess_env["UNSTABLE_MOUNTPOINT_CONGESTION_THRESHOLD"] = str(cfg["mountpoint_congestion_threshold"])
128142

129-
stub_mode = str(cfg["stub_mode"]).lower()
130143
if stub_mode != "off" and cfg["mountpoint_binary"] is not None:
131144
raise ValueError("Cannot use `stub_mode` with `mountpoint_binary`, `stub_mode` requires recompilation")
132145
match stub_mode:
133146
case "off":
134147
pass
135148
case "fs_handler":
136149
subprocess_env["MOUNTPOINT_BUILD_STUB_FS_HANDLER"] = "1"
150+
case "s3_client":
151+
# Already handled when building cargo command
152+
pass
137153
case _:
138154
raise ValueError(f"Unknown stub_mode: {stub_mode}")
139155

benchmark/conf/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ mountpoint_congestion_threshold: !!null
3737
with_bwm: false
3838

3939
# Works automatically ONLY where this script manages compilation. It has no effect if `mountpoint_binary` is set.
40-
stub_mode: "off" # fs_handler
40+
stub_mode: "off" # fs_handler, s3_client
4141

4242
iterations: 1
4343

mountpoint-s3-client/src/mock_client/throughput_client.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ use super::MockBackpressureHandle;
3030
/// TODO: make it bi-directional, so that upload throughput can be simulated as well.
3131
pub struct ThroughputMockClient {
3232
inner: MockClient,
33-
/// A throughput rate limiter with one token per byte
34-
rate_limiter: LeakyBucket,
33+
/// A throughput rate limiter with one token per byte.
34+
///
35+
/// If [None], there will be no limit on throughput.
36+
rate_limiter: Option<LeakyBucket>,
3537
}
3638

3739
impl ThroughputMockClient {
@@ -46,7 +48,8 @@ impl ThroughputMockClient {
4648
.refill_amount(bytes_per_interval as u32)
4749
.max(config.part_size as u32)
4850
.tokens(0)
49-
.build();
51+
.build()
52+
.into();
5053
tracing::info!(?rate_limiter, "new client");
5154

5255
Self {
@@ -55,6 +58,16 @@ impl ThroughputMockClient {
5558
}
5659
}
5760

61+
/// Create a new [ThroughputMockClient] with the given configuration and no throughput limits.
62+
///
63+
/// This is effectively the same as a [MockClient], but allows you to use the [ThroughputMockClient] type.
64+
pub fn new_unlimited_throughput(config: MockClientConfig) -> Self {
65+
Self {
66+
inner: MockClient::new(config),
67+
rate_limiter: None,
68+
}
69+
}
70+
5871
/// Add an object to this mock client's bucket
5972
pub fn add_object(&self, key: &str, value: MockObject) {
6073
self.inner.add_object(key, value);
@@ -65,7 +78,7 @@ impl ThroughputMockClient {
6578
pub struct ThroughputGetObjectResponse {
6679
#[pin]
6780
request: MockGetObjectResponse,
68-
rate_limiter: LeakyBucket,
81+
rate_limiter: Option<LeakyBucket>,
6982
}
7083

7184
#[cfg_attr(not(docsrs), async_trait)]
@@ -94,8 +107,10 @@ impl Stream for ThroughputGetObjectResponse {
94107
this.request.poll_next(cx).map(|next| {
95108
next.map(|item| {
96109
item.inspect(|body_part| {
97-
// Acquire enough tokens for the number of bytes we want to deliver
98-
block_on(this.rate_limiter.acquire(body_part.data.len() as u32));
110+
if let Some(rate_limiter) = this.rate_limiter {
111+
// Acquire enough tokens for the number of bytes we want to deliver
112+
block_on(rate_limiter.acquire(body_part.data.len() as u32));
113+
}
99114
})
100115
})
101116
})

mountpoint-s3/src/bin/mock-mount-s3.rs

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
//! A version of `mount-s3` that targets an in-memory mock S3 backend rather than the real service.
22
//!
3-
//! The mock S3 backend supports simulating a target network throughput. The
4-
//! --maximum-throughput-gbps command-line argument can be used to set the target throughput, which
5-
//! defaults to 10Gbps.
3+
//! The mock S3 backend supports simulating a target network throughput.
4+
//! The `--maximum-throughput-gbps` command-line argument can be used to optionally limit download throughput.
65
//!
76
//! As a safety measure, this binary works only if the bucket name begins with "sthree-". This makes
87
//! sure we can't accidentally confuse this binary with a real `mount-s3` in any of our testing or
@@ -12,7 +11,6 @@
1211
1312
use std::sync::Arc;
1413

15-
use anyhow::anyhow;
1614
use clap::Parser;
1715
use futures::executor::ThreadPool;
1816

@@ -45,21 +43,28 @@ fn create_mock_client(args: &CliArgs) -> anyhow::Result<(Arc<ThroughputMockClien
4543

4644
tracing::warn!("using mock client");
4745

48-
let Some(max_throughput_gbps) = args.maximum_throughput_gbps else {
49-
return Err(anyhow!(
50-
"must set --maximum-throughput-gbps when using mock-mount-s3 binary"
51-
));
46+
// TODO: Actually update the mock client to support different part sizes
47+
let part_size = {
48+
if args.read_part_size.is_some() || args.write_part_size.is_some() {
49+
tracing::warn!("mock client does not support separate part sizes for reading and writing, ignoring");
50+
}
51+
args.part_size
5252
};
53-
tracing::info!("mock client target network throughput {max_throughput_gbps} Gbps");
5453

5554
let config = MockClientConfig {
5655
bucket: bucket_name,
57-
part_size: args.part_size as usize,
56+
part_size: part_size as usize,
5857
unordered_list_seed: None,
5958
enable_backpressure: true,
6059
initial_read_window_size: 1024 * 1024 + 128 * 1024, // matching real MP
6160
};
62-
let client = ThroughputMockClient::new(config, max_throughput_gbps as f64);
61+
62+
let client = if let Some(max_throughput_gbps) = args.maximum_throughput_gbps {
63+
tracing::info!("mock client limited to {max_throughput_gbps} Gb/s download throughput");
64+
ThroughputMockClient::new(config, max_throughput_gbps as f64)
65+
} else {
66+
ThroughputMockClient::new_unlimited_throughput(config)
67+
};
6368

6469
let runtime = Runtime::new(ThreadPool::builder().name_prefix("runtime").create()?);
6570

@@ -85,6 +90,13 @@ fn create_mock_client(args: &CliArgs) -> anyhow::Result<(Arc<ThroughputMockClien
8590
};
8691
client.add_object(&key, MockObject::ramp(0x11, size as usize, ETag::for_tests()));
8792
}
93+
// Some objects that are useful for benchmarking
94+
for job_num in 0..1024 {
95+
let size_gib = 100;
96+
let size_bytes = size_gib * 1024u64.pow(3);
97+
let key = format!("j{job_num}_{size_gib}GiB.bin");
98+
client.add_object(&key, MockObject::constant(1u8, size_bytes as usize, ETag::for_tests()));
99+
}
88100
client.add_object("hello.txt", MockObject::from_bytes(b"hello world", ETag::for_tests()));
89101
client.add_object("empty", MockObject::from_bytes(b"", ETag::for_tests()));
90102
client.add_object(

0 commit comments

Comments
 (0)