Skip to content

Expose access to io_uring in the form of submission and completion queues #298

Open
@saltatory

Description

@saltatory

I have been learning about and testing the io_uring and tokio_uring libraries over the past couple days.

I'm working on a performance-oriented filesystem application where reads and writes can be randomly distributed.

In my benchmarking, I observed that submitting multiple operations simultaneously and then handling the results was significantly faster than submitting a single op, then waiting, then submitting another.

This makes me want to be able to handle the submission and completion queues myself.

Both of the tests displayed above write 4K blocks to random offsets. Neither test is O_DIRECT. Neither test performs sync_all so we are just testing the buffering of IO in the Linux filesystem.

As you can see there is significant difference in the submission speed between await-ing the IO and simply continuing submission as fast as the io_uring submission queue will let us.

image
image

use constants::GIBIBYTE;
use criterion::{
    criterion_group,
    criterion_main,
    Criterion,
    Throughput,
};
use rand::{
    Rng,
    RngCore as _,
};
use std::{
    io::Write as _,
    os::unix::fs::FileExt,
};

const SIZE_BLOCK: usize = 4096;

fn bench(c: &mut Criterion)
{
    let file = tokio_uring::start(async {
        let file = tokio_uring::fs::OpenOptions::new()
            .read(true)
            .write(true)
            .create(true)
            .open("test/device.dat")
            .await
            .unwrap();

        file
    });

    let file_standard = std::fs::OpenOptions::new()
        .read(true)
        .write(true)
        .create(true)
        .open("test/device.dat")
        .unwrap();

    file_standard.set_len(GIBIBYTE).unwrap();

    let mut group = c.benchmark_group("block_device_io_uring");
    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));

    group.bench_function("io_uring_write_random_wait_sync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number).await;
                file.sync_all().await.unwrap();
            })
        });
    });

    group.bench_function("io_uring_write_random_nowait_nosync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number);
            })
        });
    });

    group.bench_function("io_uring_write_random_wait_nosync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number).await;
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64 * 1024));
    group.bench_function("io_uring_write_random_4MB_sync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK * 1024];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024)));

                let _r = file.write_at(block, block_number);
                file.sync_all().await.unwrap();
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64 * 1024));
    group.bench_function("io_uring_write_random_32MB_nowait", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK * 1024 * 8];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024 * 8)));

                let _r = file.write_at(block, block_number);
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));
    group.bench_function("std_write_random_sync", |b| {
        b.iter(|| {
            let mut rng = rand::thread_rng();
            let mut block = vec![0u8; SIZE_BLOCK];
            rng.fill_bytes(&mut block);
            let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64)));

            file_standard.write_at(&block, block_number).unwrap();
            file_standard.sync_all().unwrap();
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));
    group.bench_function("std_write_random_4MB_sync", |b| {
        b.iter(|| {
            let mut rng = rand::thread_rng();
            let mut block = vec![0u8; SIZE_BLOCK * 1024];
            rng.fill_bytes(&mut block);
            let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024)));

            file_standard.write_at(&block, block_number).unwrap();
            file_standard.sync_all().unwrap();
        });
    });
}

criterion_group!(benches, bench);
criterion_main!(benches);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions