Expose access to io_uring in the form of submission and completion queues

I have been learning about and testing the io_uring and tokio_uring libraries over the past couple days. 

I'm working on a performance-oriented filesystem application where reads and writes can be randomly distributed.

In my benchmarking, I observed that submitting multiple operations simultaneously and then handling the results was significantly faster than submitting a single op, then waiting, then submitting another.

This makes me want to be able to handle the submission and completion queues myself.

Both of the tests displayed above write 4K blocks to random offsets. Neither test is `O_DIRECT`. Neither test performs `sync_all` so we are just testing the buffering of IO in the Linux filesystem. 

As you can see there is significant difference in the submission speed between `await`-ing the IO and simply continuing submission as fast as the `io_uring` submission queue will let us.

![image](https://github.com/tokio-rs/tokio-uring/assets/288616/6394f3fc-1bfe-41ae-9d1b-da466c85b0f6)
![image](https://github.com/tokio-rs/tokio-uring/assets/288616/d354b0e6-df74-47c0-af99-3b7eff19b04d)


```rust
use constants::GIBIBYTE;
use criterion::{
    criterion_group,
    criterion_main,
    Criterion,
    Throughput,
};
use rand::{
    Rng,
    RngCore as _,
};
use std::{
    io::Write as _,
    os::unix::fs::FileExt,
};

const SIZE_BLOCK: usize = 4096;

fn bench(c: &mut Criterion)
{
    let file = tokio_uring::start(async {
        let file = tokio_uring::fs::OpenOptions::new()
            .read(true)
            .write(true)
            .create(true)
            .open("test/device.dat")
            .await
            .unwrap();

        file
    });

    let file_standard = std::fs::OpenOptions::new()
        .read(true)
        .write(true)
        .create(true)
        .open("test/device.dat")
        .unwrap();

    file_standard.set_len(GIBIBYTE).unwrap();

    let mut group = c.benchmark_group("block_device_io_uring");
    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));

    group.bench_function("io_uring_write_random_wait_sync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number).await;
                file.sync_all().await.unwrap();
            })
        });
    });

    group.bench_function("io_uring_write_random_nowait_nosync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number);
            })
        });
    });

    group.bench_function("io_uring_write_random_wait_nosync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen::<u64>() % (GIBIBYTE / SIZE_BLOCK as u64);

                let _r = file.write_at(block, block_number).await;
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64 * 1024));
    group.bench_function("io_uring_write_random_4MB_sync", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK * 1024];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024)));

                let _r = file.write_at(block, block_number);
                file.sync_all().await.unwrap();
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64 * 1024));
    group.bench_function("io_uring_write_random_32MB_nowait", |b| {
        b.iter(|| {
            tokio_uring::start(async {
                let mut rng = rand::thread_rng();
                let mut block = vec![0u8; SIZE_BLOCK * 1024 * 8];
                rng.fill_bytes(&mut block);
                let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024 * 8)));

                let _r = file.write_at(block, block_number);
            })
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));
    group.bench_function("std_write_random_sync", |b| {
        b.iter(|| {
            let mut rng = rand::thread_rng();
            let mut block = vec![0u8; SIZE_BLOCK];
            rng.fill_bytes(&mut block);
            let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64)));

            file_standard.write_at(&block, block_number).unwrap();
            file_standard.sync_all().unwrap();
        });
    });

    group.throughput(Throughput::Bytes(SIZE_BLOCK as u64));
    group.bench_function("std_write_random_4MB_sync", |b| {
        b.iter(|| {
            let mut rng = rand::thread_rng();
            let mut block = vec![0u8; SIZE_BLOCK * 1024];
            rng.fill_bytes(&mut block);
            let block_number = rng.gen_range(0..(GIBIBYTE - (SIZE_BLOCK as u64 * 1024)));

            file_standard.write_at(&block, block_number).unwrap();
            file_standard.sync_all().unwrap();
        });
    });
}

criterion_group!(benches, bench);
criterion_main!(benches);
```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Expose access to io_uring in the form of submission and completion queues #298

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Expose access to io_uring in the form of submission and completion queues #298

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions