Skip to content

Commit 8aa59d9

Browse files
committed
[chore] add pinned mem buffer
1 parent 0ed8faa commit 8aa59d9

File tree

12 files changed

+37
-23
lines changed

12 files changed

+37
-23
lines changed

csrc/aio.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#include <stdexcept>
2-
#include <memory>
31
#include "aio.h"
42

53
AIOAsyncIO::AIOAsyncIO(unsigned int n_entries)
@@ -128,9 +126,14 @@ void AIOAsyncIO::readv(int fd, const iovec *iov, unsigned int iovcnt, unsigned l
128126
this->n_read_events++;
129127
}
130128

131-
void AIOAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback) {
129+
void AIOAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned) {
132130
if (t.is_cuda()) {
133-
t = t.to(torch::kCPU);
131+
if (pinned.has_value()) {
132+
pinned.value().copy_(t);
133+
t = pinned.value();
134+
} else {
135+
t = t.to(torch::kCPU);
136+
}
134137
}
135138
void *buffer = t.data_ptr();
136139
size_t n_bytes = t.numel() * t.element_size();

csrc/async_file_io.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ void AsyncFileWriter::write(size_t buffer, size_t n_bytes, unsigned long long of
88
this->aio->write(this->fd, ptr, n_bytes, offset, callback);
99
}
1010

11-
void AsyncFileWriter::write_tensor(torch::Tensor tensor, unsigned long long offset, callback_t callback) {
12-
this->aio->write_tensor(this->fd, tensor, offset, callback);
11+
void AsyncFileWriter::write_tensor(torch::Tensor tensor, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned) {
12+
this->aio->write_tensor(this->fd, tensor, offset, callback, pinned);
1313
}
1414

1515

csrc/pthread_backend.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,17 @@ void PthreadAsyncIO::synchronize() {
8080

8181
void PthreadAsyncIO::register_file(int fd) {}
8282

83-
void PthreadAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback) {
83+
void PthreadAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned) {
8484
auto fut = this->pool.submit_task(
85-
[fd, t, offset] {
85+
[fd, t, offset, pinned] {
8686
torch::Tensor cpu_tensor;
8787
if (t.is_cuda()) {
88-
cpu_tensor = t.to(torch::kCPU);
89-
} else {
90-
cpu_tensor = t;
88+
if (pinned.has_value()) {
89+
pinned.value().copy_(t);
90+
cpu_tensor = pinned.value();
91+
} else {
92+
cpu_tensor = t.to(torch::kCPU);
93+
}
9194
}
9295
void *buf = cpu_tensor.data_ptr();
9396
size_t n_bytes = cpu_tensor.numel() * cpu_tensor.element_size();

csrc/py_api.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
2929
py::class_<AsyncFileWriter>(m, "AsyncFileWriter")
3030
.def(py::init<int, unsigned int, const std::string &>(), py::arg("fd"), py::arg("n_entries"), py::arg("backend") = "aio")
3131
.def("write", &AsyncFileWriter::write, py::arg("buffer"), py::arg("n_bytes"), py::arg("offset"), py::arg("callback") = py::none())
32-
.def("write_tensor", &AsyncFileWriter::write_tensor, py::arg("tensor"), py::arg("offset"), py::arg("callback") = py::none())
32+
.def("write_tensor", &AsyncFileWriter::write_tensor, py::arg("tensor"), py::arg("offset"), py::arg("callback") = py::none(), py::arg("pinned") = py::none())
3333
.def("synchronize", &AsyncFileWriter::synchronize);
3434
}

csrc/uring.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,14 @@ void UringAsyncIO::readv(int fd, const iovec *iov, unsigned int iovcnt, unsigned
9999
this->n_read_events++;
100100
}
101101

102-
void UringAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback) {
102+
void UringAsyncIO::write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned) {
103103
if (t.is_cuda()) {
104-
t = t.to(torch::kCPU);
104+
if (pinned.has_value()) {
105+
pinned.value().copy_(t);
106+
t = pinned.value();
107+
} else {
108+
t = t.to(torch::kCPU);
109+
}
105110
}
106111
void *buffer = t.data_ptr<float>();
107112
size_t n_bytes = t.numel() * t.element_size();

include/aio.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include <libaio.h>
44
#include <torch/torch.h>
5+
#include <stdexcept>
6+
#include <memory>
57
#include "asyncio.h"
68

79
class AIOAsyncIO : public AsyncIO
@@ -30,5 +32,5 @@ class AIOAsyncIO : public AsyncIO
3032
void synchronize();
3133

3234
void register_file(int fd);
33-
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback);
35+
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned);
3436
};

include/async_file_io.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <string>
33
#include <torch/torch.h>
4+
#include <optional>
45

56
#include "asyncio.h"
67
#include "backend.h"
@@ -18,7 +19,7 @@ class AsyncFileWriter
1819
public:
1920
AsyncFileWriter(int fd, unsigned int n_entries, const std::string &backend);
2021
void write(size_t buffer, size_t n_bytes, unsigned long long offset, callback_t callback);
21-
void write_tensor(torch::Tensor tensor, unsigned long long offset, callback_t callback);
22+
void write_tensor(torch::Tensor tensor, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned);
2223
void synchronize();
2324
~AsyncFileWriter();
2425

include/asyncio.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@ class AsyncIO
4848
virtual void synchronize() = 0;
4949

5050
virtual void register_file(int fd) = 0;
51-
virtual void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback) = 0;
51+
virtual void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned) = 0;
5252
};

include/pthread_backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,5 @@ class PthreadAsyncIO : public AsyncIO
3939

4040
void register_file(int fd);
4141

42-
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback);
42+
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned);
4343
};

include/uring.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ class UringAsyncIO : public AsyncIO
2626
void synchronize();
2727

2828
void register_file(int fd);
29-
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback);
29+
void write_tensor(int fd, torch::Tensor t, unsigned long long offset, callback_t callback, std::optional<torch::Tensor> pinned);
3030
};

tensornvme/_C/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@ def probe_backend(backend: str) -> bool: ...
2222
class AsyncFileWriter:
2323
def __init__(self, fd: int, n_entries: int, backend: str = "aio") -> None: ...
2424
def write(self, buffer: int, n_bytes: int, offset: int, callback: Optional[Callable[[], None]] = None) -> None: ...
25-
def write_tensor(self, tensor: Tensor, offset: int, callback: Optional[Callable[[], None]] = None) -> None: ...
25+
def write_tensor(self, tensor: Tensor, offset: int, callback: Optional[Callable[[], None]] = None, pinned: Optional[Tensor] = None) -> None: ...
2626
def synchronize(self) -> None: ...

tensornvme/async_file_io.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import ctypes
22
from functools import partial
33
from torch import Tensor
4-
from typing import List
4+
from typing import List, Optional
55
from io import IOBase
66
from tensornvme._C import AsyncFileWriter as AsyncFileWriterC
77

@@ -31,9 +31,9 @@ def write_raw(self, py_ref: object, buffer: int, n_bytes: int, offset: int) -> N
3131
self.io.write(buffer, n_bytes, offset, partial(AsyncFileWriter.gc_callback, self.buffers, len(self.buffers) - 1))
3232
self.offset += n_bytes
3333

34-
def write_tensor(self, tensor: Tensor) -> None:
34+
def write_tensor(self, tensor: Tensor, pinned: Optional[Tensor] = None) -> None:
3535
self.buffers.append(tensor) # append before callback is called
36-
self.io.write_tensor(tensor, self.offset, partial(AsyncFileWriter.gc_callback, self.buffers, len(self.buffers) - 1))
36+
self.io.write_tensor(tensor, self.offset, partial(AsyncFileWriter.gc_callback, self.buffers, len(self.buffers) - 1), pinned)
3737
self.offset += tensor.numel() * tensor.element_size()
3838

3939
@staticmethod

0 commit comments

Comments
 (0)