Skip to content

Commit bf770fd

Browse files
committed
nsfs support for direct io
Signed-off-by: Guy Margalit <[email protected]>
1 parent bda6a11 commit bf770fd

File tree

6 files changed

+134
-96
lines changed

6 files changed

+134
-96
lines changed

.clang-format

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ BinPackParameters: false
1717
FixNamespaceComments: true
1818
NamespaceIndentation: Inner
1919
PointerAlignment: Left
20-
# IndentPPDirectives: AfterHash
20+
IndentPPDirectives: BeforeHash
2121

2222
AlignOperands: DontAlign
2323
AlignAfterOpenBracket: DontAlign

config.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ config.NSFS_DIR_CACHE_MAX_DIR_SIZE = 64 * 1024 * 1024;
638638
config.NSFS_DIR_CACHE_MIN_DIR_SIZE = 64;
639639
config.NSFS_DIR_CACHE_MAX_TOTAL_SIZE = 24 * config.NSFS_DIR_CACHE_MAX_DIR_SIZE;
640640

641-
config.NSFS_OPEN_READ_MODE = 'r'; // use 'rd' for direct, or 'rs' for sync, or 'rds' for both
641+
config.NSFS_OPEN_READ_MODE = 'r'; // use 'rd' for direct io
642642

643643
config.BASE_MODE_FILE = 0o666;
644644
config.BASE_MODE_DIR = 0o777;

src/native/fs/fs_napi.cpp

+113-70
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "./gpfs_fcntl.h"
88

99
#include <dirent.h>
10+
#include <errno.h>
1011
#include <fcntl.h>
1112
#include <limits.h>
1213
#include <map>
@@ -22,17 +23,16 @@
2223
#include <unistd.h>
2324
#include <uv.h>
2425
#include <vector>
25-
#include <errno.h>
2626

2727
#ifdef __APPLE__
28-
#include <sys/mount.h>
29-
#include <sys/param.h>
28+
#include <sys/mount.h>
29+
#include <sys/param.h>
3030
#else
31-
#include <sys/statfs.h>
31+
#include <sys/statfs.h>
3232
#endif
3333

3434
#ifndef __APPLE__
35-
#define ENOATTR ENODATA
35+
#define ENOATTR ENODATA
3636
#endif
3737

3838
#define ROUNDUP(X, Y) ((Y) * (((X) + (Y)-1) / (Y)))
@@ -121,11 +121,11 @@
121121
} while (0)
122122

123123
#ifdef __APPLE__
124-
#define flistxattr(a, b, c) ::flistxattr(a, b, c, 0)
125-
#define getxattr(a, b, c, d) ::getxattr(a, b, c, d, 0, 0)
126-
#define fgetxattr(a, b, c, d) ::fgetxattr(a, b, c, d, 0, 0)
127-
#define fsetxattr(a, b, c, d, e) ::fsetxattr(a, b, c, d, e, 0)
128-
#define fremovexattr(a, b) ::fremovexattr(a, b, 0)
124+
#define flistxattr(a, b, c) ::flistxattr(a, b, c, 0)
125+
#define getxattr(a, b, c, d) ::getxattr(a, b, c, d, 0, 0)
126+
#define fgetxattr(a, b, c, d) ::fgetxattr(a, b, c, d, 0, 0)
127+
#define fsetxattr(a, b, c, d, e) ::fsetxattr(a, b, c, d, e, 0)
128+
#define fremovexattr(a, b) ::fremovexattr(a, b, 0)
129129
#endif
130130

131131
namespace noobaa
@@ -146,41 +146,56 @@ static int (*dlsym_gpfs_linkatif)(
146146
static int (*dlsym_gpfs_unlinkat)(
147147
gpfs_file_t fileDesc, const char* path, gpfs_file_t fd) = 0;
148148

149-
const static std::map<std::string, int> flags_to_case = {
150-
{ "r", O_RDONLY },
151-
{ "rs", O_RDONLY | O_SYNC },
152-
{ "sr", O_RDONLY | O_SYNC },
153-
#ifdef O_DIRECT
154-
{ "rd", O_RDONLY | O_DIRECT },
155-
{ "dr", O_RDONLY | O_DIRECT },
156-
{ "rds", O_RDONLY | O_DIRECT | O_SYNC },
157-
#endif
158-
{ "r+", O_RDWR },
159-
{ "rs+", O_RDWR | O_SYNC },
160-
{ "sr+", O_RDWR | O_SYNC },
161-
{ "w", O_TRUNC | O_CREAT | O_WRONLY },
149+
static int
150+
parse_open_flags(std::string flags)
151+
{
152+
int bits = 0;
153+
for (char ch : flags) {
154+
switch (ch) {
155+
case 'r':
156+
bits |= O_RDONLY;
157+
break;
158+
case 'w':
159+
bits |= O_TRUNC | O_CREAT | O_WRONLY;
160+
break;
161+
case 'a':
162+
bits |= O_APPEND | O_CREAT | O_WRONLY;
163+
break;
164+
case '+':
165+
bits |= O_RDWR;
166+
bits &= ~(O_RDONLY | O_WRONLY);
167+
break;
168+
case 's':
169+
bits |= O_SYNC;
170+
break;
171+
case 'x':
172+
bits |= O_EXCL;
173+
break;
174+
case 't':
162175
#ifdef O_TMPFILE
163-
{ "wt", O_RDWR | O_TMPFILE },
176+
bits |= O_TMPFILE | O_RDWR;
177+
bits &= ~(O_RDONLY | O_WRONLY);
178+
#else
179+
LOG("FS: Unsupported O_TMPFILE " << flags);
164180
#endif
165-
{ "wx", O_TRUNC | O_CREAT | O_WRONLY | O_EXCL },
166-
{ "xw", O_TRUNC | O_CREAT | O_WRONLY | O_EXCL },
167-
{ "w+", O_TRUNC | O_CREAT | O_RDWR },
168-
{ "wx+", O_TRUNC | O_CREAT | O_RDWR | O_EXCL },
169-
{ "xw+", O_TRUNC | O_CREAT | O_RDWR | O_EXCL },
170-
{ "a", O_APPEND | O_CREAT | O_WRONLY },
171-
{ "ax", O_APPEND | O_CREAT | O_WRONLY | O_EXCL },
172-
{ "xa", O_APPEND | O_CREAT | O_WRONLY | O_EXCL },
173-
{ "as", O_APPEND | O_CREAT | O_WRONLY | O_SYNC },
174-
{ "sa", O_APPEND | O_CREAT | O_WRONLY | O_SYNC },
175-
{ "a+", O_APPEND | O_CREAT | O_RDWR },
176-
{ "ax+", O_APPEND | O_CREAT | O_RDWR | O_EXCL },
177-
{ "xa+", O_APPEND | O_CREAT | O_RDWR | O_EXCL },
178-
{ "as+", O_APPEND | O_CREAT | O_RDWR | O_SYNC },
179-
{ "sa+", O_APPEND | O_CREAT | O_RDWR | O_SYNC },
180-
};
181+
break;
182+
case 'd':
183+
#ifdef O_DIRECT
184+
bits |= O_DIRECT;
185+
#else
186+
LOG("FS: Unsupported O_DIRECT " << flags);
187+
#endif
188+
break;
189+
default:
190+
LOG("FS: Unexpected open flags " << flags);
191+
return -1;
192+
}
193+
}
194+
return bits;
195+
}
181196

182197
const static std::vector<std::string> GPFS_XATTRS{ GPFS_ENCRYPTION_XATTR_NAME };
183-
const static std::vector<std::string> USER_XATTRS{ "user.content_md5", "user.version_id", "user.prev_version_id", "user.delete_marker", "user.dir_content"};
198+
const static std::vector<std::string> USER_XATTRS{ "user.content_md5", "user.version_id", "user.prev_version_id", "user.delete_marker", "user.dir_content" };
184199

185200
struct Entry
186201
{
@@ -196,7 +211,7 @@ struct gpfsRequest_t
196211
char buffer[GPFS_XATTR_BUFFER_SIZE];
197212
};
198213

199-
void
214+
static void
200215
build_gpfs_get_ea_request(gpfsRequest_t* reqP, std::string key)
201216
{
202217
int nameLen = key.size();
@@ -224,7 +239,7 @@ api(const Napi::CallbackInfo& info)
224239
return promise;
225240
}
226241

227-
void
242+
static void
228243
set_stat_res(Napi::Object res, Napi::Env env, struct stat& stat_res, XattrMap& xattr_res)
229244
{
230245
res["dev"] = Napi::Number::New(env, stat_res.st_dev);
@@ -278,7 +293,7 @@ set_stat_res(Napi::Object res, Napi::Env env, struct stat& stat_res, XattrMap& x
278293
}
279294
}
280295

281-
void
296+
static void
282297
set_statfs_res(Napi::Object res, Napi::Env env, struct statfs& statfs_res)
283298
{
284299
res["type"] = Napi::Number::New(env, statfs_res.f_type);
@@ -302,26 +317,26 @@ set_statfs_res(Napi::Object res, Napi::Env env, struct statfs& statfs_res)
302317
#endif
303318
}
304319

305-
void
320+
static void
306321
set_fs_worker_stats(Napi::Env env, Napi::Object fs_worker_stats, std::string work_name, double took_time, int error)
307322
{
308323
fs_worker_stats["name"] = Napi::String::New(env, work_name);
309324
fs_worker_stats["took_time"] = Napi::Number::New(env, took_time);
310325
fs_worker_stats["error"] = Napi::Number::New(env, error);
311326
}
312327

313-
bool
328+
static bool
314329
cmp_ver_id(int64_t link_expected_mtime, int64_t link_expected_inode, struct stat& _stat_res)
315330
{
316331
// extract actual stat ino and mtime
317332
int64_t stat_actual_ino = _stat_res.st_ino;
318-
#ifdef __APPLE__
319-
auto actual_mtime_sec = _stat_res.st_mtimespec.tv_sec;
320-
auto actual_mtime_nsec = _stat_res.st_mtimespec.tv_nsec;
321-
#else
322-
auto actual_mtime_sec = _stat_res.st_mtim.tv_sec;
323-
auto actual_mtime_nsec = _stat_res.st_mtim.tv_nsec;
324-
#endif
333+
#ifdef __APPLE__
334+
auto actual_mtime_sec = _stat_res.st_mtimespec.tv_sec;
335+
auto actual_mtime_nsec = _stat_res.st_mtimespec.tv_nsec;
336+
#else
337+
auto actual_mtime_sec = _stat_res.st_mtim.tv_sec;
338+
auto actual_mtime_nsec = _stat_res.st_mtim.tv_nsec;
339+
#endif
325340
auto actual_mtimeNs = int64_t(round((double(1e9) * actual_mtime_sec)) + actual_mtime_nsec);
326341
return link_expected_mtime == actual_mtimeNs && link_expected_inode == stat_actual_ino;
327342
}
@@ -512,7 +527,7 @@ struct FSWorker : public Napi::AsyncWorker
512527
DBG1("FS::FSWorker::Execute: " << _desc << DVAL(_uid) << DVAL(_gid) << DVAL(_backend));
513528
ThreadScope tx;
514529
tx.set_user(_uid, _gid);
515-
DBG1("FS::FSWorker::Execute: " << _desc << DVAL(_uid) << DVAL(_gid) << DVAL(geteuid()) << DVAL(getegid()) << DVAL(getuid()) << DVAL(getgid()));
530+
DBG1("FS::FSWorker::Execute: " << _desc << DVAL(_uid) << DVAL(_gid) << DVAL(geteuid()) << DVAL(getegid()) << DVAL(getuid()) << DVAL(getgid()));
516531

517532
auto start_time = std::chrono::high_resolution_clock::now();
518533
Work();
@@ -838,11 +853,10 @@ struct Rmdir : public FSWorker
838853
}
839854
};
840855

841-
842856
/**
843857
* SafeLink is an fs op
844858
* 1. link
845-
* 2. check if the target has the expected version
859+
* 2. check if the target has the expected version
846860
* 2.1. if yes - return
847861
* 2.2. else - unlink and retry
848862
*/
@@ -864,16 +878,15 @@ struct SafeLink : public FSWorker
864878
Begin(XSTR() << "SafeLink " << DVAL(_link_from.c_str()) << DVAL(_link_to.c_str()) << DVAL(_link_expected_mtime) << DVAL(_link_expected_inode));
865879
}
866880
virtual void Work()
867-
{
881+
{
868882
SYSCALL_OR_RETURN(link(_link_from.c_str(), _link_to.c_str()));
869883
struct stat _stat_res;
870884
SYSCALL_OR_RETURN(stat(_link_to.c_str(), &_stat_res));
871885
if (cmp_ver_id(_link_expected_mtime, _link_expected_inode, _stat_res) == true) return;
872886
SYSCALL_OR_RETURN(unlink(_link_to.c_str()));
873-
DBG0("FS::SafeLink::Execute: ERROR link target doesn't match the expected inode + mtime" << DVAL(_link_to)
874-
<< DVAL(_link_expected_mtime) << DVAL(_link_expected_inode));
887+
DBG0("FS::SafeLink::Execute: ERROR link target doesn't match the expected inode + mtime" << DVAL(_link_to)
888+
<< DVAL(_link_expected_mtime) << DVAL(_link_expected_inode));
875889
SetError(XSTR() << "FS::SafeLink ERROR link target doesn't match expected inode and mtime");
876-
877890
}
878891
};
879892

@@ -895,7 +908,7 @@ struct SafeUnlink : public FSWorker
895908
{
896909
_to_unlink = info[1].As<Napi::String>();
897910
_mv_to = info[2].As<Napi::String>();
898-
if (info.Length() > 4 && !info[3].IsUndefined() && !info[4].IsUndefined()) {
911+
if (info.Length() > 4 && !info[3].IsUndefined() && !info[4].IsUndefined()) {
899912
// TODO: handle lossless
900913
bool lossless = true;
901914
_unlink_expected_mtime = info[3].As<Napi::BigInt>().Int64Value(&lossless);
@@ -908,13 +921,13 @@ struct SafeUnlink : public FSWorker
908921
SYSCALL_OR_RETURN(rename(_to_unlink.c_str(), _mv_to.c_str()));
909922
struct stat _stat_res;
910923
SYSCALL_OR_RETURN(stat(_mv_to.c_str(), &_stat_res));
911-
if (cmp_ver_id(_unlink_expected_mtime, _unlink_expected_inode, _stat_res) == true){
924+
if (cmp_ver_id(_unlink_expected_mtime, _unlink_expected_inode, _stat_res) == true) {
912925
SYSCALL_OR_RETURN(unlink(_mv_to.c_str()));
913926
return;
914927
}
915928
SYSCALL_OR_RETURN(link(_mv_to.c_str(), _to_unlink.c_str()));
916-
DBG0("FS::SafeUnlink::Execute: ERROR unlink target doesn't match the expected inode + mtime, retry" << DVAL(_to_unlink)
917-
<< DVAL(_unlink_expected_mtime) << DVAL(_unlink_expected_inode));
929+
DBG0("FS::SafeUnlink::Execute: ERROR unlink target doesn't match the expected inode + mtime, retry"
930+
<< DVAL(_to_unlink) << DVAL(_unlink_expected_mtime) << DVAL(_unlink_expected_inode));
918931
SetError(XSTR() << "FS::SafeUnlink ERROR unlink target doesn't match expected inode and mtime");
919932
}
920933
};
@@ -1245,7 +1258,8 @@ struct FileOpen : public FSWorker
12451258
{
12461259
_path = info[1].As<Napi::String>();
12471260
if (info.Length() > 2 && !info[2].IsUndefined()) {
1248-
_flags = flags_to_case.at(info[2].As<Napi::String>());
1261+
_flags = parse_open_flags(info[2].As<Napi::String>());
1262+
if (_flags < 0) SetError("Unexpected open flags");
12491263
}
12501264
if (info.Length() > 3 && !info[3].IsUndefined()) {
12511265
_mode = info[3].As<Napi::Number>().Uint32Value();
@@ -1442,7 +1456,7 @@ struct LinkFileAt : public FSWrapWorker<FileWrap>
14421456
{
14431457
// gpfs_linkat() is the same as Linux linkat() but we need a new function because
14441458
// Linux will fail the linkat() if the file already exist and we want to replace it if it existed.
1445-
if (_replace_fd == 0){
1459+
if (_replace_fd == 0) {
14461460
SYSCALL_OR_RETURN(dlsym_gpfs_linkat(_wrap->_fd, "", AT_FDCWD, _filepath.c_str(), AT_EMPTY_PATH));
14471461
} else {
14481462
SYSCALL_OR_RETURN(dlsym_gpfs_linkatif(_wrap->_fd, "", AT_FDCWD, _filepath.c_str(), AT_EMPTY_PATH, _replace_fd));
@@ -1465,12 +1479,11 @@ struct UnlinkFileAt : public FSWrapWorker<FileWrap>
14651479
Begin(XSTR() << "UnlinkFileAt" << DVAL(_wrap->_path.c_str()) << DVAL(_wrap->_fd) << DVAL(_filepath) << DVAL(_delete_fd));
14661480
}
14671481
virtual void Work()
1468-
{
1482+
{
14691483
SYSCALL_OR_RETURN(dlsym_gpfs_unlinkat(_wrap->_fd, _filepath.c_str(), _delete_fd));
14701484
}
14711485
};
14721486

1473-
14741487
struct FileStat : public FSWrapWorker<FileWrap>
14751488
{
14761489
struct stat _stat_res;
@@ -1613,7 +1626,6 @@ struct GetSingleXattr : public FSWorker
16131626
}
16141627
};
16151628

1616-
16171629
Napi::Value
16181630
FileWrap::close(const Napi::CallbackInfo& info)
16191631
{
@@ -1825,7 +1837,7 @@ DirWrap::read(const Napi::CallbackInfo& info)
18251837
return api<DirReadEntry>(info);
18261838
}
18271839

1828-
Napi::Value
1840+
static Napi::Value
18291841
set_debug_level(const Napi::CallbackInfo& info)
18301842
{
18311843
int level = info[0].As<Napi::Number>();
@@ -1834,6 +1846,29 @@ set_debug_level(const Napi::CallbackInfo& info)
18341846
return info.Env().Undefined();
18351847
}
18361848

1849+
static const int DIO_BUFFER_MEMALIGN = 4096;
1850+
1851+
static void
1852+
dio_buffer_free(Napi::Env env, uint8_t* buf)
1853+
{
1854+
if (buf) free(buf);
1855+
}
1856+
1857+
/**
1858+
* Allocate memory aligned buffer for direct IO.
1859+
*/
1860+
static Napi::Value
1861+
dio_buffer_alloc(const Napi::CallbackInfo& info)
1862+
{
1863+
int size = info[0].As<Napi::Number>();
1864+
uint8_t* buf = 0;
1865+
int r = posix_memalign((void**)&buf, DIO_BUFFER_MEMALIGN, size);
1866+
if (r || !buf) {
1867+
throw Napi::Error::New(info.Env(), "FS::dio_buffer_alloc: failed to allocate memory");
1868+
}
1869+
return Napi::Buffer<uint8_t>::New(info.Env(), buf, size, dio_buffer_free);
1870+
}
1871+
18371872
void
18381873
fs_napi(Napi::Env env, Napi::Object exports)
18391874
{
@@ -1906,6 +1941,14 @@ fs_napi(Napi::Env env, Napi::Object exports)
19061941
exports_fs["DT_LNK"] = Napi::Number::New(env, DT_LNK);
19071942
exports_fs["PLATFORM_IOV_MAX"] = Napi::Number::New(env, IOV_MAX);
19081943

1944+
#ifdef O_DIRECT
1945+
exports_fs["O_DIRECT"] = Napi::Number::New(env, O_DIRECT);
1946+
#endif
1947+
#ifdef O_TMPFILE
1948+
exports_fs["O_TMPFILE"] = Napi::Number::New(env, O_TMPFILE);
1949+
#endif
1950+
1951+
exports_fs["dio_buffer_alloc"] = Napi::Function::New(env, dio_buffer_alloc);
19091952
exports_fs["set_debug_level"] = Napi::Function::New(env, set_debug_level);
19101953

19111954
exports["fs"] = exports_fs;

0 commit comments

Comments
 (0)