diff --git a/.obm.cfg b/.obm.cfg index ada426f8e5..1803905eb0 100644 --- a/.obm.cfg +++ b/.obm.cfg @@ -1,2 +1,2 @@ container_name: curve-build-playground.master -container_image: opencurvedocker/curve-base:build-debian11 +container_image: opencurvedocker/curve-build:ubuntu22 diff --git a/CHANGELOG-2.7.md b/CHANGELOG-2.7.md new file mode 100644 index 0000000000..5f799aff4e --- /dev/null +++ b/CHANGELOG-2.7.md @@ -0,0 +1,5 @@ +# CHANGELOG of v2.7 + +[CHANGELOG-2.7](https://docs.opencurve.io/Release/release-notes-v2.7) + +> NOTE: All release notes will be published on our documents site from now on. diff --git a/README.md b/README.md index 1405e0be98..1a988a16ab 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ **A sandbox project hosted by the CNCF Foundation** #### English | [简体中文](README_cn.md) -### 📄 [Documents](https://github.com/opencurve/curve/tree/master/docs) || 🌐 [Official Website](https://www.opencurve.io/Curve/HOME) || 🏠 [Forum](https://ask.opencurve.io/t/topic/7) +### 📄 [Documents](https://docs.opencurve.io/) || 🌐 [Official Website](https://www.opencurve.io/Curve/HOME) || 🏠 [Forum](https://ask.opencurve.io/t/topic/7)
diff --git a/README_cn.md b/README_cn.md index 12d4140388..b722921fe3 100644 --- a/README_cn.md +++ b/README_cn.md @@ -9,7 +9,7 @@ **CNCF基金会的沙箱托管项目** #### [English](README.md) | 简体中文 -### 📄 [文档](https://github.com/opencurve/curve/tree/master/docs) || 🌐 [官网](https://www.opencurve.io/Curve/HOME) || 🏠 [论坛](https://ask.opencurve.io/t/topic/7) +### 📄 [文档](https://docs.opencurve.io/) || 🌐 [官网](https://www.opencurve.io/Curve/HOME) || 🏠 [论坛](https://ask.opencurve.io/t/topic/7)
diff --git a/WORKSPACE b/WORKSPACE index a423f1c46a..6efa122eea 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -143,6 +143,11 @@ git_repository( "//:thirdparties/brpc/brpc.patch", "//:thirdparties/brpc/fix-gcc11.patch", "//:thirdparties/brpc/0001-bvar-warning-on-conflict-bvar-name.patch", + "//:thirdparties/brpc/0002-Support-fork-without-exec.patch", + "//:thirdparties/brpc/0003-Add-docs-on-fork-w-o-exec.patch", + "//:thirdparties/brpc/0004-not-register-pthread_atfork-in-child-process.patch", + "//:thirdparties/brpc/0005-Fix-LatencyRecorder-qps-not-accurate.patch", + "//:thirdparties/brpc/0006-fix-1973-1863.patch", ], patch_args = ["-p1"], ) diff --git a/curvefs/conf/client.conf b/curvefs/conf/client.conf index 755d624920..8ba0127ec2 100644 --- a/curvefs/conf/client.conf +++ b/curvefs/conf/client.conf @@ -85,6 +85,12 @@ fuseClient.getThreadPool=4 # it gurantee the consistent of file after rename, otherwise you should # disable it for performance. fuseClient.enableMultiMountPointRename=true + +# the rename transaction models are different between version 1 and version 2 +# the v2 version greatly improves the performance of rename, especially in concurrent scenarios. +# Node: v1 and v2 are incompatible and cannot be directly upgraded from a v1 cluster to v2. +fuseClient.txVersion=1 + # splice will bring higher performance in some cases # but there might be a kernel issue that will cause kernel panic when enabling it # see https://lore.kernel.org/all/CAAmZXrsGg2xsP1CK+cbuEMumtrqdvD-NKnWzhNcvn71RV3c1yw@mail.gmail.com/ diff --git a/curvefs/conf/metaserver.conf b/curvefs/conf/metaserver.conf index a16fd4d101..1140bef40d 100644 --- a/curvefs/conf/metaserver.conf +++ b/curvefs/conf/metaserver.conf @@ -258,11 +258,17 @@ storage.rocksdb.unordered_write_buffer_size=67108864 # for store inode which exclude its s3chunkinfo list (default: 3) storage.rocksdb.unordered_max_write_buffer_number=3 # rocksdb column family's write_buffer_size -# for store dentry and inode's s3chunkinfo list (unit: bytes, default: 128MB) +# for store dentry and inode's s3chunkinfo list (unit: bytes, default: 64MB) storage.rocksdb.ordered_write_buffer_size=67108864 # rocksdb column family's max_write_buffer_number # for store dentry and inode's s3chunkinfo list (default: 3) storage.rocksdb.ordered_max_write_buffer_number=3 +# rocksdb column family's write_buffer_size +# for store tx lock and write (unit: bytes, default: 64MB) +storage.rocksdb.tx_cf_write_buffer_size=67108864 +# rocksdb column family's max_write_buffer_number +# for store tx lock and write (default: 3) +storage.rocksdb.tx_cf_max_write_buffer_number=3 # The target number of write history bytes to hold in memory (default: 20MB) storage.rocksdb.max_write_buffer_size_to_maintain=20971520 # rocksdb memtable prefix bloom size ratio (size=write_buffer_size*memtable_prefix_bloom_size_ratio) @@ -286,6 +292,8 @@ storage.rocksdb.perf_sampling_ratio=0 # we will sending its with rpc streaming instead of # padding its into inode (default: 25000, about 25000 * 41 (byte) = 1MB) storage.s3_meta_inside_inode.limit_size=25000 +# TTL(millisecond) for tx lock +storage.tx_lock_ttl_ms=5000 # recycle options # metaserver scan recycle period, default 1h diff --git a/curvefs/devops/util/tmpl.sh b/curvefs/devops/util/tmpl.sh index cb1e5ea181..ed69596098 100755 --- a/curvefs/devops/util/tmpl.sh +++ b/curvefs/devops/util/tmpl.sh @@ -3,7 +3,7 @@ # Usage: # tmpl.sh DSV SOURCE DESTINATION # Example: -# tmpl.sh = /usr/local/metaserver.conf /tmp/metaserver.con +# tmpl.sh = /usr/local/metaserver.conf /tmp/metaserver.conf g_dsv=$1 g_src=$2 diff --git a/curvefs/docker/debian11/entrypoint.sh b/curvefs/docker/debian11/entrypoint.sh index 0ca397dace..35faec3cad 100755 --- a/curvefs/docker/debian11/entrypoint.sh +++ b/curvefs/docker/debian11/entrypoint.sh @@ -104,7 +104,10 @@ function prepare() { } function create_directory() { - chmod 700 "$g_prefix/data" + if [ "$g_role" != "monitor" ]; then + chmod 700 "$g_prefix/data" + fi + if [ "$g_role" == "etcd" ]; then mkdir -p "$g_prefix/data/wal" elif [ "$g_role" == "metaserver" ]; then diff --git a/curvefs/docker/openeuler/entrypoint.sh b/curvefs/docker/openeuler/entrypoint.sh index 0ca397dace..35faec3cad 100644 --- a/curvefs/docker/openeuler/entrypoint.sh +++ b/curvefs/docker/openeuler/entrypoint.sh @@ -104,7 +104,10 @@ function prepare() { } function create_directory() { - chmod 700 "$g_prefix/data" + if [ "$g_role" != "monitor" ]; then + chmod 700 "$g_prefix/data" + fi + if [ "$g_role" == "etcd" ]; then mkdir -p "$g_prefix/data/wal" elif [ "$g_role" == "metaserver" ]; then diff --git a/curvefs/proto/common.proto b/curvefs/proto/common.proto index a56402c744..25ba220f42 100644 --- a/curvefs/proto/common.proto +++ b/curvefs/proto/common.proto @@ -80,7 +80,7 @@ message PartitionInfo { // partition manage inodeid range [start, end] required uint64 start = 5; required uint64 end = 6; - required uint64 txId = 7; + optional uint64 txId = 7; optional uint64 nextId = 8; // status can change from READWRITE to READONLY, but can not chanage from READONLY to READWRITE // READWRITE/READONLY can change to DELETING, but DELETING can not change to READWRITE/READONLY diff --git a/curvefs/proto/mds.proto b/curvefs/proto/mds.proto index a3c6ca1c26..f89c13e733 100644 --- a/curvefs/proto/mds.proto +++ b/curvefs/proto/mds.proto @@ -194,6 +194,7 @@ message RefreshSessionRequest { required string fsName = 2; required Mountpoint mountpoint = 3; optional FsDelta fsDelta = 4; + optional string mdsAddrs = 5; } message RefreshSessionResponse { @@ -202,6 +203,7 @@ message RefreshSessionResponse { optional bool enableSumInDir = 3; optional uint64 fsCapacity = 4; optional uint64 fsUsedBytes = 5; + optional string mdsAddrsOverride = 6; } message DLockValue { @@ -234,6 +236,22 @@ message CommitTxResponse { required FSStatusCode statusCode = 1; } +message SetClientMdsAddrsOverrideRequest { + required string clientMdsAddrsOverride = 1; +} + +message SetClientMdsAddrsOverrideResponse { + required FSStatusCode statusCode = 1; +} + +message TsoRequest {} + +message TsoResponse { + required FSStatusCode statusCode = 1; + optional uint64 ts = 2; // transaction sequence number + optional uint64 timestamp = 3; +} + service MdsService { // fs interface rpc CreateFs(CreateFsRequest) returns (CreateFsResponse); @@ -249,6 +267,11 @@ service MdsService { rpc GetLatestTxId(GetLatestTxIdRequest) returns (GetLatestTxIdResponse); rpc CommitTx(CommitTxRequest) returns (CommitTxResponse); + rpc Tso(TsoRequest) returns (TsoResponse); + // client lease rpc RefreshSession(RefreshSessionRequest) returns (RefreshSessionResponse); + + // client mds addrs override, for mds migration + rpc SetClientMdsAddrsOverride(SetClientMdsAddrsOverrideRequest) returns (SetClientMdsAddrsOverrideResponse); } diff --git a/curvefs/proto/metaserver.proto b/curvefs/proto/metaserver.proto index f0ab8167a2..d22246069e 100644 --- a/curvefs/proto/metaserver.proto +++ b/curvefs/proto/metaserver.proto @@ -49,6 +49,15 @@ enum MetaStatusCode { RPC_STREAM_ERROR = 25; INODE_S3_META_TOO_LARGE = 26; STORAGE_CLOSED = 27; + // tx v2 related + TX_FAILED = 28; + TX_WRITE_CONFLICT = 29; + TX_KEY_LOCKED = 30; + TX_COMMITTED = 31; + TX_ROLLBACKED = 32; + TX_TIMEOUT = 33; + TX_INPROGRESS = 34; + TX_MISMATCH = 35; } // dentry interface @@ -59,7 +68,7 @@ message GetDentryRequest { required uint32 fsId = 4; required uint64 parentInodeId = 5; required string name = 6; - required uint64 txId = 7; + optional uint64 txId = 7; optional uint64 appliedIndex = 8; } @@ -74,7 +83,8 @@ message Dentry { required uint64 inodeId = 2; required uint64 parentInodeId = 3; required string name = 4; - required uint64 txId = 5; + // reused txId as ts in tx v2 for compatibility in metaserver + optional uint64 txId = 5; optional uint32 flag = 6; optional FsFileType type = 7; optional uint64 txSequence = 8; @@ -88,6 +98,7 @@ message GetDentryResponse { required MetaStatusCode statusCode = 1; optional Dentry dentry = 2; optional uint64 appliedIndex = 3; + optional TxLock txLock = 4; } message ListDentryRequest { @@ -96,7 +107,7 @@ message ListDentryRequest { required uint32 partitionId = 3; required uint32 fsId = 4; required uint64 dirInodeId = 5; - required uint64 txId = 6; + optional uint64 txId = 6; optional string last = 7; // the name of last entry optional uint32 count = 8; // the number of entry required optional bool onlyDir = 9; @@ -107,6 +118,7 @@ message ListDentryResponse { required MetaStatusCode statusCode = 1; repeated Dentry dentrys = 2; optional uint64 appliedIndex = 3; + optional TxLock txLock = 4; } message CreateDentryRequest { @@ -120,6 +132,7 @@ message CreateDentryRequest { message CreateDentryResponse { required MetaStatusCode statusCode = 1; optional uint64 appliedIndex = 2; + optional TxLock txLock = 3; } message DeleteDentryRequest { @@ -127,15 +140,17 @@ message DeleteDentryRequest { required uint32 copysetId = 2; required uint32 partitionId = 3; required uint32 fsId = 4; - required uint64 txId = 5; + optional uint64 txId = 5; required uint64 parentInodeId = 6; required string name = 7; optional FsFileType type = 8; + optional Time create = 9; } message DeleteDentryResponse { required MetaStatusCode statusCode = 1; optional uint64 appliedIndex = 2; + optional TxLock txLock = 3; } message PrepareRenameTxRequest { @@ -159,6 +174,85 @@ message PrepareRenameTxResponse { optional uint64 appliedIndex = 2; } +message TxLock { + required string primaryKey = 1; + required uint64 startTs = 2; + required uint64 timestamp = 3; + optional uint32 index = 4; + optional int32 ttl = 5; +} + +enum TxWriteKind { + Commit = 1; + Rollback = 2; +} + +message TS { + required uint64 ts = 1; +} + +message TxWrite { + required uint64 startTs = 1; + required TxWriteKind kind = 2; +} + +message PrewriteRenameTxRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + repeated Dentry dentrys = 4; + required TxLock txLock = 5; +} + +message PrewriteRenameTxResponse { + required MetaStatusCode statusCode = 1; + repeated Dentry dentrys = 2; + optional TxLock txLock = 3; + optional uint64 appliedIndex = 4; +} + +message CheckTxStatusRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + required string primaryKey = 4; + required uint64 startTs = 5; + required uint64 curTimestamp = 6; +} + +message CheckTxStatusResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} + +message ResolveTxLockRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + required Dentry dentry = 4; + required uint64 startTs = 5; + required uint64 commitTs = 6; +} + +message ResolveTxLockResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} + +message CommitTxRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + repeated Dentry dentrys = 4; + required uint64 startTs = 5; + required uint64 commitTs = 6; +} + +message CommitTxResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} + // inode interface message GetInodeRequest { required uint32 poolId = 1; @@ -266,7 +360,7 @@ message Inode { optional uint64 rdev = 16; // field 17 is left for compatibility map s3ChunkInfoMap = 18; // TYPE_S3 only, first is chunk index - optional uint32 dtime = 19; + optional uint64 dtime = 19; optional uint32 openmpcount = 20; // openmpcount mount points had the file open map xattr = 21; repeated uint64 parent = 22; @@ -538,6 +632,11 @@ service MetaServerService { rpc CreateDentry(CreateDentryRequest) returns (CreateDentryResponse); rpc DeleteDentry(DeleteDentryRequest) returns (DeleteDentryResponse); rpc PrepareRenameTx(PrepareRenameTxRequest) returns (PrepareRenameTxResponse); + // tx v2 + rpc PrewriteRenameTx(PrewriteRenameTxRequest) returns (PrewriteRenameTxResponse); + rpc CheckTxStatus(CheckTxStatusRequest) returns (CheckTxStatusResponse); + rpc ResolveTxLock(ResolveTxLockRequest) returns (ResolveTxLockResponse); + rpc CommitTx(CommitTxRequest) returns (CommitTxResponse); // inode interface rpc GetInode(GetInodeRequest) returns (GetInodeResponse); diff --git a/curvefs/proto/topology.proto b/curvefs/proto/topology.proto index c9d70682b5..f248655fca 100644 --- a/curvefs/proto/topology.proto +++ b/curvefs/proto/topology.proto @@ -61,7 +61,6 @@ message ClusterInfoData { required string clusterId = 1; // map partitionIndexs = 2; - } message PoolData { diff --git a/curvefs/sdk/README.md b/curvefs/sdk/README.md index c58aa8feba..6848362017 100644 --- a/curvefs/sdk/README.md +++ b/curvefs/sdk/README.md @@ -6,12 +6,12 @@ How to build ``` bash $ git clone git@github.com:opencurve/curve.git -$ cd curve -$ make dep stor=fs +$ make playground +$ make ci-dep stor=fs $ make sdk ``` -It will generate a jar after build success: +It will generate a jar package after build success: ``` Build SDK success => /curve/curvefs/sdk/output/curvefs-hadoop-1.0-SNAPSHOT.jar diff --git a/curvefs/sdk/java/native/BUILD b/curvefs/sdk/java/native/BUILD index 3361aeea44..13dcef4778 100644 --- a/curvefs/sdk/java/native/BUILD +++ b/curvefs/sdk/java/native/BUILD @@ -26,6 +26,8 @@ cc_binary( copts = CURVE_DEFAULT_COPTS, linkopts = [ "-Wl,-rpath=/tmp/libcurvefs,--disable-new-dtags", + "-L/usr/lib/x86_64-linux-gnu/", + "-lhashkit", ], deps = [ "@com_google_absl//absl/cleanup", diff --git a/curvefs/sdk/java/pom.xml b/curvefs/sdk/java/pom.xml index b3f15f585c..a3ed98aea9 100644 --- a/curvefs/sdk/java/pom.xml +++ b/curvefs/sdk/java/pom.xml @@ -30,7 +30,25 @@ native/build + + src/main/resources + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemFactory.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemFactory.java index ac42dfaf82..2a79fcc1c4 100644 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemFactory.java +++ b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemFactory.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.opencurve.curve.fs.flink; import io.opencurve.curve.fs.hadoop.CurveFileSystem; @@ -10,28 +26,17 @@ import java.net.URI; public class CurveFileSystemFactory implements FileSystemFactory { - private org.apache.hadoop.conf.Configuration conf; - + private org.apache.hadoop.conf.Configuration conf = new Configuration(); private static final String CURVE_FS_CONFIG_PREFIXES = "curvefs."; private static final String FLINK_CONFIG_PREFIXES = "fs."; public static String SCHEME = "curvefs"; @Override public void configure(org.apache.flink.configuration.Configuration config) { - conf = new Configuration(); - if (config != null) { - for (String key : config.keySet()) { - if (key.startsWith(CURVE_FS_CONFIG_PREFIXES) || key.startsWith(FLINK_CONFIG_PREFIXES)) { - String value = config.getString(key, null); - if (value != null) { - if (CurveFileSystem.class.getCanonicalName().equals(value.trim())) { - SCHEME = key.split("\\.")[1]; - } - conf.set(key, value); - } - } - } - } + config.keySet() + .stream() + .filter(key -> key.startsWith(CURVE_FS_CONFIG_PREFIXES) || key.startsWith(FLINK_CONFIG_PREFIXES)) + .forEach(key -> conf.set(key, config.getString(key, ""))); } @Override @@ -45,4 +50,4 @@ public FileSystem create(URI uri) throws IOException { fs.initialize(uri, conf); return new HadoopFileSystem(fs); } -} +} \ No newline at end of file diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemTableFactory.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemTableFactory.java index da68151bbd..d065492109 100644 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemTableFactory.java +++ b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/flink/CurveFileSystemTableFactory.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.opencurve.curve.fs.flink; import org.apache.flink.connector.file.table.FileSystemTableFactory; diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSInputStream.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSInputStream.java index 2dd1be8d1d..25ad56564f 100644 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSInputStream.java +++ b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSInputStream.java @@ -59,8 +59,6 @@ public class CurveFSInputStream extends FSInputStream { */ public CurveFSInputStream(Configuration conf, CurveFSProto curvefs, int fh, long flength, int bufferSize) { - // Whoever's calling the constructor is responsible for doing the actual curve_open - // call and providing the file handle. fileLength = flength; fileHandle = fh; closed = false; @@ -73,6 +71,7 @@ public CurveFSInputStream(Configuration conf, CurveFSProto curvefs, /** Curve likes things to be closed before it shuts down, * so closing the IOStream stuff voluntarily in a finalizer is good */ + @Override protected void finalize() throws Throwable { try { if (!closed) { @@ -91,7 +90,6 @@ private synchronized boolean fillBuffer() throws IOException { bufValid = 0; - // attempt to reset to old position. If it fails, too bad. curve.lseek(fileHandle, curvePos, CurveFSMount.SEEK_SET); throw new IOException("Failed to fill read buffer! Error code:" + err); } @@ -102,6 +100,7 @@ private synchronized boolean fillBuffer() throws IOException { /* * Get the current position of the stream. */ + @Override public synchronized long getPos() throws IOException { return curvePos - bufValid + bufPos; } @@ -117,6 +116,7 @@ public synchronized int available() throws IOException { return (int) (fileLength - getPos()); } + @Override public synchronized void seek(long targetPos) throws IOException { LOG.trace("CurveInputStream.seek: Seeking to position " + targetPos + " on fd " + fileHandle); @@ -142,6 +142,7 @@ public synchronized void seek(long targetPos) throws IOException { * they'll be dealt with before anybody even tries to call this method! * @return false. */ + @Override public synchronized boolean seekToNewSource(long targetPos) { return false; } diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSTalker.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSTalker.java index 90366cc210..26d0492142 100644 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSTalker.java +++ b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFSTalker.java @@ -29,6 +29,7 @@ import io.opencurve.curve.fs.libfs.CurveFSStat; import io.opencurve.curve.fs.libfs.CurveFSStatVFS; +import java.util.UUID; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; @@ -37,6 +38,7 @@ class CurveFSTalker extends CurveFSProto { private CurveFSMount mount; private String fsname = null; + private String mountpoint = null; private boolean inited = false; private static final String PREFIX_KEY = "curvefs"; @@ -72,14 +74,15 @@ void initialize(URI uri, Configuration conf) throws IOException { if (null == fsname || fsname.isEmpty()) { throw new IOException("curvefs.name is not set"); } - mount.mount(fsname, "/"); + mountpoint = UUID.randomUUID().toString(); + mount.mount(fsname, mountpoint); inited = true; } @Override void shutdown() throws IOException { if (inited) { - mount.umount(fsname, "/"); + mount.umount(fsname, mountpoint); mount = null; inited = false; } @@ -179,4 +182,4 @@ void chown(Path path, int uid, int gid) throws IOException { void rename(Path src, Path dst) throws IOException { mount.rename(tostr(src), tostr(dst)); } -} +} \ No newline at end of file diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFileSystem.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFileSystem.java index 09df042c1b..fc031d38d8 100644 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFileSystem.java +++ b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/CurveFileSystem.java @@ -59,12 +59,14 @@ private Path makeAbsolute(Path path) { return new Path(workingDir, path); } + @Override public URI getUri() { return uri; } + @Override public String getScheme() { - return uri.getScheme(); + return "hdfs"; } @Override @@ -85,14 +87,12 @@ public void initialize(URI uri, Configuration conf) throws IOException { this.workingDir = getHomeDirectory(); } - + @Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { path = makeAbsolute(path); - // throws filenotfoundexception if path is a directory int fd = curve.open(path, CurveFSMount.O_RDONLY, 0); - /* get file size */ CurveFSStat stat = new CurveFSStat(); curve.fstat(fd, stat); @@ -102,10 +102,11 @@ public FSDataInputStream open(Path path, int bufferSize) throws IOException { @Override public void close() throws IOException { - super.close(); // this method does stuff, make sure it's run! + super.close(); curve.shutdown(); } + @Override public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException { path = makeAbsolute(path); @@ -122,6 +123,7 @@ public FSDataOutputStream append(Path path, int bufferSize, Progressable progres return new FSDataOutputStream(ostream, statistics); } + @Override public Path getWorkingDirectory() { return workingDir; } @@ -144,6 +146,7 @@ public boolean mkdirs(Path f) throws IOException { return mkdirs(f, perms); } + @Override public FileStatus getFileStatus(Path path) throws IOException { path = makeAbsolute(path); @@ -160,7 +163,7 @@ public FileStatus getFileStatus(Path path) throws IOException { return status; } - + @Override public FileStatus[] listStatus(Path path) throws IOException { path = makeAbsolute(path); @@ -174,12 +177,10 @@ public FileStatus[] listStatus(Path path) throws IOException { for (int i = 0; i < status.length; i++) { status[i] = getFileStatus(new Path(path, dirlist[i])); } - curve.shutdown(); return status; } else { throw new FileNotFoundException("File " + path + " does not exist."); } - } @Override @@ -208,9 +209,9 @@ public void setTimes(Path path, long mtime, long atime) throws IOException { curve.setattr(path, stat, mask); } + @Override public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { - path = makeAbsolute(path); boolean exists = exists(path); @@ -268,6 +269,7 @@ public void setOwner(Path path, String username, String groupname) throws IOExce } @Deprecated + @Override public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, @@ -278,7 +280,7 @@ public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, if (parent != null) { CurveFSStat stat = new CurveFSStat(); - curve.lstat(parent, stat); // handles FileNotFoundException case + curve.lstat(parent, stat); if (stat.isFile()) { throw new FileAlreadyExistsException(parent.toString()); } @@ -314,14 +316,15 @@ public boolean rename(Path src, Path dst) throws IOException { } @Deprecated + @Override public boolean delete(Path path) throws IOException { return delete(path, false); } + @Override public boolean delete(Path path, boolean recursive) throws IOException { path = makeAbsolute(path); - /* path exists? */ FileStatus status; try { status = getFileStatus(path); @@ -329,13 +332,11 @@ public boolean delete(Path path, boolean recursive) throws IOException { return false; } - /* we're done if its a file */ if (status.isFile()) { curve.unlink(path); return true; } - /* get directory contents */ FileStatus[] dirlist = listStatus(path); if (dirlist == null) { return false; @@ -383,6 +384,6 @@ protected int getDefaultPort() { @Override public String getCanonicalServiceName() { - return null; // Does not support Token + return null; } } diff --git a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/Main.java b/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/Main.java deleted file mode 100644 index d488e309dc..0000000000 --- a/curvefs/sdk/java/src/main/java/io/opencurve/curve/fs/hadoop/Main.java +++ /dev/null @@ -1,7 +0,0 @@ -package io.opencurve.curve.fs.hadoop; - -public class Main { - public static void main(String[] args) { - System.out.println("Hello world!"); - } -} diff --git a/curvefs/sdk/libcurvefs/libcurvefs.cpp b/curvefs/sdk/libcurvefs/libcurvefs.cpp index d53c0b51b8..fb54c3b509 100644 --- a/curvefs/sdk/libcurvefs/libcurvefs.cpp +++ b/curvefs/sdk/libcurvefs/libcurvefs.cpp @@ -143,7 +143,7 @@ int curvefs_open(uintptr_t instance_ptr, } } - uint64_t fd; + uint64_t fd = 0; rc = mount->vfs->Open(path, flags, mode, &fd); if (rc != CURVEFS_ERROR::OK) { return SysErr(rc); @@ -164,7 +164,7 @@ ssize_t curvefs_read(uintptr_t instance_ptr, int fd, char* buffer, size_t count) { - size_t nread; + size_t nread = 0; auto mount = get_instance(instance_ptr); auto rc = mount->vfs->Read(fd, buffer, count, &nread); if (rc != CURVEFS_ERROR::OK) { diff --git a/curvefs/src/client/BUILD b/curvefs/src/client/BUILD index 450e8b6ced..33a3b45089 100644 --- a/curvefs/src/client/BUILD +++ b/curvefs/src/client/BUILD @@ -99,6 +99,7 @@ cc_library( "//curvefs/src/common:metric_utils", "//curvefs/src/common:dynamic_vlog", "//curvefs/src/common:threading", + "//curvefs/src/metaserver:metaserver_storage_conv", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", diff --git a/curvefs/src/client/client_operator.cpp b/curvefs/src/client/client_operator.cpp index 720ce65ee3..fc41375caf 100644 --- a/curvefs/src/client/client_operator.cpp +++ b/curvefs/src/client/client_operator.cpp @@ -20,18 +20,22 @@ * Author: Jingli Chen (Wine93) */ +#include "curvefs/src/client/client_operator.h" + #include -#include "src/common/uuid.h" -#include "curvefs/src/client/client_operator.h" #include "curvefs/src/client/filesystem/error.h" +#include "curvefs/src/metaserver/storage/converter.h" +#include "src/common/uuid.h" namespace curvefs { namespace client { using ::curve::common::UUIDGenerator; -using ::curvefs::metaserver::DentryFlag; -using ::curvefs::mds::topology::PartitionTxId; using ::curvefs::client::filesystem::ToFSError; +using ::curvefs::mds::topology::PartitionTxId; +using ::curvefs::metaserver::DentryFlag; +using ::curvefs::metaserver::TxLock; +using ::curvefs::metaserver::storage::Key4Dentry; #define LOG_ERROR(action, rc) \ LOG(ERROR) << action << " failed, retCode = " << rc \ @@ -60,6 +64,7 @@ RenameOperator::RenameOperator(uint32_t fsId, dstTxId_(0), oldInodeId_(0), oldInodeSize_(-1), + startTs_(0), dentryManager_(dentryManager), inodeManager_(inodeManager), metaClient_(metaClient), @@ -77,6 +82,7 @@ std::string RenameOperator::DebugString() { << ", srcPartitionId = " << srcPartitionId_ << ", dstPartitionId = " << dstPartitionId_ << ", srcTxId = " << srcTxId_ << ", dstTxId_ = " << dstTxId_ + << ", startTs = " << startTs_ << ", oldInodeId = " << oldInodeId_ << ", srcDentry = [" << srcDentry_.ShortDebugString() << "]" << ", dstDentry = [" << dstDentry_.ShortDebugString() << "]" @@ -194,7 +200,6 @@ CURVEFS_ERROR RenameOperator::RecordOldInodeInfo() { return CURVEFS_ERROR::NOT_EXIST; } } - return CURVEFS_ERROR::OK; } @@ -204,7 +209,6 @@ CURVEFS_ERROR RenameOperator::PrepareRenameTx( if (rc != MetaStatusCode::OK) { LOG_ERROR("PrepareRenameTx", rc); } - return ToFSError(rc); } @@ -272,6 +276,118 @@ CURVEFS_ERROR RenameOperator::CommitTx() { return CURVEFS_ERROR::OK; } +CURVEFS_ERROR RenameOperator::PrewriteRenameTx( + const std::vector& dentrys, const TxLock& txLockIn) { + TxLock txLockOut; + uint32_t dcount = 0; + auto rc = metaClient_->PrewriteRenameTx(dentrys, txLockIn, &txLockOut); + while (rc == MetaStatusCode::TX_KEY_LOCKED) { + dcount += txLockOut.index(); + auto rt = dentryManager_->CheckAndResolveTx(dentrys[dcount], + txLockOut, txLockIn.timestamp(), txLockIn.startts()); + if (rt != MetaStatusCode::OK) { + LOG_ERROR("CheckAndResolveTx", rt); + return CURVEFS_ERROR::INTERNAL; + } + if (dcount < dentrys.size()) { + rc = metaClient_->PrewriteRenameTx(std::vector( + dentrys.begin() + dcount, dentrys.end()), + txLockIn, &txLockOut); + } else { + break; + } + } + if (rc != MetaStatusCode::OK) { + LOG_ERROR("PrewriteRenameTx", rc); + return CURVEFS_ERROR::INTERNAL; + } + return CURVEFS_ERROR::OK; +} + +CURVEFS_ERROR RenameOperator::PrewriteTx() { + uint64_t timestamp; + auto rc = mdsClient_->Tso(&startTs_, ×tamp); + if (rc != FSStatusCode::OK) { + LOG_ERROR("start Tso", rc); + return CURVEFS_ERROR::INTERNAL; + } + + dentry_ = Dentry(srcDentry_); + dentry_.set_flag(DentryFlag::DELETE_MARK_FLAG); + dentry_.set_type(srcDentry_.type()); + dentry_.set_txid(startTs_); + + newDentry_ = Dentry(srcDentry_); + newDentry_.set_parentinodeid(newParentId_); + newDentry_.set_name(newname_); + newDentry_.set_type(srcDentry_.type()); + newDentry_.set_txid(startTs_); + + Key4Dentry key4Dentry( + dentry_.fsid(), dentry_.parentinodeid(), dentry_.name()); + std::string primaryKey = key4Dentry.SerializeToString(); + + TxLock txLockIn; + txLockIn.set_primarykey(primaryKey); + txLockIn.set_startts(startTs_); + txLockIn.set_timestamp(timestamp); + + if (!metaClient_->GetPartitionId(dentry_.fsid(), dentry_.parentinodeid(), + &srcPartitionId_) || !metaClient_->GetPartitionId(newDentry_.fsid(), + newDentry_.parentinodeid(), &dstPartitionId_)) { + LOG_ERROR("GetPartitionId", rc); + return CURVEFS_ERROR::INTERNAL; + } + + // note: do not prewrite concurrently, the tx write table clear logic based primary key prewrite first // NOLINT + CURVEFS_ERROR rt = CURVEFS_ERROR::OK; + std::vector dentrys{dentry_}; + if (srcPartitionId_ == dstPartitionId_) { + dentrys.push_back(newDentry_); + rt = PrewriteRenameTx(dentrys, txLockIn); + } else { + rt = PrewriteRenameTx(dentrys, txLockIn); + if (rt == CURVEFS_ERROR::OK) { + dentrys[0] = newDentry_; + rt = PrewriteRenameTx(dentrys, txLockIn); + } + } + if (rt != CURVEFS_ERROR::OK) { + LOG_ERROR("PrepPrewriteTxareTx", rc); + return rt; + } + return CURVEFS_ERROR::OK; +} + +CURVEFS_ERROR RenameOperator::CommitTxV2() { + uint64_t commitTs; + uint64_t timestamp; + auto rc = mdsClient_->Tso(&commitTs, ×tamp); + if (rc != FSStatusCode::OK) { + LOG_ERROR("CommitTxV2 Tso", rc); + return CURVEFS_ERROR::INTERNAL; + } + + MetaStatusCode rt = MetaStatusCode::OK; + std::vector dentrys{dentry_}; + if (srcPartitionId_ == dstPartitionId_) { + dentrys.push_back(newDentry_); + rt = metaClient_->CommitTx(dentrys, startTs_, commitTs); + } else { + rt = metaClient_->CommitTx(dentrys, startTs_, commitTs); + if (rt == MetaStatusCode::OK) { + dentrys[0] = newDentry_; + // do not need check second key commit result + metaClient_->CommitTx(dentrys, startTs_, commitTs); + } + } + if (rt != MetaStatusCode::OK) { + LOG_ERROR("CommitTx", rt); + return CURVEFS_ERROR::INTERNAL; + } + return CURVEFS_ERROR::OK; +} + CURVEFS_ERROR RenameOperator::LinkInode(uint64_t inodeId, uint64_t parent) { std::shared_ptr inodeWrapper; auto rc = inodeManager_->GetInode(inodeId, inodeWrapper); @@ -413,8 +529,6 @@ CURVEFS_ERROR RenameOperator::UpdateInodeCtime() { LOG_ERROR("UpdateInodeCtime", rc); return rc; } - - LOG(INFO) << "UpdateInodeCtime inodeid = " << srcDentry_.inodeid(); return rc; } diff --git a/curvefs/src/client/client_operator.h b/curvefs/src/client/client_operator.h index 0f073a67c4..0408fd1181 100644 --- a/curvefs/src/client/client_operator.h +++ b/curvefs/src/client/client_operator.h @@ -27,13 +27,15 @@ #include #include -#include "curvefs/src/client/inode_cache_manager.h" -#include "curvefs/src/client/dentry_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" +#include "curvefs/src/client/dentry_manager.h" #include "curvefs/src/client/rpcclient/mds_client.h" namespace curvefs { namespace client { +using ::curvefs::metaserver::MetaStatusCode; +using ::curvefs::metaserver::TxLock; using rpcclient::MdsClient; class RenameOperator { @@ -56,6 +58,8 @@ class RenameOperator { CURVEFS_ERROR LinkDestParentInode(); CURVEFS_ERROR PrepareTx(); CURVEFS_ERROR CommitTx(); + CURVEFS_ERROR PrewriteTx(); + CURVEFS_ERROR CommitTxV2(); CURVEFS_ERROR UnlinkSrcParentInode(); void UnlinkOldInode(); CURVEFS_ERROR UpdateInodeParent(); @@ -85,6 +89,9 @@ class RenameOperator { CURVEFS_ERROR PrepareRenameTx(const std::vector& dentrys); + CURVEFS_ERROR PrewriteRenameTx( + const std::vector& dentrys, const TxLock& txLockIn); + CURVEFS_ERROR LinkInode(uint64_t inodeId, uint64_t parent = 0); CURVEFS_ERROR UnLinkInode(uint64_t inodeId, uint64_t parent = 0); @@ -107,6 +114,7 @@ class RenameOperator { // if dest exist, record the size and type of file or empty dir int64_t oldInodeSize_; FsFileType oldInodeType_; + uint64_t startTs_; // tx sequence number Dentry srcDentry_; Dentry dstDentry_; Dentry dentry_; diff --git a/curvefs/src/client/common/common.cpp b/curvefs/src/client/common/common.cpp index b50898a630..1210287fc3 100644 --- a/curvefs/src/client/common/common.cpp +++ b/curvefs/src/client/common/common.cpp @@ -45,6 +45,18 @@ std::ostream &operator<<(std::ostream &os, MetaServerOpType optype) { case MetaServerOpType::PrepareRenameTx: os << "PrepareRenameTx"; break; + case MetaServerOpType::PrewriteRenameTx: + os << "PrewriteRenameTx"; + break; + case MetaServerOpType::CheckTxStatus: + os << "CheckTxStatus"; + break; + case MetaServerOpType::ResolveTxLock: + os << "ResolveTxLock"; + break; + case MetaServerOpType::CommitTx: + os << "CommitTx"; + break; case MetaServerOpType::GetInode: os << "GetInode"; break; diff --git a/curvefs/src/client/common/common.h b/curvefs/src/client/common/common.h index a76312b117..2a5a13d931 100644 --- a/curvefs/src/client/common/common.h +++ b/curvefs/src/client/common/common.h @@ -50,6 +50,10 @@ enum class MetaServerOpType { CreateDentry, DeleteDentry, PrepareRenameTx, + PrewriteRenameTx, + CheckTxStatus, + ResolveTxLock, + CommitTx, GetInode, BatchGetInodeAttr, BatchGetXAttr, diff --git a/curvefs/src/client/common/config.cpp b/curvefs/src/client/common/config.cpp index dc80177ab3..6125a22af9 100644 --- a/curvefs/src/client/common/config.cpp +++ b/curvefs/src/client/common/config.cpp @@ -105,6 +105,8 @@ DEFINE_uint64(fuseClientBurstReadIopsSecs, 180, "the times that Read burst iops can continue"); DEFINE_validator(fuseClientBurstReadIopsSecs, &pass_uint64); +DEFINE_int32(TxVersion, 1, "tx version"); + void InitMdsOption(Configuration *conf, MdsOption *mdsOpt) { conf->GetValueFatalIfFail("mdsOpt.mdsMaxRetryMS", &mdsOpt->mdsMaxRetryMS); conf->GetValueFatalIfFail("mdsOpt.rpcRetryOpt.maxRPCTimeoutMS", @@ -428,6 +430,8 @@ void InitFuseClientOption(Configuration *conf, FuseClientOption *clientOption) { &clientOption->dummyServerStartPort); conf->GetValueFatalIfFail("fuseClient.enableMultiMountPointRename", &clientOption->enableMultiMountPointRename); + conf->GetIntValue("fuseClient.txVersion", + &FLAGS_TxVersion); conf->GetValueFatalIfFail("fuseClient.downloadMaxRetryTimes", &clientOption->downloadMaxRetryTimes); conf->GetValueFatalIfFail("fuseClient.warmupThreadsNum", diff --git a/curvefs/src/client/curve_fuse_op.cpp b/curvefs/src/client/curve_fuse_op.cpp index dd249c8892..cd2128d805 100644 --- a/curvefs/src/client/curve_fuse_op.cpp +++ b/curvefs/src/client/curve_fuse_op.cpp @@ -48,6 +48,7 @@ #include "curvefs/src/common/metric_utils.h" #include "src/common/configuration.h" #include "src/common/gflags_helper.h" +#include "src/common/log_util.h" using ::curve::common::Configuration; using ::curvefs::client::CURVEFS_ERROR; @@ -152,6 +153,7 @@ int InitLog(const char *confPath, const char *argv0) { FLAGS_vlog_level = FLAGS_v; // initialize logging module + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv0); bool succ = InitAccessLog(FLAGS_log_dir); diff --git a/curvefs/src/client/dentry_cache_manager.cpp b/curvefs/src/client/dentry_cache_manager.cpp deleted file mode 100644 index afb5e49eef..0000000000 --- a/curvefs/src/client/dentry_cache_manager.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2021 NetEase Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/* - * Project: curve - * Created Date: Thur May 27 2021 - * Author: xuchaojie - */ -#include "curvefs/src/client/dentry_cache_manager.h" - -#include -#include -#include -#include -#include -#include - -using ::curvefs::metaserver::MetaStatusCode_Name; - -namespace curvefs { -namespace client { -namespace common { -DECLARE_bool(enableCto); -} // namespace common -} // namespace client -} // namespace curvefs - -namespace curvefs { -namespace client { - -using curve::common::WriteLockGuard; -using NameLockGuard = ::curve::common::GenericNameLockGuard; -using ::curvefs::client::filesystem::ToFSError; - -CURVEFS_ERROR DentryCacheManagerImpl::GetDentry(uint64_t parent, - const std::string &name, - Dentry *out) { - std::string key = GetDentryCacheKey(parent, name); - NameLockGuard lock(nameLock_, key); - - MetaStatusCode ret = metaClient_->GetDentry(fsId_, parent, name, out); - if (ret != MetaStatusCode::OK) { - LOG_IF(ERROR, ret != MetaStatusCode::NOT_FOUND) - << "metaClient_ GetDentry failed, MetaStatusCode = " << ret - << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) - << ", parent = " << parent << ", name = " << name; - return ToFSError(ret); - } - return CURVEFS_ERROR::OK; -} - -CURVEFS_ERROR DentryCacheManagerImpl::CreateDentry(const Dentry &dentry) { - std::string key = GetDentryCacheKey(dentry.parentinodeid(), dentry.name()); - NameLockGuard lock(nameLock_, key); - MetaStatusCode ret = metaClient_->CreateDentry(dentry); - if (ret != MetaStatusCode::OK) { - LOG(ERROR) << "metaClient_ CreateDentry failed, MetaStatusCode = " - << ret - << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) - << ", parent = " << dentry.parentinodeid() - << ", name = " << dentry.name(); - return ToFSError(ret); - } - - return CURVEFS_ERROR::OK; -} - -CURVEFS_ERROR DentryCacheManagerImpl::DeleteDentry(uint64_t parent, - const std::string &name, - FsFileType type) { - std::string key = GetDentryCacheKey(parent, name); - NameLockGuard lock(nameLock_, key); - - MetaStatusCode ret = metaClient_->DeleteDentry(fsId_, parent, name, type); - if (ret != MetaStatusCode::OK && ret != MetaStatusCode::NOT_FOUND) { - LOG(ERROR) << "metaClient_ DeleteInode failed, MetaStatusCode = " << ret - << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) - << ", parent = " << parent << ", name = " << name; - return ToFSError(ret); - } - return CURVEFS_ERROR::OK; -} - -CURVEFS_ERROR DentryCacheManagerImpl::ListDentry(uint64_t parent, - std::list *dentryList, - uint32_t limit, - bool onlyDir, - uint32_t nlink) { - dentryList->clear(); - // means no dir under this dir - if (onlyDir && nlink == 2) { - LOG(INFO) << "ListDentry parent = " << parent - << ", onlyDir = 1 and nlink = 2, return directly"; - return CURVEFS_ERROR::OK; - } - - MetaStatusCode ret = MetaStatusCode::OK; - bool perceed = true; - std::string last = ""; - do { - std::list part; - ret = metaClient_->ListDentry(fsId_, parent, last, limit, onlyDir, - &part); - VLOG(6) << "ListDentry fsId = " << fsId_ << ", parent = " << parent - << ", last = " << last << ", count = " << limit - << ", onlyDir = " << onlyDir - << ", ret = " << ret << ", part.size() = " << part.size(); - if (ret != MetaStatusCode::OK) { - LOG(ERROR) << "metaClient_ ListDentry failed" - << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) - << ", parent = " << parent << ", last = " << last - << ", count = " << limit << ", onlyDir = " << onlyDir; - return ToFSError(ret); - } - - if (!onlyDir) { - if (part.size() < limit) { - perceed = false; - } - if (!part.empty()) { - last = part.back().name(); - dentryList->splice(dentryList->end(), part); - } - } else { - // means iterate over the range - if (part.empty()) { - perceed = false; - } else { - last = part.back().name(); - if (part.back().type() != FsFileType::TYPE_DIRECTORY) { - part.pop_back(); - } - dentryList->splice(dentryList->end(), part); - // means already get all the dir under this dir - if (nlink - dentryList->size() == 2) { - perceed = false; - } - } - } - } while (perceed); - - return CURVEFS_ERROR::OK; -} - -} // namespace client -} // namespace curvefs diff --git a/curvefs/src/client/dentry_manager.cpp b/curvefs/src/client/dentry_manager.cpp new file mode 100644 index 0000000000..9c5227ccfb --- /dev/null +++ b/curvefs/src/client/dentry_manager.cpp @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2021 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + * Project: curve + * Created Date: Thur May 27 2021 + * Author: xuchaojie + */ +#include "curvefs/src/client/dentry_manager.h" + +#include +#include +#include +#include +#include +#include +#include "curvefs/src/metaserver/storage/converter.h" + +using ::curvefs::metaserver::MetaStatusCode_Name; +using ::curvefs::metaserver::storage::Key4Dentry; + +namespace curvefs { +namespace client { +namespace common { +DECLARE_bool(enableCto); +} // namespace common +} // namespace client +} // namespace curvefs + +namespace curvefs { +namespace client { + +using curve::common::WriteLockGuard; +using NameLockGuard = ::curve::common::GenericNameLockGuard; +using ::curvefs::client::filesystem::ToFSError; + +MetaStatusCode DentryCacheManagerImpl::CheckTxStatus( + const std::string primaryKey, uint64_t startTs, uint64_t curTimestamp) { + Key4Dentry key4Dentry; + if (!key4Dentry.ParseFromString(primaryKey)) { + LOG(ERROR) << "CheckTxStatus parse primary key failed, primaryKey = " + << primaryKey; + return MetaStatusCode::PARSE_FROM_STRING_FAILED; + } + return metaClient_->CheckTxStatus(key4Dentry.fsId, key4Dentry.parentInodeId, + primaryKey, startTs, curTimestamp); +} + +MetaStatusCode DentryCacheManagerImpl::ResolveTxLock(const Dentry &dentry, + uint64_t startTs, uint64_t commitTs) { + return metaClient_->ResolveTxLock(dentry, startTs, commitTs); +} + +MetaStatusCode DentryCacheManagerImpl::CheckAndResolveTx(const Dentry& dentry, + const TxLock& txLock, uint64_t timestamp, uint64_t commitTs) { + auto rt = CheckTxStatus(txLock.primarykey(), txLock.startts(), timestamp); + switch (rt) { + case MetaStatusCode::TX_COMMITTED: + return ResolveTxLock(dentry, txLock.startts(), commitTs); + case MetaStatusCode::TX_ROLLBACKED: + case MetaStatusCode::TX_TIMEOUT: + return ResolveTxLock(dentry, txLock.startts()); + default: + LOG(ERROR) << "CheckTxStatus unexpected rt = " + << MetaStatusCode_Name(rt); + return rt; + } + return MetaStatusCode::OK; +} + +CURVEFS_ERROR DentryCacheManagerImpl::GetDentry(uint64_t parent, + const std::string &name, + Dentry *out) { + std::string key = GetDentryCacheKey(parent, name); + NameLockGuard lock(nameLock_, key); + TxLock txLockOut; + MetaStatusCode ret = metaClient_->GetDentry(fsId_, parent, name, out, + &txLockOut); + while (ret == MetaStatusCode::TX_KEY_LOCKED) { + uint64_t ts = 0; + uint64_t timestamp = 0; + if (mdsClient_->Tso(&ts, ×tamp) != FSStatusCode::OK) { + LOG(ERROR) << "GetDentry Tso failed, parent = " << parent + << ", name = " << name; + return CURVEFS_ERROR::INTERNAL; + } + Dentry dentry; + dentry.set_fsid(fsId_); + dentry.set_parentinodeid(parent); + dentry.set_name(name); + MetaStatusCode rc = CheckAndResolveTx(dentry, txLockOut, timestamp, ts); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "GetDentry CheckAndResolveTx failed, rc = " + << MetaStatusCode_Name(rc) + << ", parent = " << parent << ", name = " << name; + return CURVEFS_ERROR::INTERNAL; + } + ret = metaClient_->GetDentry(fsId_, parent, name, out, &txLockOut); + } + + if (ret != MetaStatusCode::OK) { + LOG_IF(ERROR, ret != MetaStatusCode::NOT_FOUND) + << "metaClient_ GetDentry failed, MetaStatusCode = " << ret + << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) + << ", parent = " << parent << ", name = " << name; + return ToFSError(ret); + } + return CURVEFS_ERROR::OK; +} + +CURVEFS_ERROR DentryCacheManagerImpl::CreateDentry(const Dentry &dentry) { + std::string key = GetDentryCacheKey(dentry.parentinodeid(), dentry.name()); + NameLockGuard lock(nameLock_, key); + TxLock txLockOut; + MetaStatusCode ret = metaClient_->CreateDentry(dentry, &txLockOut); + while (ret == MetaStatusCode::TX_KEY_LOCKED) { + uint64_t ts = 0; + uint64_t timestamp = 0; + if (mdsClient_->Tso(&ts, ×tamp) != FSStatusCode::OK) { + LOG(ERROR) << "CreateDentry Tso failed, dentry = " + << dentry.ShortDebugString(); + return CURVEFS_ERROR::INTERNAL; + } + MetaStatusCode rc = CheckAndResolveTx(dentry, txLockOut, timestamp, ts); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "CreateDentry CheckAndResolveTx failed, rc = " + << MetaStatusCode_Name(rc) << ", dentry = " + << dentry.ShortDebugString(); + return CURVEFS_ERROR::INTERNAL; + } + ret = metaClient_->CreateDentry(dentry, &txLockOut); + } + if (ret != MetaStatusCode::OK) { + LOG(ERROR) << "metaClient_ CreateDentry failed" + << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) + << ", parent = " << dentry.parentinodeid() + << ", name = " << dentry.name(); + return ToFSError(ret); + } + return CURVEFS_ERROR::OK; +} + +CURVEFS_ERROR DentryCacheManagerImpl::DeleteDentry(uint64_t parent, + const std::string &name, + FsFileType type) { + std::string key = GetDentryCacheKey(parent, name); + NameLockGuard lock(nameLock_, key); + + TxLock txLockOut; + MetaStatusCode ret = metaClient_->DeleteDentry( + fsId_, parent, name, type, &txLockOut); + while (ret == MetaStatusCode::TX_KEY_LOCKED) { + uint64_t ts = 0; + uint64_t timestamp = 0; + if (mdsClient_->Tso(&ts, ×tamp) != FSStatusCode::OK) { + LOG(ERROR) << "DeleteDentry Tso failed, parent = " << parent + << ", name = " << name; + return CURVEFS_ERROR::INTERNAL; + } + Dentry dentry; + dentry.set_fsid(fsId_); + dentry.set_parentinodeid(parent); + dentry.set_name(name); + MetaStatusCode rc = CheckAndResolveTx(dentry, txLockOut, timestamp, ts); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "DeleteDentry CheckAndResolveTx failed, rc = " + << MetaStatusCode_Name(rc) << ", parent = " << parent + << ", name = " << name; + return CURVEFS_ERROR::INTERNAL; + } + ret = metaClient_->DeleteDentry(fsId_, parent, name, type, &txLockOut); + } + + if (ret != MetaStatusCode::OK && ret != MetaStatusCode::NOT_FOUND) { + LOG(ERROR) << "metaClient_ DeleteInode failed, MetaStatusCode = " << ret + << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) + << ", parent = " << parent << ", name = " << name; + return ToFSError(ret); + } + return CURVEFS_ERROR::OK; +} + +CURVEFS_ERROR DentryCacheManagerImpl::ListDentry(uint64_t parent, + std::list *dentryList, + uint32_t limit, + bool onlyDir, + uint32_t nlink) { + dentryList->clear(); + // means no dir under this dir + if (onlyDir && nlink == 2) { + LOG(INFO) << "ListDentry parent = " << parent + << ", onlyDir = 1 and nlink = 2, return directly"; + return CURVEFS_ERROR::OK; + } + + MetaStatusCode ret = MetaStatusCode::OK; + bool perceed = true; + std::string last = ""; + TxLock txLockOut; + do { + std::list part; + ret = metaClient_->ListDentry(fsId_, parent, last, limit, onlyDir, + &part, &txLockOut); + VLOG(6) << "ListDentry fsId = " << fsId_ << ", parent = " << parent + << ", last = " << last << ", count = " << limit + << ", onlyDir = " << onlyDir + << ", ret = " << ret << ", part.size() = " << part.size(); + if (ret == MetaStatusCode::TX_KEY_LOCKED) { + uint64_t ts = 0; + uint64_t timestamp = 0; + if (mdsClient_->Tso(&ts, ×tamp) != FSStatusCode::OK) { + LOG(ERROR) << "ListDentry Tso failed, parent = " << parent; + return CURVEFS_ERROR::INTERNAL; + } + Dentry dentry; + dentry.set_fsid(fsId_); + dentry.set_parentinodeid(parent); + if (part.empty()) { + LOG(ERROR) << "ListDentry tx key locked, but part is empty" + << ", parent = " << parent; + return CURVEFS_ERROR::INTERNAL; + } + dentry.set_name(part.back().name()); + part.pop_back(); + MetaStatusCode rc = CheckAndResolveTx( + dentry, txLockOut, timestamp, ts); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "ListDentry CheckAndResolveTx failed, rc = " + << MetaStatusCode_Name(rc) + << ", parent = " << parent; + return CURVEFS_ERROR::INTERNAL; + } + } else if (ret != MetaStatusCode::OK) { + LOG(ERROR) << "metaClient_ ListDentry failed" + << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret) + << ", parent = " << parent << ", last = " << last + << ", count = " << limit << ", onlyDir = " << onlyDir; + return ToFSError(ret); + } + + if (!onlyDir) { + if (part.size() < limit && ret != MetaStatusCode::TX_KEY_LOCKED) { + perceed = false; + } + if (!part.empty()) { + last = part.back().name(); + dentryList->splice(dentryList->end(), part); + } + } else { + // means iterate over the range + if (part.empty() && ret != MetaStatusCode::TX_KEY_LOCKED) { + perceed = false; + } else { + if (!part.empty()) { + last = part.back().name(); + if (part.back().type() != FsFileType::TYPE_DIRECTORY) { + part.pop_back(); + } + dentryList->splice(dentryList->end(), part); + } + // means already get all the dir under this dir + if (nlink - dentryList->size() == 2) { + perceed = false; + } + } + } + } while (perceed); + + return CURVEFS_ERROR::OK; +} + +} // namespace client +} // namespace curvefs diff --git a/curvefs/src/client/dentry_cache_manager.h b/curvefs/src/client/dentry_manager.h similarity index 76% rename from curvefs/src/client/dentry_cache_manager.h rename to curvefs/src/client/dentry_manager.h index 84f9f20f53..0014a28349 100644 --- a/curvefs/src/client/dentry_cache_manager.h +++ b/curvefs/src/client/dentry_manager.h @@ -21,8 +21,8 @@ * Author: xuchaojie */ -#ifndef CURVEFS_SRC_CLIENT_DENTRY_CACHE_MANAGER_H_ -#define CURVEFS_SRC_CLIENT_DENTRY_CACHE_MANAGER_H_ +#ifndef CURVEFS_SRC_CLIENT_DENTRY_MANAGER_H_ +#define CURVEFS_SRC_CLIENT_DENTRY_MANAGER_H_ #include #include @@ -33,6 +33,7 @@ #include #include "curvefs/src/client/rpcclient/metaserver_client.h" +#include "curvefs/src/client/rpcclient/mds_client.h" #include "src/common/concurrent/concurrent.h" #include "src/common/concurrent/name_lock.h" #include "curvefs/src/client/filesystem/error.h" @@ -44,6 +45,7 @@ namespace client { using rpcclient::MetaServerClient; using rpcclient::MetaServerClientImpl; +using rpcclient::MdsClient; using ::curvefs::client::filesystem::CURVEFS_ERROR; static const char* kDentryKeyDelimiter = ":"; @@ -57,6 +59,8 @@ class DentryCacheManager { fsId_ = fsId; } + virtual void Init(std::shared_ptr mdsClient) = 0; + virtual CURVEFS_ERROR GetDentry(uint64_t parent, const std::string &name, Dentry *out) = 0; @@ -70,6 +74,9 @@ class DentryCacheManager { std::list *dentryList, uint32_t limit, bool onlyDir = false, uint32_t nlink = 0) = 0; + virtual MetaStatusCode CheckAndResolveTx(const Dentry& dentry, + const TxLock& txLock, uint64_t timestamp, uint64_t commitTs) = 0; + protected: uint32_t fsId_; }; @@ -83,6 +90,10 @@ class DentryCacheManagerImpl : public DentryCacheManager { const std::shared_ptr &metaClient) : metaClient_(metaClient) {} + void Init(std::shared_ptr mdsClient) override { + mdsClient_ = mdsClient; + } + CURVEFS_ERROR GetDentry(uint64_t parent, const std::string &name, Dentry *out) override; @@ -96,11 +107,22 @@ class DentryCacheManagerImpl : public DentryCacheManager { std::list *dentryList, uint32_t limit, bool dirOnly = false, uint32_t nlink = 0) override; + MetaStatusCode CheckAndResolveTx(const Dentry& dentry, const TxLock& txLock, + uint64_t timestamp, uint64_t commitTs) override; + std::string GetDentryCacheKey(uint64_t parent, const std::string &name) { return std::to_string(parent) + kDentryKeyDelimiter + name; } private: + MetaStatusCode CheckTxStatus(const std::string primaryKey, uint64_t startTs, + uint64_t curTimestamp); + + MetaStatusCode ResolveTxLock(const Dentry& dentry, uint64_t startTs, + uint64_t commitTs = 0); + + private: + std::shared_ptr mdsClient_; std::shared_ptr metaClient_; curve::common::GenericNameLock nameLock_; }; @@ -108,4 +130,4 @@ class DentryCacheManagerImpl : public DentryCacheManager { } // namespace client } // namespace curvefs -#endif // CURVEFS_SRC_CLIENT_DENTRY_CACHE_MANAGER_H_ +#endif // CURVEFS_SRC_CLIENT_DENTRY_MANAGER_H_ diff --git a/curvefs/src/client/filesystem/defer_sync.cpp b/curvefs/src/client/filesystem/defer_sync.cpp index 2b33e33ba4..3a91709f9d 100644 --- a/curvefs/src/client/filesystem/defer_sync.cpp +++ b/curvefs/src/client/filesystem/defer_sync.cpp @@ -30,14 +30,94 @@ namespace curvefs { namespace client { namespace filesystem { -DeferSync::DeferSync(DeferSyncOption option) - : option_(option), +using ::curve::common::LockGuard; +using ::curve::common::ReadLockGuard; +using ::curve::common::WriteLockGuard; +using ::curvefs::client::filesystem::AttrCtime; + +#define RETURN_FALSE_IF_CTO_ON() \ + do { \ + if (cto_) { \ + return false; \ + } \ + } while (0) + +DeferInodes::DeferInodes(bool cto) + : cto_(cto), + rwlock_(), + inodes_() {} + +bool DeferInodes::Add(const std::shared_ptr& inode) { + RETURN_FALSE_IF_CTO_ON(); + WriteLockGuard lk(rwlock_); + Ino ino = inode->GetInodeId(); + auto ret = inodes_.emplace(ino, inode); + auto iter = ret.first; + bool yes = ret.second; + if (!yes) { // already exists + iter->second = inode; + } + return true; +} + +bool DeferInodes::Get(Ino ino, std::shared_ptr* inode) { + RETURN_FALSE_IF_CTO_ON(); + ReadLockGuard lk(rwlock_); + auto iter = inodes_.find(ino); + if (iter == inodes_.end()) { + return false; + } + *inode = iter->second; + return true; +} + +bool DeferInodes::Remove(const std::shared_ptr& inode) { + RETURN_FALSE_IF_CTO_ON(); + WriteLockGuard lk(rwlock_); + InodeAttr attr; + inode->GetInodeAttrLocked(&attr); + auto iter = inodes_.find(attr.inodeid()); + if (iter == inodes_.end()) { + return false; + } + + InodeAttr defered; + iter->second->GetInodeAttrLocked(&defered); + if (AttrCtime(attr) < AttrCtime(defered)) { + // it means the old defered inode already replaced by the lastest one, + // so we can't remove it before it synced yet. + return false; + } + inodes_.erase(iter); + return true; +} + +size_t DeferInodes::Size() { + ReadLockGuard lk(rwlock_); + return inodes_.size(); +} + +SyncInodeClosure::SyncInodeClosure(const std::shared_ptr& inodes, + const std::shared_ptr& inode) + : inodes_(inodes), inode_(inode) {} + +void SyncInodeClosure::Run() { + std::unique_ptr self_guard(this); + MetaStatusCode rc = GetStatusCode(); + if (rc == MetaStatusCode::OK || rc == MetaStatusCode::NOT_FOUND) { + inodes_->Remove(inode_); + } +} + +DeferSync::DeferSync(bool cto, DeferSyncOption option) + : cto_(cto), + option_(option), mutex_(), running_(false), thread_(), sleeper_(), - inodes_() { -} + pending_(), + inodes_(std::make_shared(cto)) {} void DeferSync::Start() { if (!running_.exchange(true)) { @@ -55,20 +135,32 @@ void DeferSync::Stop() { } } +SyncInodeClosure* DeferSync::NewSyncInodeClosure( + const std::shared_ptr& inode) { + // NOTE: we only store the defer inodes in nocto scenario, + // which means we don't need to remove the inode from defer inodes + // even if the inode already synced done in cto scenario. + if (cto_) { + return nullptr; + } + return new SyncInodeClosure(inodes_, inode); +} + void DeferSync::SyncTask() { - std::vector> inodes; + std::vector> syncing; for ( ;; ) { bool running = sleeper_.wait_for(std::chrono::seconds(option_.delay)); { LockGuard lk(mutex_); - inodes.swap(inodes_); + syncing.swap(pending_); } - for (const auto& inode : inodes) { + for (const auto& inode : syncing) { + auto closure = NewSyncInodeClosure(inode); UniqueLock lk(inode->GetUniqueLock()); - inode->Async(nullptr, true); + inode->Async(closure, true); } - inodes.clear(); + syncing.clear(); if (!running) { break; @@ -78,18 +170,12 @@ void DeferSync::SyncTask() { void DeferSync::Push(const std::shared_ptr& inode) { LockGuard lk(mutex_); - inodes_.emplace_back(inode); + pending_.emplace_back(inode); + inodes_->Add(inode); } -bool DeferSync::IsDefered(Ino ino, InodeAttr* attr) { - LockGuard lk(mutex_); - for (const auto& inode : inodes_) { - if (inode->GetInodeId() == ino) { - inode->GetInodeAttr(attr); - return true; - } - } - return false; +bool DeferSync::IsDefered(Ino ino, std::shared_ptr* inode) { + return inodes_->Get(ino, inode); } } // namespace filesystem diff --git a/curvefs/src/client/filesystem/defer_sync.h b/curvefs/src/client/filesystem/defer_sync.h index 41264a7e00..17472ce11e 100644 --- a/curvefs/src/client/filesystem/defer_sync.h +++ b/curvefs/src/client/filesystem/defer_sync.h @@ -27,23 +27,56 @@ #include #include +#include "absl/container/btree_map.h" #include "src/common/interruptible_sleeper.h" #include "curvefs/src/client/common/config.h" +#include "curvefs/src/client/rpcclient/task_excutor.h" #include "curvefs/src/client/filesystem/meta.h" namespace curvefs { namespace client { namespace filesystem { -using ::curvefs::client::common::DeferSyncOption; - +using ::curve::common::RWLock; using ::curve::common::Mutex; -using ::curve::common::LockGuard; using ::curve::common::InterruptibleSleeper; +using ::curvefs::client::common::DeferSyncOption; +using ::curvefs::client::rpcclient::MetaServerClientDone; + +// NOTE: we only store the defer inodes in nocto scenario. +class DeferInodes { + public: + explicit DeferInodes(bool cto); + + bool Add(const std::shared_ptr& inode); + + bool Get(Ino ino, std::shared_ptr* inode); + + bool Remove(const std::shared_ptr& inode); + + size_t Size(); + + private: + bool cto_; + RWLock rwlock_; + absl::btree_map> inodes_; +}; + +class SyncInodeClosure : public MetaServerClientDone { + public: + explicit SyncInodeClosure(const std::shared_ptr& inodes, + const std::shared_ptr& inode); + + void Run() override; + + private: + std::shared_ptr inodes_; + std::shared_ptr inode_; +}; class DeferSync { public: - explicit DeferSync(DeferSyncOption option); + explicit DeferSync(bool cto, DeferSyncOption option); void Start(); @@ -51,18 +84,26 @@ class DeferSync { void Push(const std::shared_ptr& inode); - bool IsDefered(Ino ino, InodeAttr* attr); + bool IsDefered(Ino ino, std::shared_ptr* inode); private: + SyncInodeClosure* NewSyncInodeClosure( + const std::shared_ptr& inode); + void SyncTask(); private: + friend class SyncInodeClosure; + + private: + bool cto_; DeferSyncOption option_; Mutex mutex_; std::atomic running_; std::thread thread_; InterruptibleSleeper sleeper_; - std::vector> inodes_; + std::vector> pending_; + std::shared_ptr inodes_; }; } // namespace filesystem diff --git a/curvefs/src/client/filesystem/filesystem.cpp b/curvefs/src/client/filesystem/filesystem.cpp index 2aad563051..4546ade2af 100644 --- a/curvefs/src/client/filesystem/filesystem.cpp +++ b/curvefs/src/client/filesystem/filesystem.cpp @@ -31,7 +31,8 @@ namespace filesystem { FileSystem::FileSystem(FileSystemOption option, ExternalMember member) : option_(option), member(member) { - deferSync_ = std::make_shared(option.deferSyncOption); + deferSync_ = std::make_shared(option.cto, + option.deferSyncOption); negative_ = std::make_shared(option.lookupCacheOption); dirCache_ = std::make_shared(option.dirCacheOption); openFiles_ = std::make_shared(option_.openFilesOption, @@ -257,11 +258,6 @@ CURVEFS_ERROR FileSystem::Lookup(Ino parent, CURVEFS_ERROR FileSystem::GetAttr(Ino ino, AttrOut* attrOut) { InodeAttr attr; - if (!option_.cto && deferSync_->IsDefered(ino, &attr)) { - *attrOut = AttrOut(attr); - return CURVEFS_ERROR::OK; - } - auto rc = rpc_->GetAttr(ino, &attr); if (rc == CURVEFS_ERROR::OK) { *attrOut = AttrOut(attr); @@ -319,7 +315,7 @@ CURVEFS_ERROR FileSystem::Open(Ino ino, FileInfo* fi) { bool yes = openFiles_->IsOpened(ino, &inode); if (yes) { openFiles_->Open(ino, inode); - // fi->keep_cache = 1; + // fi->keep_cache = 1; // FIXME(Wine93): let it works. return CURVEFS_ERROR::OK; } diff --git a/curvefs/src/client/filesystem/package.h b/curvefs/src/client/filesystem/package.h index 47f9fccc36..8eb7a14b23 100644 --- a/curvefs/src/client/filesystem/package.h +++ b/curvefs/src/client/filesystem/package.h @@ -25,8 +25,8 @@ #include -#include "curvefs/src/client/dentry_cache_manager.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/dentry_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/client/xattr_manager.h" namespace curvefs { diff --git a/curvefs/src/client/fuse_client.cpp b/curvefs/src/client/fuse_client.cpp index 6f1c0b4771..f1e19721e1 100644 --- a/curvefs/src/client/fuse_client.cpp +++ b/curvefs/src/client/fuse_client.cpp @@ -104,6 +104,8 @@ DECLARE_uint64(fuseClientAvgReadBytes); DECLARE_uint64(fuseClientBurstReadBytes); DECLARE_uint64(fuseClientBurstReadBytesSecs); DECLARE_bool(fs_disableXattr); + +DECLARE_int32(TxVersion); } // namespace common } // namespace client } // namespace curvefs @@ -158,6 +160,8 @@ CURVEFS_ERROR FuseClient::Init(const FuseClientOption &option) { metaCache, mdsClient_, &enableSumInDir_); + dentryManager_->Init(mdsClient_); + xattrManager_ = std::make_shared(inodeManager_, dentryManager_, option_.listDentryLimit, option_.listDentryThreads); @@ -368,35 +372,6 @@ CURVEFS_ERROR FuseClient::FuseOpOpen(fuse_req_t req, return HandleOpenFlags(req, ino, fi, fileOut); } -CURVEFS_ERROR FuseClient::UpdateParentMCTimeAndNlink( - fuse_ino_t parent, FsFileType type, NlinkChange nlink) { - - std::shared_ptr parentInodeWrapper; - auto ret = inodeManager_->GetInode(parent, parentInodeWrapper); - if (ret != CURVEFS_ERROR::OK) { - LOG(ERROR) << "inodeManager get inode fail, ret = " << ret - << ", inodeid = " << parent; - return ret; - } - - { - curve::common::UniqueLock lk = parentInodeWrapper->GetUniqueLock(); - parentInodeWrapper->UpdateTimestampLocked(kModifyTime | kChangeTime); - - if (FsFileType::TYPE_DIRECTORY == type) { - parentInodeWrapper->UpdateNlinkLocked(nlink); - } - - if (option_.fileSystemOption.deferSyncOption.deferDirMtime) { - inodeManager_->ShipToFlush(parentInodeWrapper); - } else { - return parentInodeWrapper->SyncAttr(); - } - } - - return CURVEFS_ERROR::OK; -} - CURVEFS_ERROR FuseClient::MakeNode( fuse_req_t req, fuse_ino_t parent, @@ -535,15 +510,6 @@ CURVEFS_ERROR FuseClient::DeleteNode(uint64_t ino, fuse_ino_t parent, return ret; } - ret = UpdateParentMCTimeAndNlink(parent, type, NlinkChange::kSubOne); - if (ret != CURVEFS_ERROR::OK) { - LOG(ERROR) << "UpdateParentMCTimeAndNlink failed" - << ", parent: " << parent - << ", name: " << name - << ", type: " << type; - return ret; - } - std::shared_ptr inodeWrapper; ret = inodeManager_->GetInode(ino, inodeWrapper); if (ret != CURVEFS_ERROR::OK) { @@ -933,38 +899,63 @@ CURVEFS_ERROR FuseClient::FuseOpRename(fuse_req_t req, fuse_ino_t parent, return CURVEFS_ERROR::NAME_TOO_LONG; } - auto renameOp = - RenameOperator(fsInfo_->fsid(), fsInfo_->fsname(), - parent, name, newparent, newname, - dentryManager_, inodeManager_, metaClient_, mdsClient_, - option_.enableMultiMountPointRename); + auto renameOp = RenameOperator(fsInfo_->fsid(), fsInfo_->fsname(), parent, + name, newparent, newname, dentryManager_, inodeManager_, metaClient_, + mdsClient_, option_.enableMultiMountPointRename); - curve::common::LockGuard lg(renameMutex_); CURVEFS_ERROR rc = CURVEFS_ERROR::OK; - VLOG(3) << "FuseOpRename [start]: " << renameOp.DebugString(); - RETURN_IF_UNSUCCESS(GetTxId); - RETURN_IF_UNSUCCESS(Precheck); - RETURN_IF_UNSUCCESS(RecordOldInodeInfo); - // Do not move LinkDestParentInode behind CommitTx. - // If so, the nlink will be lost when the machine goes down - RETURN_IF_UNSUCCESS(LinkDestParentInode); - RETURN_IF_UNSUCCESS(PrepareTx); - RETURN_IF_UNSUCCESS(CommitTx); - VLOG(3) << "FuseOpRename [success]: " << renameOp.DebugString(); - // Do not check UnlinkSrcParentInode, beause rename is already success - renameOp.UnlinkSrcParentInode(); - renameOp.UnlinkOldInode(); - if (parent != newparent) { - renameOp.UpdateInodeParent(); - } - renameOp.UpdateInodeCtime(); - renameOp.UpdateCache(); + if (common::FLAGS_TxVersion == 1) { + curve::common::LockGuard lg(renameMutex_); + VLOG(3) << "FuseOpRename [start]: " << renameOp.DebugString(); + RETURN_IF_UNSUCCESS(GetTxId); + RETURN_IF_UNSUCCESS(Precheck); + RETURN_IF_UNSUCCESS(RecordOldInodeInfo); + // Do not move LinkDestParentInode behind CommitTx. + // If so, the nlink will be lost when the machine goes down + RETURN_IF_UNSUCCESS(LinkDestParentInode); + RETURN_IF_UNSUCCESS(PrepareTx); + RETURN_IF_UNSUCCESS(CommitTx); + VLOG(3) << "FuseOpRename [success]: " << renameOp.DebugString(); + // Do not check UnlinkSrcParentInode, beause rename is already success + renameOp.UnlinkSrcParentInode(); + renameOp.UnlinkOldInode(); + if (parent != newparent) { + renameOp.UpdateInodeParent(); + } + renameOp.UpdateInodeCtime(); + renameOp.UpdateCache(); - if (enableSumInDir_.load()) { - xattrManager_->UpdateParentXattrAfterRename( - parent, newparent, newname, &renameOp); - } + if (enableSumInDir_.load()) { + xattrManager_->UpdateParentXattrAfterRename( + parent, newparent, newname, &renameOp); + } + } else if (common::FLAGS_TxVersion == 2) { + VLOG(3) << "FuseOpRename [start]: " << renameOp.DebugString(); + RETURN_IF_UNSUCCESS(Precheck); + RETURN_IF_UNSUCCESS(RecordOldInodeInfo); + // Do not move LinkDestParentInode behind CommitTx. + // If so, the nlink will be lost when the machine goes down + RETURN_IF_UNSUCCESS(LinkDestParentInode); + RETURN_IF_UNSUCCESS(PrewriteTx); + RETURN_IF_UNSUCCESS(CommitTxV2); + VLOG(3) << "FuseOpRename [success]: " << renameOp.DebugString(); + // Do not check UnlinkSrcParentInode, beause rename is already success + renameOp.UnlinkSrcParentInode(); + renameOp.UnlinkOldInode(); + if (parent != newparent) { + renameOp.UpdateInodeParent(); + } + renameOp.UpdateInodeCtime(); + if (enableSumInDir_.load()) { + xattrManager_->UpdateParentXattrAfterRename( + parent, newparent, newname, &renameOp); + } + } else { + LOG(ERROR) << "FuseOpRename not support tx version: " + << common::FLAGS_TxVersion; + return CURVEFS_ERROR::NOT_SUPPORT; + } return rc; } @@ -1175,6 +1166,13 @@ CURVEFS_ERROR FuseClient::FuseOpListXattr(fuse_req_t req, fuse_ino_t ino, // +1 because, the format is key\0key\0 *realSize += it.first.length() + 1; } + // add summary xattr key + if (inodeAttr.type() == FsFileType::TYPE_DIRECTORY) { + *realSize += strlen(XATTR_DIR_RFILES) + 1; + *realSize += strlen(XATTR_DIR_RSUBDIRS) + 1; + *realSize += strlen(XATTR_DIR_RENTRIES) + 1; + *realSize += strlen(XATTR_DIR_RFBYTES) + 1; + } if (size == 0) { return CURVEFS_ERROR::OK; @@ -1188,6 +1186,16 @@ CURVEFS_ERROR FuseClient::FuseOpListXattr(fuse_req_t req, fuse_ino_t ino, memcpy(value, it.first.c_str(), tsize); value += tsize; } + if (inodeAttr.type() == FsFileType::TYPE_DIRECTORY) { + memcpy(value, XATTR_DIR_RFILES, strlen(XATTR_DIR_RFILES) + 1); + value += strlen(XATTR_DIR_RFILES) + 1; + memcpy(value, XATTR_DIR_RSUBDIRS, strlen(XATTR_DIR_RSUBDIRS) + 1); + value += strlen(XATTR_DIR_RSUBDIRS) + 1; + memcpy(value, XATTR_DIR_RENTRIES, strlen(XATTR_DIR_RENTRIES) + 1); + value += strlen(XATTR_DIR_RENTRIES) + 1; + memcpy(value, XATTR_DIR_RFBYTES, strlen(XATTR_DIR_RFBYTES) + 1); + value += strlen(XATTR_DIR_RFBYTES) + 1; + } return CURVEFS_ERROR::OK; } return CURVEFS_ERROR::OUT_OF_RANGE; diff --git a/curvefs/src/client/fuse_client.h b/curvefs/src/client/fuse_client.h index 13adc3c591..aae53041cb 100644 --- a/curvefs/src/client/fuse_client.h +++ b/curvefs/src/client/fuse_client.h @@ -41,13 +41,13 @@ #include "curvefs/src/client/client_operator.h" #include "curvefs/src/client/common/common.h" #include "curvefs/src/client/common/config.h" -#include "curvefs/src/client/dentry_cache_manager.h" +#include "curvefs/src/client/dentry_manager.h" #include "curvefs/src/client/dir_buffer.h" #include "curvefs/src/client/filesystem/error.h" #include "curvefs/src/client/filesystem/filesystem.h" #include "curvefs/src/client/filesystem/meta.h" #include "curvefs/src/client/fuse_common.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/client/lease/lease_excutor.h" #include "curvefs/src/client/metric/client_metric.h" #include "curvefs/src/client/rpcclient/mds_client.h" @@ -415,9 +415,6 @@ class FuseClient { private: virtual void FlushData() = 0; - CURVEFS_ERROR UpdateParentMCTimeAndNlink( - fuse_ino_t parent, FsFileType type, NlinkChange nlink); - std::string GenerateNewRecycleName(fuse_ino_t ino, fuse_ino_t parent, const char* name) { std::string newName(name); diff --git a/curvefs/src/client/inode_cache_manager.cpp b/curvefs/src/client/inode_manager.cpp similarity index 80% rename from curvefs/src/client/inode_cache_manager.cpp rename to curvefs/src/client/inode_manager.cpp index e8601c1526..d3aa1c9fac 100644 --- a/curvefs/src/client/inode_cache_manager.cpp +++ b/curvefs/src/client/inode_manager.cpp @@ -21,7 +21,7 @@ * Author: xuchaojie */ -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include #include @@ -30,6 +30,8 @@ #include #include "curvefs/proto/metaserver.pb.h" #include "curvefs/src/client/filesystem/error.h" +#include "curvefs/src/client/filesystem/utils.h" +#include "curvefs/src/client/filesystem/defer_sync.h" #include "curvefs/src/client/inode_wrapper.h" using ::curvefs::metaserver::Inode; @@ -47,10 +49,73 @@ namespace curvefs { namespace client { using ::curvefs::client::filesystem::ToFSError; +using ::curvefs::client::filesystem::AttrCtime; using NameLockGuard = ::curve::common::GenericNameLockGuard; using curvefs::client::common::FLAGS_enableCto; +#define RETURN_IF_CTO_ON() \ + do { \ + if (cto_) { \ + return; \ + } \ + } while (0) + +DeferWatcher::DeferWatcher(bool cto, std::shared_ptr deferSync) + : cto_(cto), + deferSync_(deferSync), + deferAttrs_() {} + +void DeferWatcher::PreGetAttrs(const std::set& inos) { + RETURN_IF_CTO_ON(); + InodeAttr attr; + std::shared_ptr inode; + for (const auto& ino : inos) { + bool yes = deferSync_->IsDefered(ino, &inode); + if (!yes) { + continue; + } + inode->GetInodeAttr(&attr); + deferAttrs_.emplace(ino, attr); + } +} + +bool DeferWatcher::TryUpdate(InodeAttr* attr) { + Ino ino = attr->inodeid(); + auto iter = deferAttrs_.find(ino); + if (iter == deferAttrs_.end()) { + return false; + } + + auto& defered = iter->second; + if (AttrCtime(*attr) > AttrCtime(defered)) { + return false; + } + *attr = defered; + return true; +} + +void DeferWatcher::PostGetAttrs(std::list* attrs) { + RETURN_IF_CTO_ON(); + if (deferAttrs_.size() == 0) { + return; + } + for (auto& attr : *attrs) { + TryUpdate(&attr); + } +} + +void DeferWatcher::PostGetAttrs(std::map* attrs) { + RETURN_IF_CTO_ON(); + if (deferAttrs_.size() == 0) { + return; + } + for (auto& item : *attrs) { + auto& attr = item.second; + TryUpdate(&attr); + } +} + #define GET_INODE_REMOTE(FSID, INODEID, OUT, STREAMING) \ MetaStatusCode ret = metaClient_->GetInode(FSID, INODEID, OUT, STREAMING); \ if (ret != MetaStatusCode::OK) { \ @@ -76,6 +141,11 @@ InodeCacheManagerImpl::GetInode(uint64_t inodeId, return CURVEFS_ERROR::OK; } + bool cto = FLAGS_enableCto; + if (!cto && deferSync_->IsDefered(inodeId, &out)) { + return CURVEFS_ERROR::OK; + } + // get inode from metaserver Inode inode; bool streaming = false; @@ -97,6 +167,11 @@ CURVEFS_ERROR InodeCacheManagerImpl::GetInodeAttr(uint64_t inodeId, std::set inodeIds; std::list attrs; inodeIds.emplace(inodeId); + + bool cto = FLAGS_enableCto; + auto watcher = std::make_shared(cto, deferSync_); + watcher->PreGetAttrs(inodeIds); + MetaStatusCode ret = metaClient_->BatchGetInodeAttr(fsId_, inodeIds, &attrs); if (MetaStatusCode::OK != ret) { @@ -113,6 +188,7 @@ CURVEFS_ERROR InodeCacheManagerImpl::GetInodeAttr(uint64_t inodeId, return CURVEFS_ERROR::INTERNAL; } + watcher->PostGetAttrs(&attrs); *out = *attrs.begin(); return CURVEFS_ERROR::OK; } @@ -124,6 +200,10 @@ CURVEFS_ERROR InodeCacheManagerImpl::BatchGetInodeAttr( return CURVEFS_ERROR::OK; } + bool cto = FLAGS_enableCto; + auto watcher = std::make_shared(cto, deferSync_); + watcher->PreGetAttrs(*inodeIds); + MetaStatusCode ret = metaClient_->BatchGetInodeAttr(fsId_, *inodeIds, attrs); if (MetaStatusCode::OK != ret) { @@ -131,6 +211,7 @@ CURVEFS_ERROR InodeCacheManagerImpl::BatchGetInodeAttr( << ret << ", MetaStatusCode_Name = " << MetaStatusCode_Name(ret); } + watcher->PostGetAttrs(attrs); return ToFSError(ret); } @@ -144,6 +225,10 @@ CURVEFS_ERROR InodeCacheManagerImpl::BatchGetInodeAttrAsync( return CURVEFS_ERROR::OK; } + bool cto = FLAGS_enableCto; + auto watcher = std::make_shared(cto, deferSync_); + watcher->PreGetAttrs(*inodeIds); + // split inodeIds by partitionId and batch limit std::vector> inodeGroups; if (!metaClient_->SplitRequestInodes(fsId_, *inodeIds, &inodeGroups)) { @@ -168,6 +253,7 @@ CURVEFS_ERROR InodeCacheManagerImpl::BatchGetInodeAttrAsync( // wait for all sudrequest finished cond->Wait(); + watcher->PostGetAttrs(attrs); return CURVEFS_ERROR::OK; } diff --git a/curvefs/src/client/inode_cache_manager.h b/curvefs/src/client/inode_manager.h similarity index 91% rename from curvefs/src/client/inode_cache_manager.h rename to curvefs/src/client/inode_manager.h index ce0ebb3d17..093b5e750d 100644 --- a/curvefs/src/client/inode_cache_manager.h +++ b/curvefs/src/client/inode_manager.h @@ -21,8 +21,8 @@ * Author: xuchaojie */ -#ifndef CURVEFS_SRC_CLIENT_INODE_CACHE_MANAGER_H_ -#define CURVEFS_SRC_CLIENT_INODE_CACHE_MANAGER_H_ +#ifndef CURVEFS_SRC_CLIENT_INODE_MANAGER_H_ +#define CURVEFS_SRC_CLIENT_INODE_MANAGER_H_ #include #include @@ -46,6 +46,7 @@ #include "curvefs/src/client/common/config.h" #include "curvefs/src/client/filesystem/openfile.h" #include "curvefs/src/client/filesystem/defer_sync.h" +#include "absl/container/btree_map.h" using ::curve::common::LRUCache; using ::curve::common::CacheMetrics; @@ -66,6 +67,7 @@ using rpcclient::BatchGetInodeAttrDone; using curve::common::CountDownEvent; using metric::S3ChunkInfoMetric; using common::RefreshDataOption; +using ::curvefs::client::filesystem::Ino; using ::curvefs::client::filesystem::OpenFiles; using ::curvefs::client::filesystem::DeferSync; @@ -116,6 +118,25 @@ class InodeCacheManager { uint32_t fsId_; }; +class DeferWatcher { + public: + DeferWatcher(bool cto, std::shared_ptr deferSync); + + void PreGetAttrs(const std::set& inos); + + void PostGetAttrs(std::list* attrs); + + void PostGetAttrs(std::map* attrs); + + private: + bool TryUpdate(InodeAttr* attr); + + private: + bool cto_; + std::shared_ptr deferSync_; + absl::btree_map deferAttrs_; +}; + class InodeCacheManagerImpl : public InodeCacheManager, public std::enable_shared_from_this { public: @@ -221,4 +242,4 @@ class BatchGetInodeAttrAsyncDone : public BatchGetInodeAttrDone { } // namespace client } // namespace curvefs -#endif // CURVEFS_SRC_CLIENT_INODE_CACHE_MANAGER_H_ +#endif // CURVEFS_SRC_CLIENT_INODE_MANAGER_H_ diff --git a/curvefs/src/client/kvclient/kvclient_manager.cpp b/curvefs/src/client/kvclient/kvclient_manager.cpp index c87630758b..9e677cc212 100644 --- a/curvefs/src/client/kvclient/kvclient_manager.cpp +++ b/curvefs/src/client/kvclient/kvclient_manager.cpp @@ -62,10 +62,12 @@ void KVClientManager::Uninit() { } void KVClientManager::Set(std::shared_ptr task) { + kvClientManagerMetric_->setQueueSize << 1; threadPool_.Enqueue([task, this]() { std::string error_log; task->res = client_->Set(task->key, task->value, task->length, &error_log); + kvClientManagerMetric_->setQueueSize << -1; if (task->res) { kvClientManagerMetric_->count << 1; } @@ -96,12 +98,14 @@ void UpdateHitMissMetric(memcached_return_t retCode, } void KVClientManager::Get(std::shared_ptr task) { + kvClientManagerMetric_->getQueueSize << 1; threadPool_.Enqueue([task, this]() { std::string error_log; memcached_return_t retCode; task->res = client_->Get(task->key, task->value, task->offset, task->valueLength, &error_log, &task->length, &retCode); + kvClientManagerMetric_->getQueueSize << -1; UpdateHitMissMetric(retCode, kvClientManagerMetric_.get()); OnReturn(&kvClientManagerMetric_->get, task); }); diff --git a/curvefs/src/client/kvclient/memcache_client.h b/curvefs/src/client/kvclient/memcache_client.h index 57e82a7f44..7ca3ea1248 100644 --- a/curvefs/src/client/kvclient/memcache_client.h +++ b/curvefs/src/client/kvclient/memcache_client.h @@ -166,9 +166,10 @@ class MemCachedClient : public KVClient { *errorlog = ResError(ue); if (ue != MEMCACHED_NOTFOUND) { - LOG(ERROR) << "Get key = " << key << " error = " << *errorlog - << ", get_value_len = " << value_length - << ", expect_value_len = " << length; + LOG_EVERY_N(WARNING, 1000) << "Get key = " << key + << " error = " << *errorlog << ", get_value_len = " + << value_length << ", expect_value_len = " << length; + free(res); memcached_free(tcli); tcli = nullptr; } diff --git a/curvefs/src/client/lease/lease_excutor.cpp b/curvefs/src/client/lease/lease_excutor.cpp index 08dc400c88..13972db561 100644 --- a/curvefs/src/client/lease/lease_excutor.cpp +++ b/curvefs/src/client/lease/lease_excutor.cpp @@ -31,6 +31,10 @@ using curvefs::mds::topology::PartitionTxId; namespace curvefs { namespace client { +namespace common { +DECLARE_int32(TxVersion); +} // namespace common + LeaseExecutor::~LeaseExecutor() { if (task_) { task_->Stop(); @@ -71,26 +75,32 @@ void LeaseExecutor::Stop() { } bool LeaseExecutor::RefreshLease() { + // for tx v2 txIds and latestTxIdList will empty here // get partition txid list std::vector txIds; - metaCache_->GetAllTxIds(&txIds); - + if (common::FLAGS_TxVersion == 1) { + metaCache_->GetAllTxIds(&txIds); + } // refresh from mds std::vector latestTxIdList; - FSStatusCode ret = mdsCli_->RefreshSession(txIds, &latestTxIdList, - fsName_, mountpoint_, - enableSumInDir_); + std::string mdsAddrs = mdsCli_->GetMdsAddrs(); + std::string mdsAddrsOverride; + FSStatusCode ret = + mdsCli_->RefreshSession(txIds, &latestTxIdList, fsName_, mountpoint_, + enableSumInDir_, mdsAddrs, &mdsAddrsOverride); + if (ret != FSStatusCode::OK) { LOG(ERROR) << "LeaseExecutor refresh session fail, ret = " << ret << ", errorName = " << FSStatusCode_Name(ret); return true; } - // update to metacache std::for_each(latestTxIdList.begin(), latestTxIdList.end(), [&](const PartitionTxId &item) { metaCache_->SetTxId(item.partitionid(), item.txid()); }); + // update mds addrs + mdsCli_->SetMdsAddrs(mdsAddrsOverride); return true; } diff --git a/curvefs/src/client/metric/client_metric.cpp b/curvefs/src/client/metric/client_metric.cpp index d6c2592ebb..b9db382123 100644 --- a/curvefs/src/client/metric/client_metric.cpp +++ b/curvefs/src/client/metric/client_metric.cpp @@ -75,17 +75,18 @@ void AsyncContextCollectMetrics( std::shared_ptr s3Metric, const std::shared_ptr& context) { if (s3Metric.get() != nullptr) { - CollectMetrics(&s3Metric->adaptorReadS3, context->actualLen, - context->timer.u_elapsed()); + + CollectMetrics(&s3Metric->adaptorAsyncReadS3, context->actualLen, + butil::cpuwide_time_us() - context->start); switch (context->type) { case curve::common::ContextType::Disk: - CollectMetrics(&s3Metric->readFromDiskCache, context->actualLen, - context->timer.u_elapsed()); + CollectMetrics(&s3Metric->asyncReadDiskCache, context->actualLen, + butil::cpuwide_time_us() - context->start); break; case curve::common::ContextType::S3: - CollectMetrics(&s3Metric->readFromS3, context->actualLen, - context->timer.u_elapsed()); + CollectMetrics(&s3Metric->asyncReadFromS3, context->actualLen, + butil::cpuwide_time_us() - context->start); break; default: break; diff --git a/curvefs/src/client/metric/client_metric.h b/curvefs/src/client/metric/client_metric.h index bfbf0f3373..a55dc7e8c7 100644 --- a/curvefs/src/client/metric/client_metric.h +++ b/curvefs/src/client/metric/client_metric.h @@ -55,6 +55,7 @@ struct MDSClientMetric { InterfaceMetric refreshSession; InterfaceMetric getLatestTxId; InterfaceMetric commitTx; + InterfaceMetric tso; InterfaceMetric allocOrGetMemcacheCluster; MDSClientMetric() @@ -70,6 +71,7 @@ struct MDSClientMetric { refreshSession(prefix, "refreshSession"), getLatestTxId(prefix, "getLatestTxId"), commitTx(prefix, "commitTx"), + tso(prefix, "tso"), allocOrGetMemcacheCluster(prefix, "allocOrGetMemcacheCluster") {} }; @@ -93,6 +95,10 @@ struct MetaServerClientMetric { // tnx InterfaceMetric prepareRenameTx; + InterfaceMetric prewriteRenameTx; + InterfaceMetric checkTxStatus; + InterfaceMetric resolveTxLock; + InterfaceMetric commitTx; // volume extent InterfaceMetric updateVolumeExtent; @@ -100,9 +106,11 @@ struct MetaServerClientMetric { InterfaceMetric updateDeallocatableBlockGroup; MetaServerClientMetric() - : getDentry(prefix, "getDentry"), listDentry(prefix, "listDentry"), + : getDentry(prefix, "getDentry"), + listDentry(prefix, "listDentry"), createDentry(prefix, "createDentry"), - deleteDentry(prefix, "deleteDentry"), getInode(prefix, "getInode"), + deleteDentry(prefix, "deleteDentry"), + getInode(prefix, "getInode"), batchGetInodeAttr(prefix, "batchGetInodeAttr"), batchGetXattr(prefix, "batchGetXattr"), createInode(prefix, "createInode"), @@ -110,10 +118,14 @@ struct MetaServerClientMetric { deleteInode(prefix, "deleteInode"), appendS3ChunkInfo(prefix, "appendS3ChunkInfo"), prepareRenameTx(prefix, "prepareRenameTx"), + prewriteRenameTx(prefix, "prewriteRenameTx"), + checkTxStatus(prefix, "checkTxStatus"), + resolveTxLock(prefix, "resolveTxLock"), + commitTx(prefix, "commitTx"), updateVolumeExtent(prefix, "updateVolumeExtent"), getVolumeExtent(prefix, "getVolumeExtent"), - updateDeallocatableBlockGroup(prefix, - "updateDeallocatableBlockGroup") {} + updateDeallocatableBlockGroup( + prefix, "updateDeallocatableBlockGroup") {} }; struct InflightGuard { @@ -239,6 +251,17 @@ struct S3Metric { std::string fsName; InterfaceMetric adaptorWrite; InterfaceMetric adaptorRead; + + InterfaceMetric adaptorDequeue; + InterfaceMetric adaptorProcess; + + InterfaceMetric adaptorAsyncReadS3; + InterfaceMetric asyncReadDiskCache; + InterfaceMetric asyncReadFromS3; + + InterfaceMetric waitDownloading; + + InterfaceMetric adaptorWriteS3; InterfaceMetric adaptorWriteDiskCache; InterfaceMetric adaptorReadS3; @@ -258,11 +281,21 @@ struct S3Metric { bvar::Status readSize; bvar::Status writeSize; + bvar::Adder readAllHitsMemCounts; + bvar::Adder readRequestCounts; + bvar::Adder s3ReadRequestCounts; + explicit S3Metric(const std::string& name = "") : fsName(!name.empty() ? name : prefix + curve::common::ToHexString(this)), adaptorWrite(prefix, fsName + "_adaptor_write"), adaptorRead(prefix, fsName + "_adaptor_read"), + adaptorDequeue(prefix, fsName + "_adaptor_dequeue"), + adaptorProcess(prefix, fsName + "_adaptor_process"), + adaptorAsyncReadS3(prefix, fsName + "_adaptor_async_read"), + asyncReadDiskCache(prefix, fsName + "_async_read_from_disk"), + asyncReadFromS3(prefix, fsName + "_async_read_from_s3"), + waitDownloading(prefix, fsName + "_wait_download"), adaptorWriteS3(prefix, fsName + "_adaptor_write_s3"), adaptorWriteDiskCache(prefix, fsName + "_adaptor_write_disk_cache"), adaptorReadS3(prefix, fsName + "_adaptor_read_s3"), @@ -274,7 +307,11 @@ struct S3Metric { writeToKVCache(prefix, fsName + "_write_to_kv_cache"), readFromKVCache(prefix, fsName + "_read_from_kv_cache"), readSize(prefix, fsName + "_adaptor_read_size", 0), - writeSize(prefix, fsName + "_adaptor_write_size", 0) {} + writeSize(prefix, fsName + "_adaptor_write_size", 0) { + readAllHitsMemCounts.expose_as(prefix, "read_all_hits_mem"); + readRequestCounts.expose_as(prefix, "read_request_counts"); + s3ReadRequestCounts.expose_as(prefix, "s3_read_request_counts"); + } }; template @@ -317,6 +354,10 @@ struct KVClientManagerMetric { bvar::Adder hit; // kvcache miss bvar::Adder miss; + // kvcache getQueueSize + bvar::Adder getQueueSize; + // kvcache setQueueSize + bvar::Adder setQueueSize; explicit KVClientManagerMetric(const std::string& name = "") : fsName(!name.empty() ? name @@ -325,7 +366,9 @@ struct KVClientManagerMetric { set(prefix, fsName + "_set"), count(prefix, fsName + "_count"), hit(prefix, fsName + "_hit"), - miss(prefix, fsName + "_miss") {} + miss(prefix, fsName + "_miss"), + getQueueSize(prefix, fsName + "_get_queue_size"), + setQueueSize(prefix, fsName + "_set_queue_size") {} }; struct MemcacheClientMetric { diff --git a/curvefs/src/client/rpcclient/BUILD b/curvefs/src/client/rpcclient/BUILD index 13cdfccf1c..cd39cfa2f5 100644 --- a/curvefs/src/client/rpcclient/BUILD +++ b/curvefs/src/client/rpcclient/BUILD @@ -39,5 +39,6 @@ cc_library( "//src/client:curve_client", "@com_google_absl//absl/cleanup", "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/strings", ], ) diff --git a/curvefs/src/client/rpcclient/base_client.cpp b/curvefs/src/client/rpcclient/base_client.cpp index ca504f5201..a2f75ae177 100644 --- a/curvefs/src/client/rpcclient/base_client.cpp +++ b/curvefs/src/client/rpcclient/base_client.cpp @@ -167,6 +167,12 @@ void MDSBaseClient::CommitTx(const CommitTxRequest& request, stub.CommitTx(cntl, &request, response, nullptr); } +void MDSBaseClient::Tso(const TsoRequest& request, TsoResponse* response, + brpc::Controller* cntl, brpc::Channel* channel) { + curvefs::mds::MdsService_Stub stub(channel); + stub.Tso(cntl, &request, response, nullptr); +} + // TODO(all): do we really need pass `fsId` all the time? // each curve-fuse process only mount one filesystem void MDSBaseClient::AllocateVolumeBlockGroup( diff --git a/curvefs/src/client/rpcclient/base_client.h b/curvefs/src/client/rpcclient/base_client.h index f43d2854ee..7f3bd8161f 100644 --- a/curvefs/src/client/rpcclient/base_client.h +++ b/curvefs/src/client/rpcclient/base_client.h @@ -57,11 +57,11 @@ using curvefs::metaserver::GetInodeResponse; using curvefs::metaserver::Inode; using curvefs::metaserver::ListDentryRequest; using curvefs::metaserver::ListDentryResponse; -using curvefs::metaserver::PrepareRenameTxRequest; -using curvefs::metaserver::PrepareRenameTxResponse; +using curvefs::metaserver::ManageInodeType; +using curvefs::metaserver::PrewriteRenameTxRequest; +using curvefs::metaserver::PrewriteRenameTxResponse; using curvefs::metaserver::UpdateInodeRequest; using curvefs::metaserver::UpdateInodeResponse; -using curvefs::metaserver::ManageInodeType; using curvefs::common::FSType; using curvefs::common::PartitionInfo; @@ -84,6 +84,8 @@ using curvefs::mds::CommitTxRequest; using curvefs::mds::CommitTxResponse; using curvefs::mds::RefreshSessionRequest; using curvefs::mds::RefreshSessionResponse; +using curvefs::mds::TsoRequest; +using curvefs::mds::TsoResponse; using curvefs::mds::UmountFsRequest; using curvefs::mds::UmountFsResponse; @@ -197,6 +199,9 @@ class MDSBaseClient { brpc::Controller* cntl, brpc::Channel* channel); + virtual void Tso(const TsoRequest& request, TsoResponse* response, + brpc::Controller* cntl, brpc::Channel* channel); + virtual void AllocateVolumeBlockGroup(uint32_t fsId, uint32_t count, const std::string& owner, diff --git a/curvefs/src/client/rpcclient/mds_client.cpp b/curvefs/src/client/rpcclient/mds_client.cpp index 3c52bf3a5a..2f53aa3d7d 100644 --- a/curvefs/src/client/rpcclient/mds_client.cpp +++ b/curvefs/src/client/rpcclient/mds_client.cpp @@ -25,6 +25,7 @@ #include #include #include +#include "absl/strings/str_join.h" #include "curvefs/proto/space.pb.h" #include "curvefs/src/client/rpcclient/fsdelta_updater.h" @@ -500,12 +501,11 @@ FSStatusCode MdsClientImpl::AllocS3ChunkId(uint32_t fsId, uint32_t idNum, return ReturnError(rpcexcutor_.DoRPCTask(task, mdsOpt_.mdsMaxRetryMS)); } -FSStatusCode -MdsClientImpl::RefreshSession(const std::vector &txIds, - std::vector *latestTxIdList, - const std::string& fsName, - const Mountpoint& mountpoint, - std::atomic* enableSumInDir) { +FSStatusCode MdsClientImpl::RefreshSession( + const std::vector &txIds, + std::vector *latestTxIdList, const std::string &fsName, + const Mountpoint &mountpoint, std::atomic *enableSumInDir, + const std::string &mdsAddrs, std::string *mdsAddrsOverride) { auto task = RPCTask { (void)addrindex; (void)rpctimeoutMS; @@ -520,6 +520,7 @@ MdsClientImpl::RefreshSession(const std::vector &txIds, fsDelta.set_bytes( FsDeltaUpdater::GetInstance().GetDeltaBytesAndReset()); *request.mutable_fsdelta() = std::move(fsDelta); + request.set_mdsaddrs(mdsAddrs); mdsbasecli_->RefreshSession(request, &response, cntl, channel); if (cntl->Failed()) { @@ -550,13 +551,31 @@ MdsClientImpl::RefreshSession(const std::vector &txIds, FsQuotaChecker::GetInstance().UpdateQuotaCache( response.fscapacity(), response.fsusedbytes()); } - + if (response.has_mdsaddrsoverride()) { + *mdsAddrsOverride = response.mdsaddrsoverride(); + } return ret; }; return ReturnError(rpcexcutor_.DoRPCTask(task, mdsOpt_.mdsMaxRetryMS)); } +std::string MdsClientImpl::GetMdsAddrs() { + auto option = rpcexcutor_.GetOption(); + return absl::StrJoin(option.addrs, ","); +} + +void MdsClientImpl::SetMdsAddrs(const std::string &mdsAddrs) { + std::vector mdsAddrsVec = {}; + curve::common::SplitString(mdsAddrs, ",", &mdsAddrsVec); + if (!mdsAddrsVec.empty()) { + auto option = rpcexcutor_.GetOption(); + option.addrs = mdsAddrsVec; + rpcexcutor_.SetOption(option); + LOG(WARNING) << "update mdsAddrs to " << mdsAddrs; + } +} + FSStatusCode MdsClientImpl::GetLatestTxId(const GetLatestTxIdRequest& request, GetLatestTxIdResponse* response) { auto task = RPCTask { @@ -684,6 +703,38 @@ FSStatusCode MdsClientImpl::CommitTxWithLock( return CommitTx(request); } +FSStatusCode MdsClientImpl::Tso(uint64_t* ts, uint64_t* timestamp) { + auto task = RPCTask { + (void)addrindex; + (void)rpctimeoutMS; + mdsClientMetric_.tso.qps.count << 1; + LatencyUpdater updater(&mdsClientMetric_.tso.latency); + TsoRequest request; + TsoResponse response; + mdsbasecli_->Tso(request, &response, cntl, channel); + if (cntl->Failed()) { + mdsClientMetric_.tso.eps.count << 1; + LOG(WARNING) << "Tso Failed, errorcode = " << cntl->ErrorCode() + << ", error content:" << cntl->ErrorText() + << ", log id = " << cntl->log_id(); + return -cntl->ErrorCode(); + } + + FSStatusCode ret = response.statuscode(); + if (ret != FSStatusCode::OK) { + LOG(ERROR) << "Tso: errcode = " << ret + << ", errmsg = " << FSStatusCode_Name(ret); + return ret; + } else { + *ts = response.ts(); + *timestamp = response.timestamp(); + } + return ret; + }; + // for rpc error or failed/timeout, we will retry until success + return ReturnError(rpcexcutor_.DoRPCTask(task, 0)); +} + FSStatusCode MdsClientImpl::ReturnError(int retcode) { // rpc error convert to FSStatusCode::RPC_ERROR if (retcode < 0) { diff --git a/curvefs/src/client/rpcclient/mds_client.h b/curvefs/src/client/rpcclient/mds_client.h index 3b7577c9e3..e74ec84c03 100644 --- a/curvefs/src/client/rpcclient/mds_client.h +++ b/curvefs/src/client/rpcclient/mds_client.h @@ -112,12 +112,15 @@ class MdsClient { virtual FSStatusCode AllocS3ChunkId(uint32_t fsId, uint32_t idNum, uint64_t *chunkId) = 0; - virtual FSStatusCode - RefreshSession(const std::vector &txIds, - std::vector *latestTxIdList, - const std::string& fsName, - const Mountpoint& mountpoint, - std::atomic* enableSumInDir) = 0; + virtual FSStatusCode RefreshSession( + const std::vector &txIds, + std::vector *latestTxIdList, const std::string &fsName, + const Mountpoint &mountpoint, std::atomic *enableSumInDir, + const std::string &mdsAddrs, std::string *mdsAddrsOverride) = 0; + + virtual std::string GetMdsAddrs() = 0; + + virtual void SetMdsAddrs(const std::string &mdsAddrs) = 0; virtual FSStatusCode GetLatestTxId(uint32_t fsId, std::vector* txIds) = 0; @@ -138,6 +141,8 @@ class MdsClient { const std::string& uuid, uint64_t sequence) = 0; + virtual FSStatusCode Tso(uint64_t* ts, uint64_t* timestamp) = 0; + // allocate block group virtual SpaceErrCode AllocateVolumeBlockGroup( uint32_t fsId, @@ -203,9 +208,15 @@ class MdsClientImpl : public MdsClient { FSStatusCode RefreshSession(const std::vector &txIds, std::vector *latestTxIdList, - const std::string& fsName, - const Mountpoint& mountpoint, - std::atomic* enableSumInDir) override; + const std::string &fsName, + const Mountpoint &mountpoint, + std::atomic *enableSumInDir, + const std::string &mdsAddrs, + std::string *mdsAddrsOverride) override; + + std::string GetMdsAddrs() override; + + void SetMdsAddrs(const std::string &mdsAddrs) override; FSStatusCode GetLatestTxId(uint32_t fsId, std::vector* txIds) override; @@ -225,6 +236,8 @@ class MdsClientImpl : public MdsClient { const std::string& uuid, uint64_t sequence) override; + FSStatusCode Tso(uint64_t* ts, uint64_t* timestamp) override; + // allocate block group SpaceErrCode AllocateVolumeBlockGroup( uint32_t fsId, @@ -233,11 +246,9 @@ class MdsClientImpl : public MdsClient { std::vector *groups) override; // acquire block group - SpaceErrCode AcquireVolumeBlockGroup( - uint32_t fsId, - uint64_t blockGroupOffset, - const std::string &owner, - curvefs::mds::space::BlockGroup *groups) override; + SpaceErrCode AcquireVolumeBlockGroup(uint32_t fsId, + uint64_t blockGroupOffset, const std::string& owner, + curvefs::mds::space::BlockGroup* groups) override; // release block group SpaceErrCode ReleaseVolumeBlockGroup( diff --git a/curvefs/src/client/rpcclient/metacache.cpp b/curvefs/src/client/rpcclient/metacache.cpp index d3bd4fd294..4ca48b4935 100644 --- a/curvefs/src/client/rpcclient/metacache.cpp +++ b/curvefs/src/client/rpcclient/metacache.cpp @@ -102,7 +102,7 @@ bool MetaCache::GetTarget(uint32_t fsID, uint64_t inodeID, } if (!GetCopysetIDwithInodeID(inodeID, &target->groupID, - &target->partitionID, &target->txId)) { + &target->partitionID, &target->txId)) { LOG(ERROR) << "{fsid:" << fsID << ", inodeid:" << inodeID << "} do not find partition"; return false; @@ -496,9 +496,7 @@ bool MetaCache::SelectPartition(CopysetTarget *target) { } bool MetaCache::GetCopysetIDwithInodeID(uint64_t inodeID, - CopysetGroupID *groupID, - PartitionID *partitionID, - uint64_t *txId) { + CopysetGroupID* groupID, PartitionID* partitionID, uint64_t *txId) { ReadLockGuard rl(rwlock4Partitions_); for (auto iter = partitionInfos_.begin(); iter != partitionInfos_.end(); ++iter) { diff --git a/curvefs/src/client/rpcclient/metacache.h b/curvefs/src/client/rpcclient/metacache.h index c4c5589761..a9a1505d86 100644 --- a/curvefs/src/client/rpcclient/metacache.h +++ b/curvefs/src/client/rpcclient/metacache.h @@ -54,8 +54,15 @@ using ::curvefs::common::PartitionStatus; namespace curvefs { namespace client { + +namespace common { + DECLARE_int32(TxVersion); +} // namespace common + namespace rpcclient { +using curvefs::client::common::FLAGS_TxVersion; + struct CopysetGroupID { LogicPoolID poolID = 0; CopysetID copysetID = 0; @@ -83,8 +90,9 @@ struct CopysetTarget { bool IsValid() const { return groupID.poolID != 0 && groupID.copysetID != 0 && - partitionID != 0 && txId != 0 && metaServerID != 0 && - endPoint.ip != butil::IP_ANY && endPoint.port != 0; + partitionID != 0 && metaServerID != 0 && + endPoint.ip != butil::IP_ANY && endPoint.port != 0 && + (FLAGS_TxVersion != 1 || txId != 0); } void Reset() { @@ -179,8 +187,8 @@ class MetaCache { bool SelectPartition(CopysetTarget *target); // get info from partitionMap or copysetMap - bool GetCopysetIDwithInodeID(uint64_t inodeID, CopysetGroupID *groupID, - PartitionID *patitionID, uint64_t *txId); + bool GetCopysetIDwithInodeID(uint64_t inodeID, CopysetGroupID* groupID, + PartitionID* patitionID, uint64_t *txId); bool GetCopysetInfowithCopySetID(const CopysetGroupID &groupID, CopysetInfo *targetInfo); diff --git a/curvefs/src/client/rpcclient/metaserver_client.cpp b/curvefs/src/client/rpcclient/metaserver_client.cpp index febde15dc2..0f5d951134 100644 --- a/curvefs/src/client/rpcclient/metaserver_client.cpp +++ b/curvefs/src/client/rpcclient/metaserver_client.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -46,8 +45,12 @@ using curvefs::metaserver::BatchGetInodeAttrRequest; using curvefs::metaserver::BatchGetInodeAttrResponse; using curvefs::metaserver::BatchGetXAttrRequest; using curvefs::metaserver::BatchGetXAttrResponse; +using curvefs::metaserver::CheckTxStatusRequest; +using curvefs::metaserver::CheckTxStatusResponse; using curvefs::metaserver::GetOrModifyS3ChunkInfoRequest; using curvefs::metaserver::GetOrModifyS3ChunkInfoResponse; +using curvefs::metaserver::ResolveTxLockRequest; +using curvefs::metaserver::ResolveTxLockResponse; namespace curvefs { namespace client { @@ -57,6 +60,10 @@ using GetDentryExcutor = TaskExecutor; using ListDentryExcutor = TaskExecutor; using DeleteDentryExcutor = TaskExecutor; using PrepareRenameTxExcutor = TaskExecutor; +using PrewriteRenameTxExcutor = TaskExecutor; +using CheckTxStatusExcutor = TaskExecutor; +using ResolveTxLockExcutor = TaskExecutor; +using CommitTxExcutor = TaskExecutor; using DeleteInodeExcutor = TaskExecutor; using UpdateInodeExcutor = TaskExecutor; using GetInodeExcutor = TaskExecutor; @@ -120,9 +127,15 @@ void MetaServerClientImpl::SetTxId(uint32_t partitionId, uint64_t txId) { metaCache_->SetTxId(partitionId, txId); } +bool MetaServerClientImpl::GetPartitionId(uint32_t fsId, uint64_t inodeId, + PartitionID *partitionId) { + return metaCache_->GetPartitionIdByInodeId(fsId, inodeId, partitionId); +} + MetaStatusCode MetaServerClientImpl::GetDentry(uint32_t fsId, uint64_t inodeid, const std::string &name, - Dentry *out) { + Dentry *out, + TxLock* txLockOut) { auto task = RPCTask { (void)taskExecutorDone; metric_.getDentry.qps.count << 1; @@ -151,6 +164,9 @@ MetaStatusCode MetaServerClientImpl::GetDentry(uint32_t fsId, uint64_t inodeid, MetaStatusCode ret = response.statuscode(); if (ret != MetaStatusCode::OK) { + if (ret == MetaStatusCode::TX_KEY_LOCKED) { + *txLockOut = response.txlock(); + } LOG_IF(WARNING, ret != MetaStatusCode::NOT_FOUND) << "GetDentry: fsId = " << fsId << ", inodeid = " << inodeid << ", name = " << name << ", errcode = " << ret @@ -172,8 +188,7 @@ MetaStatusCode MetaServerClientImpl::GetDentry(uint32_t fsId, uint64_t inodeid, }; auto taskCtx = std::make_shared(MetaServerOpType::GetDentry, - task, fsId, inodeid, false, - opt_.enableRenameParallel); + task, fsId, inodeid, false, opt_.enableRenameParallel); GetDentryExcutor excutor(opt_, metaCache_, channelManager_, std::move(taskCtx)); return ConvertToMetaStatusCode(excutor.DoRPCTask()); @@ -182,7 +197,8 @@ MetaStatusCode MetaServerClientImpl::GetDentry(uint32_t fsId, uint64_t inodeid, MetaStatusCode MetaServerClientImpl::ListDentry(uint32_t fsId, uint64_t inodeid, const std::string &last, uint32_t count, bool onlyDir, - std::list *dentryList) { + std::list *dentryList, + TxLock* txLockOut) { auto task = RPCTask { (void)taskExecutorDone; metric_.listDentry.qps.count << 1; @@ -213,6 +229,9 @@ MetaStatusCode MetaServerClientImpl::ListDentry(uint32_t fsId, uint64_t inodeid, MetaStatusCode ret = response.statuscode(); if (ret != MetaStatusCode::OK) { + if (ret == MetaStatusCode::TX_KEY_LOCKED) { + *txLockOut = response.txlock(); + } LOG(WARNING) << "ListDentry: fsId = " << fsId << ", inodeid = " << inodeid << ", last = " << last << ", count = " << count << ", onlyDir = " << onlyDir @@ -230,14 +249,14 @@ MetaStatusCode MetaServerClientImpl::ListDentry(uint32_t fsId, uint64_t inodeid, }; auto taskCtx = std::make_shared(MetaServerOpType::ListDentry, - task, fsId, inodeid, false, - opt_.enableRenameParallel); + task, fsId, inodeid, false, opt_.enableRenameParallel); ListDentryExcutor excutor(opt_, metaCache_, channelManager_, std::move(taskCtx)); return ConvertToMetaStatusCode(excutor.DoRPCTask()); } -MetaStatusCode MetaServerClientImpl::CreateDentry(const Dentry &dentry) { +MetaStatusCode MetaServerClientImpl::CreateDentry( + const Dentry &dentry, TxLock* txLockOut) { auto task = RPCTask { (void)taskExecutorDone; metric_.createDentry.qps.count << 1; @@ -255,12 +274,7 @@ MetaStatusCode MetaServerClientImpl::CreateDentry(const Dentry &dentry) { d->set_txid(txId); d->set_type(dentry.type()); request.set_allocated_dentry(d); - struct timespec now; - clock_gettime(CLOCK_REALTIME, &now); - Time *tm = new Time(); - tm->set_sec(now.tv_sec); - tm->set_nsec(now.tv_nsec); - request.set_allocated_create(tm); + SetCreateTime(request.mutable_create()); curvefs::metaserver::MetaServerService_Stub stub(channel); stub.CreateDentry(cntl, &request, &response, nullptr); @@ -280,7 +294,11 @@ MetaStatusCode MetaServerClientImpl::CreateDentry(const Dentry &dentry) { } MetaStatusCode ret = response.statuscode(); + if (ret != MetaStatusCode::OK) { + if (ret == MetaStatusCode::TX_KEY_LOCKED) { + *txLockOut = response.txlock(); + } LOG(WARNING) << "CreateDentry: dentry = " << dentry.DebugString() << ", errcode = " << ret << ", errmsg = " << MetaStatusCode_Name(ret); @@ -304,9 +322,8 @@ MetaStatusCode MetaServerClientImpl::CreateDentry(const Dentry &dentry) { } MetaStatusCode MetaServerClientImpl::DeleteDentry(uint32_t fsId, - uint64_t inodeid, - const std::string &name, - FsFileType type) { + uint64_t inodeid, const std::string &name, FsFileType type, + TxLock* txLockOut) { auto task = RPCTask { (void)taskExecutorDone; metric_.deleteDentry.qps.count << 1; @@ -321,7 +338,7 @@ MetaStatusCode MetaServerClientImpl::DeleteDentry(uint32_t fsId, request.set_name(name); request.set_txid(txId); request.set_type(type); - + SetCreateTime(request.mutable_create()); curvefs::metaserver::MetaServerService_Stub stub(channel); stub.DeleteDentry(cntl, &request, &response, nullptr); @@ -336,6 +353,9 @@ MetaStatusCode MetaServerClientImpl::DeleteDentry(uint32_t fsId, MetaStatusCode ret = response.statuscode(); if (ret != MetaStatusCode::OK) { + if (ret == MetaStatusCode::TX_KEY_LOCKED) { + *txLockOut = response.txlock(); + } LOG(WARNING) << "DeleteDentry: fsid = " << fsId << ", inodeid = " << inodeid << ", name = " << name << ", errcode = " << ret @@ -348,22 +368,21 @@ MetaStatusCode MetaServerClientImpl::DeleteDentry(uint32_t fsId, }; auto taskCtx = std::make_shared(MetaServerOpType::DeleteDentry, - task, fsId, inodeid, false, - opt_.enableRenameParallel); + task, fsId, inodeid, false, opt_.enableRenameParallel); DeleteDentryExcutor excutor(opt_, metaCache_, channelManager_, std::move(taskCtx)); return ConvertToMetaStatusCode(excutor.DoRPCTask()); } -MetaStatusCode -MetaServerClientImpl::PrepareRenameTx(const std::vector &dentrys) { +MetaStatusCode MetaServerClientImpl::PrepareRenameTx( + const std::vector &dentrys) { auto task = RPCTask { (void)txId; (void)taskExecutorDone; metric_.prepareRenameTx.qps.count << 1; LatencyUpdater updater(&metric_.prepareRenameTx.latency); - PrepareRenameTxRequest request; - PrepareRenameTxResponse response; + curvefs::metaserver::PrepareRenameTxRequest request; + curvefs::metaserver::PrepareRenameTxResponse response; request.set_poolid(poolID); request.set_copysetid(copysetID); request.set_partitionid(partitionID); @@ -401,6 +420,167 @@ MetaServerClientImpl::PrepareRenameTx(const std::vector &dentrys) { return ConvertToMetaStatusCode(excutor.DoRPCTask()); } +MetaStatusCode MetaServerClientImpl::PrewriteRenameTx( + const std::vector& dentrys, + const TxLock& txLockIn, TxLock* txLockOut) { + auto task = RPCTask { + (void)txId; + (void)taskExecutorDone; + metric_.prewriteRenameTx.qps.count << 1; + LatencyUpdater updater(&metric_.prewriteRenameTx.latency); + PrewriteRenameTxRequest request; + PrewriteRenameTxResponse response; + request.set_poolid(poolID); + request.set_copysetid(copysetID); + request.set_partitionid(partitionID); + *request.mutable_dentrys() = {dentrys.begin(), dentrys.end()}; + *request.mutable_txlock() = txLockIn; + + curvefs::metaserver::MetaServerService_Stub stub(channel); + stub.PrewriteRenameTx(cntl, &request, &response, nullptr); + + if (cntl->Failed()) { + metric_.prewriteRenameTx.eps.count << 1; + LOG(WARNING) << "PrewriteRenameTx Failed, errorcode = " + << cntl->ErrorCode() + << ", error content:" << cntl->ErrorText() + << ", request = " << request.DebugString() + << ", log id = " << cntl->log_id(); + return -cntl->ErrorCode(); + } + + MetaStatusCode ret = response.statuscode(); + if (ret != MetaStatusCode::OK) { + LOG(WARNING) << "PrewriteRenameTx: ret = " << ret + << ", errmsg = " << MetaStatusCode_Name(ret); + } else { + *txLockOut = response.txlock(); + } + + VLOG(6) << "PrewriteRenameTx done, request: " << request.DebugString() + << "response: " << response.DebugString(); + return ret; + }; + auto taskCtx = std::make_shared( + MetaServerOpType::PrewriteRenameTx, task, dentrys[0].fsid(), + dentrys[0].parentinodeid()); + PrewriteRenameTxExcutor excutor(opt_, metaCache_, channelManager_, + std::move(taskCtx)); + return ConvertToMetaStatusCode(excutor.DoRPCTask()); +} + +MetaStatusCode MetaServerClientImpl::CheckTxStatus(uint32_t fsId, + uint64_t inodeId, const std::string& primaryKey, uint64_t startTs, + uint64_t curTimestamp) { + auto task = RPCTask { + (void)txId; + (void)taskExecutorDone; + metric_.checkTxStatus.qps.count << 1; + LatencyUpdater updater(&metric_.checkTxStatus.latency); + CheckTxStatusRequest request; + CheckTxStatusResponse response; + request.set_poolid(poolID); + request.set_copysetid(copysetID); + request.set_partitionid(partitionID); + request.set_primarykey(primaryKey); + request.set_startts(startTs); + request.set_curtimestamp(curTimestamp); + + curvefs::metaserver::MetaServerService_Stub stub(channel); + stub.CheckTxStatus(cntl, &request, &response, nullptr); + + if (cntl->Failed()) { + metric_.checkTxStatus.eps.count << 1; + LOG(WARNING) << "CheckTxStatus failed" + << ", errorCode = " << cntl->ErrorCode() + << ", errorText = " << cntl->ErrorText() + << ", request = " << request.DebugString() + << ", logId = " << cntl->log_id(); + return -cntl->ErrorCode(); + } + + return response.statuscode(); + }; + auto taskCtx = std::make_shared( + MetaServerOpType::CheckTxStatus, task, fsId, inodeId); + CheckTxStatusExcutor excutor( + opt_, metaCache_, channelManager_, std::move(taskCtx)); + return ConvertToMetaStatusCode(excutor.DoRPCTask()); +} + +MetaStatusCode MetaServerClientImpl::ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs) { + auto task = RPCTask { + (void)txId; + (void)taskExecutorDone; + metric_.resolveTxLock.qps.count << 1; + LatencyUpdater updater(&metric_.resolveTxLock.latency); + ResolveTxLockRequest request; + ResolveTxLockResponse response; + request.set_poolid(poolID); + request.set_copysetid(copysetID); + request.set_partitionid(partitionID); + *request.mutable_dentry() = dentry; + request.set_startts(startTs); + request.set_committs(commitTs); + + curvefs::metaserver::MetaServerService_Stub stub(channel); + stub.ResolveTxLock(cntl, &request, &response, nullptr); + + if (cntl->Failed()) { + metric_.resolveTxLock.eps.count << 1; + LOG(WARNING) << "ResolveTxLock failed" + << ", errorCode = " << cntl->ErrorCode() + << ", errorText = " << cntl->ErrorText() + << ", logId = " << cntl->log_id(); + return -cntl->ErrorCode(); + } + return response.statuscode(); + }; + auto taskCtx = + std::make_shared(MetaServerOpType::ResolveTxLock, task, + dentry.fsid(), dentry.parentinodeid()); + ResolveTxLockExcutor excutor( + opt_, metaCache_, channelManager_, std::move(taskCtx)); + return ConvertToMetaStatusCode(excutor.DoRPCTask()); +} + +MetaStatusCode MetaServerClientImpl::CommitTx( + const std::vector& dentrys, uint64_t startTs, uint64_t commitTs) { + auto task = RPCTask { + (void)txId; + (void)taskExecutorDone; + metric_.commitTx.qps.count << 1; + LatencyUpdater updater(&metric_.commitTx.latency); + curvefs::metaserver::CommitTxRequest request; + curvefs::metaserver::CommitTxResponse response; + request.set_poolid(poolID); + request.set_copysetid(copysetID); + request.set_partitionid(partitionID); + *request.mutable_dentrys() = {dentrys.begin(), dentrys.end()}; + request.set_startts(startTs); + request.set_committs(commitTs); + + curvefs::metaserver::MetaServerService_Stub stub(channel); + stub.CommitTx(cntl, &request, &response, nullptr); + + if (cntl->Failed()) { + metric_.commitTx.eps.count << 1; + LOG(WARNING) << "CommitTx failed" + << ", errorCode = " << cntl->ErrorCode() + << ", errorText = " << cntl->ErrorText() + << ", logId = " << cntl->log_id(); + return -cntl->ErrorCode(); + } + return response.statuscode(); + }; + auto taskCtx = std::make_shared(MetaServerOpType::CommitTx, + task, dentrys[0].fsid(), dentrys[0].parentinodeid()); + CommitTxExcutor excutor( + opt_, metaCache_, channelManager_, std::move(taskCtx)); + return ConvertToMetaStatusCode(excutor.DoRPCTask()); +} + MetaStatusCode MetaServerClientImpl::GetInode(uint32_t fsId, uint64_t inodeid, Inode *out, bool *streaming) { auto task = RPCTask { @@ -1147,12 +1327,7 @@ MetaStatusCode MetaServerClientImpl::CreateInode(const InodeParam ¶m, request.set_rdev(param.rdev); request.set_symlink(param.symlink); request.set_parent(param.parent); - struct timespec now; - clock_gettime(CLOCK_REALTIME, &now); - Time *tm = new Time(); - tm->set_sec(now.tv_sec); - tm->set_nsec(now.tv_nsec); - request.set_allocated_create(tm); + SetCreateTime(request.mutable_create()); curvefs::metaserver::MetaServerService_Stub stub(channel); stub.CreateInode(cntl, &request, &response, nullptr); diff --git a/curvefs/src/client/rpcclient/metaserver_client.h b/curvefs/src/client/rpcclient/metaserver_client.h index cd2a233da3..ddce3c64ba 100644 --- a/curvefs/src/client/rpcclient/metaserver_client.h +++ b/curvefs/src/client/rpcclient/metaserver_client.h @@ -23,6 +23,8 @@ #ifndef CURVEFS_SRC_CLIENT_RPCCLIENT_METASERVER_CLIENT_H_ #define CURVEFS_SRC_CLIENT_RPCCLIENT_METASERVER_CLIENT_H_ +#include + #include #include #include @@ -43,17 +45,17 @@ #include "absl/types/optional.h" using ::curvefs::client::metric::MetaServerClientMetric; +using ::curvefs::common::StreamClient; +using ::curvefs::common::StreamStatus; +using ::curvefs::metaserver::DeallocatableBlockGroup; using ::curvefs::metaserver::Dentry; using ::curvefs::metaserver::FsFileType; using ::curvefs::metaserver::Inode; using ::curvefs::metaserver::InodeAttr; -using ::curvefs::metaserver::XAttr; -using ::curvefs::metaserver::MetaStatusCode; using ::curvefs::metaserver::S3ChunkInfoList; -using ::curvefs::metaserver::DeallocatableBlockGroup; -using ::curvefs::common::StreamStatus; -using ::curvefs::common::StreamClient; using ::curvefs::metaserver::Time; +using ::curvefs::metaserver::TxLock; +using ::curvefs::metaserver::XAttr; using DeallocatableBlockGroupMap = std::map; using S3ChunkInfoMap = google::protobuf::Map; @@ -70,6 +72,13 @@ struct DataIndices { absl::optional volumeExtents; }; +inline void SetCreateTime(Time* tm) { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + tm->set_sec(now.tv_sec); + tm->set_nsec(now.tv_nsec); +} + class MetaServerClient { public: virtual ~MetaServerClient() = default; @@ -85,22 +94,36 @@ class MetaServerClient { virtual void SetTxId(uint32_t partitionId, uint64_t txId) = 0; virtual MetaStatusCode GetDentry(uint32_t fsId, uint64_t inodeid, - const std::string &name, Dentry *out) = 0; + const std::string &name, Dentry *out, TxLock* txLockOut) = 0; virtual MetaStatusCode ListDentry(uint32_t fsId, uint64_t inodeid, const std::string &last, uint32_t count, bool onlyDir, - std::list *dentryList) = 0; + std::list *dentryList, + TxLock* txLockOut) = 0; - virtual MetaStatusCode CreateDentry(const Dentry &dentry) = 0; + virtual MetaStatusCode CreateDentry( + const Dentry &dentry, TxLock* txLockOut) = 0; virtual MetaStatusCode DeleteDentry(uint32_t fsId, uint64_t inodeid, - const std::string &name, - FsFileType type) = 0; + const std::string &name, FsFileType type, TxLock* txLockOut) = 0; virtual MetaStatusCode PrepareRenameTx(const std::vector &dentrys) = 0; + virtual MetaStatusCode PrewriteRenameTx(const std::vector& dentrys, + const TxLock& txLockIn, TxLock* txLockOut) = 0; + + virtual MetaStatusCode CheckTxStatus(uint32_t fsId, uint64_t inodeId, + const std::string& primaryKey, uint64_t startTs, + uint64_t curTimestamp) = 0; + + virtual MetaStatusCode ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs) = 0; + + virtual MetaStatusCode CommitTx(const std::vector& dentry, + uint64_t startTs, uint64_t commitTs) = 0; + virtual MetaStatusCode GetInode(uint32_t fsId, uint64_t inodeid, Inode *out, bool* streaming) = 0; @@ -176,6 +199,9 @@ class MetaServerClient { virtual MetaStatusCode UpdateDeallocatableBlockGroup(uint32_t fsId, uint64_t inodeId, DeallocatableBlockGroupMap *statistic) = 0; + + virtual bool GetPartitionId(uint32_t fsId, uint64_t inodeId, + PartitionID* partitionId) = 0; }; class MetaServerClientImpl : public MetaServerClient { @@ -193,21 +219,35 @@ class MetaServerClientImpl : public MetaServerClient { void SetTxId(uint32_t partitionId, uint64_t txId) override; MetaStatusCode GetDentry(uint32_t fsId, uint64_t inodeid, - const std::string &name, Dentry *out) override; + const std::string &name, Dentry *out, TxLock* txLockOut) override; MetaStatusCode ListDentry(uint32_t fsId, uint64_t inodeid, const std::string &last, uint32_t count, bool onlyDir, - std::list *dentryList) override; + std::list *dentryList, + TxLock* txLockOut) override; - MetaStatusCode CreateDentry(const Dentry &dentry) override; + MetaStatusCode CreateDentry( + const Dentry &dentry, TxLock* txLockOut) override; MetaStatusCode DeleteDentry(uint32_t fsId, uint64_t inodeid, - const std::string &name, - FsFileType type) override; + const std::string &name, FsFileType type, TxLock* txLockOut) override; MetaStatusCode PrepareRenameTx(const std::vector &dentrys) override; + MetaStatusCode PrewriteRenameTx(const std::vector& dentrys, + const TxLock& txLockIn, TxLock* txLockOut) override; + + MetaStatusCode CheckTxStatus(uint32_t fsId, uint64_t inodeId, + const std::string& primaryKey, uint64_t startTs, + uint64_t curTimestamp) override; + + MetaStatusCode ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs) override; + + MetaStatusCode CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs) override; + MetaStatusCode GetInode(uint32_t fsId, uint64_t inodeid, Inode *out, bool* streaming) override; @@ -285,6 +325,9 @@ class MetaServerClientImpl : public MetaServerClient { uint32_t fsId, uint64_t inodeId, DeallocatableBlockGroupMap *statistic) override; + bool GetPartitionId(uint32_t fsId, uint64_t inodeId, + PartitionID *partitionId) override; + private: MetaStatusCode UpdateInode(const UpdateInodeRequest &request, bool internal = false); @@ -308,6 +351,7 @@ class MetaServerClientImpl : public MetaServerClient { StreamClient streamClient_; MetaServerClientMetric metric_; }; + } // namespace rpcclient } // namespace client } // namespace curvefs diff --git a/curvefs/src/client/rpcclient/task_excutor.cpp b/curvefs/src/client/rpcclient/task_excutor.cpp index 191cff6e08..6d5d939369 100644 --- a/curvefs/src/client/rpcclient/task_excutor.cpp +++ b/curvefs/src/client/rpcclient/task_excutor.cpp @@ -29,8 +29,8 @@ #include "curvefs/proto/metaserver.pb.h" #include "curvefs/src/common/define.h" -using ::curvefs::metaserver::MetaStatusCode; using ::curvefs::RECYCLEINODEID; +using ::curvefs::metaserver::MetaStatusCode; namespace curvefs { namespace client { @@ -160,6 +160,10 @@ bool TaskExecutor::OnReturn(int retCode) { needRetry = true; break; + case MetaStatusCode::TX_INPROGRESS: + needRetry = true; + break; + default: break; } @@ -233,9 +237,8 @@ int TaskExecutor::ExcuteTask(brpc::Channel *channel, task_->cntl_.Reset(); task_->cntl_.set_timeout_ms(task_->rpcTimeoutMs); return task_->rpctask(task_->target.groupID.poolID, - task_->target.groupID.copysetID, - task_->target.partitionID, task_->target.txId, - channel, &task_->cntl_, done); + task_->target.groupID.copysetID, task_->target.partitionID, + task_->target.txId, channel, &task_->cntl_, done); } void TaskExecutor::OnSuccess() {} diff --git a/curvefs/src/client/rpcclient/task_excutor.h b/curvefs/src/client/rpcclient/task_excutor.h index 18a69346e5..23df4d1938 100644 --- a/curvefs/src/client/rpcclient/task_excutor.h +++ b/curvefs/src/client/rpcclient/task_excutor.h @@ -45,13 +45,19 @@ using ::curvefs::client::common::ExcutorOpt; using ::curvefs::client::common::MetaserverID; using ::curvefs::client::common::MetaServerOpType; using ::curvefs::common::PartitionInfo; -using ::curvefs::metaserver::MetaStatusCode; -using ::google::protobuf::RepeatedPtrField; using ::curvefs::metaserver::Inode; using ::curvefs::metaserver::InodeAttr; +using ::curvefs::metaserver::MetaStatusCode; +using ::google::protobuf::RepeatedPtrField; +using ::curvefs::client::common::FLAGS_TxVersion; namespace curvefs { namespace client { + +namespace common { + DECLARE_int32(TxVersion); +} // namespace common + namespace rpcclient { class TaskExecutorDone; @@ -60,10 +66,9 @@ MetaStatusCode ConvertToMetaStatusCode(int retcode); class TaskContext { public: - using RpcFunc = std::function; + using RpcFunc = std::function; TaskContext() = default; TaskContext(MetaServerOpType type, @@ -77,7 +82,7 @@ class TaskContext { fsID(fsid), inodeID(inodeid), streaming(streaming), - refreshTxId(refreshTxId) {} + refreshTxId(FLAGS_TxVersion == 1 ? refreshTxId : false) {} std::string TaskContextStr() { std::ostringstream oss; diff --git a/curvefs/src/client/s3/client_s3_adaptor.h b/curvefs/src/client/s3/client_s3_adaptor.h index dcc25c3795..453efb8931 100644 --- a/curvefs/src/client/s3/client_s3_adaptor.h +++ b/curvefs/src/client/s3/client_s3_adaptor.h @@ -34,7 +34,7 @@ #include "curvefs/src/client/common/common.h" #include "curvefs/src/client/common/config.h" #include "curvefs/src/client/filesystem/error.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/client/rpcclient/mds_client.h" #include "curvefs/src/client/s3/client_s3.h" #include "curvefs/src/client/s3/client_s3_cache_manager.h" diff --git a/curvefs/src/client/s3/client_s3_cache_manager.cpp b/curvefs/src/client/s3/client_s3_cache_manager.cpp index 1bd098eb71..ce0f18170d 100644 --- a/curvefs/src/client/s3/client_s3_cache_manager.cpp +++ b/curvefs/src/client/s3/client_s3_cache_manager.cpp @@ -444,7 +444,12 @@ int FileCacheManager::Read(uint64_t inodeId, uint64_t offset, uint64_t length, ReadFromMemCache(offset, length, dataBuf, &actualReadLen, &memCacheMissRequest); if (memCacheMissRequest.empty()) { + if (s3ClientAdaptor_->s3Metric_) { + s3ClientAdaptor_->s3Metric_->readAllHitsMemCounts << 1; + } return actualReadLen; + } else { + s3ClientAdaptor_->s3Metric_->readRequestCounts << memCacheMissRequest.size(); } VLOG(6) << "memcache miss request size: " << memCacheMissRequest.size(); @@ -579,22 +584,34 @@ bool FileCacheManager::ReadKVRequestFromS3(const std::string &name, } FileCacheManager::ReadStatus -FileCacheManager::ReadKVRequest(const std::vector &kvRequests, +FileCacheManager::ReadKVRequest(std::vector &kvRequests, char *dataBuf, uint64_t fileLen) { absl::BlockingCounter counter(kvRequests.size()); std::once_flag cancelFlag; std::atomic isCanceled{false}; std::atomic retCode{0}; - for (const auto &req : kvRequests) { + for (auto &req : kvRequests) { + req.enqueue = butil::cpuwide_time_us(); readTaskPool_->Enqueue([&]() { auto defer = absl::MakeCleanup([&]() { counter.DecrementCount(); }); if (isCanceled) { LOG(WARNING) << "kv request is canceled " << req.DebugString(); return; } + req.dequeue = butil::cpuwide_time_us() - req.enqueue; ProcessKVRequest(req, dataBuf, fileLen, cancelFlag, isCanceled, retCode); + req.processed = butil::cpuwide_time_us() - req.enqueue; + + if (s3ClientAdaptor_->s3Metric_) { + curve::client::CollectMetrics( + &s3ClientAdaptor_->s3Metric_->adaptorDequeue, req.len, req.dequeue); + curve::client::CollectMetrics( + &s3ClientAdaptor_->s3Metric_->adaptorProcess, req.len, req.processed); + s3ClientAdaptor_->s3Metric_->s3ReadRequestCounts << 1; + } + }); } @@ -620,7 +637,10 @@ void FileCacheManager::ProcessKVRequest(const S3ReadRequest &req, char *dataBuf, std::string prefetchName = curvefs::common::s3util::GenObjName( req.chunkId, blockIndex, req.compaction, req.fsId, req.inodeId, objectPrefix); + + uint64_t start = butil::cpuwide_time_us(); bool waitDownloading = false; + // if obj is in downloading, wait for it. while (true) { { @@ -642,6 +662,12 @@ void FileCacheManager::ProcessKVRequest(const S3ReadRequest &req, char *dataBuf, } } + if (waitDownloading && s3ClientAdaptor_->s3Metric_) { + curve::client::CollectMetrics( + &s3ClientAdaptor_->s3Metric_->waitDownloading, req.len, butil::cpuwide_time_us() - start); + } + + // prefetch if (s3ClientAdaptor_->HasDiskCache() && !waitDownloading && !IsCachedInLocal(prefetchName)) { @@ -852,7 +878,8 @@ void FileCacheManager::PrefetchS3Objs( if (fromS3) { auto context = std::make_shared( name, dataCacheS3, 0, readLen, - AsyncPrefetchCallback{inode_, s3ClientAdaptor_, true}); + AsyncPrefetchCallback{inode_, s3ClientAdaptor_, true}, ContextType::S3); + context->start = butil::cpuwide_time_us(); auto task = [this, context]() { s3ClientAdaptor_->GetS3Client()->DownloadAsync(context); }; @@ -860,7 +887,8 @@ void FileCacheManager::PrefetchS3Objs( } else { auto context = std::make_shared( name, dataCacheS3, 0, readLen, - AsyncPrefetchCallback{inode_, s3ClientAdaptor_, false}); + AsyncPrefetchCallback{inode_, s3ClientAdaptor_, false}, ContextType::Disk); + context->start = butil::cpuwide_time_us(); kvClientManager_->Enqueue(context); } } diff --git a/curvefs/src/client/s3/client_s3_cache_manager.h b/curvefs/src/client/s3/client_s3_cache_manager.h index df3d665ac4..f0e75f8bf7 100644 --- a/curvefs/src/client/s3/client_s3_cache_manager.h +++ b/curvefs/src/client/s3/client_s3_cache_manager.h @@ -61,6 +61,7 @@ using WeakDataCachePtr = std::weak_ptr; using curve::common::GetObjectAsyncCallBack; using curve::common::PutObjectAsyncCallBack; using curve::common::S3Adapter; +using curve::common::ContextType; using curvefs::metaserver::Inode; using curvefs::metaserver::S3ChunkInfo; using curvefs::metaserver::S3ChunkInfoList; @@ -96,6 +97,9 @@ struct S3ReadRequest { uint64_t fsId; uint64_t inodeId; uint64_t compaction; + uint64_t enqueue; + uint64_t dequeue; + uint64_t processed; std::string DebugString() const { std::ostringstream os; @@ -426,7 +430,7 @@ class FileCacheManager { } // read kv request, need - ReadStatus ReadKVRequest(const std::vector &kvRequests, + ReadStatus ReadKVRequest(std::vector &kvRequests, char *dataBuf, uint64_t fileLen); // thread function for ReadKVRequest diff --git a/curvefs/src/client/s3/disk_cache_manager.cpp b/curvefs/src/client/s3/disk_cache_manager.cpp index 1bc2ac2473..33a09e307d 100644 --- a/curvefs/src/client/s3/disk_cache_manager.cpp +++ b/curvefs/src/client/s3/disk_cache_manager.cpp @@ -418,6 +418,7 @@ void DiskCacheManager::TrimCache() { cacheWriteFullDir = GetCacheWriteFullDir(); while (true) { UpdateDiskFsUsedRatio(); + waitIntervalSec_.Init(FLAGS_diskTrimCheckIntervalSec * 1000); waitIntervalSec_.WaitForNextExcution(); if (!isRunning_) { LOG(INFO) << "trim thread end."; @@ -427,6 +428,10 @@ void DiskCacheManager::TrimCache() { InitQosParam(); if (!IsDiskCacheSafe(kRatioLevel)) { while (!IsDiskCacheSafe(FLAGS_diskTrimRatio)) { + if (!isRunning_) { + LOG(INFO) << "trim thread end."; + return; + } UpdateDiskFsUsedRatio(); if (!cachedObjName_->GetBack(&cacheKey)) { VLOG_EVERY_N(9, 1000) << "obj is empty"; diff --git a/curvefs/src/client/volume/default_volume_storage.cpp b/curvefs/src/client/volume/default_volume_storage.cpp index 4b2618b170..010b1e1902 100644 --- a/curvefs/src/client/volume/default_volume_storage.cpp +++ b/curvefs/src/client/volume/default_volume_storage.cpp @@ -31,7 +31,7 @@ #include "absl/meta/type_traits.h" #include "curvefs/src/client/filesystem/error.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/client/inode_wrapper.h" #include "curvefs/src/client/volume/extent_cache.h" #include "curvefs/src/client/volume/utils.h" diff --git a/curvefs/src/client/warmup/warmup_manager.cpp b/curvefs/src/client/warmup/warmup_manager.cpp index 1f7dde85cb..bcc9fb9d73 100644 --- a/curvefs/src/client/warmup/warmup_manager.cpp +++ b/curvefs/src/client/warmup/warmup_manager.cpp @@ -70,7 +70,8 @@ bool WarmupManagerS3Impl::AddWarmupFilelist(fuse_ino_t key, return false; } // add warmup Progress - if (AddWarmupProcess(key, path, type)) { + WriteLockGuard lock(inode2ProgressMutex_); + if (AddWarmupProcessLocked(key, path, type)) { LOG(INFO) << "add warmup list task:" << key; WriteLockGuard lock(warmupFilelistDequeMutex_); auto iter = FindWarmupFilelistByKeyLocked(key); @@ -96,7 +97,8 @@ bool WarmupManagerS3Impl::AddWarmupFile(fuse_ino_t key, const std::string& path, return false; } // add warmup Progress - if (AddWarmupProcess(key, path, type)) { + WriteLockGuard lock(inode2ProgressMutex_); + if (AddWarmupProcessLocked(key, path, type)) { LOG(INFO) << "add warmup single task:" << key; FetchDentryEnqueue(key, path); } diff --git a/curvefs/src/client/warmup/warmup_manager.h b/curvefs/src/client/warmup/warmup_manager.h index 5cb7342fd0..e83772ed35 100644 --- a/curvefs/src/client/warmup/warmup_manager.h +++ b/curvefs/src/client/warmup/warmup_manager.h @@ -40,9 +40,9 @@ #include #include "curvefs/src/client/common/common.h" -#include "curvefs/src/client/dentry_cache_manager.h" +#include "curvefs/src/client/dentry_manager.h" #include "curvefs/src/client/fuse_common.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/client/kvclient/kvclient_manager.h" #include "curvefs/src/client/metric/client_metric.h" #include "curvefs/src/client/rpcclient/metaserver_client.h" @@ -285,9 +285,8 @@ class WarmupManager { * @return true * @return false warmupProcess has been added */ - virtual bool AddWarmupProcess(fuse_ino_t key, const std::string& path, - WarmupStorageType type) { - WriteLockGuard lock(inode2ProgressMutex_); + virtual bool AddWarmupProcessLocked(fuse_ino_t key, const std::string& path, + WarmupStorageType type) { auto retPg = inode2Progress_.emplace(key, WarmupProgress(type, path)); return retPg.second; } diff --git a/curvefs/src/mds/codec/codec.cpp b/curvefs/src/mds/codec/codec.cpp index 3bee130c98..1089533842 100644 --- a/curvefs/src/mds/codec/codec.cpp +++ b/curvefs/src/mds/codec/codec.cpp @@ -33,6 +33,7 @@ namespace codec { using ::curve::common::EncodeBigEndian; using ::curve::common::EncodeBigEndian_uint32; +using ::curve::common::DecodeBigEndian_uint32; using ::curvefs::mds::BLOCKGROUP_KEY_END; using ::curvefs::mds::BLOCKGROUP_KEY_PREFIX; using ::curvefs::mds::COMMON_PREFIX_LENGTH; diff --git a/curvefs/src/mds/common/storage_key.h b/curvefs/src/mds/common/storage_key.h index 8b2eb9f1b1..81a7ae85b3 100644 --- a/curvefs/src/mds/common/storage_key.h +++ b/curvefs/src/mds/common/storage_key.h @@ -45,26 +45,27 @@ const char BLOCKGROUP_KEY_PREFIX[] = "fs_04"; const char BLOCKGROUP_KEY_END[] = "fs_05"; const char FS_USAGE_KEY_PREFIX[] = "fs_05"; const char FS_USAGE_KEY_END[] = "fs_06"; +const char TS_INFO_KEY_PREFIX[] = "fs_07"; constexpr uint32_t COMMON_PREFIX_LENGTH = 5; -const char POOLKEYPREFIX[] = "fs_1001"; -const char POOLKEYEND[] = "fs_1002"; -const char ZONEKEYPREFIX[] = "fs_1002"; -const char ZONEKEYEND[] = "fs_1003"; -const char SERVERKEYPREFIX[] = "fs_1003"; -const char SERVERKEYEND[] = "fs_1004"; -const char METASERVERKEYPREFIX[] = "fs_1004"; -const char METASERVERKEYEND[] = "fs_1005"; -const char CLUSTERINFOKEY[] = "fs_1006"; -const char COPYSETKEYPREFIX[] = "fs_1007"; -const char COPYSETKEYEND[] = "fs_1008"; -const char PARTITIONKEYPREFIX[] = "fs_1008"; -const char PARTITIONKEYEND[] = "fs_1009"; -const char MEMCACHECLUSTERKEYPREFIX[] = "fs_1009"; -const char MEMCACHECLUSTERKEYEND[] = "fs_1010"; -const char FS2MEMCACHECLUSTERKEYPREFIX[] = "fs_1010"; -const char FS2MEMCACHECLUSTERKEYEND[] = "fs_1011"; +const char POOL_KEY_PREFIX[] = "fs_1001"; +const char POOL_KEY_END[] = "fs_1002"; +const char ZONE_KEY_PREFIX[] = "fs_1002"; +const char ZONE_KEY_END[] = "fs_1003"; +const char SERVER_KEY_PREFIX[] = "fs_1003"; +const char SERVER_KEY_END[] = "fs_1004"; +const char METASERVER_KEY_PREFIX[] = "fs_1004"; +const char METASERVER_KEY_END[] = "fs_1005"; +const char CLUSTER_KEY[] = "fs_1006"; +const char COPYSET_KEY_PREFIX[] = "fs_1007"; +const char COPYSET_KEY_END[] = "fs_1008"; +const char PARTITION_KEY_PREFIX[] = "fs_1008"; +const char PARTITION_KEY_END[] = "fs_1009"; +const char MEMCACHE_CLUSTER_KEY_PREFIX[] = "fs_1009"; +const char MEMCACHE_CLUSTER_KEY_END[] = "fs_1010"; +const char FS_2_MEMCACHE_CLUSTER_KEY_PREFIX[] = "fs_1010"; +const char FS_2_MEMCACHE_CLUSTER_KEY_END[] = "fs_1011"; constexpr uint32_t TOPOLOGY_PREFIX_LENGTH = 7; diff --git a/curvefs/src/mds/fs_manager.cpp b/curvefs/src/mds/fs_manager.cpp index 6ab8090946..7c6a1aa51a 100644 --- a/curvefs/src/mds/fs_manager.cpp +++ b/curvefs/src/mds/fs_manager.cpp @@ -841,9 +841,7 @@ FSStatusCode FsManager::UpdateFsInfo( } int FsManager::IsExactlySameOrCreateUnComplete(const std::string& fsName, - FSType fsType, - uint64_t blocksize, - const FsDetail& detail) { + FSType fsType, uint64_t blocksize, const FsDetail& detail) { FsInfoWrapper existFs; auto volumeInfoComparator = [](common::Volume lhs, common::Volume rhs) { @@ -858,16 +856,34 @@ int FsManager::IsExactlySameOrCreateUnComplete(const std::string& fsName, return google::protobuf::util::MessageDifferencer::Equals(lhs, rhs); }; - auto checkFsInfo = [fsType, volumeInfoComparator](const FsDetail& lhs, - const FsDetail& rhs) { + auto s3InfoComparator = [](common::S3Info newFs, common::S3Info existFs) { + // If the s3info detail stored in mds doesn't have prefix, the new + // client which has prefix value bigger than 0 can't mount filesystem. + if (newFs.has_objectprefix() && !existFs.has_objectprefix() && + newFs.objectprefix() != 0) { + return false; + } + // If the s3info detail stored in mds has prefix value bigger than 0, + // the old client which doesn't have prefix can't mount filesystem. + if (existFs.has_objectprefix() && !newFs.has_objectprefix() && + existFs.objectprefix() != 0) { + return false; + } + + return google::protobuf::util::MessageDifferencer::Equals( + newFs, existFs); + }; + + auto checkFsInfo = [fsType, volumeInfoComparator, s3InfoComparator]( + const FsDetail& newFs, const FsDetail& existFs) { switch (fsType) { case curvefs::common::FSType::TYPE_S3: - return MessageDifferencer::Equals(lhs.s3info(), rhs.s3info()); + return s3InfoComparator(newFs.s3info(), existFs.s3info()); case curvefs::common::FSType::TYPE_VOLUME: - return volumeInfoComparator(lhs.volume(), rhs.volume()); + return volumeInfoComparator(newFs.volume(), existFs.volume()); case curvefs::common::FSType::TYPE_HYBRID: - return MessageDifferencer::Equals(lhs.s3info(), rhs.s3info()) && - volumeInfoComparator(lhs.volume(), rhs.volume()); + return s3InfoComparator(newFs.s3info(), existFs.s3info()) && + volumeInfoComparator(newFs.volume(), existFs.volume()); } return false; @@ -976,6 +992,15 @@ void FsManager::RefreshSession(const RefreshSessionRequest* request, FsUsage usage; fsStorage_->GetFsUsage(request->fsname(), &usage, true); response->set_fsusedbytes(usage.usedbytes()); + { + ReadLockGuard lock(clientMdsAddrsOverrideMutex_); + VLOG(6) << "clientMdsAddrsOverride_ = " << clientMdsAddrsOverride_ + << ", request->mdsaddrs() = " << request->mdsaddrs(); + if (!clientMdsAddrsOverride_.empty() && request->has_mdsaddrs() && + request->mdsaddrs() != clientMdsAddrsOverride_) { + response->set_mdsaddrsoverride(clientMdsAddrsOverride_); + } + } } FSStatusCode FsManager::ReloadFsVolumeSpace() { @@ -1280,5 +1305,31 @@ bool FsManager::FillVolumeInfo(common::Volume* volume) { return true; } +std::string FsManager::GetClientMdsAddrsOverride() { + ReadLockGuard lock(clientMdsAddrsOverrideMutex_); + return clientMdsAddrsOverride_; +} + +void FsManager::SetClientMdsAddrsOverride(const std::string& addrs) { + // always add active mds to override to improve availability + auto addrsWithActiveMds = addrs + "," + option_.mdsListenAddr; + WriteLockGuard lock(clientMdsAddrsOverrideMutex_); + clientMdsAddrsOverride_ = addrsWithActiveMds; +} + +void FsManager::Tso(const TsoRequest* request, TsoResponse* response) { + uint64_t ts; + uint64_t timestamp; + auto ret = fsStorage_->Tso(&ts, ×tamp); + if (ret != FSStatusCode::OK) { + LOG(ERROR) << "Tso fail, ret = " << FSStatusCode_Name(ret); + response->set_statuscode(ret); + return; + } + response->set_ts(ts); + response->set_timestamp(timestamp); + response->set_statuscode(ret); +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/src/mds/fs_manager.h b/curvefs/src/mds/fs_manager.h index c7803657a3..c1f3ed1e2b 100644 --- a/curvefs/src/mds/fs_manager.h +++ b/curvefs/src/mds/fs_manager.h @@ -74,6 +74,7 @@ struct FsManagerOption { uint32_t spaceReloadConcurrency = 10; uint32_t clientTimeoutSec = 20; curve::common::S3AdapterOption s3AdapterOption; + std::string mdsListenAddr; }; class FsManager { @@ -202,6 +203,8 @@ class FsManager { void CommitTx(const CommitTxRequest* request, CommitTxResponse* response); + void Tso(const TsoRequest* request, TsoResponse* response); + // periodically check if the mount point is alive void BackEndCheckMountPoint(); void CheckMountPoint(); @@ -210,6 +213,9 @@ class FsManager { bool GetClientAliveTime(const std::string& mountpoint, std::pair* out); + std::string GetClientMdsAddrsOverride(); + void SetClientMdsAddrsOverride(const std::string& addrs); + private: // return 0: ExactlySame; 1: uncomplete, -1: neither int IsExactlySameOrCreateUnComplete(const std::string& fsName, @@ -279,6 +285,9 @@ class FsManager { mutable RWLock recorderMutex_; // fsuage update lock mutable RWLock fsUsageMutex_; + // client mds addrs override + std::string clientMdsAddrsOverride_; + mutable RWLock clientMdsAddrsOverrideMutex_; }; } // namespace mds } // namespace curvefs diff --git a/curvefs/src/mds/fs_storage.cpp b/curvefs/src/mds/fs_storage.cpp index b38b9f9803..be5fc466ee 100644 --- a/curvefs/src/mds/fs_storage.cpp +++ b/curvefs/src/mds/fs_storage.cpp @@ -30,6 +30,7 @@ #include "curvefs/src/mds/codec/codec.h" #include "curvefs/src/mds/metric/fs_metric.h" +#include "src/common/timeutility.h" namespace curvefs { namespace mds { @@ -40,6 +41,7 @@ using ::curve::kvstorage::KVStorageClient; bool MemoryFsStorage::Init() { WriteLockGuard writeLockGuard(rwLock_); fsInfoMap_.clear(); + tsId_.store(1); return true; } @@ -186,6 +188,12 @@ FSStatusCode MemoryFsStorage::DeleteFsUsage(const std::string& fsName) { return FSStatusCode::OK; } +FSStatusCode MemoryFsStorage::Tso(uint64_t* ts, uint64_t* timestamp) { + *timestamp = curve::common::TimeUtility::GetTimeofDayMs(); + *ts = tsId_.fetch_add(1, std::memory_order_relaxed); + return FSStatusCode::OK; +} + PersisKVStorage::PersisKVStorage( const std::shared_ptr& storage) : storage_(storage), @@ -193,7 +201,8 @@ PersisKVStorage::PersisKVStorage( fsLock_(), fs_(), idToNameLock_(), - idToName_() {} + idToName_(), + tsIdGen_(new TsIdGenerator(storage_)) {} PersisKVStorage::~PersisKVStorage() = default; @@ -207,8 +216,11 @@ FSStatusCode PersisKVStorage::Get(uint64_t fsId, FsInfoWrapper* fsInfo) { } bool PersisKVStorage::Init() { - bool ret = LoadAllFs(); - return ret; + if (!LoadAllFs()) { + LOG(ERROR) << "Load all fs failed"; + return false; + } + return true; } void PersisKVStorage::Uninit() {} @@ -570,5 +582,14 @@ FSStatusCode PersisKVStorage::DeleteFsUsage(const std::string& fsName) { return FSStatusCode::OK; } +FSStatusCode PersisKVStorage::Tso(uint64_t* ts, uint64_t* timestamp) { + *timestamp = curve::common::TimeUtility::CLockRealTimeMs(); + if (tsIdGen_->GenTsId(ts)) { + return FSStatusCode::OK; + } + LOG(ERROR) << "Gen ts failed"; + return FSStatusCode::INTERNAL_ERROR; +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/src/mds/fs_storage.h b/curvefs/src/mds/fs_storage.h index d2ed8d674d..fcf1105469 100644 --- a/curvefs/src/mds/fs_storage.h +++ b/curvefs/src/mds/fs_storage.h @@ -37,6 +37,7 @@ #include "src/common/concurrent/rw_lock.h" #include "src/idgenerator/etcd_id_generator.h" #include "src/kvstorageclient/etcd_client.h" +#include "curvefs/src/mds/idgenerator/ts_id_generator.h" namespace curvefs { namespace mds { @@ -76,6 +77,8 @@ class FsStorage { virtual FSStatusCode GetFsUsage( const std::string& fsName, FsUsage* fsUsage, bool fromCache) = 0; virtual FSStatusCode DeleteFsUsage(const std::string& fsName) = 0; + + virtual FSStatusCode Tso(uint64_t* ts, uint64_t* timestamp) = 0; }; class MemoryFsStorage : public FsStorage { @@ -182,6 +185,8 @@ class MemoryFsStorage : public FsStorage { const std::string& fsName, FsUsage*, bool fromCache) override; FSStatusCode DeleteFsUsage(const std::string& fsName) override; + FSStatusCode Tso(uint64_t* ts, uint64_t* timestamp) override; + private: std::unordered_map fsInfoMap_; curve::common::RWLock rwLock_; @@ -190,6 +195,8 @@ class MemoryFsStorage : public FsStorage { std::unordered_map fsUsageMap_; curve::common::RWLock fsUsedUsageLock_; + + std::atomic tsId_; }; // Persist all data to kvstorage and cache all fsinfo in memory @@ -225,6 +232,8 @@ class PersisKVStorage : public FsStorage { const std::string& fsName, FsUsage*, bool fromCache) override; FSStatusCode DeleteFsUsage(const std::string& fsName) override; + FSStatusCode Tso(uint64_t* ts, uint64_t* timestamp) override; + private: bool LoadAllFs(); @@ -259,6 +268,8 @@ class PersisKVStorage : public FsStorage { // fs usage cache map std::unordered_map fsUsageCache_; mutable RWLock fsUsageCacheMutex_; + + std::unique_ptr tsIdGen_; }; } // namespace mds diff --git a/curvefs/src/mds/idgenerator/BUILD b/curvefs/src/mds/idgenerator/BUILD index fdde4ecd6b..fc465923ab 100644 --- a/curvefs/src/mds/idgenerator/BUILD +++ b/curvefs/src/mds/idgenerator/BUILD @@ -18,7 +18,8 @@ load("//:copts.bzl", "CURVE_DEFAULT_COPTS") cc_library( name = "fs_mds_idgenerator", - hdrs = ["fs_id_generator.h"], + hdrs = ["fs_id_generator.h", + "ts_id_generator.h"], copts = CURVE_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ diff --git a/curvefs/src/mds/idgenerator/ts_id_generator.h b/curvefs/src/mds/idgenerator/ts_id_generator.h new file mode 100644 index 0000000000..68060d1384 --- /dev/null +++ b/curvefs/src/mds/idgenerator/ts_id_generator.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: curve + * Created Date: 2023-12-07 + * Author: wanghai01 + */ + +#ifndef CURVEFS_SRC_MDS_IDGENERATOR_TS_ID_GENERATOR_H_ +#define CURVEFS_SRC_MDS_IDGENERATOR_TS_ID_GENERATOR_H_ + +#include "curvefs/src/mds/common/storage_key.h" +#include "src/idgenerator/etcd_id_generator.h" + +namespace curvefs { +namespace mds { + +class TsIdGenerator { + public: + explicit TsIdGenerator( + const std::shared_ptr& client) + : generator_(new curve::idgenerator::EtcdIdGenerator( + client, TS_INFO_KEY_PREFIX, TS_ID_INIT, TS_ID_ALLOCATE_BUNDLE)) {} + + bool GenTsId(uint64_t* id) { + return generator_->GenID(id); + } + + private: + static constexpr uint64_t TS_ID_INIT = 0; + static constexpr uint64_t TS_ID_ALLOCATE_BUNDLE = 100; + + private: + std::unique_ptr generator_; +}; + +} // namespace mds +} // namespace curvefs + +#endif // CURVEFS_SRC_MDS_IDGENERATOR_TS_ID_GENERATOR_H_ diff --git a/curvefs/src/mds/main.cpp b/curvefs/src/mds/main.cpp index 725c29e89c..f506b4e5c8 100644 --- a/curvefs/src/mds/main.cpp +++ b/curvefs/src/mds/main.cpp @@ -24,6 +24,7 @@ #include #include "curvefs/src/mds/mds.h" +#include "src/common/log_util.h" #include "src/common/configuration.h" #include "curvefs/src/common/dynamic_vlog.h" @@ -64,6 +65,7 @@ int main(int argc, char **argv) { } // initialize logging module + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); conf->PrintConfig(); diff --git a/curvefs/src/mds/mds.cpp b/curvefs/src/mds/mds.cpp index 6e5d8644b5..f0c4403e83 100644 --- a/curvefs/src/mds/mds.cpp +++ b/curvefs/src/mds/mds.cpp @@ -161,6 +161,7 @@ void MDS::InitFsManagerOptions(FsManagerOption* fsManagerOption) { "default value: " << fsManagerOption->spaceReloadConcurrency; + fsManagerOption->mdsListenAddr = conf_->GetStringValue("mds.listen.addr"); ::curve::common::InitS3AdaptorOptionExceptS3InfoOption( conf_.get(), &fsManagerOption->s3AdapterOption); } diff --git a/curvefs/src/mds/mds_service.cpp b/curvefs/src/mds/mds_service.cpp index 0b1c32c76a..17c33156c2 100644 --- a/curvefs/src/mds/mds_service.cpp +++ b/curvefs/src/mds/mds_service.cpp @@ -29,10 +29,9 @@ namespace mds { using mds::Mountpoint; -void MdsServiceImpl::CreateFs(::google::protobuf::RpcController *controller, - const ::curvefs::mds::CreateFsRequest *request, - ::curvefs::mds::CreateFsResponse *response, - ::google::protobuf::Closure *done) { +void MdsServiceImpl::CreateFs(::google::protobuf::RpcController* controller, + const CreateFsRequest* request, CreateFsResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); const std::string &fsName = request->fsname(); @@ -142,10 +141,9 @@ void MdsServiceImpl::CreateFs(::google::protobuf::RpcController *controller, << ", capacity = " << request->capacity(); } -void MdsServiceImpl::MountFs(::google::protobuf::RpcController *controller, - const ::curvefs::mds::MountFsRequest *request, - ::curvefs::mds::MountFsResponse *response, - ::google::protobuf::Closure *done) { +void MdsServiceImpl::MountFs(::google::protobuf::RpcController* controller, + const MountFsRequest* request, MountFsResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); const std::string &fsName = request->fsname(); @@ -169,10 +167,9 @@ void MdsServiceImpl::MountFs(::google::protobuf::RpcController *controller, << ", mps: " << response->mutable_fsinfo()->mountpoints_size(); } -void MdsServiceImpl::UmountFs(::google::protobuf::RpcController *controller, - const ::curvefs::mds::UmountFsRequest *request, - ::curvefs::mds::UmountFsResponse *response, - ::google::protobuf::Closure *done) { +void MdsServiceImpl::UmountFs(::google::protobuf::RpcController* controller, + const UmountFsRequest* request, UmountFsResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); const std::string &fsName = request->fsname(); @@ -192,10 +189,9 @@ void MdsServiceImpl::UmountFs(::google::protobuf::RpcController *controller, << ", mountPoint = " << mount.ShortDebugString(); } -void MdsServiceImpl::GetFsInfo(::google::protobuf::RpcController *controller, - const ::curvefs::mds::GetFsInfoRequest *request, - ::curvefs::mds::GetFsInfoResponse *response, - ::google::protobuf::Closure *done) { +void MdsServiceImpl::GetFsInfo(::google::protobuf::RpcController* controller, + const GetFsInfoRequest* request, GetFsInfoResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); @@ -227,10 +223,8 @@ void MdsServiceImpl::GetFsInfo(::google::protobuf::RpcController *controller, << response->ShortDebugString(); } -void MdsServiceImpl::UpdateFsInfo( - ::google::protobuf::RpcController* controller, - const ::curvefs::mds::UpdateFsInfoRequest* request, - ::curvefs::mds::UpdateFsInfoResponse* response, +void MdsServiceImpl::UpdateFsInfo(::google::protobuf::RpcController* controller, + const UpdateFsInfoRequest* request, UpdateFsInfoResponse* response, ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); @@ -261,10 +255,9 @@ void MdsServiceImpl::UpdateFsInfo( << response->ShortDebugString(); } -void MdsServiceImpl::DeleteFs(::google::protobuf::RpcController *controller, - const ::curvefs::mds::DeleteFsRequest *request, - ::curvefs::mds::DeleteFsResponse *response, - ::google::protobuf::Closure *done) { +void MdsServiceImpl::DeleteFs(::google::protobuf::RpcController* controller, + const DeleteFsRequest* request, DeleteFsResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard doneGuard(done); const std::string &fsName = request->fsname(); @@ -281,10 +274,9 @@ void MdsServiceImpl::DeleteFs(::google::protobuf::RpcController *controller, } void MdsServiceImpl::AllocateS3Chunk( - ::google::protobuf::RpcController *controller, - const ::curvefs::mds::AllocateS3ChunkRequest *request, - ::curvefs::mds::AllocateS3ChunkResponse *response, - ::google::protobuf::Closure *done) { + ::google::protobuf::RpcController* controller, + const AllocateS3ChunkRequest* request, AllocateS3ChunkResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard guard(done); @@ -318,10 +310,9 @@ void MdsServiceImpl::AllocateS3Chunk( } void MdsServiceImpl::ListClusterFsInfo( - ::google::protobuf::RpcController *controller, - const ::curvefs::mds::ListClusterFsInfoRequest *request, - ::curvefs::mds::ListClusterFsInfoResponse *response, - ::google::protobuf::Closure *done) { + ::google::protobuf::RpcController* controller, + const ListClusterFsInfoRequest* request, + ListClusterFsInfoResponse* response, ::google::protobuf::Closure* done) { (void)controller; (void)request; @@ -333,10 +324,9 @@ void MdsServiceImpl::ListClusterFsInfo( } void MdsServiceImpl::RefreshSession( - ::google::protobuf::RpcController *controller, - const ::curvefs::mds::RefreshSessionRequest *request, - ::curvefs::mds::RefreshSessionResponse *response, - ::google::protobuf::Closure *done) { + ::google::protobuf::RpcController* controller, + const RefreshSessionRequest* request, RefreshSessionResponse* response, + ::google::protobuf::Closure* done) { (void)controller; brpc::ClosureGuard guard(done); fsManager_->RefreshSession(request, response); @@ -365,5 +355,30 @@ void MdsServiceImpl::CommitTx(::google::protobuf::RpcController *controller, VLOG(3) << "CommitTx [response]: " << request->DebugString(); } +void MdsServiceImpl::SetClientMdsAddrsOverride( + ::google::protobuf::RpcController *controller, + const ::curvefs::mds::SetClientMdsAddrsOverrideRequest *request, + ::curvefs::mds::SetClientMdsAddrsOverrideResponse *response, + ::google::protobuf::Closure *done) { + (void)controller; + brpc::ClosureGuard guard(done); + VLOG(3) << "SetClientMdsAddrsOverride [request]: " + << request->DebugString(); + fsManager_->SetClientMdsAddrsOverride(request->clientmdsaddrsoverride()); + response->set_statuscode(FSStatusCode::OK); + VLOG(3) << "SetClientMdsAddrsOverride [response]: " + << response->DebugString(); +} + +void MdsServiceImpl::Tso(::google::protobuf::RpcController* controller, + const TsoRequest* request, TsoResponse* response, + ::google::protobuf::Closure* done) { + (void)controller; + brpc::ClosureGuard guard(done); + VLOG(3) << "Tso [request]: " << request->DebugString(); + fsManager_->Tso(request, response); + VLOG(3) << "Tso [response]: " << response->DebugString(); +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/src/mds/mds_service.h b/curvefs/src/mds/mds_service.h index 343b946427..dbcfaaf6f4 100644 --- a/curvefs/src/mds/mds_service.h +++ b/curvefs/src/mds/mds_service.h @@ -76,31 +76,39 @@ class MdsServiceImpl : public MdsService { ::google::protobuf::Closure* done); void AllocateS3Chunk(::google::protobuf::RpcController* controller, - const ::curvefs::mds::AllocateS3ChunkRequest* request, - ::curvefs::mds::AllocateS3ChunkResponse* response, - ::google::protobuf::Closure* done); + const AllocateS3ChunkRequest* request, + AllocateS3ChunkResponse* response, ::google::protobuf::Closure* done); - void ListClusterFsInfo( - ::google::protobuf::RpcController* controller, - const ::curvefs::mds::ListClusterFsInfoRequest* request, - ::curvefs::mds::ListClusterFsInfoResponse* response, - ::google::protobuf::Closure* done); + void ListClusterFsInfo(::google::protobuf::RpcController* controller, + const ListClusterFsInfoRequest* request, + ListClusterFsInfoResponse* response, ::google::protobuf::Closure* done); - void RefreshSession(::google::protobuf::RpcController *controller, - const ::curvefs::mds::RefreshSessionRequest *request, - ::curvefs::mds::RefreshSessionResponse *response, - ::google::protobuf::Closure *done); + void RefreshSession(::google::protobuf::RpcController* controller, + const RefreshSessionRequest* request, RefreshSessionResponse* response, + ::google::protobuf::Closure* done); + // reserved for compatibility void GetLatestTxId(::google::protobuf::RpcController* controller, const GetLatestTxIdRequest* request, GetLatestTxIdResponse* response, ::google::protobuf::Closure* done); + // reserved for compatibility void CommitTx(::google::protobuf::RpcController* controller, const CommitTxRequest* request, CommitTxResponse* response, ::google::protobuf::Closure* done); + void SetClientMdsAddrsOverride( + ::google::protobuf::RpcController* controller, + const SetClientMdsAddrsOverrideRequest* request, + SetClientMdsAddrsOverrideResponse* response, + ::google::protobuf::Closure* done); + + void Tso(::google::protobuf::RpcController* controller, + const TsoRequest* request, TsoResponse* response, + ::google::protobuf::Closure* done); + private: std::shared_ptr fsManager_; std::shared_ptr chunkIdAllocator_; diff --git a/curvefs/src/mds/topology/topology.cpp b/curvefs/src/mds/topology/topology.cpp index a929336918..9ed8c90935 100644 --- a/curvefs/src/mds/topology/topology.cpp +++ b/curvefs/src/mds/topology/topology.cpp @@ -227,6 +227,7 @@ TopoStatusCode TopologyImpl::RemoveServer(ServerIdType id) { } TopoStatusCode TopologyImpl::RemoveMetaServer(MetaServerIdType id) { + WriteLockGuard wlockPool(poolMutex_); WriteLockGuard wlockServer(serverMutex_); WriteLockGuard wlockMetaServer(metaServerMutex_); auto it = metaServerMap_.find(id); @@ -243,7 +244,6 @@ TopoStatusCode TopologyImpl::RemoveMetaServer(MetaServerIdType id) { metaServerMap_.erase(it); // update pool - WriteLockGuard wlockPool(poolMutex_); PoolIdType poolId = ix->second.GetPoolId(); auto it = poolMap_.find(poolId); if (it != poolMap_.end()) { diff --git a/curvefs/src/mds/topology/topology_storage_codec.cpp b/curvefs/src/mds/topology/topology_storage_codec.cpp index 717a0a624b..7ba8edb105 100644 --- a/curvefs/src/mds/topology/topology_storage_codec.cpp +++ b/curvefs/src/mds/topology/topology_storage_codec.cpp @@ -30,12 +30,12 @@ namespace curvefs { namespace mds { namespace topology { -using curvefs::mds::FS2MEMCACHECLUSTERKEYPREFIX; -using curvefs::mds::MEMCACHECLUSTERKEYEND; -using curvefs::mds::MEMCACHECLUSTERKEYPREFIX; +using curvefs::mds::FS_2_MEMCACHE_CLUSTER_KEY_PREFIX; +using curvefs::mds::MEMCACHE_CLUSTER_KEY_END; +using curvefs::mds::MEMCACHE_CLUSTER_KEY_PREFIX; std::string TopologyStorageCodec::EncodePoolKey(PoolIdType id) { - std::string key = POOLKEYPREFIX; + std::string key = POOL_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -53,7 +53,7 @@ bool TopologyStorageCodec::DecodePoolData(const std::string &value, } std::string TopologyStorageCodec::EncodeZoneKey(ZoneIdType id) { - std::string key = ZONEKEYPREFIX; + std::string key = ZONE_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -71,7 +71,7 @@ bool TopologyStorageCodec::DecodeZoneData(const std::string &value, } std::string TopologyStorageCodec::EncodeServerKey(ServerIdType id) { - std::string key = SERVERKEYPREFIX; + std::string key = SERVER_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -89,7 +89,7 @@ bool TopologyStorageCodec::DecodeServerData(const std::string &value, } std::string TopologyStorageCodec::EncodeMetaServerKey(MetaServerIdType id) { - std::string key = METASERVERKEYPREFIX; + std::string key = METASERVER_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -107,7 +107,7 @@ bool TopologyStorageCodec::DecodeMetaServerData(const std::string &value, } std::string TopologyStorageCodec::EncodeCopySetKey(const CopySetKey &id) { - std::string key = COPYSETKEYPREFIX; + std::string key = COPYSET_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t) + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id.first); @@ -126,7 +126,7 @@ bool TopologyStorageCodec::DecodeCopySetData(const std::string &value, } std::string TopologyStorageCodec::EncodePartitionKey(PartitionIdType id) { - std::string key = PARTITIONKEYPREFIX; + std::string key = PARTITION_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -155,7 +155,7 @@ bool TopologyStorageCodec::DecodeClusterInfoData(const std::string &value, std::string TopologyStorageCodec::EncodeMemcacheClusterKey( MetaServerIdType id) { - std::string key = MEMCACHECLUSTERKEYPREFIX; + std::string key = MEMCACHE_CLUSTER_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), id); @@ -173,7 +173,7 @@ bool TopologyStorageCodec::DecodeMemcacheClusterData(const std::string& value, } std::string TopologyStorageCodec::EncodeFs2MemcacheClusterKey(FsIdType fsId) { - std::string key = FS2MEMCACHECLUSTERKEYPREFIX; + std::string key = FS_2_MEMCACHE_CLUSTER_KEY_PREFIX; size_t prefixLen = TOPOLOGY_PREFIX_LENGTH; key.resize(prefixLen + sizeof(uint64_t)); EncodeBigEndian(&(key[prefixLen]), fsId); diff --git a/curvefs/src/mds/topology/topology_storage_codec.h b/curvefs/src/mds/topology/topology_storage_codec.h index d5bdc3df24..a6e3f7bb86 100644 --- a/curvefs/src/mds/topology/topology_storage_codec.h +++ b/curvefs/src/mds/topology/topology_storage_codec.h @@ -34,18 +34,18 @@ namespace curvefs { namespace mds { namespace topology { -using curvefs::mds::POOLKEYPREFIX; -using curvefs::mds::POOLKEYEND; -using curvefs::mds::ZONEKEYPREFIX; -using curvefs::mds::ZONEKEYEND; -using curvefs::mds::SERVERKEYPREFIX; -using curvefs::mds::SERVERKEYEND; -using curvefs::mds::METASERVERKEYPREFIX; -using curvefs::mds::METASERVERKEYEND; -using curvefs::mds::CLUSTERINFOKEY; -using curvefs::mds::COPYSETKEYPREFIX; -using curvefs::mds::COPYSETKEYEND; using curve::common::EncodeBigEndian; +using curvefs::mds::CLUSTER_KEY; +using curvefs::mds::COPYSET_KEY_END; +using curvefs::mds::COPYSET_KEY_PREFIX; +using curvefs::mds::METASERVER_KEY_END; +using curvefs::mds::METASERVER_KEY_PREFIX; +using curvefs::mds::POOL_KEY_END; +using curvefs::mds::POOL_KEY_PREFIX; +using curvefs::mds::SERVER_KEY_END; +using curvefs::mds::SERVER_KEY_PREFIX; +using curvefs::mds::ZONE_KEY_END; +using curvefs::mds::ZONE_KEY_PREFIX; class TopologyStorageCodec { public: diff --git a/curvefs/src/mds/topology/topology_storge_etcd.cpp b/curvefs/src/mds/topology/topology_storge_etcd.cpp index b59eb23a09..c70aa7dd8d 100644 --- a/curvefs/src/mds/topology/topology_storge_etcd.cpp +++ b/curvefs/src/mds/topology/topology_storge_etcd.cpp @@ -40,7 +40,7 @@ bool TopologyStorageEtcd::LoadPool( std::vector out; poolMap->clear(); *maxPoolId = 0; - int errCode = client_->List(POOLKEYPREFIX, POOLKEYEND, &out); + int errCode = client_->List(POOL_KEY_PREFIX, POOL_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -76,7 +76,7 @@ bool TopologyStorageEtcd::LoadZone( std::vector out; zoneMap->clear(); *maxZoneId = 0; - int errCode = client_->List(ZONEKEYPREFIX, ZONEKEYEND, &out); + int errCode = client_->List(ZONE_KEY_PREFIX, ZONE_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -112,7 +112,7 @@ bool TopologyStorageEtcd::LoadServer( std::vector out; serverMap->clear(); *maxServerId = 0; - int errCode = client_->List(SERVERKEYPREFIX, SERVERKEYEND, &out); + int errCode = client_->List(SERVER_KEY_PREFIX, SERVER_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -148,7 +148,8 @@ bool TopologyStorageEtcd::LoadMetaServer( std::vector out; metaServerMap->clear(); *maxMetaServerId = 0; - int errCode = client_->List(METASERVERKEYPREFIX, METASERVERKEYEND, &out); + int errCode = + client_->List(METASERVER_KEY_PREFIX, METASERVER_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -185,7 +186,7 @@ bool TopologyStorageEtcd::LoadCopySet( std::vector out; copySetMap->clear(); copySetIdMaxMap->clear(); - int errCode = client_->List(COPYSETKEYPREFIX, COPYSETKEYEND, &out); + int errCode = client_->List(COPYSET_KEY_PREFIX, COPYSET_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -223,7 +224,7 @@ bool TopologyStorageEtcd::LoadPartition( std::vector out; partitionMap->clear(); *maxPartitionId = 0; - int errCode = client_->List(PARTITIONKEYPREFIX, PARTITIONKEYEND, &out); + int errCode = client_->List(PARTITION_KEY_PREFIX, PARTITION_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -508,7 +509,7 @@ bool TopologyStorageEtcd::UpdatePartitions( bool TopologyStorageEtcd::LoadClusterInfo( std::vector *info) { std::string value; - int errCode = client_->Get(CLUSTERINFOKEY, &value); + int errCode = client_->Get(CLUSTER_KEY, &value); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -528,7 +529,7 @@ bool TopologyStorageEtcd::LoadClusterInfo( } bool TopologyStorageEtcd::StorageClusterInfo(const ClusterInformation &info) { - std::string key = CLUSTERINFOKEY; + std::string key = CLUSTER_KEY; std::string value; if (codec_->EncodeClusterInfoData(info, &value) != true) { @@ -551,8 +552,8 @@ bool TopologyStorageEtcd::LoadMemcacheCluster( std::vector out; memcacheClusterMap->clear(); *maxMemCacheClusterId = 0; - int errCode = - client_->List(MEMCACHECLUSTERKEYPREFIX, MEMCACHECLUSTERKEYEND, &out); + int errCode = client_->List( + MEMCACHE_CLUSTER_KEY_PREFIX, MEMCACHE_CLUSTER_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } @@ -617,8 +618,8 @@ bool TopologyStorageEtcd::LoadFs2MemcacheCluster( std::unordered_map* fs2MemcacheCluster) { std::vector> out; fs2MemcacheCluster->clear(); - int errCode = client_->List(FS2MEMCACHECLUSTERKEYPREFIX, - FS2MEMCACHECLUSTERKEYEND, &out); + int errCode = client_->List( + FS_2_MEMCACHE_CLUSTER_KEY_PREFIX, FS_2_MEMCACHE_CLUSTER_KEY_END, &out); if (errCode == EtcdErrCode::EtcdKeyNotExist) { return true; } diff --git a/curvefs/src/metaserver/BUILD b/curvefs/src/metaserver/BUILD index df1696159a..1de9d79033 100644 --- a/curvefs/src/metaserver/BUILD +++ b/curvefs/src/metaserver/BUILD @@ -27,6 +27,9 @@ cc_library( ["copyset/*.cpp"], ) + glob( ["storage/*.cpp"], + exclude = [ + "storage/converter.cpp", + ], ) + glob( ["streaming/*.cpp"], ) + glob( @@ -40,6 +43,9 @@ cc_library( ["copyset/*.h"], ) + glob( ["storage/*.h"], + exclude = [ + "storage/converter.h", + ], ) + glob( ["streaming/*.h"], ) + glob( @@ -51,6 +57,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":metaserver_s3_lib", + ":metaserver_storage_conv", "//curvefs/proto:cli2_cc_proto", "//curvefs/proto:copyset_cc_proto", "//curvefs/proto:curvefs_heartbeat_cc_proto", @@ -77,6 +84,21 @@ cc_library( ], ) +cc_library( + name = "metaserver_storage_conv", + srcs = ["storage/converter.cpp"], + hdrs = ["storage/converter.h", + "storage/common.h",], + copts = CURVE_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "//external:glog", + "//src/common:curve_common", + "//curvefs/proto:metaserver_cc_proto", + "@com_google_absl//absl/container:btree", + ], +) + cc_library( name = "metaserver_s3_lib", srcs = glob(["s3/*.cpp", "mdsclient/*.cpp"]), diff --git a/curvefs/src/metaserver/copyset/concurrent_apply_queue.cpp b/curvefs/src/metaserver/copyset/concurrent_apply_queue.cpp index a058163439..26f49e9b26 100644 --- a/curvefs/src/metaserver/copyset/concurrent_apply_queue.cpp +++ b/curvefs/src/metaserver/copyset/concurrent_apply_queue.cpp @@ -20,7 +20,6 @@ * Author: Xinlong-Chen */ - #include "curvefs/src/metaserver/copyset/concurrent_apply_queue.h" #include diff --git a/curvefs/src/metaserver/copyset/copyset_node.cpp b/curvefs/src/metaserver/copyset/copyset_node.cpp index 449886204f..a235a7d886 100644 --- a/curvefs/src/metaserver/copyset/copyset_node.cpp +++ b/curvefs/src/metaserver/copyset/copyset_node.cpp @@ -170,7 +170,7 @@ bool CopysetNode::Start() { LOG(ERROR) << "Fail to init raft node, copyset: " << name_; return false; } - + metaStore_->LoadDeletedInodes(); LOG(INFO) << "Run copyset success, copyset: " << name_; return true; } diff --git a/curvefs/src/metaserver/copyset/meta_operator.cpp b/curvefs/src/metaserver/copyset/meta_operator.cpp index 456fd9c361..356ab12d35 100644 --- a/curvefs/src/metaserver/copyset/meta_operator.cpp +++ b/curvefs/src/metaserver/copyset/meta_operator.cpp @@ -139,6 +139,7 @@ OPERATOR_CAN_BY_PASS_PROPOSE(GetInode); OPERATOR_CAN_BY_PASS_PROPOSE(BatchGetInodeAttr); OPERATOR_CAN_BY_PASS_PROPOSE(BatchGetXAttr); OPERATOR_CAN_BY_PASS_PROPOSE(GetVolumeExtent); +OPERATOR_CAN_BY_PASS_PROPOSE(CheckTxStatus); #undef OPERATOR_CAN_BY_PASS_PROPOSE @@ -184,6 +185,10 @@ OPERATOR_ON_APPLY(CreateManageInode); OPERATOR_ON_APPLY(CreatePartition); OPERATOR_ON_APPLY(DeletePartition); OPERATOR_ON_APPLY(PrepareRenameTx); +OPERATOR_ON_APPLY(PrewriteRenameTx); +OPERATOR_ON_APPLY(CheckTxStatus); +OPERATOR_ON_APPLY(ResolveTxLock); +OPERATOR_ON_APPLY(CommitTx); OPERATOR_ON_APPLY(UpdateVolumeExtent); OPERATOR_ON_APPLY(UpdateDeallocatableBlockGroup); @@ -311,6 +316,9 @@ OPERATOR_ON_APPLY_FROM_LOG(CreateManageInode); OPERATOR_ON_APPLY_FROM_LOG(CreatePartition); OPERATOR_ON_APPLY_FROM_LOG(DeletePartition); OPERATOR_ON_APPLY_FROM_LOG(PrepareRenameTx); +OPERATOR_ON_APPLY_FROM_LOG(PrewriteRenameTx); +OPERATOR_ON_APPLY_FROM_LOG(ResolveTxLock); +OPERATOR_ON_APPLY_FROM_LOG(CommitTx); OPERATOR_ON_APPLY_FROM_LOG(UpdateVolumeExtent); OPERATOR_ON_APPLY_FROM_LOG(UpdateDeallocatableBlockGroup); @@ -346,6 +354,7 @@ READONLY_OPERATOR_ON_APPLY_FROM_LOG(GetInode); READONLY_OPERATOR_ON_APPLY_FROM_LOG(BatchGetInodeAttr); READONLY_OPERATOR_ON_APPLY_FROM_LOG(BatchGetXAttr); READONLY_OPERATOR_ON_APPLY_FROM_LOG(GetVolumeExtent); +READONLY_OPERATOR_ON_APPLY_FROM_LOG(CheckTxStatus); #undef READONLY_OPERATOR_ON_APPLY_FROM_LOG @@ -371,6 +380,10 @@ OPERATOR_REDIRECT(CreateManageInode); OPERATOR_REDIRECT(CreatePartition); OPERATOR_REDIRECT(DeletePartition); OPERATOR_REDIRECT(PrepareRenameTx); +OPERATOR_REDIRECT(PrewriteRenameTx); +OPERATOR_REDIRECT(CheckTxStatus); +OPERATOR_REDIRECT(ResolveTxLock); +OPERATOR_REDIRECT(CommitTx); OPERATOR_REDIRECT(GetVolumeExtent); OPERATOR_REDIRECT(UpdateVolumeExtent); OPERATOR_REDIRECT(UpdateDeallocatableBlockGroup); @@ -398,6 +411,10 @@ OPERATOR_ON_FAILED(CreateManageInode); OPERATOR_ON_FAILED(CreatePartition); OPERATOR_ON_FAILED(DeletePartition); OPERATOR_ON_FAILED(PrepareRenameTx); +OPERATOR_ON_FAILED(PrewriteRenameTx); +OPERATOR_ON_FAILED(CheckTxStatus); +OPERATOR_ON_FAILED(ResolveTxLock); +OPERATOR_ON_FAILED(CommitTx); OPERATOR_ON_FAILED(GetVolumeExtent); OPERATOR_ON_FAILED(UpdateVolumeExtent); OPERATOR_ON_FAILED(UpdateDeallocatableBlockGroup); @@ -423,6 +440,10 @@ OPERATOR_HASH_CODE(DeleteInode); OPERATOR_HASH_CODE(CreateRootInode); OPERATOR_HASH_CODE(CreateManageInode); OPERATOR_HASH_CODE(PrepareRenameTx); +OPERATOR_HASH_CODE(PrewriteRenameTx); +OPERATOR_HASH_CODE(CheckTxStatus); +OPERATOR_HASH_CODE(ResolveTxLock); +OPERATOR_HASH_CODE(CommitTx); OPERATOR_HASH_CODE(DeletePartition); OPERATOR_HASH_CODE(GetVolumeExtent); OPERATOR_HASH_CODE(UpdateVolumeExtent); @@ -461,6 +482,10 @@ OPERATOR_TYPE(DeleteInode); OPERATOR_TYPE(CreateRootInode); OPERATOR_TYPE(CreateManageInode); OPERATOR_TYPE(PrepareRenameTx); +OPERATOR_TYPE(PrewriteRenameTx); +OPERATOR_TYPE(CheckTxStatus); +OPERATOR_TYPE(ResolveTxLock); +OPERATOR_TYPE(CommitTx); OPERATOR_TYPE(CreatePartition); OPERATOR_TYPE(DeletePartition); OPERATOR_TYPE(GetVolumeExtent); diff --git a/curvefs/src/metaserver/copyset/meta_operator.h b/curvefs/src/metaserver/copyset/meta_operator.h index b78aa3e78f..d861913999 100644 --- a/curvefs/src/metaserver/copyset/meta_operator.h +++ b/curvefs/src/metaserver/copyset/meta_operator.h @@ -470,6 +470,65 @@ class DeletePartitionOperator : public MetaOperator { void OnFailed(MetaStatusCode code) override; }; +class GetVolumeExtentOperator : public MetaOperator { + public: + using MetaOperator::MetaOperator; + + void OnApply(int64_t index, google::protobuf::Closure* done, + uint64_t startTimeUs) override; + + void OnApplyFromLog(int64_t index, uint64_t startTimeUs) override; + + uint64_t HashCode() const override; + + OperatorType GetOperatorType() const override; + + private: + void Redirect() override; + + void OnFailed(MetaStatusCode code) override; + + bool CanBypassPropose() const override; +}; + +class UpdateVolumeExtentOperator : public MetaOperator { + public: + using MetaOperator::MetaOperator; + + void OnApply(int64_t index, google::protobuf::Closure* done, + uint64_t startTimeUs) override; + + void OnApplyFromLog(int64_t index, uint64_t startTimeUs) override; + + uint64_t HashCode() const override; + + OperatorType GetOperatorType() const override; + + private: + void Redirect() override; + + void OnFailed(MetaStatusCode code) override; +}; + +class UpdateDeallocatableBlockGroupOperator : public MetaOperator { + public: + using MetaOperator::MetaOperator; + + void OnApply(int64_t index, google::protobuf::Closure* done, + uint64_t startTimeUs) override; + + void OnApplyFromLog(int64_t index, uint64_t startTimeUs) override; + + uint64_t HashCode() const override; + + OperatorType GetOperatorType() const override; + + private: + void Redirect() override; + + void OnFailed(MetaStatusCode code) override; +}; + class PrepareRenameTxOperator : public MetaOperator { public: using MetaOperator::MetaOperator; @@ -489,7 +548,26 @@ class PrepareRenameTxOperator : public MetaOperator { void OnFailed(MetaStatusCode code) override; }; -class GetVolumeExtentOperator : public MetaOperator { +class PrewriteRenameTxOperator : public MetaOperator { + public: + using MetaOperator::MetaOperator; + + void OnApply(int64_t index, google::protobuf::Closure* done, + uint64_t startTimeUs) override; + + void OnApplyFromLog(int64_t index, uint64_t startTimeUs) override; + + uint64_t HashCode() const override; + + OperatorType GetOperatorType() const override; + + private: + void Redirect() override; + + void OnFailed(MetaStatusCode code) override; +}; + +class CheckTxStatusOperator : public MetaOperator { public: using MetaOperator::MetaOperator; @@ -510,7 +588,7 @@ class GetVolumeExtentOperator : public MetaOperator { bool CanBypassPropose() const override; }; -class UpdateVolumeExtentOperator : public MetaOperator { +class ResolveTxLockOperator : public MetaOperator { public: using MetaOperator::MetaOperator; @@ -529,7 +607,7 @@ class UpdateVolumeExtentOperator : public MetaOperator { void OnFailed(MetaStatusCode code) override; }; -class UpdateDeallocatableBlockGroupOperator : public MetaOperator { +class CommitTxOperator : public MetaOperator { public: using MetaOperator::MetaOperator; diff --git a/curvefs/src/metaserver/copyset/operator_type.cpp b/curvefs/src/metaserver/copyset/operator_type.cpp index adcee67671..e7627f6712 100644 --- a/curvefs/src/metaserver/copyset/operator_type.cpp +++ b/curvefs/src/metaserver/copyset/operator_type.cpp @@ -68,6 +68,14 @@ const char* OperatorTypeName(OperatorType type) { return "UpdateVolumeExtent"; case OperatorType::UpdateDeallocatableBlockGroup: return "UpdateDeallocatableBlockGroup"; + case OperatorType::PrewriteRenameTx: + return "PrewriteRenameTx"; + case OperatorType::CheckTxStatus: + return "CheckTxStatus"; + case OperatorType::ResolveTxLock: + return "ResolveTxLock"; + case OperatorType::CommitTx: + return "CommitTx"; // Add new case before `OperatorType::OperatorTypeMax` case OperatorType::OperatorTypeMax: break; diff --git a/curvefs/src/metaserver/copyset/operator_type.h b/curvefs/src/metaserver/copyset/operator_type.h index c2e54c44bf..cd8a97d987 100644 --- a/curvefs/src/metaserver/copyset/operator_type.h +++ b/curvefs/src/metaserver/copyset/operator_type.h @@ -53,6 +53,10 @@ enum class OperatorType : uint32_t { UpdateVolumeExtent = 16, CreateManageInode = 17, UpdateDeallocatableBlockGroup = 18, + PrewriteRenameTx = 19, + CheckTxStatus = 20, + ResolveTxLock = 21, + CommitTx = 22, // NOTE: // Add new operator before `OperatorTypeMax` diff --git a/curvefs/src/metaserver/copyset/raft_log_codec.cpp b/curvefs/src/metaserver/copyset/raft_log_codec.cpp index d70799f25c..c1f36e5233 100644 --- a/curvefs/src/metaserver/copyset/raft_log_codec.cpp +++ b/curvefs/src/metaserver/copyset/raft_log_codec.cpp @@ -154,6 +154,18 @@ std::unique_ptr RaftLogCodec::Decode(CopysetNode* node, case OperatorType::PrepareRenameTx: return ParseFromRaftLog(node, type, meta); + case OperatorType::PrewriteRenameTx: + return ParseFromRaftLog(node, type, meta); + case OperatorType::CheckTxStatus: + return ParseFromRaftLog(node, type, meta); + case OperatorType::ResolveTxLock: + return ParseFromRaftLog(node, type, meta); + case OperatorType::CommitTx: + return ParseFromRaftLog( + node, type, meta); case OperatorType::GetOrModifyS3ChunkInfo: return ParseFromRaftLog( diff --git a/curvefs/src/metaserver/dentry_manager.cpp b/curvefs/src/metaserver/dentry_manager.cpp index 46b0102369..d1bb0a053c 100644 --- a/curvefs/src/metaserver/dentry_manager.cpp +++ b/curvefs/src/metaserver/dentry_manager.cpp @@ -76,10 +76,10 @@ void DentryManager::Log4Code(const std::string& request, MetaStatusCode rc) { } MetaStatusCode DentryManager::CreateDentry(const Dentry& dentry, - int64_t logIndex) { + int64_t logIndex, TxLock* txLock) { CHECK_APPLIED(); Log4Dentry("CreateDentry", dentry); - MetaStatusCode rc = dentryStorage_->Insert(dentry, logIndex); + MetaStatusCode rc = dentryStorage_->Insert(dentry, logIndex, txLock); Log4Code("CreateDentry", rc); return rc; } @@ -95,26 +95,28 @@ MetaStatusCode DentryManager::CreateDentry(const DentryVec& vec, bool merge, } MetaStatusCode DentryManager::DeleteDentry(const Dentry& dentry, - int64_t logIndex) { + int64_t logIndex, TxLock* txLock) { CHECK_APPLIED(); Log4Dentry("DeleteDentry", dentry); - MetaStatusCode rc = dentryStorage_->Delete(dentry, logIndex); + MetaStatusCode rc = dentryStorage_->Delete(dentry, logIndex, txLock); Log4Code("DeleteDentry", rc); return rc; } -MetaStatusCode DentryManager::GetDentry(Dentry* dentry) { +MetaStatusCode DentryManager::GetDentry(Dentry* dentry, TxLock* txLock) { Log4Dentry("GetDentry", *dentry); - MetaStatusCode rc = dentryStorage_->Get(dentry); + MetaStatusCode rc = dentryStorage_->Get(dentry, txLock); Log4Code("GetDentry", rc); return rc; } MetaStatusCode DentryManager::ListDentry(const Dentry& dentry, std::vector* dentrys, - uint32_t limit, bool onlyDir) { + uint32_t limit, bool onlyDir, + TxLock* txLock) { Log4Dentry("ListDentry", dentry); - MetaStatusCode rc = dentryStorage_->List(dentry, dentrys, limit, onlyDir); + MetaStatusCode rc = dentryStorage_->List( + dentry, dentrys, limit, onlyDir, txLock); Log4Code("ListDentry", rc); return rc; } @@ -134,5 +136,94 @@ MetaStatusCode DentryManager::HandleRenameTx(const std::vector& dentrys, return rc; } +MetaStatusCode DentryManager::PrewriteRenameTx( + const std::vector& dentrys, + const TxLock& txLock, int64_t logIndex, TxLock* out) { + std::stringstream ss; + for (const auto& dentry : dentrys) { + ss << dentry.ShortDebugString() << ", "; + } + VLOG(1) << "PrewriteRenameTx request, dentrys = (" << ss.str() + << "), txLock = ("<< txLock.ShortDebugString() << ")"; + auto rc = dentryStorage_->PrewriteTx(dentrys, txLock, logIndex, out); + if (rc != MetaStatusCode::OK && rc != MetaStatusCode::TX_WRITE_CONFLICT + && rc != MetaStatusCode::TX_KEY_LOCKED) { + LOG(ERROR) << "PrewriteRenameTx failed, dentrys = (" << ss.str() + << "), inLock = (" << txLock.ShortDebugString() + << "), retCode = " << MetaStatusCode_Name(rc); + } else { + VLOG(1) << "PrewriteRenameTx success, dentrys = (" << ss.str() + << "), inLock = ("<< txLock.ShortDebugString() << ")" + << ", outLock = (" << out->ShortDebugString() << ")" + << ", status = " << MetaStatusCode_Name(rc); + } + return rc; +} + +MetaStatusCode DentryManager::CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex) { + LOG(INFO) << "CheckTxStatus request, primaryKey = " << primaryKey + << ", startTs = " << startTs + << ", curTimestamp = " << curTimestamp; + auto rc = dentryStorage_->CheckTxStatus(primaryKey, startTs, curTimestamp, + logIndex); + if (rc != MetaStatusCode::TX_COMMITTED && + rc != MetaStatusCode::TX_ROLLBACKED && + rc != MetaStatusCode::TX_INPROGRESS) { + LOG(ERROR) << "CheckTxStatus failed, primaryKey = " << primaryKey + << ", startTs = " << startTs + << ", curTimestamp = " << curTimestamp + << ", retCode = " << MetaStatusCode_Name(rc); + } else { + LOG(INFO) << "CheckTxStatus success, primaryKey = " << primaryKey + << ", startTs = " << startTs + << ", curTimestamp = " << curTimestamp; + } + return rc; +} + +MetaStatusCode DentryManager::ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + LOG(INFO) << "ResolveTxLock request, dentry = (" + << dentry.ShortDebugString() << "), startTs = " << startTs + << ", commitTs = " << commitTs; + auto rc = dentryStorage_->ResolveTxLock( + dentry, startTs, commitTs, logIndex); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "ResolveTxLock failed, dentry = (" + << dentry.ShortDebugString() << "), startTs = " << startTs + << ", commitTs = " << commitTs + << ", retCode = " << MetaStatusCode_Name(rc); + } else { + LOG(INFO) << "ResolveTxLock success, dentry = (" + << dentry.ShortDebugString() << "), startTs = " << startTs + << ", commitTs = " << commitTs; + } + return rc; +} + +MetaStatusCode DentryManager::CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + std::stringstream ss; + for (const auto& dentry : dentrys) { + ss << dentry.ShortDebugString() << ", "; + } + VLOG(1) << "CommitTx request, dentrys = (" << ss.str() + << "), startTs = " << startTs + << ", commitTs = " << commitTs; + auto rc = dentryStorage_->CommitTx(dentrys, startTs, commitTs, logIndex); + if (rc != MetaStatusCode::OK) { + LOG(ERROR) << "CommitTx failed, dentrys = (" << ss.str() + << "), startTs = " << startTs + << ", commitTs = " << commitTs + << ", retCode = " << MetaStatusCode_Name(rc); + } else { + VLOG(1) << "CommitTx success, dentrys = (" << ss.str() + << "), startTs = " << startTs + << ", commitTs = " << commitTs; + } + return rc; +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/dentry_manager.h b/curvefs/src/metaserver/dentry_manager.h index ef5ca8305c..fbcf605473 100644 --- a/curvefs/src/metaserver/dentry_manager.h +++ b/curvefs/src/metaserver/dentry_manager.h @@ -42,25 +42,39 @@ class DentryManager { bool Init(); - MetaStatusCode CreateDentry(const Dentry& dentry, int64_t logIndex); + MetaStatusCode CreateDentry(const Dentry& dentry, int64_t logIndex, + TxLock* txLock = nullptr); // only invoked from snapshot loadding MetaStatusCode CreateDentry(const DentryVec& vec, bool merge, int64_t logIndex); - MetaStatusCode DeleteDentry(const Dentry& dentry, int64_t logIndex); + MetaStatusCode DeleteDentry(const Dentry& dentry, int64_t logIndex, + TxLock* txLock = nullptr); - MetaStatusCode GetDentry(Dentry* dentry); + MetaStatusCode GetDentry(Dentry* dentry, TxLock* txLock = nullptr); MetaStatusCode ListDentry(const Dentry& dentry, std::vector* dentrys, uint32_t limit, - bool onlyDir = false); + bool onlyDir = false, TxLock* txLock = nullptr); void ClearDentry(); MetaStatusCode HandleRenameTx(const std::vector& dentrys, int64_t logIndex); + MetaStatusCode PrewriteRenameTx(const std::vector& dentrys, + const TxLock& txLock, int64_t logIndex, TxLock* out); + + MetaStatusCode CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex); + + MetaStatusCode ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs, int64_t logIndex); + + MetaStatusCode CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex); + private: void Log4Dentry(const std::string& request, const Dentry& dentry); void Log4Code(const std::string& request, MetaStatusCode rc); diff --git a/curvefs/src/metaserver/dentry_storage.cpp b/curvefs/src/metaserver/dentry_storage.cpp index 57fc70c1cf..3023bbc9e9 100644 --- a/curvefs/src/metaserver/dentry_storage.cpp +++ b/curvefs/src/metaserver/dentry_storage.cpp @@ -38,17 +38,30 @@ namespace curvefs { namespace metaserver { +namespace storage { + DECLARE_int32(tx_lock_ttl_ms); +} + using ::curve::common::ReadLockGuard; using ::curve::common::StringStartWith; using ::curve::common::WriteLockGuard; using ::curvefs::metaserver::storage::Key4Dentry; using ::curvefs::metaserver::storage::Prefix4AllDentry; using ::curvefs::metaserver::storage::Prefix4SameParentDentry; +using ::curvefs::metaserver::storage::Prefix4TxWrite; +using ::curvefs::metaserver::storage::Key4TxWrite; using ::curvefs::metaserver::storage::Status; +using ::curvefs::metaserver::storage::FLAGS_tx_lock_ttl_ms; + +const char* DentryStorage::kDentryAppliedKey("dentry"); +const char* DentryStorage::kDentryCountKey("count"); +const char* DentryStorage::kHandleTxKey("handleTx"); +const char* DentryStorage::kPendingTxKey("pendingTx"); +const char* DentryStorage::kTxLatestCommit("latestCommit"); bool operator==(const Dentry& lhs, const Dentry& rhs) { return EQUAL(fsid) && EQUAL(parentinodeid) && EQUAL(name) && EQUAL(txid) && - EQUAL(inodeid) && EQUAL(flag); + EQUAL(inodeid); } bool operator<(const Dentry& lhs, const Dentry& rhs) { @@ -65,6 +78,9 @@ static bool HasDeleteMarkFlag(const Dentry& dentry) { return (dentry.flag() & DentryFlag::DELETE_MARK_FLAG) != 0; } +/* +* DentryVector is a wrapper of DentryVec +*/ DentryVector::DentryVector(DentryVec* vec) : vec_(vec), nPendingAdd_(0), nPendingDel_(0) {} @@ -114,6 +130,9 @@ void DentryVector::Confirm(uint64_t* count) { *count = *count + nPendingAdd_ - nPendingDel_; } +/* +* DentryList +*/ DentryList::DentryList(std::vector* list, uint32_t limit, const std::string& exclude, uint64_t maxTxId, bool onlyDir) @@ -124,7 +143,7 @@ DentryList::DentryList(std::vector* list, uint32_t limit, maxTxId_(maxTxId), onlyDir_(onlyDir) {} -void DentryList::PushBack(DentryVec* vec) { +void DentryList::PushBack(DentryVec* vec, bool* realEntry) { // NOTE: it's a cheap operation becacuse the size of // dentryVec must less than 2 BTree dentrys; @@ -148,28 +167,22 @@ void DentryList::PushBack(DentryVec* vec) { } return; } + *realEntry = true; list_->push_back(*last); VLOG(9) << "Push dentry, dentry = (" << last->ShortDebugString() << ")"; } -uint32_t DentryList::Size() { return size_; } - -bool DentryList::IsFull() { return limit_ != 0 && size_ >= limit_; } - -const char* DentryStorage::kDentryAppliedKey("dentry"); -const char* DentryStorage::kDentryCountKey("count"); -const char* DentryStorage::kHandleTxKey("handleTx"); -const char* DentryStorage::kPendingTxKey("pendingTx"); +uint32_t DentryList::Size() { + return size_; +} -bool DentryStorage::Init() { - auto s = GetDentryCount(&nDentry_); - if (s.ok() || s.IsNotFound()) { - s = GetHandleTxIndex(&handleTxIndex_); - return s.ok() || s.IsNotFound(); - } - return false; +bool DentryList::IsFull() { + return limit_ != 0 && size_ >= limit_; } +/* +* DentryStorage +*/ DentryStorage::DentryStorage(std::shared_ptr kvStorage, std::shared_ptr nameGenerator, uint64_t nDentry) @@ -178,135 +191,45 @@ DentryStorage::DentryStorage(std::shared_ptr kvStorage, table4AppliedIndex_(nameGenerator->GetAppliedIndexTableName()), table4Transaction_(nameGenerator->GetTransactionTableName()), table4DentryCount_(nameGenerator->GetDentryCountTableName()), + table4TxLock_(nameGenerator->GetTxLockTableName()), + table4TxWrite_(nameGenerator->GetTxWriteTableName()), handleTxIndex_(-1), nDentry_(nDentry), - conv_() { + conv_(), + latestCommit_(0) { // NOTE: for compatibility with older versions // we cannot ignore `nDentry` argument // try get dentry count for rocksdb // if we got it, replace old value } -std::string DentryStorage::DentryKey(const Dentry& dentry) { - Key4Dentry key(dentry.fsid(), dentry.parentinodeid(), dentry.name()); - return conv_.SerializeToString(key); -} - -bool DentryStorage::CompressDentry(storage::StorageTransaction* txn, - DentryVec* vec, BTree* dentrys, - uint64_t* outCount) { - DentryVector vector(vec); - std::vector deleted; - if (dentrys->size() == 2) { - deleted.push_back(*dentrys->begin()); +bool DentryStorage::Init() { + auto s = GetDentryCount(&nDentry_); + if (!s.ok() && !s.IsNotFound()) { + LOG(ERROR) << "Get dentry count failed, status = " << s.ToString(); + return false; } - if (HasDeleteMarkFlag(*dentrys->rbegin())) { - deleted.push_back(*dentrys->rbegin()); + s = GetHandleTxIndex(&handleTxIndex_); + if (!s.ok() && !s.IsNotFound()) { + LOG(ERROR) << "Get handle tx index failed, status = " << s.ToString(); + return false; } - for (const auto& dentry : deleted) { - vector.Delete(dentry); + s = GetLatestCommit(&latestCommit_); + if (!s.ok() && !s.IsNotFound()) { + LOG(ERROR) << "Get latest commit failed, status = " << s.ToString(); + return false; } - const char* step = "Compress dentry from transaction"; - Status s; - std::string skey = DentryKey(*dentrys->begin()); - do { - if (vec->dentrys_size() == 0) { // delete directly - s = txn->SDel(table4Dentry_, skey); - } else { - s = txn->SSet(table4Dentry_, skey, *vec); - } - if (!s.ok()) { - break; - } - uint64_t countCopy = *outCount; - vector.Confirm(&countCopy); - s = SetDentryCount(txn, countCopy); - if (!s.ok()) { - step = "Insert dentry count to transaction"; - break; - } - *outCount = countCopy; - return true; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - return false; + return true; } -// NOTE: Find() return the dentry which has the latest txid, -// and it will clean the old txid's dentry if you specify compress to true -MetaStatusCode DentryStorage::Find(const Dentry& in, Dentry* out, - DentryVec* vec) { - std::string skey = DentryKey(in); - Status s = kvStorage_->SGet(table4Dentry_, skey, vec); - if (s.IsNotFound()) { - return MetaStatusCode::NOT_FOUND; - } else if (!s.ok()) { - return MetaStatusCode::STORAGE_INTERNAL_ERROR; - } - - // status = OK - BTree dentrys; - DentryVector vector(vec); - vector.Filter(in.txid(), &dentrys); - size_t size = dentrys.size(); - if (size > 2) { - LOG(ERROR) << "There are more than 2 dentrys"; - return MetaStatusCode::NOT_FOUND; - } else if (size == 0) { - return MetaStatusCode::NOT_FOUND; - } - - // size == 1 || size == 2 - MetaStatusCode rc; - if (HasDeleteMarkFlag(*dentrys.rbegin())) { - rc = MetaStatusCode::NOT_FOUND; - } else { - rc = MetaStatusCode::OK; - *out = *dentrys.rbegin(); - } - return rc; +std::string DentryStorage::DentryKey(const Dentry& dentry) { + Key4Dentry key(dentry.fsid(), dentry.parentinodeid(), dentry.name()); + return conv_.SerializeToString(key); } -// NOTE: Find() return the dentry which has the latest txid, -// and it will clean the old txid's dentry if you specify compressOutCount to -// non-nullptr compressOutCount must point to a variable that value is equal -// with `nDentry_` -MetaStatusCode DentryStorage::Find(storage::StorageTransaction* txn, - const Dentry& in, Dentry* out, - DentryVec* vec, uint64_t* compressOutCount) { - std::string skey = DentryKey(in); - Status s = txn->SGet(table4Dentry_, skey, vec); - if (s.IsNotFound()) { - return MetaStatusCode::NOT_FOUND; - } else if (!s.ok()) { - return MetaStatusCode::STORAGE_INTERNAL_ERROR; - } - // status = OK - BTree dentrys; - DentryVector vector(vec); - vector.Filter(in.txid(), &dentrys); - size_t size = dentrys.size(); - if (size > 2) { - LOG(ERROR) << "There are more than 2 dentrys"; - return MetaStatusCode::NOT_FOUND; - } else if (size == 0) { - return MetaStatusCode::NOT_FOUND; - } - - // size == 1 || size == 2 - MetaStatusCode rc; - if (HasDeleteMarkFlag(*dentrys.rbegin())) { - rc = MetaStatusCode::NOT_FOUND; - } else { - rc = MetaStatusCode::OK; - *out = *dentrys.rbegin(); - } - - if (compressOutCount != nullptr && - !CompressDentry(txn, vec, &dentrys, compressOutCount)) { - rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; - } - return rc; +std::string DentryStorage::TxWriteKey(const Dentry& dentry, uint64_t ts) { + Key4TxWrite key(dentry.fsid(), dentry.parentinodeid(), dentry.name(), ts); + return conv_.SerializeToString(key); } MetaStatusCode DentryStorage::GetAppliedIndex(int64_t* index) { @@ -406,226 +329,147 @@ storage::Status DentryStorage::GetHandleTxIndex(int64_t* index) { return s; } -MetaStatusCode DentryStorage::Insert(const Dentry& dentry, int64_t logIndex) { - WriteLockGuard lg(rwLock_); - - Dentry out; - DentryVec vec; - std::shared_ptr txn; - storage::Status s; - const char* step = "Begin transaction"; +bool DentryStorage::CompressDentry(storage::StorageTransaction* txn, + DentryVec* vec, BTree* dentrys, + uint64_t* outCount) { + DentryVector vector(vec); + std::vector deleted; + if (dentrys->size() == 2) { + deleted.push_back(*dentrys->begin()); + } + if (HasDeleteMarkFlag(*dentrys->rbegin())) { + deleted.push_back(*dentrys->rbegin()); + } + for (const auto& dentry : deleted) { + vector.Delete(dentry); + } + const char* step = "Compress dentry from transaction"; + Status s; + std::string skey = DentryKey(*dentrys->begin()); do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - uint64_t count = nDentry_; - s = SetAppliedIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert applied index to transaction"; - break; - } - MetaStatusCode rc = Find(txn.get(), dentry, &out, &vec, &count); - if (rc == MetaStatusCode::OK) { - auto s = txn->Commit(); - if (!s.ok()) { - step = "Commit compress dentry transaction"; - break; - } - // if compress is success - // we use output dentry count to replace old one - nDentry_ = count; - if (BelongSomeOne(out, dentry)) { - return MetaStatusCode::IDEMPOTENCE_OK; - } - return MetaStatusCode::DENTRY_EXIST; - } else if (rc != MetaStatusCode::NOT_FOUND) { - step = "Find dentry failed"; - break; + if (vec->dentrys_size() == 0) { // delete directly + s = txn->SDel(table4Dentry_, skey); + } else { + s = txn->SSet(table4Dentry_, skey, *vec); } - // rc == MetaStatusCode::NOT_FOUND - - // NOTE: `count` maybe already written by `Find()` in here - // so we continue use `count` in follow operations - DentryVector vector(&vec); - vector.Insert(dentry); - std::string skey = DentryKey(dentry); - s = txn->SSet(table4Dentry_, skey, vec); if (!s.ok()) { - step = "Insert dentry to transaction"; break; } - vector.Confirm(&count); - s = SetDentryCount(txn.get(), count); + uint64_t countCopy = *outCount; + vector.Confirm(&countCopy); + s = SetDentryCount(txn, countCopy); if (!s.ok()) { step = "Insert dentry count to transaction"; break; } - s = txn->Commit(); - if (!s.ok()) { - step = "Insert dentry"; - break; - } - nDentry_ = count; - return MetaStatusCode::OK; + *outCount = countCopy; + return true; } while (false); LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback insert dentry transaction failed, status = " - << s.ToString(); - } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + return false; } -MetaStatusCode DentryStorage::Insert(const DentryVec& vec, bool merge, - int64_t logIndex) { - WriteLockGuard lg(rwLock_); - +// NOTE: Find() return the dentry which has the latest txid, +// and it will clean the old txid's dentry if you specify compressOutCount to +// non-nullptr compressOutCount must point to a variable that value is equal +// with `nDentry_` +MetaStatusCode DentryStorage::Find(storage::StorageTransaction* txn, + const Dentry& in, Dentry* out, + DentryVec* vec, uint64_t* compressOutCount, + TxLock* txLock) { + std::string skey = DentryKey(in); Status s; - DentryVec oldVec; - std::string skey = DentryKey(vec.dentrys(0)); - std::shared_ptr txn; - const char* step = "Begin transaction"; - do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - if (merge) { // for old version dumpfile (v1) - s = txn->SGet(table4Dentry_, skey, &oldVec); - if (s.IsNotFound()) { - // do nothing - } else if (!s.ok()) { - step = "Find old version from transaction"; - break; - } - } - DentryVector vector(&oldVec); - vector.Merge(vec); - s = txn->SSet(table4Dentry_, skey, oldVec); - if (!s.ok()) { - step = "Insert dentry vector to tranasction"; - break; - } - s = SetAppliedIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert applied index to tranasction"; - break; - } - uint64_t count = nDentry_; - vector.Confirm(&count); - s = SetDentryCount(txn.get(), count); - if (!s.ok()) { - step = "Insert dentry count to transaction"; - break; - } - s = txn->Commit(); - if (!s.ok()) { - step = "Insert dentry vector"; - break; + // check tx lock on dentry + if (txLock != nullptr) { + s = txn->SGet(table4TxLock_, skey, txLock); + if (s.ok()) { + return MetaStatusCode::TX_KEY_LOCKED; + } else if (!s.IsNotFound()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; } - nDentry_ = count; - return MetaStatusCode::OK; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback insert dentry transaction failed, status = " - << s.ToString(); } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; -} -MetaStatusCode DentryStorage::Delete(const Dentry& dentry, int64_t logIndex) { - WriteLockGuard lg(rwLock_); + s = txn->SGet(table4Dentry_, skey, vec); + if (s.IsNotFound()) { + return MetaStatusCode::NOT_FOUND; + } else if (!s.ok()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } - Dentry out; - DentryVec vec; - const char* step = "Begin transaction"; - std::shared_ptr txn; - storage::Status s; - do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - uint64_t count = nDentry_; - s = SetAppliedIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert applied index to transaction"; - break; - } - MetaStatusCode rc = Find(txn.get(), dentry, &out, &vec, &count); - if (rc == MetaStatusCode::NOT_FOUND) { - // NOTE: we should commit transaction - // even if rc is NOT_FOUND - // because Find() maybe write dentry count to rocksdb - s = txn->Commit(); - if (!s.ok()) { - step = "Commit transaction"; - break; - } - nDentry_ = count; - return MetaStatusCode::NOT_FOUND; - } else if (rc != MetaStatusCode::OK) { - step = "Find dentry"; - break; - } - DentryVector vector(&vec); - vector.Delete(out); - std::string skey = DentryKey(dentry); - if (vec.dentrys_size() == 0) { - s = txn->SDel(table4Dentry_, skey); - } else { - s = txn->SSet(table4Dentry_, skey, vec); - } - if (!s.ok()) { - step = "Delete dentry vector from transaction"; - break; - } - // NOTE: we should use count variable instead of nDentry_ - // (it means that we should not reset count to nDentry_) - // count is newest version of dentry count - vector.Confirm(&count); - s = SetDentryCount(txn.get(), count); - if (!s.ok()) { - step = "Insert applied index to transaction"; - break; - } - s = txn->Commit(); - if (!s.ok()) { - step = "Delete dentry vector"; - break; - } - nDentry_ = count; - return MetaStatusCode::OK; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback transaction failed"; + // status = OK + // txId here means latest dentry version + uint64_t txId = latestCommit_ > 0 ? latestCommit_ : in.txid(); + BTree dentrys; + DentryVector vector(vec); + vector.Filter(txId, &dentrys); + size_t size = dentrys.size(); + + if (size > 2) { + LOG(ERROR) << "There are more than 2 dentrys"; + return MetaStatusCode::NOT_FOUND; + } else if (size == 0) { + return MetaStatusCode::NOT_FOUND; } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + + // size == 1 || size == 2 + MetaStatusCode rc; + if (HasDeleteMarkFlag(*dentrys.rbegin())) { + rc = MetaStatusCode::NOT_FOUND; + } else { + rc = MetaStatusCode::OK; + *out = *dentrys.rbegin(); + } + + if (compressOutCount != nullptr && + !CompressDentry(txn, vec, &dentrys, compressOutCount)) { + rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + return rc; } -MetaStatusCode DentryStorage::Get(Dentry* dentry) { +#define ON_ERROR(msg) \ + do \ + { \ + LOG(ERROR) << msg; \ + if (txn != nullptr && !txn->Rollback().ok()) { \ + LOG(ERROR) << "Rollback transaction fail"; \ + } \ + return rc; \ + } while (false) + +#define ON_COMMIT() \ + do \ + { \ + s = txn->Commit(); \ + if (!s.ok()) { \ + rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; \ + ON_ERROR("Commit transaction failed, " + s.ToString()); \ + } \ + nDentry_ = count; \ + return rc; \ + } while (false) + + +MetaStatusCode DentryStorage::Get(Dentry* dentry, TxLock* txLock) { ReadLockGuard lg(rwLock_); - - Dentry out; - DentryVec vec; - MetaStatusCode rc = Find(*dentry, &out, &vec); - if (rc == MetaStatusCode::NOT_FOUND) { - return MetaStatusCode::NOT_FOUND; - } else if (rc != MetaStatusCode::OK) { - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); } - // MetaStatusCode::OK - *dentry = out; - return MetaStatusCode::OK; + DentryVec vec; + rc = Find(txn.get(), *dentry, dentry, &vec, nullptr, txLock); + ON_COMMIT(); } MetaStatusCode DentryStorage::List(const Dentry& dentry, std::vector* dentrys, uint32_t limit, - bool onlyDir) { + bool onlyDir, + TxLock* txLock) { // TODO(all): consider store dir dentry and file dentry separately ReadLockGuard lg(rwLock_); @@ -643,7 +487,7 @@ MetaStatusCode DentryStorage::List(const Dentry& dentry, Prefix4SameParentDentry prefix(fsId, parentInodeId); std::string sprefix = conv_.SerializeToString(prefix); // "1:1:" Key4Dentry key(fsId, parentInodeId, name); - std::string lower = conv_.SerializeToString(key); // "1:1:", "1:1:/a/b/c" + std::string lower = conv_.SerializeToString(key); // "1:1:", "1:1:dir" // 3. iterator key/value pair one by one auto iterator = kvStorage_->SSeek(table4Dentry_, lower); @@ -652,8 +496,10 @@ MetaStatusCode DentryStorage::List(const Dentry& dentry, return MetaStatusCode::STORAGE_INTERNAL_ERROR; } + // get newest dentry version + uint64_t txId = latestCommit_ > 0 ? latestCommit_ : dentry.txid(); DentryVec current; - DentryList list(dentrys, limit, name, dentry.txid(), onlyDir); + DentryList list(dentrys, limit, name, txId, onlyDir); butil::Timer time; uint32_t seekTimes = 0; time.start(); @@ -667,81 +513,209 @@ MetaStatusCode DentryStorage::List(const Dentry& dentry, return MetaStatusCode::PARSE_FROM_STRING_FAILED; } - list.PushBack(¤t); + bool realEntry = false; + list.PushBack(¤t, &realEntry); + // check dentry tx lock + if (txLock != nullptr && realEntry) { + Status s = kvStorage_->SGet(table4TxLock_, skey, txLock); + if (s.ok()) { + return MetaStatusCode::TX_KEY_LOCKED; + } else if (!s.IsNotFound()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + } + if (list.IsFull()) { break; } } time.stop(); VLOG(1) << "ListDentry request: dentry = (" << dentry.ShortDebugString() - << ")" - << ", onlyDir = " << onlyDir << ", limit = " << limit + << "), onlyDir = " << onlyDir << ", limit = " << limit << ", lower key = " << lower << ", seekTimes = " << seekTimes << ", dentrySize = " << dentrys->size() << ", costUs = " << time.u_elapsed(); return MetaStatusCode::OK; } +MetaStatusCode DentryStorage::Insert( + const Dentry& dentry, int64_t logIndex, TxLock* txLock) { + WriteLockGuard lg(rwLock_); + storage::Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + // 1. set applied index + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction"); + } + // find dentry + Dentry out; + DentryVec vec; + rc = Find(txn.get(), dentry, &out, &vec, &count, txLock); + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + ON_COMMIT(); + } + if (rc == MetaStatusCode::OK) { + if (BelongSomeOne(out, dentry)) { + rc = MetaStatusCode::IDEMPOTENCE_OK; + } else { + rc = MetaStatusCode::DENTRY_EXIST; + } + ON_COMMIT(); + } else if (rc != MetaStatusCode::NOT_FOUND) { + ON_ERROR("Find dentry failed"); + } + // rc == MetaStatusCode::NOT_FOUND + DentryVector vector(&vec); + vector.Insert(dentry); + s = txn->SSet(table4Dentry_, DentryKey(dentry), vec); + if (!s.ok()) { + ON_ERROR("Insert dentry to transaction"); + } + vector.Confirm(&count); + s = SetDentryCount(txn.get(), count); + if (!s.ok()) { + ON_ERROR("Insert dentry count to transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + +MetaStatusCode DentryStorage::Insert(const DentryVec& vec, bool merge, + int64_t logIndex) { + WriteLockGuard lg(rwLock_); + storage::Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + + DentryVec oldVec; + std::string skey = DentryKey(vec.dentrys(0)); + if (merge) { // for old version dumpfile (v1) + s = txn->SGet(table4Dentry_, skey, &oldVec); + if (s.IsNotFound()) { + // do nothing + } else if (!s.ok()) { + ON_ERROR("Find old version from transaction"); + } + } + DentryVector vector(&oldVec); + vector.Merge(vec); + s = txn->SSet(table4Dentry_, skey, oldVec); + if (!s.ok()) { + ON_ERROR("Insert dentry vector to tranasction"); + } + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to tranasction"); + } + vector.Confirm(&count); + s = SetDentryCount(txn.get(), count); + if (!s.ok()) { + ON_ERROR("Insert dentry count to transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + +MetaStatusCode DentryStorage::Delete( + const Dentry& dentry, int64_t logIndex, TxLock* txLock) { + WriteLockGuard lg(rwLock_); + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction"); + } + Dentry out; + DentryVec vec; + rc = Find(txn.get(), dentry, &out, &vec, &count, txLock); + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + ON_COMMIT(); + } + if (rc == MetaStatusCode::NOT_FOUND) { + ON_COMMIT(); + } else if (rc != MetaStatusCode::OK) { + ON_ERROR("Find dentry failed"); + } + // OK + DentryVector vector(&vec); + vector.Delete(out); + std::string skey = DentryKey(dentry); + if (vec.dentrys_size() == 0) { + s = txn->SDel(table4Dentry_, skey); + } else { + s = txn->SSet(table4Dentry_, skey, vec); + } + if (!s.ok()) { + ON_ERROR("Delete dentry vector from transaction"); + } + // NOTE: we should use count variable instead of nDentry_ + // (it means that we should not reset count to nDentry_) + // count is newest version of dentry count + vector.Confirm(&count); + s = SetDentryCount(txn.get(), count); + if (!s.ok()) { + ON_ERROR("Insert dentry count to transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + MetaStatusCode DentryStorage::PrepareTx( const std::vector& dentrys, const metaserver::TransactionRequest& txRequest, int64_t logIndex) { WriteLockGuard lg(rwLock_); - uint64_t count = nDentry_; Status s; - const char* step = "Begin transaction"; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; std::shared_ptr txn; - do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - bool quit = false; - for (const auto& dentry : dentrys) { - DentryVec vec; - DentryVector vector(&vec); - std::string skey = DentryKey(dentry); - s = txn->SGet(table4Dentry_, skey, &vec); - if (!s.ok() && !s.IsNotFound()) { - step = "Get dentry from transaction"; - quit = true; - break; - } - // OK || NOT_FOUND - vector.Insert(dentry); - s = txn->SSet(table4Dentry_, skey, vec); - if (!s.ok()) { - step = "Insert dentry to transaction"; - quit = true; - break; - } - vector.Confirm(&count); - } - if (quit) { - break; - } - s = SetAppliedIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert applied index to transaction"; - break; - } - s = SetPendingTx(txn.get(), txRequest); - if (!s.ok()) { - step = "Insert tx request to transaction"; - break; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + for (const auto& dentry : dentrys) { + DentryVec vec; + DentryVector vector(&vec); + std::string skey = DentryKey(dentry); + s = txn->SGet(table4Dentry_, skey, &vec); + if (!s.ok() && !s.IsNotFound()) { + ON_ERROR("Get dentry from transaction"); } - s = txn->Commit(); + // OK || NOT_FOUND + vector.Insert(dentry); + s = txn->SSet(table4Dentry_, skey, vec); if (!s.ok()) { - step = "Commit transaction"; - break; + ON_ERROR("Insert dentry to transaction"); } - nDentry_ = count; - return MetaStatusCode::OK; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback transaction fail"; + vector.Confirm(&count); } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction"); + } + s = SetPendingTx(txn.get(), txRequest); + if (!s.ok()) { + ON_ERROR("Insert tx request to transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); } MetaStatusCode DentryStorage::CommitTx(const std::vector& dentrys, @@ -760,53 +734,31 @@ MetaStatusCode DentryStorage::CommitTx(const std::vector& dentrys, } WriteLockGuard lg(rwLock_); Status s; - const char* step = "Begin transaction"; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; std::shared_ptr txn; - do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - uint64_t count = nDentry_; - bool quit = false; - for (const auto& dentry : dentrys) { - Dentry out; - DentryVec vec; - std::string skey = DentryKey(dentry); - MetaStatusCode rc = MetaStatusCode::OK; - rc = Find(txn.get(), dentry, &out, &vec, &count); - if (rc != MetaStatusCode::OK && rc != MetaStatusCode::NOT_FOUND) { - step = "Find dentry from transaction"; - quit = true; - break; - } - } - if (quit) { - break; - } - s = SetHandleTxIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert handle tx index to transaction"; - break; - } - s = ClearPendingTx(txn.get()); - if (!s.ok()) { - step = "Delete pending tx from transaction"; - break; - } - s = txn->Commit(); - if (!s.ok()) { - step = "Commit transaction"; - break; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + for (const auto& dentry : dentrys) { + Dentry out; + DentryVec vec; + rc = Find(txn.get(), dentry, &out, &vec, &count, nullptr); + if (rc != MetaStatusCode::OK && rc != MetaStatusCode::NOT_FOUND) { + ON_ERROR("Find dentry from transaction"); } - nDentry_ = count; - return MetaStatusCode::OK; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback transaction failed"; } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + s = SetHandleTxIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert handle tx index to transaction"); + } + s = ClearPendingTx(txn.get()); + if (!s.ok()) { + ON_ERROR("Delete pending tx from transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); } MetaStatusCode DentryStorage::RollbackTx(const std::vector& dentrys, @@ -825,70 +777,47 @@ MetaStatusCode DentryStorage::RollbackTx(const std::vector& dentrys, } WriteLockGuard lg(rwLock_); Status s; - const char* step = "Begin transaction"; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; std::shared_ptr txn; - do { - txn = kvStorage_->BeginTransaction(); - if (txn == nullptr) { - break; - } - uint64_t count = nDentry_; - bool quit = false; - for (const auto& dentry : dentrys) { - DentryVec vec; - DentryVector vector(&vec); - std::string skey = DentryKey(dentry); - s = txn->SGet(table4Dentry_, skey, &vec); - if (!s.ok() && !s.IsNotFound()) { - step = "Find dentry"; - quit = true; - break; - } - // OK || NOT_FOUND - vector.Delete(dentry); - if (vec.dentrys_size() == 0) { // delete directly - s = txn->SDel(table4Dentry_, skey); - } else { - s = txn->SSet(table4Dentry_, skey, vec); - } - if (!s.ok()) { - step = "Delete dentry from transaction"; - quit = true; - break; - } - vector.Confirm(&count); - } - if (quit) { - break; - } - s = SetDentryCount(txn.get(), count); - if (!s.ok()) { - step = "Insert dentry count to transaction"; - break; - } - s = SetHandleTxIndex(txn.get(), logIndex); - if (!s.ok()) { - step = "Insert handle tx index to transaction"; - break; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + for (const auto& dentry : dentrys) { + DentryVec vec; + DentryVector vector(&vec); + std::string skey = DentryKey(dentry); + s = txn->SGet(table4Dentry_, skey, &vec); + if (!s.ok() && !s.IsNotFound()) { + ON_ERROR("Get dentry from transaction"); } - s = ClearPendingTx(txn.get()); - if (!s.ok()) { - step = "Delete pending tx from transaction"; - break; + // OK || NOT_FOUND + vector.Delete(dentry); + if (vec.dentrys_size() == 0) { // delete directly + s = txn->SDel(table4Dentry_, skey); + } else { + s = txn->SSet(table4Dentry_, skey, vec); } - s = txn->Commit(); if (!s.ok()) { - step = "Commit transaction"; - break; + ON_ERROR("Delete dentry from transaction"); } - nDentry_ = count; - return MetaStatusCode::OK; - } while (false); - LOG(ERROR) << step << " failed, status = " << s.ToString(); - if (txn != nullptr && !txn->Rollback().ok()) { - LOG(ERROR) << "Rollback transaction failed"; + vector.Confirm(&count); } - return MetaStatusCode::STORAGE_INTERNAL_ERROR; + s = SetDentryCount(txn.get(), count); + if (!s.ok()) { + ON_ERROR("Insert dentry count to transaction"); + } + s = SetHandleTxIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert handle tx index to transaction"); + } + s = ClearPendingTx(txn.get()); + if (!s.ok()) { + ON_ERROR("Delete pending tx from transaction"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); } std::shared_ptr DentryStorage::GetAll() { @@ -932,6 +861,16 @@ MetaStatusCode DentryStorage::Clear() { LOG(ERROR) << "Clear dentry table failed, status = " << s.ToString(); return MetaStatusCode::STORAGE_INTERNAL_ERROR; } + s = kvStorage_->SClear(table4TxWrite_); + if (!s.ok()) { + LOG(ERROR) << "Clear tx write table failed, status = " << s.ToString(); + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + s = kvStorage_->SClear(table4TxLock_); + if (!s.ok()) { + LOG(ERROR) << "Clear tx lock table failed, status = " << s.ToString(); + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } std::shared_ptr txn; const char* step = "Begin transaction"; do { @@ -974,5 +913,393 @@ MetaStatusCode DentryStorage::Clear() { return MetaStatusCode::STORAGE_INTERNAL_ERROR; } +MetaStatusCode DentryStorage::GetLastTxWriteTs(storage::StorageTransaction* txn, + const Dentry& dentry, uint64_t* commitTs) { + // 1. prepare seek lower key + Prefix4TxWrite prefix; + prefix.fsId = dentry.fsid(); + prefix.parentInodeId = dentry.parentinodeid(); + prefix.name = dentry.name(); + std::string sprefix = conv_.SerializeToString(prefix); // "1:1:name/" + + // 2. iterator key/value pair one by one + auto iterator = txn->SSeek(table4TxWrite_, sprefix); + if (iterator->Status() < 0) { + LOG(ERROR) << "failed to get iterator for prefix" << sprefix; + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + + std::string lastWriteKey; + std::vector toDelete; + butil::Timer time; + uint32_t seekTimes = 0; + uint32_t compressCount = 0; + time.start(); + for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { + seekTimes++; + lastWriteKey = iterator->Key(); + TxWrite value; + if (!iterator->ParseFromValue(&value)) { + LOG(ERROR) << "parse value failed, key = " << lastWriteKey; + return MetaStatusCode::PARSE_FROM_STRING_FAILED; + } + if (value.kind() == TxWriteKind::Rollback) { + continue; + } + toDelete.push_back(lastWriteKey); + } + time.stop(); + + if (seekTimes == 0) { + *commitTs = 0; + return MetaStatusCode::OK; + } + Key4TxWrite key; + if (!conv_.ParseFromString(lastWriteKey, &key)) { + LOG(ERROR) << "parse key failed, key = " << lastWriteKey; + return MetaStatusCode::PARSE_FROM_STRING_FAILED; + } + *commitTs = key.ts; + compressCount = toDelete.size() == 0 ? 0 : toDelete.size() - 1; + for (int i = 0; i < compressCount; i++) { + auto s = txn->SDel(table4TxWrite_, toDelete[i]); + if (!s.ok()) { + LOG(ERROR) << "delete tx write failed, key = " << toDelete[i]; + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + } + VLOG(1) << "GetLastTxWriteTs request: dentry = (" + << dentry.ShortDebugString() << ")" + << ", lower key = " << sprefix << ", seekTimes = " << seekTimes + << ", costUs = " << time.u_elapsed() + << ", compressCount = " << compressCount; + return MetaStatusCode::OK; +} + +storage::Status DentryStorage::GetLatestCommit(uint64_t* statTs) { + TS commitTs; + Status s = kvStorage_->SGet(table4TxWrite_, kTxLatestCommit, &commitTs); + if (s.ok()) { + *statTs = commitTs.ts(); + } + return s; +} + +storage::Status DentryStorage::SetLatestCommit( + storage::StorageTransaction* txn, uint64_t ts) { + if (latestCommit_ >= ts) { + return Status::OK(); + } + TS commitTs; + commitTs.set_ts(ts); + Status s = txn->SSet(table4TxWrite_, kTxLatestCommit, commitTs); + if (s.ok()) { + latestCommit_ = ts; + } + return s; +} + +// based on tx lock not exist +MetaStatusCode DentryStorage::CheckTxStatus(storage::StorageTransaction* txn, + const std::string& primaryKey, uint64_t ts) { + Key4Dentry key; + if (!key.ParseFromString(primaryKey)) { + return MetaStatusCode::PARSE_FROM_STRING_FAILED; + } + Key4TxWrite wkey(key.fsId, key.parentInodeId, key.name, ts); + std::string skey = wkey.SerializeToString(); + TxWrite txWrite; + Status s = txn->SGet(table4TxWrite_, skey, &txWrite); + if (s.IsNotFound()) { + return MetaStatusCode::TX_COMMITTED; + } else if (!s.ok()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + + // status = OK + if (txWrite.startts() == ts && txWrite.kind() == TxWriteKind::Rollback) { + return MetaStatusCode::TX_ROLLBACKED; + } + return MetaStatusCode::TX_COMMITTED; +} + +storage::Status DentryStorage::SetTxWrite(storage::StorageTransaction* txn, + const std::string& key, const TxWrite& txWrite) { + return txn->SSet(table4TxWrite_, key, txWrite); +} + +storage::Status DentryStorage::GetTxLock( + storage::StorageTransaction* txn, const std::string& key, TxLock* out) { + return txn->SGet(table4TxLock_, key, out); +} + +storage::Status DentryStorage::SetTxLock(storage::StorageTransaction* txn, + const std::string& key, const TxLock& txLock) { + return txn->SSet(table4TxLock_, key, txLock); +} + +storage::Status DentryStorage::DelTxLock( + storage::StorageTransaction* txn, const std::string& key) { + return txn->SDel(table4TxLock_, key); +} + +MetaStatusCode DentryStorage::WriteTx(storage::StorageTransaction* txn, + const Dentry& dentry, TxLock txLock, uint64_t* count) { + // 1. set tx lock + txLock.set_ttl(FLAGS_tx_lock_ttl_ms); + Status s = SetTxLock(txn, DentryKey(dentry), txLock); + if (!s.ok()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + // 2. set dentry data and compress old version data + Dentry out; + DentryVec vec; + auto rc = Find(txn, dentry, &out, &vec, count, nullptr); + if (rc != MetaStatusCode::OK && rc != MetaStatusCode::NOT_FOUND) { + return rc; + } + DentryVector vector(&vec); + VLOG(3) << "WriteTx before insert = " << vec.DebugString(); + vector.Insert(dentry); + VLOG(3) << "WriteTx after insert = " << vec.DebugString(); + s = txn->SSet(table4Dentry_, DentryKey(dentry), vec); + if (!s.ok()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + vector.Confirm(count); + s = SetDentryCount(txn, *count); + if (!s.ok()) { + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + return MetaStatusCode::OK; +} + +MetaStatusCode DentryStorage::PrewriteTx(const std::vector& dentrys, + TxLock txLock, int64_t logIndex, TxLock* out) { + WriteLockGuard lg(rwLock_); + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + // 1. set applied index + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction failed"); + } + for (int i = 0; i < dentrys.size(); i++) { + // 2. check write confict + uint64_t commitTs = 0; + if (MetaStatusCode::OK != + GetLastTxWriteTs(txn.get(), dentrys[i], &commitTs)) { + ON_ERROR("Get last tx write ts failed"); + } + if (commitTs >= txLock.startts()) { + rc = MetaStatusCode::TX_WRITE_CONFLICT; + ON_ERROR("Tx write conflict"); + } + // 3. check tx lock + s = GetTxLock(txn.get(), DentryKey(dentrys[i]), out); + if (s.ok()) { + if (out->startts() == txLock.startts()) { + continue; + } + out->set_index(i); + rc = MetaStatusCode::TX_KEY_LOCKED; + ON_COMMIT(); + } else if (!s.IsNotFound()) { + ON_ERROR("Get tx lock failed"); + } + // 4. write tx + if (WriteTx(txn.get(), dentrys[i], txLock, &count) + != MetaStatusCode::OK) { + ON_ERROR("Write tx failed"); + } + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + +MetaStatusCode DentryStorage::CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex) { + WriteLockGuard lg(rwLock_); + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + // 1. set applied index + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction failed"); + } + // 2. check tx lock + TxLock txLock; + s = GetTxLock(txn.get(), primaryKey, &txLock); + if (s.ok()) { + // inprogress or timeout + if (curTimestamp > txLock.timestamp() + txLock.ttl()) { + rc = MetaStatusCode::TX_TIMEOUT; + ON_COMMIT(); + } else { + rc = MetaStatusCode::TX_INPROGRESS; + ON_COMMIT(); + } + } else if (s.IsNotFound()) { + // committed or rollbacked + rc = CheckTxStatus(txn.get(), primaryKey, startTs); + ON_COMMIT(); + } else { + ON_ERROR("Get tx lock failed"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + +MetaStatusCode DentryStorage::ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + WriteLockGuard lg(rwLock_); + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + // 1. set applied index + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction failed"); + } + TxLock outLock; + s = GetTxLock(txn.get(), DentryKey(dentry), &outLock); + if (s.IsNotFound()) { + rc = MetaStatusCode::OK; + ON_COMMIT(); + } else if (!s.ok()) { + ON_ERROR("Get tx lock failed"); + } + if (outLock.startts() != startTs) { + rc = MetaStatusCode::TX_MISMATCH; + ON_ERROR("tx lock mismatch"); + } + // roll forward + if (commitTs > 0) { + if (!DelTxLock(txn.get(), DentryKey(dentry)).ok()) { + ON_ERROR("Delete tx lock failed"); + } + TxWrite txWrite; + txWrite.set_startts(startTs); + txWrite.set_kind(TxWriteKind::Commit); + if (!SetTxWrite(txn.get(), + TxWriteKey(dentry, commitTs), txWrite).ok()) { + ON_ERROR("Set tx write failed"); + } + // update latest commit + if (!SetLatestCommit(txn.get(), commitTs).ok()) { + ON_ERROR("update latest commit failed"); + } + } else { + // 1. delete tx lock + if (!DelTxLock(txn.get(), DentryKey(dentry)).ok()) { + ON_ERROR("Delete tx lock failed"); + } + // 2. delete tx data with startTs + DentryVec vec; + DentryVector vector(&vec); + std::string skey = DentryKey(dentry); + s = txn->SGet(table4Dentry_, skey, &vec); + if (!s.ok() && !s.IsNotFound()) { + ON_ERROR("Get dentry from transaction failed"); + } + // OK || NOT_FOUND + Dentry preDentry(dentry); + preDentry.set_txid(startTs); + vector.Delete(preDentry); + if (vec.dentrys_size() == 0) { // delete directly + s = txn->SDel(table4Dentry_, skey); + } else { + s = txn->SSet(table4Dentry_, skey, vec); + } + if (!s.ok()) { + ON_ERROR("Delete dentry from transaction failed"); + } + vector.Confirm(&count); + // 3. set tx write + TxWrite txWrite; + txWrite.set_startts(startTs); + txWrite.set_kind(TxWriteKind::Rollback); + if (!SetTxWrite( + txn.get(), TxWriteKey(dentry, startTs), txWrite).ok()) { + ON_ERROR("Set tx write failed"); + } + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + +MetaStatusCode DentryStorage::CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + WriteLockGuard lg(rwLock_); + Status s; + uint64_t count = nDentry_; + MetaStatusCode rc = MetaStatusCode::STORAGE_INTERNAL_ERROR; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + ON_ERROR("Begin transaction failed"); + } + // 1. set applied index + s = SetAppliedIndex(txn.get(), logIndex); + if (!s.ok()) { + ON_ERROR("Insert applied index to transaction failed"); + } + for (const auto& dentry : dentrys) { + // check tx lock + TxLock txLock; + s = GetTxLock(txn.get(), DentryKey(dentry), &txLock); + if (s.IsNotFound()) { + // commited or rollbacked + rc = CheckTxStatus(txn.get(), DentryKey(dentry), startTs); + if (rc == MetaStatusCode::TX_COMMITTED) { + continue; + } else { + ON_ERROR("tx have been rollbacked when commit"); + } + } else if (!s.ok()) { + ON_ERROR("Get tx lock failed"); + } + if (txLock.startts() != startTs) { + rc = MetaStatusCode::TX_MISMATCH; + ON_ERROR("tx lock mismatch"); + } + // set tx write + TxWrite txWrite; + txWrite.set_startts(startTs); + txWrite.set_kind(TxWriteKind::Commit); + if (!SetTxWrite( + txn.get(), TxWriteKey(dentry, commitTs), txWrite).ok()) { + ON_ERROR("Set tx write failed"); + } + // delete tx lock + if (!DelTxLock(txn.get(), DentryKey(dentry)).ok()) { + ON_ERROR("Delete tx lock failed"); + } + } + // update latest commit + if (!SetLatestCommit(txn.get(), startTs).ok()) { + ON_ERROR("update latest commit failed"); + } + rc = MetaStatusCode::OK; + ON_COMMIT(); +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/dentry_storage.h b/curvefs/src/metaserver/dentry_storage.h index 102df6737a..c2bc141ba3 100644 --- a/curvefs/src/metaserver/dentry_storage.h +++ b/curvefs/src/metaserver/dentry_storage.h @@ -81,7 +81,7 @@ class DentryList { DentryList(std::vector* list, uint32_t limit, const std::string& exclude, uint64_t maxTxId, bool onlyDir); - void PushBack(DentryVec* vec); + void PushBack(DentryVec* vec, bool* realEntry); uint32_t Size(); @@ -104,17 +104,19 @@ class DentryStorage { bool Init(); - MetaStatusCode Insert(const Dentry& dentry, int64_t logIndex); + MetaStatusCode Get(Dentry* dentry, TxLock* txLock = nullptr); - // only for loadding from snapshot - MetaStatusCode Insert(const DentryVec& vec, bool merge, int64_t logIndex); + MetaStatusCode List(const Dentry& dentry, std::vector* dentrys, + uint32_t limit, bool onlyDir = false, TxLock* txLock = nullptr); - MetaStatusCode Delete(const Dentry& dentry, int64_t logIndex); + MetaStatusCode Insert(const Dentry& dentry, int64_t logIndex, + TxLock* txLock = nullptr); - MetaStatusCode Get(Dentry* dentry); + // only for loadding from snapshot + MetaStatusCode Insert(const DentryVec& vec, bool merge, int64_t logIndex); - MetaStatusCode List(const Dentry& dentry, std::vector* dentrys, - uint32_t limit, bool onlyDir = false); + MetaStatusCode Delete(const Dentry& dentry, int64_t logIndex, + TxLock* txLock = nullptr); MetaStatusCode PrepareTx(const std::vector& dentrys, const metaserver::TransactionRequest& txRequest, @@ -138,17 +140,22 @@ class DentryStorage { MetaStatusCode GetAppliedIndex(int64_t* index); - private: - std::string DentryKey(const Dentry& entry); + MetaStatusCode PrewriteTx(const std::vector& dentrys, + TxLock txLock, int64_t logIndex, TxLock* out); - bool CompressDentry(storage::StorageTransaction* txn, DentryVec* vec, - BTree* dentrys, uint64_t* outCount); + MetaStatusCode CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex); - MetaStatusCode Find(const Dentry& in, Dentry* out, DentryVec* vec); + MetaStatusCode ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs, int64_t logIndex); - MetaStatusCode Find(storage::StorageTransaction* txn, const Dentry& in, - Dentry* out, DentryVec* vec, - uint64_t* compressOutCount); + MetaStatusCode CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex); + + private: + std::string DentryKey(const Dentry& daemon); + + std::string TxWriteKey(const Dentry& dentry, uint64_t ts); storage::Status SetAppliedIndex(storage::StorageTransaction* transaction, int64_t index); @@ -176,21 +183,60 @@ class DentryStorage { storage::Status GetHandleTxIndex(int64_t* count); + bool CompressDentry(storage::StorageTransaction* txn, DentryVec* vec, + BTree* dentrys, uint64_t* outCount); + + MetaStatusCode Find(storage::StorageTransaction* txn, const Dentry& in, + Dentry* out, DentryVec* vec, + uint64_t* compressOutCount, + TxLock* txLock); + + MetaStatusCode GetLastTxWriteTs(storage::StorageTransaction* transaction, + const Dentry& dentry, uint64_t* commitTs); + + storage::Status GetLatestCommit(uint64_t* statTs); + + storage::Status SetLatestCommit(storage::StorageTransaction* transaction, + uint64_t ts); + + MetaStatusCode CheckTxStatus(storage::StorageTransaction* transaction, + const std::string& primaryKey, uint64_t ts); + + storage::Status SetTxWrite(storage::StorageTransaction* transaction, + const std::string& key, const TxWrite& txWrite); + + storage::Status GetTxLock(storage::StorageTransaction* transaction, + const std::string& key, TxLock* out); + + storage::Status SetTxLock(storage::StorageTransaction* transaction, + const std::string& key, const TxLock& txLock); + + storage::Status DelTxLock(storage::StorageTransaction* transaction, + const std::string& key); + + MetaStatusCode WriteTx(storage::StorageTransaction* transaction, + const Dentry& dentry, TxLock txLock, uint64_t* count); + private: RWLock rwLock_; std::shared_ptr kvStorage_; std::string table4Dentry_; std::string table4AppliedIndex_; std::string table4Transaction_; + // record dentry total count std::string table4DentryCount_; + std::string table4TxLock_; + std::string table4TxWrite_; int64_t handleTxIndex_; uint64_t nDentry_; Converter conv_; + uint64_t latestCommit_; static const char* kDentryCountKey; static const char* kDentryAppliedKey; static const char* kHandleTxKey; static const char* kPendingTxKey; + static const char* kTxLatestCommit; }; } // namespace metaserver diff --git a/curvefs/src/metaserver/inode_manager.cpp b/curvefs/src/metaserver/inode_manager.cpp index 91fb83a784..f459b7f77d 100644 --- a/curvefs/src/metaserver/inode_manager.cpp +++ b/curvefs/src/metaserver/inode_manager.cpp @@ -283,16 +283,17 @@ MetaStatusCode InodeManager::DeleteInode(uint32_t fsId, uint64_t inodeId, VLOG(6) << "DeleteInode, fsId = " << fsId << ", inodeId = " << inodeId; NameLockGuard lg(inodeLock_, GetInodeLockName(fsId, inodeId)); InodeAttr attr; - MetaStatusCode retGetAttr = - inodeStorage_->GetAttr(Key4Inode(fsId, inodeId), &attr); - if (retGetAttr != MetaStatusCode::OK) { - VLOG(9) << "GetInodeAttr fail, fsId = " << fsId - << ", inodeId = " << inodeId - << ", ret = " << MetaStatusCode_Name(retGetAttr); + auto ret = inodeStorage_->GetAttr(Key4Inode(fsId, inodeId), &attr); + if (ret == MetaStatusCode::NOT_FOUND) { + return MetaStatusCode::OK; + } else if (ret != MetaStatusCode::OK) { + LOG(ERROR) << "GetInodeAttr fail, fsId = " << fsId + << ", inodeId = " << inodeId + << ", ret = " << MetaStatusCode_Name(ret); + return ret; } - MetaStatusCode ret = - inodeStorage_->Delete(Key4Inode(fsId, inodeId), logIndex); + ret = inodeStorage_->Delete(Key4Inode(fsId, inodeId), logIndex); if (ret != MetaStatusCode::OK) { LOG(ERROR) << "DeleteInode fail, fsId = " << fsId << ", inodeId = " << inodeId @@ -300,9 +301,13 @@ MetaStatusCode InodeManager::DeleteInode(uint32_t fsId, uint64_t inodeId, return ret; } - if (retGetAttr == MetaStatusCode::OK) { + // if nlink is 0 means this inode is already in trash + if (attr.nlink() != 0) { // get attr success --(*type2InodeNum_)[attr.type()]; + } else { + // delete trash item + trash_->Remove(inodeId); } VLOG(6) << "DeleteInode success, fsId = " << fsId << ", inodeId = " << inodeId; @@ -353,8 +358,7 @@ MetaStatusCode InodeManager::UpdateInode(const UpdateInodeRequest& request, if (request.has_nlink()) { if (old.nlink() != 0 && request.nlink() == 0) { - uint32_t now = TimeUtility::GetTimeofDaySec(); - old.set_dtime(now); + old.set_dtime(TimeUtility::GetTimeofDaySec()); needAddTrash = true; } VLOG(9) << "update inode nlink, from " << old.nlink() << " to " @@ -373,7 +377,6 @@ MetaStatusCode InodeManager::UpdateInode(const UpdateInodeRequest& request, bool fileNeedDeallocate = (needAddTrash && (FsFileType::TYPE_FILE == old.type())); - bool s3NeedTrash = (needAddTrash && (FsFileType::TYPE_S3 == old.type())); std::shared_ptr txn; if (needUpdate) { @@ -388,11 +391,6 @@ MetaStatusCode InodeManager::UpdateInode(const UpdateInodeRequest& request, } } - if (s3NeedTrash) { - trash_->Add(old.fsid(), old.inodeid(), old.dtime()); - --(*type2InodeNum_)[old.type()]; - } - const S3ChunkInfoMap &map2add = request.s3chunkinfoadd(); const S3ChunkInfoList *list2add; VLOG(9) << "UpdateInode inode " << old.inodeid() << " map2add size " @@ -443,10 +441,27 @@ MetaStatusCode InodeManager::UpdateInode(const UpdateInodeRequest& request, return MetaStatusCode::STORAGE_INTERNAL_ERROR; } } + + if (needAddTrash) { + trash_->Add(old.inodeid(), old.dtime(), false); + --(*type2InodeNum_)[old.type()]; + } + VLOG(9) << "UpdateInode success, " << request.ShortDebugString(); return MetaStatusCode::OK; } +void InodeManager::LoadDeletedInodes() { + std::map items; + inodeStorage_->LoadDeletedInodes(&items); + VLOG(3) << "build trash items size: " << items.size(); + std::vector names; + for (auto& iter : items) { + curve::common::SplitString(iter.first , ":", &names); + trash_->Add(std::stoull(names[names.size() - 1 ]), iter.second, true); + } +} + MetaStatusCode InodeManager::GetOrModifyS3ChunkInfo( uint32_t fsId, uint64_t inodeId, const S3ChunkInfoMap& map2add, const S3ChunkInfoMap& map2del, bool returnS3ChunkInfoMap, @@ -541,10 +556,11 @@ MetaStatusCode InodeManager::PaddingInodeS3ChunkInfo(int32_t fsId, } MetaStatusCode InodeManager::UpdateInodeWhenCreateOrRemoveSubNode( - const Dentry& dentry, uint64_t now, uint32_t now_ns, bool isCreate, - int64_t logIndex) { + const Dentry& dentry, const Time& tm, bool isCreate, int64_t logIndex) { uint64_t fsId = dentry.fsid(); uint64_t parentInodeId = dentry.parentinodeid(); + uint64_t now = tm.sec(); + uint32_t now_ns = tm.nsec(); FsFileType type = dentry.type(); MetaStatusCode ret = MetaStatusCode::OK; @@ -609,25 +625,6 @@ MetaStatusCode InodeManager::UpdateInodeWhenCreateOrRemoveSubNode( return MetaStatusCode::OK; } -MetaStatusCode InodeManager::InsertInode(const Inode& inode, int64_t logIndex) { - CHECK_APPLIED(); - VLOG(6) << "InsertInode, " << inode.ShortDebugString(); - - // 2. insert inode - MetaStatusCode ret = inodeStorage_->Insert(inode, logIndex); - if (ret != MetaStatusCode::OK) { - LOG(ERROR) << "InsertInode fail, " << inode.ShortDebugString() - << ", ret = " << MetaStatusCode_Name(ret); - return ret; - } - - if (inode.nlink() == 0) { - trash_->Add(inode.fsid(), inode.inodeid(), inode.dtime()); - } - - return MetaStatusCode::OK; -} - bool InodeManager::GetInodeIdList(std::list* inodeIdList) { return inodeStorage_->GetAllInodeId(inodeIdList); } diff --git a/curvefs/src/metaserver/inode_manager.h b/curvefs/src/metaserver/inode_manager.h index bd28f65790..81ba1a4516 100644 --- a/curvefs/src/metaserver/inode_manager.h +++ b/curvefs/src/metaserver/inode_manager.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,8 @@ class InodeManager { MetaStatusCode UpdateInode(const UpdateInodeRequest& request, int64_t logIndex); + void LoadDeletedInodes(); + MetaStatusCode GetOrModifyS3ChunkInfo( uint32_t fsId, uint64_t inodeId, const S3ChunkInfoMap& map2add, const S3ChunkInfoMap& map2del, bool returnS3ChunkInfoMap, @@ -105,13 +108,8 @@ class InodeManager { S3ChunkInfoMap* m, uint64_t limit = 0); - MetaStatusCode UpdateInodeWhenCreateOrRemoveSubNode(const Dentry& dentry, - uint64_t now, - uint32_t now_ns, - bool isCreate, - int64_t logIndex); - - MetaStatusCode InsertInode(const Inode& inode, int64_t logIndex); + MetaStatusCode UpdateInodeWhenCreateOrRemoveSubNode( + const Dentry& dentry, const Time& tm, bool isCreate, int64_t logIndex); bool GetInodeIdList(std::list* inodeIdList); diff --git a/curvefs/src/metaserver/inode_storage.cpp b/curvefs/src/metaserver/inode_storage.cpp index 8e5da8a913..53835a6a94 100644 --- a/curvefs/src/metaserver/inode_storage.cpp +++ b/curvefs/src/metaserver/inode_storage.cpp @@ -57,6 +57,7 @@ using ::curvefs::metaserver::storage::Prefix4ChunkIndexS3ChunkInfoList; using ::curvefs::metaserver::storage::Prefix4InodeS3ChunkInfoList; using ::curvefs::metaserver::storage::Prefix4InodeVolumeExtent; using ::curvefs::metaserver::storage::Status; +using curvefs::metaserver::Time; const char* InodeStorage::kInodeCountKey("count"); @@ -67,6 +68,7 @@ InodeStorage::InodeStorage(std::shared_ptr kvStorage, uint64_t nInode) : kvStorage_(std::move(kvStorage)), table4Inode_(nameGenerator->GetInodeTableName()), + table4DelInode_(nameGenerator->GetDelInodeTableName()), table4S3ChunkInfo_(nameGenerator->GetS3ChunkInfoTableName()), table4VolumeExtent_(nameGenerator->GetVolumeExtentTableName()), table4InodeAuxInfo_(nameGenerator->GetInodeAuxInfoTableName()), @@ -185,9 +187,78 @@ MetaStatusCode InodeStorage::Insert(const Inode& inode, int64_t logIndex) { return MetaStatusCode::STORAGE_INTERNAL_ERROR; } +MetaStatusCode InodeStorage::AddDeletedInode( + const Key4Inode& keyInode, uint64_t dtime) { + WriteLockGuard lg(rwLock_); + std::string skey = conv_.SerializeToString(keyInode); + VLOG(9) << "update deleting key, " << keyInode.inodeId << ", " << dtime; + const char* step = "Begin transaction"; + std::shared_ptr txn; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + LOG(ERROR) << "Begin transaction failed"; + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + Time dtimeInfo; + dtimeInfo.set_sec(dtime); + dtimeInfo.set_nsec(0); + auto rc = txn->HSet(table4DelInode_, skey, dtimeInfo); + step = "insert inode "; + if (rc.ok()) { + rc = txn->Commit(); + step = "commit"; + } + if (rc.ok()) { + VLOG(9) << "set deleting key ok"; + return MetaStatusCode::OK; + } + LOG(ERROR) << step << "failed, status = " << rc.ToString(); + if (!txn->Rollback().ok()) { + LOG(ERROR) << "Rollback delete inode transaction failed, status = " + << rc.ToString(); + } + return MetaStatusCode::STORAGE_INTERNAL_ERROR; +} + +MetaStatusCode InodeStorage::RemoveDeletedInode(const Key4Inode& key) { + WriteLockGuard lg(rwLock_); + std::string skey = conv_.SerializeToString(key); + VLOG(9) << "clear deleting key start, " << skey; + std::shared_ptr txn = nullptr; + const char* step = "Begin transaction"; + txn = kvStorage_->BeginTransaction(); + if (txn == nullptr) { + LOG(ERROR) << "Begin transaction failed"; + return MetaStatusCode::STORAGE_INTERNAL_ERROR; + } + step = "Delete inode from transaction"; + auto s = txn->HDel(table4DelInode_, skey); + if (s.ok()) { + step = "Delete inode"; + s = txn->Commit(); + } + if (s.ok()) { + VLOG(9) << "clear deleting key ok, " << skey; + return MetaStatusCode::OK; + } + LOG(ERROR) << step << " failed, status = " << s.ToString(); + if (!txn->Rollback().ok()) { + LOG(ERROR) << "Rollback delete inode transaction failed, status = " + << s.ToString(); + } + return MetaStatusCode::STORAGE_INTERNAL_ERROR; +} + +void InodeStorage::LoadDeletedInodes(std::map * inodes) { + VLOG(6) << "load deleted key start with: " << table4DelInode_; + kvStorage_->GetPrefix(inodes, table4DelInode_); + VLOG(6) << "load deleted over"; +} + MetaStatusCode InodeStorage::Get(const Key4Inode& key, Inode* inode) { ReadLockGuard lg(rwLock_); std::string skey = conv_.SerializeToString(key); + Status s = kvStorage_->HGet(table4Inode_, skey, inode); if (s.ok()) { return MetaStatusCode::OK; @@ -471,7 +542,6 @@ MetaStatusCode InodeStorage::Clear() { // because if we fail stop, we will replay // raft logs and clear it again WriteLockGuard lg(rwLock_); - Status s = kvStorage_->HClear(table4Inode_); if (!s.ok()) { LOG(ERROR) << "InodeStorage clear inode table failed, status = " @@ -492,7 +562,6 @@ MetaStatusCode InodeStorage::Clear() { << s.ToString(); return MetaStatusCode::STORAGE_INTERNAL_ERROR; } - s = kvStorage_->HClear(table4InodeAuxInfo_); if (!s.ok()) { LOG(ERROR) diff --git a/curvefs/src/metaserver/inode_storage.h b/curvefs/src/metaserver/inode_storage.h index 38ad3c5f56..a7654c5963 100644 --- a/curvefs/src/metaserver/inode_storage.h +++ b/curvefs/src/metaserver/inode_storage.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,18 @@ class InodeStorage { */ MetaStatusCode Insert(const Inode& inode, int64_t logIndex); + /** + * @brief update deleting inode key in storage + * @param[in] inode: the inode want to update + * @param[in] logIndex: the index of raft log + * @return + */ + MetaStatusCode AddDeletedInode(const Key4Inode& inode, uint64_t dtime); + + MetaStatusCode RemoveDeletedInode(const Key4Inode& key); + + void LoadDeletedInodes(std::map * inodes); + /** * @brief get inode from storage * @param[in] key: the key of inode want to get @@ -237,6 +250,7 @@ class InodeStorage { RWLock rwLock_; std::shared_ptr kvStorage_; std::string table4Inode_; + std::string table4DelInode_; std::string table4S3ChunkInfo_; std::string table4VolumeExtent_; std::string table4InodeAuxInfo_; diff --git a/curvefs/src/metaserver/main.cpp b/curvefs/src/metaserver/main.cpp index ec390ab951..5552dd5bed 100644 --- a/curvefs/src/metaserver/main.cpp +++ b/curvefs/src/metaserver/main.cpp @@ -29,6 +29,7 @@ #include "src/common/configuration.h" #include "curvefs/src/common/dynamic_vlog.h" #include "curvefs/src/common/threading.h" +#include "src/common/log_util.h" DEFINE_string(confPath, "curvefs/conf/metaserver.conf", "metaserver confPath"); DEFINE_string(ip, "127.0.0.1", "metasetver listen ip"); @@ -126,6 +127,7 @@ int main(int argc, char **argv) { FLAGS_vlog_level = FLAGS_v; // initialize logging module + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); conf->PrintConfig(); diff --git a/curvefs/src/metaserver/metaserver.cpp b/curvefs/src/metaserver/metaserver.cpp index 5798920f7d..d483e9d5a6 100644 --- a/curvefs/src/metaserver/metaserver.cpp +++ b/curvefs/src/metaserver/metaserver.cpp @@ -197,10 +197,8 @@ void InitExcutorOption(const std::shared_ptr& conf, &opts->minRetryTimesForceTimeoutBackoff); conf->GetValueFatalIfFail("excutorOpt.maxRetryTimesBeforeConsiderSuspend", &opts->maxRetryTimesBeforeConsiderSuspend); - conf->GetValueFatalIfFail("excutorOpt.batchInodeAttrLimit", - &opts->batchInodeAttrLimit); - conf->GetValueFatalIfFail("excutorOpt.enableMultiMountPointRename", - &opts->enableRenameParallel); + conf->GetValueFatalIfFail( + "excutorOpt.batchInodeAttrLimit", &opts->batchInodeAttrLimit); } void InitMetaCacheOption(const std::shared_ptr& conf, diff --git a/curvefs/src/metaserver/metaserver_service.cpp b/curvefs/src/metaserver/metaserver_service.cpp index 6cc3d9c914..b787c6587e 100644 --- a/curvefs/src/metaserver/metaserver_service.cpp +++ b/curvefs/src/metaserver/metaserver_service.cpp @@ -33,26 +33,30 @@ static bvar::LatencyRecorder g_oprequest_in_service_before_propose_latency( namespace curvefs { namespace metaserver { -using ::curvefs::metaserver::copyset::GetDentryOperator; -using ::curvefs::metaserver::copyset::ListDentryOperator; -using ::curvefs::metaserver::copyset::CreateDentryOperator; -using ::curvefs::metaserver::copyset::DeleteDentryOperator; -using ::curvefs::metaserver::copyset::GetInodeOperator; using ::curvefs::metaserver::copyset::BatchGetInodeAttrOperator; using ::curvefs::metaserver::copyset::BatchGetXAttrOperator; +using ::curvefs::metaserver::copyset::CheckTxStatusOperator; +using ::curvefs::metaserver::copyset::CommitTxOperator; +using ::curvefs::metaserver::copyset::CreateDentryOperator; using ::curvefs::metaserver::copyset::CreateInodeOperator; -using ::curvefs::metaserver::copyset::CreateRootInodeOperator; using ::curvefs::metaserver::copyset::CreateManageInodeOperator; -using ::curvefs::metaserver::copyset::UpdateInodeOperator; -using ::curvefs::metaserver::copyset::GetOrModifyS3ChunkInfoOperator; -using ::curvefs::metaserver::copyset::DeleteInodeOperator; -using ::curvefs::metaserver::copyset::UpdateInodeS3VersionOperator; using ::curvefs::metaserver::copyset::CreatePartitionOperator; +using ::curvefs::metaserver::copyset::CreateRootInodeOperator; +using ::curvefs::metaserver::copyset::DeleteDentryOperator; +using ::curvefs::metaserver::copyset::DeleteInodeOperator; using ::curvefs::metaserver::copyset::DeletePartitionOperator; -using ::curvefs::metaserver::copyset::PrepareRenameTxOperator; +using ::curvefs::metaserver::copyset::GetDentryOperator; +using ::curvefs::metaserver::copyset::GetInodeOperator; +using ::curvefs::metaserver::copyset::GetOrModifyS3ChunkInfoOperator; using ::curvefs::metaserver::copyset::GetVolumeExtentOperator; -using ::curvefs::metaserver::copyset::UpdateVolumeExtentOperator; +using ::curvefs::metaserver::copyset::ListDentryOperator; +using ::curvefs::metaserver::copyset::PrepareRenameTxOperator; +using ::curvefs::metaserver::copyset::PrewriteRenameTxOperator; +using ::curvefs::metaserver::copyset::ResolveTxLockOperator; using ::curvefs::metaserver::copyset::UpdateDeallocatableBlockGroupOperator; +using ::curvefs::metaserver::copyset::UpdateInodeOperator; +using ::curvefs::metaserver::copyset::UpdateInodeS3VersionOperator; +using ::curvefs::metaserver::copyset::UpdateVolumeExtentOperator; namespace { @@ -267,16 +271,6 @@ void MetaServerServiceImpl::DeletePartition( request->copysetid()); } -void MetaServerServiceImpl::PrepareRenameTx( - google::protobuf::RpcController* controller, - const PrepareRenameTxRequest* request, PrepareRenameTxResponse* response, - google::protobuf::Closure* done) { - OperatorHelper helper(copysetNodeManager_, inflightThrottle_); - helper.operator()(controller, request, response, - done, request->poolid(), - request->copysetid()); -} - void MetaServerServiceImpl::GetVolumeExtent( ::google::protobuf::RpcController* controller, const GetVolumeExtentRequest* request, @@ -310,5 +304,49 @@ void MetaServerServiceImpl::UpdateDeallocatableBlockGroup( request->copysetid()); } +void MetaServerServiceImpl::PrepareRenameTx( + google::protobuf::RpcController* controller, + const PrepareRenameTxRequest* request, PrepareRenameTxResponse* response, + google::protobuf::Closure* done) { + OperatorHelper helper(copysetNodeManager_, inflightThrottle_); + helper.operator()(controller, request, response, + done, request->poolid(), request->copysetid()); +} + +void MetaServerServiceImpl::PrewriteRenameTx( + google::protobuf::RpcController* controller, + const PrewriteRenameTxRequest* request, PrewriteRenameTxResponse* response, + google::protobuf::Closure* done) { + OperatorHelper helper(copysetNodeManager_, inflightThrottle_); + helper.operator()(controller, request, response, + done, request->poolid(), request->copysetid()); +} + +void MetaServerServiceImpl::CheckTxStatus( + google::protobuf::RpcController* controller, + const CheckTxStatusRequest* request, CheckTxStatusResponse* response, + google::protobuf::Closure* done) { + OperatorHelper helper(copysetNodeManager_, inflightThrottle_); + helper.operator()(controller, request, response, + done, request->poolid(), request->copysetid()); +} + +void MetaServerServiceImpl::ResolveTxLock( + google::protobuf::RpcController* controller, + const ResolveTxLockRequest* request, ResolveTxLockResponse* response, + google::protobuf::Closure* done) { + OperatorHelper helper(copysetNodeManager_, inflightThrottle_); + helper.operator()(controller, request, response, + done, request->poolid(), request->copysetid()); +} + +void MetaServerServiceImpl::CommitTx( + google::protobuf::RpcController* controller, const CommitTxRequest* request, + CommitTxResponse* response, google::protobuf::Closure* done) { + OperatorHelper helper(copysetNodeManager_, inflightThrottle_); + helper.operator()(controller, request, response, done, + request->poolid(), request->copysetid()); +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/metaserver_service.h b/curvefs/src/metaserver/metaserver_service.h index 7e19e20b20..a5fc8e6211 100644 --- a/curvefs/src/metaserver/metaserver_service.h +++ b/curvefs/src/metaserver/metaserver_service.h @@ -108,11 +108,6 @@ class MetaServerServiceImpl : public MetaServerService { DeletePartitionResponse* response, google::protobuf::Closure* done) override; - void PrepareRenameTx(google::protobuf::RpcController* controller, - const PrepareRenameTxRequest* request, - PrepareRenameTxResponse* response, - google::protobuf::Closure* done) override; - void GetVolumeExtent(::google::protobuf::RpcController* controller, const GetVolumeExtentRequest* request, GetVolumeExtentResponse* response, @@ -129,6 +124,29 @@ class MetaServerServiceImpl : public MetaServerService { UpdateDeallocatableBlockGroupResponse *response, ::google::protobuf::Closure *done) override; + // reserved for compatibility + void PrepareRenameTx(google::protobuf::RpcController* controller, + const PrepareRenameTxRequest* request, + PrepareRenameTxResponse* response, + google::protobuf::Closure* done) override; + + void PrewriteRenameTx(google::protobuf::RpcController* controller, + const PrewriteRenameTxRequest* request, + PrewriteRenameTxResponse* response, + google::protobuf::Closure* done) override; + + void CheckTxStatus(google::protobuf::RpcController* controller, + const CheckTxStatusRequest* request, CheckTxStatusResponse* response, + google::protobuf::Closure* done) override; + + void ResolveTxLock(google::protobuf::RpcController* controller, + const ResolveTxLockRequest* request, ResolveTxLockResponse* response, + google::protobuf::Closure* done) override; + + void CommitTx(google::protobuf::RpcController* controller, + const CommitTxRequest* request, CommitTxResponse* response, + google::protobuf::Closure* done) override; + private: CopysetNodeManager* copysetNodeManager_; InflightThrottle* inflightThrottle_; diff --git a/curvefs/src/metaserver/metastore.cpp b/curvefs/src/metaserver/metastore.cpp index 875f71cffc..f102275a38 100644 --- a/curvefs/src/metaserver/metastore.cpp +++ b/curvefs/src/metaserver/metastore.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include // NOLINT #include @@ -60,6 +61,7 @@ using KVStorage = ::curvefs::metaserver::storage::KVStorage; using Key4S3ChunkInfoList = ::curvefs::metaserver::storage::Key4S3ChunkInfoList; using ::curvefs::metaserver::storage::MemoryStorage; +using ::curvefs::metaserver::storage::NameGenerator; using ::curvefs::metaserver::storage::RocksDBStorage; using ::curvefs::metaserver::storage::StorageOptions; @@ -147,6 +149,7 @@ bool MetaStoreImpl::Load(const std::string &pathname) { } startCompacts(); + return true; } @@ -358,6 +361,15 @@ std::shared_ptr MetaStoreImpl::GetStreamServer() { return status; \ } +#define GET_TIME_FROM_REQUEST(TIME) \ + uint64_t now = 0; \ + uint32_t now_ns = 0; \ + if (request->has_create()) { \ + now = request->create().sec(); \ + now_ns = request->create().nsec(); \ + } \ + TIME.set_sec(now); \ + TIME.set_nsec(now_ns); // dentry MetaStatusCode MetaStoreImpl::CreateDentry(const CreateDentryRequest* request, @@ -366,19 +378,15 @@ MetaStatusCode MetaStoreImpl::CreateDentry(const CreateDentryRequest* request, ReadLockGuard readLockGuard(rwLock_); std::shared_ptr partition; GET_PARTITION_OR_RETURN(partition); - uint64_t now = 0; - uint32_t now_ns = 0; - if (request->has_create()) { - now = request->create().sec(); - now_ns = request->create().nsec(); - } Time tm; - tm.set_sec(now); - tm.set_nsec(now_ns); - MetaStatusCode status = - partition->CreateDentry(request->dentry(), tm, logIndex); - response->set_statuscode(status); - return status; + GET_TIME_FROM_REQUEST(tm); + TxLock txLock; + auto rc = partition->CreateDentry(request->dentry(), tm, logIndex, &txLock); + response->set_statuscode(rc); + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + *response->mutable_txlock() = std::move(txLock); + } + return rc; } MetaStatusCode MetaStoreImpl::GetDentry(const GetDentryRequest* request, @@ -401,10 +409,13 @@ MetaStatusCode MetaStoreImpl::GetDentry(const GetDentryRequest* request, dentry.set_name(name); dentry.set_txid(txId); - auto rc = partition->GetDentry(&dentry); + TxLock txLock; + auto rc = partition->GetDentry(&dentry, &txLock); response->set_statuscode(rc); if (rc == MetaStatusCode::OK) { *response->mutable_dentry() = std::move(dentry); + } else if (rc == MetaStatusCode::TX_KEY_LOCKED) { + *response->mutable_txlock() = std::move(txLock); } return rc; } @@ -428,8 +439,14 @@ MetaStatusCode MetaStoreImpl::DeleteDentry(const DeleteDentryRequest* request, dentry.set_txid(txId); dentry.set_type(request->type()); - auto rc = partition->DeleteDentry(dentry, logIndex); + Time tm; + GET_TIME_FROM_REQUEST(tm); + TxLock txLock; + auto rc = partition->DeleteDentry(dentry, tm, logIndex, &txLock); response->set_statuscode(rc); + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + *response->mutable_txlock() = std::move(txLock); + } return rc; } @@ -460,12 +477,16 @@ MetaStatusCode MetaStoreImpl::ListDentry(const ListDentryRequest* request, } std::vector dentrys; - auto rc = - partition->ListDentry(dentry, &dentrys, request->count(), onlyDir); + TxLock txLock; + auto rc = partition->ListDentry( + dentry, &dentrys, request->count(), onlyDir, &txLock); response->set_statuscode(rc); if (rc == MetaStatusCode::OK && !dentrys.empty()) { *response->mutable_dentrys() = {dentrys.begin(), dentrys.end()}; } + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + *response->mutable_txlock() = std::move(txLock); + } return rc; } @@ -487,6 +508,59 @@ MetaStatusCode MetaStoreImpl::PrepareRenameTx( return rc; } +MetaStatusCode MetaStoreImpl::PrewriteRenameTx( + const PrewriteRenameTxRequest* request, PrewriteRenameTxResponse* response, + int64_t logIndex) { + MetaStatusCode rc; + std::shared_ptr partition; + GET_PARTITION_OR_RETURN(partition); + TxLock txLock; + std::vector dentrys{request->dentrys().begin(), + request->dentrys().end()}; + rc = partition->PrewriteRenameTx( + dentrys, request->txlock(), logIndex, &txLock); + response->set_statuscode(rc); + if (rc == MetaStatusCode::TX_KEY_LOCKED) { + *response->mutable_txlock() = std::move(txLock); + } + return rc; +} + +MetaStatusCode MetaStoreImpl::CheckTxStatus(const CheckTxStatusRequest* request, + CheckTxStatusResponse* response, int64_t logIndex) { + MetaStatusCode rc; + std::shared_ptr partition; + GET_PARTITION_OR_RETURN(partition); + rc = partition->CheckTxStatus(request->primarykey(), request->startts(), + request->curtimestamp(), logIndex); + response->set_statuscode(rc); + return MetaStatusCode::OK; +} + +MetaStatusCode MetaStoreImpl::ResolveTxLock(const ResolveTxLockRequest* request, + ResolveTxLockResponse* response, int64_t logIndex) { + MetaStatusCode rc; + std::shared_ptr partition; + GET_PARTITION_OR_RETURN(partition); + rc = partition->ResolveTxLock(request->dentry(), + request->startts(), request->committs(), logIndex); + response->set_statuscode(rc); + return MetaStatusCode::OK; +} + +MetaStatusCode MetaStoreImpl::CommitTx(const CommitTxRequest* request, + CommitTxResponse* response, int64_t logIndex) { + MetaStatusCode rc; + std::shared_ptr partition; + GET_PARTITION_OR_RETURN(partition); + std::vector dentrys{request->dentrys().begin(), + request->dentrys().end()}; + rc = partition->CommitTx(dentrys, request->startts(), + request->committs(), logIndex); + response->set_statuscode(rc); + return MetaStatusCode::OK; +} + // inode MetaStatusCode MetaStoreImpl::CreateInode(const CreateInodeRequest* request, CreateInodeResponse* response, @@ -862,5 +936,17 @@ bool MetaStoreImpl::InitStorage() { return kvStorage_->Open(); } +void MetaStoreImpl::LoadDeletedInodes() { + VLOG(6) << "load deleted inodes start."; + WriteLockGuard writeLockGuard(rwLock_); + MetaStatusCode status; + for (auto it = partitionMap_.begin(); it != partitionMap_.end(); it++) { + uint32_t partitionId = it->second->GetPartitionId(); + VLOG(6) << "load deleted inodes, partitionId: " << partitionId; + it->second->LoadDeletedInodes(); + } + VLOG(6) << "load deleted inodes end."; +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/metastore.h b/curvefs/src/metaserver/metastore.h index a13c0a4980..4babaf657e 100644 --- a/curvefs/src/metaserver/metastore.h +++ b/curvefs/src/metaserver/metastore.h @@ -117,6 +117,7 @@ class MetaStore { virtual bool SaveData(const std::string& dir, std::vector* files) = 0; virtual bool Clear() = 0; + virtual void LoadDeletedInodes() {} virtual bool Destroy() = 0; virtual MetaStatusCode CreatePartition( const CreatePartitionRequest* request, @@ -155,6 +156,19 @@ class MetaStore { const PrepareRenameTxRequest* request, PrepareRenameTxResponse* response, int64_t logIndex) = 0; + virtual MetaStatusCode PrewriteRenameTx( + const PrewriteRenameTxRequest* request, + PrewriteRenameTxResponse* response, int64_t logIndex) = 0; + + virtual MetaStatusCode CheckTxStatus(const CheckTxStatusRequest* request, + CheckTxStatusResponse* response, int64_t logIndex) = 0; + + virtual MetaStatusCode ResolveTxLock(const ResolveTxLockRequest* request, + ResolveTxLockResponse* response, int64_t logIndex) = 0; + + virtual MetaStatusCode CommitTx(const CommitTxRequest* request, + CommitTxResponse* response, int64_t logIndex) = 0; + // inode virtual MetaStatusCode CreateInode(const CreateInodeRequest* request, CreateInodeResponse* response, @@ -223,6 +237,7 @@ class MetaStoreImpl : public MetaStore { std::vector* files) override; bool Clear() override; bool Destroy() override; + void LoadDeletedInodes() override; MetaStatusCode CreatePartition(const CreatePartitionRequest* request, CreatePartitionResponse* response, @@ -261,6 +276,18 @@ class MetaStoreImpl : public MetaStore { PrepareRenameTxResponse* response, int64_t logIndex) override; + MetaStatusCode PrewriteRenameTx(const PrewriteRenameTxRequest* request, + PrewriteRenameTxResponse* response, int64_t logIndex) override; + + MetaStatusCode CheckTxStatus(const CheckTxStatusRequest* request, + CheckTxStatusResponse* response, int64_t logIndex) override; + + MetaStatusCode ResolveTxLock(const ResolveTxLockRequest* request, + ResolveTxLockResponse* response, int64_t logIndex) override; + + MetaStatusCode CommitTx(const CommitTxRequest* request, + CommitTxResponse* response, int64_t logIndex) override; + // inode MetaStatusCode CreateInode(const CreateInodeRequest* request, CreateInodeResponse* response, diff --git a/curvefs/src/metaserver/partition.cpp b/curvefs/src/metaserver/partition.cpp index 3c3e7d7b99..c6e230428d 100644 --- a/curvefs/src/metaserver/partition.cpp +++ b/curvefs/src/metaserver/partition.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "curvefs/proto/metaserver.pb.h" #include "curvefs/src/metaserver/copyset/copyset_node_manager.h" @@ -68,7 +69,9 @@ Partition::Partition(PartitionInfo partition, dentryStorage_ = std::make_shared(kvStorage_, nameGen_, nDentry); - auto trash = std::make_shared(inodeStorage_); + auto trash = std::make_shared(inodeStorage_, + partitionInfo_.fsid(), partitionInfo_.poolid(), + partitionInfo_.copysetid(), partitionInfo_.partitionid()); inodeManager_ = std::make_shared( inodeStorage_, trash, partitionInfo_.mutable_filetype2inodenum()); txManager_ = std::make_shared(dentryStorage_, partitionInfo_); @@ -80,7 +83,12 @@ Partition::Partition(PartitionInfo partition, } if (partitionInfo_.status() != PartitionStatus::DELETING) { - TrashManager::GetInstance().Add(partitionInfo_.partitionid(), trash); + auto handle = std::async(std::launch::async, [&]() { + TrashManager::GetInstance().Add( + partitionInfo_.partitionid(), trash); + }); + handle.wait(); + if (startCompact) { StartS3Compact(); } @@ -111,13 +119,13 @@ Partition::Partition(PartitionInfo partition, } while (0) MetaStatusCode Partition::CreateDentry(const Dentry& dentry, const Time& tm, - int64_t logIndex) { + int64_t logIndex, TxLock* txLock) { PRECHECK(dentry.fsid(), dentry.parentinodeid()); - MetaStatusCode ret = dentryManager_->CreateDentry(dentry, logIndex); + MetaStatusCode ret = dentryManager_->CreateDentry(dentry, logIndex, txLock); if (MetaStatusCode::OK == ret) { if (dentry.has_type()) { return inodeManager_->UpdateInodeWhenCreateOrRemoveSubNode( - dentry, tm.sec(), tm.nsec(), true, logIndex); + dentry, tm, true, logIndex); } else { LOG(ERROR) << "CreateDentry does not have type, " << dentry.ShortDebugString(); @@ -128,7 +136,7 @@ MetaStatusCode Partition::CreateDentry(const Dentry& dentry, const Time& tm, // NOTE: we enter here means that // this log maybe is "half apply" ret = inodeManager_->UpdateInodeWhenCreateOrRemoveSubNode( - dentry, tm.sec(), tm.nsec(), true, logIndex); + dentry, tm, true, logIndex); if (ret == MetaStatusCode::IDEMPOTENCE_OK) { ret = MetaStatusCode::OK; } @@ -155,14 +163,16 @@ MetaStatusCode Partition::LoadDentry(const DentryVec& vec, bool merge, return rc; } -MetaStatusCode Partition::DeleteDentry(const Dentry& dentry, int64_t logIndex) { - PRECHECK(dentry.fsid(), dentry.parentinodeid()); - MetaStatusCode ret = dentryManager_->DeleteDentry(dentry, logIndex); +MetaStatusCode Partition::DeleteDentry(const Dentry& dentry, + const Time& tm, int64_t logIndex, TxLock* txLock) { + PRECHECK(dentry.fsid(), dentry.parentinodeid()); + MetaStatusCode ret = dentryManager_->DeleteDentry( + dentry, logIndex, txLock); if (MetaStatusCode::OK == ret) { if (dentry.has_type()) { return inodeManager_->UpdateInodeWhenCreateOrRemoveSubNode( - dentry, 0, 0, false, logIndex); + dentry, tm, false, logIndex); } else { LOG(ERROR) << "DeleteDentry does not have type, " << dentry.ShortDebugString(); @@ -173,7 +183,7 @@ MetaStatusCode Partition::DeleteDentry(const Dentry& dentry, int64_t logIndex) { // NOTE: we enter here means that // this log maybe is "half apply" ret = inodeManager_->UpdateInodeWhenCreateOrRemoveSubNode( - dentry, 0, 0, false, logIndex); + dentry, tm, false, logIndex); } if (ret == MetaStatusCode::IDEMPOTENCE_OK) { ret = MetaStatusCode::OK; @@ -184,16 +194,18 @@ MetaStatusCode Partition::DeleteDentry(const Dentry& dentry, int64_t logIndex) { } } -MetaStatusCode Partition::GetDentry(Dentry* dentry) { +MetaStatusCode Partition::GetDentry(Dentry* dentry, TxLock* txLock) { PRECHECK(dentry->fsid(), dentry->parentinodeid()); - return dentryManager_->GetDentry(dentry); + return dentryManager_->GetDentry(dentry, txLock); } MetaStatusCode Partition::ListDentry(const Dentry& dentry, std::vector* dentrys, - uint32_t limit, bool onlyDir) { + uint32_t limit, bool onlyDir, + TxLock* txLock) { PRECHECK(dentry.fsid(), dentry.parentinodeid()); - return dentryManager_->ListDentry(dentry, dentrys, limit, onlyDir); + return dentryManager_->ListDentry( + dentry, dentrys, limit, onlyDir, txLock); } void Partition::ClearDentry() { dentryManager_->ClearDentry(); } @@ -248,6 +260,35 @@ bool Partition::FindPendingTx(PrepareRenameTxRequest* pendingTx) { return true; } +MetaStatusCode Partition::PrewriteRenameTx(const std::vector& dentrys, + const TxLock& txLock, int64_t logIndex, TxLock* out) { + for (const auto& it : dentrys) { + PRECHECK(it.fsid(), it.parentinodeid()); + } + return dentryManager_->PrewriteRenameTx(dentrys, txLock, logIndex, out); +} + +MetaStatusCode Partition::CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex) { + return dentryManager_->CheckTxStatus(primaryKey, startTs, curTimestamp, + logIndex); +} + +MetaStatusCode Partition::ResolveTxLock(const Dentry& dentry, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + PRECHECK(dentry.fsid(), dentry.parentinodeid()); + return dentryManager_->ResolveTxLock(dentry, startTs, + commitTs, logIndex); +} + +MetaStatusCode Partition::CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex) { + for (const auto& it : dentrys) { + PRECHECK(it.fsid(), it.parentinodeid()); + } + return dentryManager_->CommitTx(dentrys, startTs, commitTs, logIndex); +} + // inode MetaStatusCode Partition::CreateInode(const InodeParam& param, Inode* inode, int64_t logIndex) { @@ -335,6 +376,10 @@ MetaStatusCode Partition::UpdateInode(const UpdateInodeRequest& request, return ret; } +void Partition::LoadDeletedInodes() { + inodeManager_->LoadDeletedInodes(); +} + MetaStatusCode Partition::GetOrModifyS3ChunkInfo( uint32_t fsId, uint64_t inodeId, const S3ChunkInfoMap& map2add, const S3ChunkInfoMap& map2del, bool returnS3ChunkInfoMap, @@ -357,15 +402,6 @@ MetaStatusCode Partition::PaddingInodeS3ChunkInfo(int32_t fsId, return inodeManager_->PaddingInodeS3ChunkInfo(fsId, inodeId, m, limit); } -MetaStatusCode Partition::InsertInode(const Inode& inode, int64_t logIndex) { - PRECHECK(inode.fsid(), inode.inodeid()); - auto ret = inodeManager_->InsertInode(inode, logIndex); - if (ret == MetaStatusCode::IDEMPOTENCE_OK) { - ret = MetaStatusCode::OK; - } - return ret; -} - bool Partition::GetInodeIdList(std::list* InodeIdList) { return inodeManager_->GetInodeIdList(InodeIdList); } @@ -468,12 +504,12 @@ uint64_t Partition::GetNewInodeId() { return newInodeId; } -uint32_t Partition::GetInodeNum() { - return static_cast(inodeStorage_->Size()); +uint64_t Partition::GetInodeNum() { + return inodeStorage_->Size(); } -uint32_t Partition::GetDentryNum() { - return static_cast(dentryStorage_->Size()); +uint64_t Partition::GetDentryNum() { + return dentryStorage_->Size(); } bool Partition::EmptyInodeStorage() { return inodeStorage_->Empty(); } @@ -537,8 +573,17 @@ MetaStatusCode Partition::GetAllBlockGroup( } void Partition::StartS3Compact() { - S3CompactManager::GetInstance().Register( - S3Compact{inodeManager_, partitionInfo_}); + // register s3 compaction task in a separate thread, since the caller may + // holds a pthread wrlock when calling this function, and create `S3Compact` + // will acquire a bthread rwlock, may cause thread switching, thus causing a + // deadlock. + // FIXME(wuhanqing): handle it in a more elegant way + auto handle = std::async(std::launch::async, [this]() { + S3CompactManager::GetInstance().Register( + S3Compact{inodeManager_, partitionInfo_}); + }); + + handle.wait(); } void Partition::CancelS3Compact() { @@ -546,45 +591,50 @@ void Partition::CancelS3Compact() { } void Partition::StartVolumeDeallocate() { - FsInfo fsInfo; - bool ok = - FsInfoManager::GetInstance().GetFsInfo(partitionInfo_.fsid(), &fsInfo); - if (!ok) { - LOG(ERROR) - << "Partition start volume deallocate fail, get fsinfo fail. fsid=" - << partitionInfo_.fsid(); - return; - } - - if (!fsInfo.detail().has_volume()) { - LOG(INFO) << "Partition not belong to volume, do not need start " - "deallocate. partitionInfo=" - << partitionInfo_.DebugString(); - return; - } - - VolumeDeallocateCalOption calOpt; - calOpt.kvStorage = kvStorage_; - calOpt.inodeStorage = inodeStorage_; - calOpt.nameGen = nameGen_; - auto copysetNode = - copyset::CopysetNodeManager::GetInstance().GetSharedCopysetNode( - partitionInfo_.poolid(), partitionInfo_.copysetid()); - if (copysetNode == nullptr) { - LOG(ERROR) << "Partition get copyset node failed. poolid=" - << partitionInfo_.poolid() - << ", copysetid=" << partitionInfo_.copysetid(); - return; - } - - InodeVolumeSpaceDeallocate task(partitionInfo_.fsid(), - partitionInfo_.partitionid(), copysetNode); - task.Init(calOpt); - - VolumeDeallocateManager::GetInstance().Register(std::move(task)); - - VLOG(3) << "Partition start volume deallocate success. partitionInfo=" - << partitionInfo_.DebugString(); + // FIXME(wuhanqing): same as `StartS3Compact` + auto handle = std::async(std::launch::async, [this]() { + FsInfo fsInfo; + bool ok = FsInfoManager::GetInstance().GetFsInfo(partitionInfo_.fsid(), + &fsInfo); + if (!ok) { + LOG(ERROR) << "Partition start volume deallocate fail, get fsinfo " + "fail. fsid=" + << partitionInfo_.fsid(); + return; + } + + if (!fsInfo.detail().has_volume()) { + LOG(INFO) << "Partition not belong to volume, do not need start " + "deallocate. partitionInfo=" + << partitionInfo_.DebugString(); + return; + } + + VolumeDeallocateCalOption calOpt; + calOpt.kvStorage = kvStorage_; + calOpt.inodeStorage = inodeStorage_; + calOpt.nameGen = nameGen_; + auto copysetNode = + copyset::CopysetNodeManager::GetInstance().GetSharedCopysetNode( + partitionInfo_.poolid(), partitionInfo_.copysetid()); + if (copysetNode == nullptr) { + LOG(ERROR) << "Partition get copyset node failed. poolid=" + << partitionInfo_.poolid() + << ", copysetid=" << partitionInfo_.copysetid(); + return; + } + + InodeVolumeSpaceDeallocate task( + partitionInfo_.fsid(), partitionInfo_.partitionid(), copysetNode); + task.Init(calOpt); + + VolumeDeallocateManager::GetInstance().Register(std::move(task)); + + VLOG(3) << "Partition start volume deallocate success. partitionInfo=" + << partitionInfo_.DebugString(); + }); + + handle.wait(); } void Partition::CancelVolumeDeallocate() { diff --git a/curvefs/src/metaserver/partition.h b/curvefs/src/metaserver/partition.h index c78738cb85..2ee1d2b1d1 100644 --- a/curvefs/src/metaserver/partition.h +++ b/curvefs/src/metaserver/partition.h @@ -56,21 +56,23 @@ class Partition { Partition(PartitionInfo partition, std::shared_ptr kvStorage, bool startCompact = true, bool startVolumeDeallocate = true); Partition() = default; + bool Init(); // dentry - MetaStatusCode CreateDentry(const Dentry& dentry, const Time& tm, - int64_t logIndex); + MetaStatusCode CreateDentry(const Dentry& dentry, + const Time& tm, int64_t logIndex, TxLock* txLock = nullptr); - MetaStatusCode LoadDentry(const DentryVec& vec, bool merge, - int64_t logIndex); + MetaStatusCode LoadDentry( + const DentryVec& vec, bool merge, int64_t logIndex); - MetaStatusCode DeleteDentry(const Dentry& dentry, int64_t logIndex); + MetaStatusCode DeleteDentry(const Dentry& dentry, + const Time& tm, int64_t logIndex, TxLock* txLock = nullptr); - MetaStatusCode GetDentry(Dentry* dentry); + MetaStatusCode GetDentry(Dentry* dentry, TxLock* txLock = nullptr); MetaStatusCode ListDentry(const Dentry& dentry, std::vector* dentrys, uint32_t limit, - bool onlyDir = false); + bool onlyDir = false, TxLock* txLock = nullptr); void ClearDentry(); @@ -83,7 +85,17 @@ class Partition { void SerializeRenameTx(const RenameTx& in, PrepareRenameTxRequest* out); - bool Init(); + MetaStatusCode PrewriteRenameTx(const std::vector& dentrys, + const TxLock& txLock, int64_t logIndex, TxLock* out); + + MetaStatusCode CheckTxStatus(const std::string& primaryKey, + uint64_t startTs, uint64_t curTimestamp, int64_t logIndex); + + MetaStatusCode ResolveTxLock(const Dentry& dentry, uint64_t startTs, + uint64_t commitTs, int64_t logIndex); + + MetaStatusCode CommitTx(const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs, int64_t logIndex); // inode MetaStatusCode CreateInode(const InodeParam& param, Inode* inode, @@ -97,6 +109,8 @@ class Partition { MetaStatusCode GetInode(uint32_t fsId, uint64_t inodeId, Inode* inode); + void LoadDeletedInodes(); + MetaStatusCode GetInodeAttr(uint32_t fsId, uint64_t inodeId, InodeAttr* attr); @@ -137,8 +151,6 @@ class Partition { virtual MetaStatusCode GetAllBlockGroup( std::vector* deallocatableBlockGroupVec); - MetaStatusCode InsertInode(const Inode& inode, int64_t logIndex); - bool GetInodeIdList(std::list* InodeIdList); // if partition has no inode or no dentry, it is deletable @@ -166,9 +178,9 @@ class Partition { // if no available inode id in this partiton ,return UINT64_MAX uint64_t GetNewInodeId(); - uint32_t GetInodeNum(); + uint64_t GetInodeNum(); - uint32_t GetDentryNum(); + uint64_t GetDentryNum(); bool EmptyInodeStorage(); diff --git a/curvefs/src/metaserver/recycle_cleaner.cpp b/curvefs/src/metaserver/recycle_cleaner.cpp index bc087a704a..647ab864cb 100644 --- a/curvefs/src/metaserver/recycle_cleaner.cpp +++ b/curvefs/src/metaserver/recycle_cleaner.cpp @@ -87,7 +87,8 @@ bool RecycleCleaner::DeleteNode(const Dentry& dentry) { LOG(INFO) << "RecycleCleaner DeleteNode, " << dentry.ShortDebugString(); // Code refers to the implementation of fuse_client.cpp DeleteNode() // 1. delete dentry - auto ret = metaClient_->DeleteDentry(fsId, parent, name, type); + TxLock txLockOut; + auto ret = metaClient_->DeleteDentry(fsId, parent, name, type, &txLockOut); if (ret != MetaStatusCode::OK) { LOG(WARNING) << "delete dentry fail, ret = " << MetaStatusCode_Name(ret) << ", dentry: " << dentry.ShortDebugString(); @@ -189,8 +190,9 @@ bool RecycleCleaner::DeleteDirRecursive(const Dentry& dentry) { while (true) { // 1. list dir std::list dentryList; + TxLock txLockOut; auto ret = metaClient_->ListDentry(fsId, inodeid, last, limit_, onlyDir, - &dentryList); + &dentryList, &txLockOut); if (ret != MetaStatusCode::OK) { LOG(WARNING) << "DeleteDirRecursive list dentry fail, ret = " << MetaStatusCode_Name(ret) @@ -305,14 +307,17 @@ bool RecycleCleaner::ScanRecycle() { Dentry dentry; dentry.set_fsid(partition_->GetFsId()); dentry.set_parentinodeid(RECYCLEINODEID); + // TODO(ALL): fix it when rewrite recycle + // get txid will not be needed in tx v2 dentry.set_txid(GetTxId()); uint32_t count = 0; uint32_t timeoutCount = 0; while (true) { dentry.set_name(last); std::vector tempDentrys; - auto ret = - partition_->ListDentry(dentry, &tempDentrys, limit_, onlyDir); + TxLock txLockOut; + auto ret = partition_->ListDentry( + dentry, &tempDentrys, limit_, onlyDir, &txLockOut); if (ret != MetaStatusCode::OK) { LOG(WARNING) << "Scan recycle, list dentry fail, dentry = " << dentry.ShortDebugString(); diff --git a/curvefs/src/metaserver/storage/converter.cpp b/curvefs/src/metaserver/storage/converter.cpp index 2e48b20d50..cab13e9abf 100644 --- a/curvefs/src/metaserver/storage/converter.cpp +++ b/curvefs/src/metaserver/storage/converter.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -46,6 +47,9 @@ using ::curve::common::StringToUll; using ::curvefs::common::PartitionInfo; static const char* const kDelimiter = ":"; +// Key4TxWrite: kTypeTxWrite:parentInodeId:name/ts +// if name contains ":" will not work if use ":" kDelimiter +static const char* const kTxDelimiter = "/"; static bool CompareType(const std::string& str, KEY_TYPE keyType) { uint32_t n; @@ -54,6 +58,7 @@ static bool CompareType(const std::string& str, KEY_TYPE keyType) { NameGenerator::NameGenerator(uint32_t partitionId) : tableName4Inode_(Format(kTypeInode, partitionId)), + tableName4DelInode_(Format(kTypeDelInode, partitionId)), tableName4DeallocatableIndoe_( Format(kTypeDeallocatableInode, partitionId)), tableName4DeallocatableBlockGroup_( @@ -65,12 +70,18 @@ NameGenerator::NameGenerator(uint32_t partitionId) tableName4AppliedIndex_(Format(kTypeAppliedIndex, partitionId)), tableName4Transaction_(Format(kTypeTransaction, partitionId)), tableName4InodeCount_(Format(kTypeInodeCount, partitionId)), - tableName4DentryCount_(Format(kTypeDentryCount, partitionId)) {} + tableName4DentryCount_(Format(kTypeDentryCount, partitionId)), + tableName4TxLock_(Format(kTypeTxLock, partitionId)), + tableName4TxWrite_(Format(kTypeTxWrite, partitionId)) {} std::string NameGenerator::GetInodeTableName() const { return tableName4Inode_; } +std::string NameGenerator::GetDelInodeTableName() const { + return tableName4DelInode_; +} + std::string NameGenerator::GetDeallocatableInodeTableName() const { return tableName4DeallocatableIndoe_; } @@ -111,10 +122,16 @@ std::string NameGenerator::GetDentryCountTableName() const { return tableName4DentryCount_; } +std::string NameGenerator::GetTxLockTableName() const { + return tableName4TxLock_; +} + +std::string NameGenerator::GetTxWriteTableName() const { + return tableName4TxWrite_; +} + size_t NameGenerator::GetFixedLength() { - size_t length = sizeof(kTypeInode) + sizeof(uint32_t) + strlen(kDelimiter); - LOG(INFO) << "Tablename fixed length is " << length; - return length; + return sizeof(kTypeInode) + sizeof(uint32_t) + strlen(kDelimiter); } std::string NameGenerator::Format(KEY_TYPE type, uint32_t partitionId) { @@ -123,6 +140,15 @@ std::string NameGenerator::Format(KEY_TYPE type, uint32_t partitionId) { return absl::StrCat(type, kDelimiter, absl::string_view(buf, sizeof(buf))); } +KEY_TYPE NameGenerator::DecodeKeyType(const std::string& name) { + std::vector items; + SplitString(name, kDelimiter, &items); + if (items.size() < 2) { + return KEY_TYPE::kTypeUnknown; + } + return static_cast(std::stoi(items[0])); +} + Key4Inode::Key4Inode() : fsId(0), inodeId(0) {} Key4Inode::Key4Inode(uint32_t fsId, uint64_t inodeId) @@ -136,23 +162,23 @@ bool Key4Inode::operator==(const Key4Inode& rhs) { } std::string Key4Inode::SerializeToString() const { - return absl::StrCat(keyType_, ":", fsId, ":", inodeId); + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, inodeId); } bool Key4Inode::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 3 && CompareType(items[0], keyType_) && StringToUl(items[1], &fsId) && StringToUll(items[2], &inodeId); } std::string Prefix4AllInode::SerializeToString() const { - return absl::StrCat(keyType_, ":"); + return absl::StrCat(keyType_, kDelimiter); } bool Prefix4AllInode::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 1 && CompareType(items[0], keyType_); } @@ -179,15 +205,16 @@ Key4S3ChunkInfoList::Key4S3ChunkInfoList(uint32_t fsId, uint64_t inodeId, size(size) {} std::string Key4S3ChunkInfoList::SerializeToString() const { - return absl::StrCat(keyType_, ":", fsId, ":", inodeId, ":", chunkIndex, ":", - absl::StrFormat("%020" PRIu64 "", firstChunkId), ":", - absl::StrFormat("%020" PRIu64 "", lastChunkId), ":", - size); + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, inodeId, + kDelimiter, chunkIndex, kDelimiter, + absl::StrFormat("%020" PRIu64 "", firstChunkId), kDelimiter, + absl::StrFormat("%020" PRIu64 "", lastChunkId), kDelimiter, + size); } bool Key4S3ChunkInfoList::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 7 && CompareType(items[0], keyType_) && StringToUl(items[1], &fsId) && StringToUll(items[2], &inodeId) && StringToUll(items[3], &chunkIndex) && @@ -203,14 +230,14 @@ Prefix4ChunkIndexS3ChunkInfoList::Prefix4ChunkIndexS3ChunkInfoList( : fsId(fsId), inodeId(inodeId), chunkIndex(chunkIndex) {} std::string Prefix4ChunkIndexS3ChunkInfoList::SerializeToString() const { - return absl::StrCat(keyType_, ":", fsId, ":", inodeId, ":", chunkIndex, - ":"); + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, inodeId, + kDelimiter, chunkIndex, kDelimiter); } bool Prefix4ChunkIndexS3ChunkInfoList::ParseFromString( const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 4 && CompareType(items[0], keyType_) && StringToUl(items[1], &fsId) && StringToUll(items[2], &inodeId) && StringToUll(items[3], &chunkIndex); @@ -224,23 +251,24 @@ Prefix4InodeS3ChunkInfoList::Prefix4InodeS3ChunkInfoList(uint32_t fsId, : fsId(fsId), inodeId(inodeId) {} std::string Prefix4InodeS3ChunkInfoList::SerializeToString() const { - return absl::StrCat(keyType_, ":", fsId, ":", inodeId, ":"); + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, + inodeId, kDelimiter); } bool Prefix4InodeS3ChunkInfoList::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 3 && CompareType(items[0], keyType_) && StringToUl(items[1], &fsId) && StringToUll(items[2], &inodeId); } std::string Prefix4AllS3ChunkInfoList::SerializeToString() const { - return absl::StrCat(kTypeS3ChunkInfo, ":"); + return absl::StrCat(kTypeS3ChunkInfo, kDelimiter); } bool Prefix4AllS3ChunkInfoList::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 1 && CompareType(items[0], keyType_); } @@ -255,7 +283,7 @@ std::string Key4Dentry::SerializeToString() const { bool Key4Dentry::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); if (items.size() < 3 || !CompareType(items[0], keyType_) || !StringToUl(items[1], &fsId) || !StringToUll(items[2], &parentInodeId)) { @@ -282,21 +310,84 @@ std::string Prefix4SameParentDentry::SerializeToString() const { bool Prefix4SameParentDentry::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 3 && CompareType(items[0], keyType_) && StringToUl(items[1], &fsId) && StringToUll(items[2], &parentInodeId); } std::string Prefix4AllDentry::SerializeToString() const { - return absl::StrCat(keyType_, ":"); + return absl::StrCat(keyType_, kDelimiter); } bool Prefix4AllDentry::ParseFromString(const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 1 && CompareType(items[0], keyType_); } +std::string Key4TxWrite::SerializeToString() const { + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, parentInodeId, + kDelimiter, name, kTxDelimiter, ts); +} + +bool Key4TxWrite::ParseFromString(const std::string& value) { + // 1. split dentryKey and ts + std::vector keys; + SplitString(value, kTxDelimiter, &keys); + if (keys.size() != 2) { + return false; + } + if (!StringToUll(keys[1], &ts)) { + return false; + } + + // 2. decode dentryKey + std::vector items; + SplitString(keys[0], kDelimiter, &items); + if (items.size() < 3 || !CompareType(items[0], keyType_) || + !StringToUl(items[1], &fsId) || + !StringToUll(items[2], &parentInodeId)) { + return false; + } + + size_t prefixLength = items[0].size() + items[1].size() + items[2].size() + + 3 * strlen(kDelimiter); + if (keys[0].size() < prefixLength) { + return false; + } + name = keys[0].substr(prefixLength); + return true; +} + +std::string Prefix4TxWrite::SerializeToString() const { + return absl::StrCat(keyType_, kDelimiter, fsId, kDelimiter, parentInodeId, + kDelimiter, name, kTxDelimiter); +} + +bool Prefix4TxWrite::ParseFromString(const std::string& value) { + std::vector keys; + SplitString(value, kTxDelimiter, &keys); + if (keys.size() != 1) { + return false; + } + + std::vector items; + SplitString(value, kDelimiter, &items); + if (items.size() < 3 || !CompareType(items[0], keyType_) || + !StringToUl(items[1], &fsId) || + !StringToUll(items[2], &parentInodeId)) { + return false; + } + + size_t prefixLength = items[0].size() + items[1].size() + items[2].size() + + 3 * strlen(kDelimiter); + if (value.size() < prefixLength) { + return false; + } + name = value.substr(prefixLength); + return true; +} + Key4VolumeExtentSlice::Key4VolumeExtentSlice(uint32_t fsId, uint64_t inodeId, uint64_t offset) : fsId_(fsId), inodeId_(inodeId), offset_(offset) {} @@ -369,13 +460,13 @@ bool Key4DeallocatableBlockGroup::ParseFromString(const std::string& value) { } std::string Prefix4AllDeallocatableBlockGroup::SerializeToString() const { - return absl::StrCat(keyType_, ":"); + return absl::StrCat(keyType_, kDelimiter); } bool Prefix4AllDeallocatableBlockGroup::ParseFromString( const std::string& value) { std::vector items; - SplitString(value, ":", &items); + SplitString(value, kDelimiter, &items); return items.size() == 1 && CompareType(items[0], keyType_); } diff --git a/curvefs/src/metaserver/storage/converter.h b/curvefs/src/metaserver/storage/converter.h index 41870c6b67..20743dd5fd 100644 --- a/curvefs/src/metaserver/storage/converter.h +++ b/curvefs/src/metaserver/storage/converter.h @@ -34,10 +34,10 @@ namespace curvefs { namespace metaserver { class MetaStoreFStream; - namespace storage { enum KEY_TYPE : unsigned char { + kTypeUnknown = 0, kTypeInode = 1, kTypeS3ChunkInfo = 2, kTypeDentry = 3, @@ -49,7 +49,10 @@ enum KEY_TYPE : unsigned char { kTypeAppliedIndex = 9, kTypeTransaction = 10, kTypeInodeCount = 11, - kTypeDentryCount = 12 + kTypeDentryCount = 12, + kTypeTxLock = 13, + kTypeTxWrite = 14, + kTypeDelInode = 15 }; // NOTE: you must generate all table name by NameGenerator class for @@ -61,6 +64,8 @@ class NameGenerator { std::string GetInodeTableName() const; + std::string GetDelInodeTableName() const; + std::string GetDeallocatableInodeTableName() const; std::string GetS3ChunkInfoTableName() const; @@ -83,13 +88,20 @@ class NameGenerator { std::string GetDentryCountTableName() const; + std::string GetTxLockTableName() const; + + std::string GetTxWriteTableName() const; + static size_t GetFixedLength(); + static KEY_TYPE DecodeKeyType(const std::string& name); + private: std::string Format(KEY_TYPE type, uint32_t partitionId); private: std::string tableName4Inode_; + std::string tableName4DelInode_; std::string tableName4DeallocatableIndoe_; std::string tableName4DeallocatableBlockGroup_; std::string tableName4S3ChunkInfo_; @@ -100,6 +112,8 @@ class NameGenerator { std::string tableName4Transaction_; std::string tableName4InodeCount_; std::string tableName4DentryCount_; + std::string tableName4TxLock_; + std::string tableName4TxWrite_; }; class StorageKey { @@ -126,6 +140,8 @@ class StorageKey { * Key4InodeAuxInfo : kTypeInodeAuxInfo:fsId:inodeId * Key4DeallocatableBlockGroup : kTypeBlockGroup:fsId:volumeOffset * Prefix4AllDeallocatableBlockGroup: kTypeBlockGroup: + * Key4TxWrite : kTypeTxWrite:parentInodeId:name/ts + * Prefix4TxWrite : kTypeTxWrite:parentInodeId:name/ */ class Key4Inode : public StorageKey { @@ -290,6 +306,41 @@ class Prefix4AllDentry : public StorageKey { static const KEY_TYPE keyType_ = kTypeDentry; }; +class Key4TxWrite : public Key4Dentry { + public: + Key4TxWrite() = default; + + Key4TxWrite(uint32_t fsId, uint64_t parentInodeId, + const std::string& name, uint64_t ts) : + Key4Dentry(fsId, parentInodeId, name), ts(ts) {} + + std::string SerializeToString() const override; + + bool ParseFromString(const std::string& value) override; + + public: + uint64_t ts; + + private: + static const KEY_TYPE keyType_ = kTypeTxWrite; +}; + +class Prefix4TxWrite : public Key4Dentry { + public: + Prefix4TxWrite() = default; + + Prefix4TxWrite(uint32_t fsId, uint64_t parentInodeId, + const std::string& name) : + Key4Dentry(fsId, parentInodeId, name) {} + + std::string SerializeToString() const override; + + bool ParseFromString(const std::string& value) override; + + private: + static const KEY_TYPE keyType_ = kTypeTxWrite; +}; + class Key4VolumeExtentSlice : public StorageKey { public: Key4VolumeExtentSlice() = default; diff --git a/curvefs/src/metaserver/storage/rocksdb_options.cpp b/curvefs/src/metaserver/storage/rocksdb_options.cpp index 9ee96cac4e..50631cd75c 100644 --- a/curvefs/src/metaserver/storage/rocksdb_options.cpp +++ b/curvefs/src/metaserver/storage/rocksdb_options.cpp @@ -98,6 +98,14 @@ DEFINE_int32(rocksdb_ordered_cf_max_write_buffer_number, 2, "Number of writer buffer for ordered column family"); +DEFINE_int64(rocksdb_tx_cf_write_buffer_size, + 64ULL << 20, + "Writer buffer size for tx column family"); + +DEFINE_int32(rocksdb_tx_cf_max_write_buffer_number, + 2, + "Number of writer buffer for tx column family"); + DEFINE_int32(rocksdb_max_write_buffer_size_to_maintain, 20ULL << 20, "The target number of write history bytes to hold in memory"); @@ -106,6 +114,10 @@ DEFINE_int32(rocksdb_stats_dump_period_sec, 180, "Dump rocksdb.stats to LOG every stats_dump_period_sec"); +DEFINE_int32(tx_lock_ttl_ms, + 5000, + "tx lock timeout after ttl ms"); + namespace { std::shared_ptr rocksdbBlockCache; @@ -113,6 +125,7 @@ std::shared_ptr rocksdbWriteBufferManager; std::shared_ptr metricEventListener; const char* const kOrderedColumnFamilyName = "ordered_column_family"; +const char* const kTxColumnFamilyName = "tx_column_family"; void CreateBlockCacheAndWriterBufferManager() { static std::once_flag createBlockCache; @@ -208,10 +221,18 @@ void InitRocksdbOptions( unorderedCfOptions.max_write_buffer_number = FLAGS_rocksdb_unordered_cf_max_write_buffer_number; + rocksdb::ColumnFamilyOptions txCfOptions = defaultCfOptions; + txCfOptions.write_buffer_size = + FLAGS_rocksdb_tx_cf_write_buffer_size; + txCfOptions.max_write_buffer_number = + FLAGS_rocksdb_tx_cf_max_write_buffer_number; + columnFamilies->push_back(rocksdb::ColumnFamilyDescriptor{ rocksdb::kDefaultColumnFamilyName, unorderedCfOptions}); columnFamilies->push_back(rocksdb::ColumnFamilyDescriptor{ kOrderedColumnFamilyName, orderedCfOptions}); + columnFamilies->push_back(rocksdb::ColumnFamilyDescriptor{ + kTxColumnFamilyName, txCfOptions}); } void ParseRocksdbOptions(curve::common::Configuration* conf) { @@ -260,6 +281,14 @@ void ParseRocksdbOptions(curve::common::Configuration* conf) { "storage.rocksdb.ordered_max_write_buffer_number", &FLAGS_rocksdb_ordered_cf_max_write_buffer_number, /*fatalIfMissing*/ false); + dummy.Load(conf, "rocksdb_tx_cf_write_buffer_size", + "storage.rocksdb.tx_cf_write_buffer_size", + &FLAGS_rocksdb_tx_cf_write_buffer_size, + /*fatalIfMissing*/ false); + dummy.Load(conf, "rocksdb_tx_cf_max_write_buffer_number", + "storage.rocksdb.tx_cf_max_write_buffer_number", + &FLAGS_rocksdb_tx_cf_max_write_buffer_number, + /*fatalIfMissing*/ false); dummy.Load(conf, "rocksdb_max_write_buffer_size_to_maintain", "storage.rocksdb.max_write_buffer_size_to_maintain", &FLAGS_rocksdb_max_write_buffer_size_to_maintain, @@ -267,6 +296,9 @@ void ParseRocksdbOptions(curve::common::Configuration* conf) { dummy.Load(conf, "rocksdb_stats_dump_period_sec", "storage.rocksdb.stats_dump_period_sec", &FLAGS_rocksdb_stats_dump_period_sec, /*fatalIfMissing*/ false); + dummy.Load(conf, "tx_lock_ttl_ms", + "storage.tx_lock_ttl_ms", + &FLAGS_tx_lock_ttl_ms, /*fatalIfMissing*/ false); } } // namespace storage diff --git a/curvefs/src/metaserver/storage/rocksdb_storage.cpp b/curvefs/src/metaserver/storage/rocksdb_storage.cpp index 5875ba6817..d2b20ad507 100644 --- a/curvefs/src/metaserver/storage/rocksdb_storage.cpp +++ b/curvefs/src/metaserver/storage/rocksdb_storage.cpp @@ -26,10 +26,10 @@ #include #include +#include "src/common/string_util.h" #include "src/common/timeutility.h" #include "curvefs/src/metaserver/storage/utils.h" #include "curvefs/src/metaserver/storage/storage.h" -#include "curvefs/src/metaserver/storage/converter.h" #include "curvefs/src/metaserver/storage/rocksdb_perf.h" #include "curvefs/src/metaserver/storage/rocksdb_storage.h" #include "curvefs/src/metaserver/storage/rocksdb_options.h" @@ -57,8 +57,8 @@ size_t RocksDBStorage::GetKeyPrefixLength() { static const size_t length = []() { const std::string tableName = std::string(NameGenerator::GetFixedLength(), '0'); - const std::string iname = - RocksDBStorage::ToInternalName(tableName, true, true); + const std::string iname = RocksDBStorage::ToInternalName(tableName, + ColumnFamilyType::kUnordered, true); return iname.size(); }(); @@ -162,32 +162,29 @@ bool RocksDBStorage::Close() { return true; } -inline ColumnFamilyHandle* RocksDBStorage::GetColumnFamilyHandle(bool ordered) { - return ordered ? handles_[1] : handles_[0]; -} - /* NOTE: * 1. we use suffix 0/1 to determine the key range: - * [ordered:name:0, ordered:name:1) + * [type:name:0, type:name:1) * 2. please gurantee the length of name is fixed for * we can determine the rocksdb's prefix key */ std::string RocksDBStorage::ToInternalName(const std::string& name, - bool ordered, + ColumnFamilyType type, bool start) { std::ostringstream oss; - oss << ordered << kDelimiter_ << name << kDelimiter_ << (start ? "0" : "1"); + oss << static_cast(type) << kDelimiter_ << name + << kDelimiter_ << (start ? "0" : "1"); return oss.str(); } std::string RocksDBStorage::ToInternalKey(const std::string& name, const std::string& key, - bool ordered) { - std::string iname = ToInternalName(name, ordered, true); + ColumnFamilyType type) { + std::string iname = ToInternalName(name, type, true); std::ostringstream oss; oss << iname << kDelimiter_ << key; std::string ikey = oss.str(); - VLOG(9) << "ikey = " << ikey << " (ordered = " << ordered + VLOG(9) << "ikey = " << ikey << " (type = " << static_cast(type) << ", name = " << name << ", key = " << key << ")" << ", size = " << ikey.size(); return ikey; @@ -198,18 +195,63 @@ std::string RocksDBStorage::ToUserKey(const std::string& ikey) { return ikey.substr(GetKeyPrefixLength() + kDelimiter_.size()); } +ColumnFamilyType Table2FamilyType(const std::string& tableName) { + auto tableKey = NameGenerator::DecodeKeyType(tableName); + switch (tableKey) { + case kTypeInode: + case kTypeDelInode: + case kTypeInodeAuxInfo: + case kTypeDeallocatableInode: + case kTypeDeallocatableBlockGroup: + return ColumnFamilyType::kUnordered; + case kTypeS3ChunkInfo: + case kTypeDentry: + case kTypeVolumeExtent: + case kTypeAppliedIndex: + case kTypeTransaction: + case kTypeInodeCount: + case kTypeDentryCount: + return ColumnFamilyType::kOrdered; + case kTypeTxLock: + case kTypeTxWrite: + return ColumnFamilyType::kTx; + default: + break; + } + return ColumnFamilyType::kUnknown; +} + +ColumnFamilyHandle* RocksDBStorage::GetColumnFamilyHandle( + ColumnFamilyType type) { + if (type == ColumnFamilyType::kUnknown) { + return nullptr; + } + // handle index is same as dbCfDescriptors_ + // 0: kUnordered; 1: kOrdered; 2: kTxn + return handles_[static_cast(type)]; +} + +#define CHECK_COLUMN_TYPE(name) \ + auto type = Table2FamilyType(name); \ + do { \ + if (ColumnFamilyType::kUnknown == type) { \ + return Status::NotSupported(); \ + } \ + } while (0) + Status RocksDBStorage::Get(const std::string& name, const std::string& key, - ValueType* value, - bool ordered) { + ValueType* value) { if (!inited_) { return Status::DBClosed(); } + CHECK_COLUMN_TYPE(name); ROCKSDB_NAMESPACE::Status s; std::string svalue; - std::string ikey = ToInternalKey(name, key, ordered); - auto handle = GetColumnFamilyHandle(ordered); + std::string ikey = ToInternalKey(name, key, type); + VLOG(9) << "Get key: " << ikey << ", " << options_.dataDir; + auto handle = GetColumnFamilyHandle(type); { RocksDBPerfGuard guard(OP_GET); s = InTransaction_ ? txn_->Get(dbReadOptions_, handle, ikey, &svalue) : @@ -218,22 +260,24 @@ Status RocksDBStorage::Get(const std::string& name, if (s.ok() && !value->ParseFromString(svalue)) { return Status::ParsedFailed(); } + return ToStorageStatus(s); } Status RocksDBStorage::Set(const std::string& name, const std::string& key, - const ValueType& value, - bool ordered) { + const ValueType& value) { std::string svalue; if (!inited_) { return Status::DBClosed(); } else if (!value.SerializeToString(&svalue)) { return Status::SerializedFailed(); } + CHECK_COLUMN_TYPE(name); - auto handle = GetColumnFamilyHandle(ordered); - std::string ikey = ToInternalKey(name, key, ordered); + auto handle = GetColumnFamilyHandle(type); + std::string ikey = ToInternalKey(name, key, type); + VLOG(9) << "set key: " << ikey << ", " << options_.dataDir; RocksDBPerfGuard guard(OP_PUT); ROCKSDB_NAMESPACE::Status s = InTransaction_ ? txn_->Put(handle, ikey, svalue) : @@ -242,14 +286,15 @@ Status RocksDBStorage::Set(const std::string& name, } Status RocksDBStorage::Del(const std::string& name, - const std::string& key, - bool ordered) { + const std::string& key) { if (!inited_) { return Status::DBClosed(); } + CHECK_COLUMN_TYPE(name); - std::string ikey = ToInternalKey(name, key, ordered); - auto handle = GetColumnFamilyHandle(ordered); + std::string ikey = ToInternalKey(name, key, type); + auto handle = GetColumnFamilyHandle(type); + VLOG(9) << "del key: " << ikey << ", " << options_.dataDir; RocksDBPerfGuard guard(OP_DELETE); ROCKSDB_NAMESPACE::Status s = InTransaction_ ? txn_->Delete(handle, ikey) : @@ -259,22 +304,23 @@ Status RocksDBStorage::Del(const std::string& name, std::shared_ptr RocksDBStorage::Seek(const std::string& name, const std::string& prefix) { - int status = inited_ ? 0 : -1; - std::string ikey = ToInternalKey(name, prefix, true); + auto type = Table2FamilyType(name); + int status = (inited_ && ColumnFamilyType::kUnknown != type) ? 0 : -1; + std::string ikey = ToInternalKey(name, prefix, type); return std::make_shared( - this, ikey, 0, status, true); + this, std::move(ikey), 0, status, type); } -std::shared_ptr RocksDBStorage::GetAll(const std::string& name, - bool ordered) { - int status = inited_ ? 0 : -1; - std::string ikey = ToInternalKey(name, "", ordered); +std::shared_ptr RocksDBStorage::GetAll(const std::string& name) { + auto type = Table2FamilyType(name); + int status = (inited_ && ColumnFamilyType::kUnknown != type) ? 0 : -1; + std::string ikey = ToInternalKey(name, "", type); return std::make_shared( - this, std::move(ikey), 0, status, ordered); + this, std::move(ikey), 0, status, type); } -size_t RocksDBStorage::Size(const std::string& name, bool ordered) { - auto iterator = GetAll(name, ordered); +size_t RocksDBStorage::Size(const std::string& name) { + auto iterator = GetAll(name); if (iterator->Status() != 0) { return 0; } @@ -286,7 +332,7 @@ size_t RocksDBStorage::Size(const std::string& name, bool ordered) { return size; } -Status RocksDBStorage::Clear(const std::string& name, bool ordered) { +Status RocksDBStorage::Clear(const std::string& name) { if (!inited_) { return Status::DBClosed(); } else if (InTransaction_) { @@ -294,6 +340,7 @@ Status RocksDBStorage::Clear(const std::string& name, bool ordered) { // maybe we can implement `Clear` by "iterate and delete" return Status::NotSupported(); } + CHECK_COLUMN_TYPE(name); // TODO(all): Maybe we should let `Clear` just do nothing, because it's only // called when recover state machine from raft snapshot, and in this case, @@ -301,13 +348,14 @@ Status RocksDBStorage::Clear(const std::string& name, bool ordered) { // database's checkpoint in raft snapshot // But, currently, many unittest cases depend it - auto handle = GetColumnFamilyHandle(ordered); - std::string lower = ToInternalName(name, ordered, true); - std::string upper = ToInternalName(name, ordered, false); + auto handle = GetColumnFamilyHandle(type); + std::string lower = ToInternalName(name, type, true); + std::string upper = ToInternalName(name, type, false); RocksDBPerfGuard guard(OP_DELETE_RANGE); ROCKSDB_NAMESPACE::Status s = db_->DeleteRange( dbWriteOptions_, handle, lower, upper); - LOG(INFO) << "Clear(), tablename = " << name << ", ordered = " << ordered + LOG(INFO) << "Clear(), tablename = " << name << ", type = " + << static_cast(type) << ", lower key = " << lower << ", upper key = " << upper; return ToStorageStatus(s); } @@ -505,6 +553,31 @@ bool RocksDBStorage::Recover(const std::string& dir) { return true; } +void RocksDBStorage::GetPrefix( + std::map* item, const std::string prefix) { + std::string sprefix = absl::StrCat("0", ":", prefix); + VLOG(3) << "load deleted inodes from: " << options_.dataDir + << ", " << sprefix << ", " << prefix; + int counts = 0; + rocksdb::Iterator* it = db_->NewIterator(rocksdb::ReadOptions()); + curvefs::metaserver::Time time; + for (it->Seek(sprefix); it->Valid() && + it->key().starts_with(sprefix); it->Next()) { + std::string key = it->key().ToString(); + if (!time.ParseFromString(it->value().ToString())) { + return; + } + VLOG(9) << "key: " << key << ", " << key.size() << ", " + << it->value().ToString() << ", " << time.sec(); + item->emplace(key, time.sec()); + counts++; + } + delete it; + VLOG(3) << "load deleted inodes end, size is: " << item->size() + << ", " << counts << ", " << options_.dataDir; +} + + } // namespace storage } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/storage/rocksdb_storage.h b/curvefs/src/metaserver/storage/rocksdb_storage.h index e0023dd8e2..1eccc2ec35 100644 --- a/curvefs/src/metaserver/storage/rocksdb_storage.h +++ b/curvefs/src/metaserver/storage/rocksdb_storage.h @@ -23,7 +23,9 @@ #ifndef CURVEFS_SRC_METASERVER_STORAGE_ROCKSDB_STORAGE_H_ #define CURVEFS_SRC_METASERVER_STORAGE_ROCKSDB_STORAGE_H_ +#include #include +#include #include #include #include @@ -40,10 +42,10 @@ #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/table_properties_collectors.h" #include "src/common/concurrent/rw_lock.h" +#include "curvefs/src/metaserver/storage/converter.h" #include "curvefs/src/metaserver/storage/utils.h" #include "curvefs/src/metaserver/storage/storage.h" #include "curvefs/src/metaserver/storage/rocksdb_perf.h" -#include "curvefs/src/metaserver/storage/rocksdb_storage.h" namespace curvefs { namespace metaserver { @@ -65,6 +67,15 @@ using ROCKSDB_NAMESPACE::NewFixedPrefixTransform; using ROCKSDB_NAMESPACE::NewBlockBasedTableFactory; using STORAGE_TYPE = KVStorage::STORAGE_TYPE; +enum class ColumnFamilyType : uint8_t { + kUnordered = 0, + kOrdered = 1, + kTx = 2, + + // unknown type + kUnknown = 255, +}; + // NOTE: The HSize() and SSize() is an expensive operation for rocksdb storage, // you should only invoke it in test cases. class RocksDBStorage : public KVStorage, public StorageTransaction { @@ -81,6 +92,9 @@ class RocksDBStorage : public KVStorage, public StorageTransaction { STORAGE_TYPE Type() override; +void GetPrefix( + std::map* item, const std::string prefix) override; + StorageOptions GetStorageOptions() const override; // unordered @@ -132,33 +146,30 @@ class RocksDBStorage : public KVStorage, public StorageTransaction { bool Recover(const std::string& dir) override; private: - ColumnFamilyHandle* GetColumnFamilyHandle(bool ordered); + ColumnFamilyHandle* GetColumnFamilyHandle(ColumnFamilyType type); static size_t GetKeyPrefixLength(); static std::string ToInternalName(const std::string& name, - bool ordered, + ColumnFamilyType type, bool start); std::string ToInternalKey(const std::string& name, const std::string& key, - bool ordered); + ColumnFamilyType type); std::string ToUserKey(const std::string& ikey); Status Get(const std::string& name, const std::string& key, - ValueType* value, - bool ordered); + ValueType* value); Status Set(const std::string& name, const std::string& key, - const ValueType& value, - bool ordered); + const ValueType& value); Status Del(const std::string& name, - const std::string& key, - bool ordered); + const std::string& key); std::shared_ptr Seek(const std::string& name, const std::string& prefix); @@ -166,11 +177,11 @@ class RocksDBStorage : public KVStorage, public StorageTransaction { // TODO(@Wine93): We do not support transactions for the // below 3 methods, maybe we should return Status::NotSupported // when user invoke it in transaction. - std::shared_ptr GetAll(const std::string& name, bool ordered); + std::shared_ptr GetAll(const std::string& name); - size_t Size(const std::string& name, bool ordered); + size_t Size(const std::string& name); - Status Clear(const std::string& name, bool ordered); + Status Clear(const std::string& name); private: friend class RocksDBStorageIterator; @@ -210,48 +221,48 @@ class RocksDBStorage : public KVStorage, public StorageTransaction { inline Status RocksDBStorage::HGet(const std::string& name, const std::string& key, ValueType* value) { - return Get(name, key, value, false); + return Get(name, key, value); } inline Status RocksDBStorage::HSet(const std::string& name, const std::string& key, const ValueType& value) { - return Set(name, key, value, false); + return Set(name, key, value); } inline Status RocksDBStorage::HDel(const std::string& name, const std::string& key) { - return Del(name, key, false); + return Del(name, key); } inline std::shared_ptr RocksDBStorage::HGetAll( const std::string& name) { - return GetAll(name, false); + return GetAll(name); } inline size_t RocksDBStorage::HSize(const std::string& name) { - return Size(name, false); + return Size(name); } inline Status RocksDBStorage::HClear(const std::string& name) { - return Clear(name, false); + return Clear(name); } inline Status RocksDBStorage::SGet(const std::string& name, const std::string& key, ValueType* value) { - return Get(name, key, value, true); + return Get(name, key, value); } inline Status RocksDBStorage::SSet(const std::string& name, const std::string& key, const ValueType& value) { - return Set(name, key, value, true); + return Set(name, key, value); } inline Status RocksDBStorage::SDel(const std::string& name, const std::string& key) { - return Del(name, key, true); + return Del(name, key); } inline std::shared_ptr RocksDBStorage::SSeek( @@ -261,15 +272,15 @@ inline std::shared_ptr RocksDBStorage::SSeek( inline std::shared_ptr RocksDBStorage::SGetAll( const std::string& name) { - return GetAll(name, true); + return GetAll(name); } inline size_t RocksDBStorage::SSize(const std::string& name) { - return Size(name, true); + return Size(name); } inline Status RocksDBStorage::SClear(const std::string& name) { - return Clear(name, true); + return Clear(name); } class RocksDBStorageIterator : public Iterator { @@ -278,13 +289,13 @@ class RocksDBStorageIterator : public Iterator { std::string prefix, size_t size, int status, - bool ordered) + ColumnFamilyType type) : storage_(storage), prefix_(std::move(prefix)), size_(size), status_(status), prefixChecking_(true), - ordered_(ordered), + type_(type), iter_(nullptr) { RocksDBPerfGuard guard(OP_GET_SNAPSHOT); if (status_ == 0) { @@ -324,7 +335,7 @@ class RocksDBStorageIterator : public Iterator { } void SeekToFirst() { - auto handler = storage_->GetColumnFamilyHandle(ordered_); + auto handler = storage_->GetColumnFamilyHandle(type_); { RocksDBPerfGuard guard(OP_GET_ITERATOR); if (storage_->InTransaction_) { @@ -379,7 +390,7 @@ class RocksDBStorageIterator : public Iterator { uint64_t size_; int status_; bool prefixChecking_; - bool ordered_; + ColumnFamilyType type_; std::unique_ptr iter_; rocksdb::ReadOptions readOptions_; }; diff --git a/curvefs/src/metaserver/storage/storage.h b/curvefs/src/metaserver/storage/storage.h index 97cef01fca..c7513e23eb 100644 --- a/curvefs/src/metaserver/storage/storage.h +++ b/curvefs/src/metaserver/storage/storage.h @@ -23,7 +23,9 @@ #ifndef CURVEFS_SRC_METASERVER_STORAGE_STORAGE_H_ #define CURVEFS_SRC_METASERVER_STORAGE_STORAGE_H_ +#include #include +#include #include #include @@ -51,6 +53,9 @@ class BaseStorage { const std::string& key, const ValueType& value) = 0; + virtual void GetPrefix(std::map* item, + const std::string prefix) {} + virtual Status HDel(const std::string& name, const std::string& key) = 0; virtual std::shared_ptr HGetAll(const std::string& name) = 0; diff --git a/curvefs/src/metaserver/trash.cpp b/curvefs/src/metaserver/trash.cpp index 1175376e72..d39b53f737 100644 --- a/curvefs/src/metaserver/trash.cpp +++ b/curvefs/src/metaserver/trash.cpp @@ -25,6 +25,8 @@ #include "curvefs/proto/mds.pb.h" #include "curvefs/src/metaserver/inode_storage.h" #include "src/common/timeutility.h" +#include "curvefs/src/metaserver/copyset/copyset_node_manager.h" +#include "curvefs/src/metaserver/copyset/meta_operator.h" using ::curve::common::TimeUtility; @@ -52,165 +54,177 @@ void TrashImpl::Init(const TrashOption &option) { options_ = option; s3Adaptor_ = option.s3Adaptor; mdsClient_ = option.mdsClient; + copysetNode_ = copyset::CopysetNodeManager::GetInstance(). + GetSharedCopysetNode(poolId_, copysetId_); + isStop_ = false; } -void TrashImpl::Add(uint32_t fsId, uint64_t inodeId, uint32_t dtime) { - TrashItem item; - item.fsId = fsId; - item.inodeId = inodeId; - item.dtime = dtime; +void TrashImpl::StopScan() { + isStop_ = true; +} + +void TrashImpl::Add(uint64_t inodeId, uint64_t dtime, bool deleted) { + if (isStop_) { + return; + } LockGuard lg(itemsMutex_); + trashItems_.emplace(inodeId, dtime); + + VLOG(6) << "Add trash item success" + << ", fsId = " << fsId_ + << ", partitionId = " << partitionId_ + << ", inodeId = " << inodeId + << ", dtime = " << dtime + << ", deleted = " << deleted; + if (!deleted) { + inodeStorage_->AddDeletedInode(Key4Inode(fsId_, inodeId), dtime); + } +} + +bool TrashImpl::IsStop() { + return isStop_; +} + +void TrashImpl::Remove(uint64_t inodeId) { if (isStop_) { return; } - trashItems_.push_back(item); - VLOG(6) << "Add Trash Item success, item.fsId = " << item.fsId - << ", item.inodeId = " << item.inodeId - << ", item.dtime = " << item.dtime; + LockGuard lg(itemsMutex_); + trashItems_.erase(inodeId); + RemoveDeletedInode(inodeId); + VLOG(6) << "Remove Trash Item success, fsId = " << fsId_ + << ", partitionId = " << partitionId_ + << ", inodeId = " << inodeId; } void TrashImpl::ScanTrash() { LockGuard lgScan(scanMutex_); - std::list temp; + LOG(INFO) << "ScanTrash, fsId = " << fsId_ + << ", partitionId = " << partitionId_ + << ", trashItems size = " << trashItems_.size(); + // only scan on leader + if (copysetNode_ == nullptr || !copysetNode_->IsLeaderTerm()) { + return; + } + + std::unordered_map temp; { LockGuard lgItems(itemsMutex_); trashItems_.swap(temp); } for (auto it = temp.begin(); it != temp.end();) { - if (isStop_) { + if (isStop_ || !copysetNode_->IsLeaderTerm()) { return; } - if (NeedDelete(*it)) { - MetaStatusCode ret = DeleteInodeAndData(*it); - if (MetaStatusCode::NOT_FOUND == ret) { - it = temp.erase(it); - continue; - } + if (NeedDelete(it->second)) { + MetaStatusCode ret = DeleteInodeAndData(it->first); if (ret != MetaStatusCode::OK) { - LOG(ERROR) << "DeleteInodeAndData fail, fsId = " << it->fsId - << ", inodeId = " << it->inodeId + LOG(ERROR) << "DeleteInodeAndData fail, fsId = " << fsId_ + << ", inodeId = " << it->first << ", ret = " << MetaStatusCode_Name(ret); it++; continue; } - VLOG(6) << "Trash Delete Inode, fsId = " << it->fsId - << ", inodeId = " << it->inodeId; + LOG(INFO) << "Trash delete inode, fsId = " << fsId_ + << ", partitionId = " << partitionId_ + << ", inodeId = " << it->first; it = temp.erase(it); } else { it++; } } - { LockGuard lgItems(itemsMutex_); - trashItems_.splice(trashItems_.end(), temp); + trashItems_.insert(temp.begin(), temp.end()); } } -void TrashImpl::StopScan() { - isStop_ = true; -} - -bool TrashImpl::IsStop() { - return isStop_; -} - -bool TrashImpl::NeedDelete(const TrashItem &item) { - uint32_t now = TimeUtility::GetTimeofDaySec(); - Inode inode; - MetaStatusCode ret = - inodeStorage_->Get(Key4Inode(item.fsId, item.inodeId), &inode); - if (MetaStatusCode::NOT_FOUND == ret) { - LOG(WARNING) << "GetInode find inode not exist, fsId = " << item.fsId - << ", inodeId = " << item.inodeId - << ", ret = " << MetaStatusCode_Name(ret); - return true; - } else if (ret != MetaStatusCode::OK) { - LOG(WARNING) << "GetInode fail, fsId = " << item.fsId - << ", inodeId = " << item.inodeId - << ", ret = " << MetaStatusCode_Name(ret); - return false; +void TrashImpl::RemoveDeletedInode(uint64_t inodeId) { + VLOG(9) << "RemoveDeletedInode: " << fsId_ + << ", " << partitionId_ << ", " << ", " << inodeId; + if (MetaStatusCode::OK != + inodeStorage_->RemoveDeletedInode(Key4Inode(fsId_, inodeId))) { + LOG(WARNING) << "RemoveDeletedInode failed, " << fsId_ + << ", " << partitionId_ << ", " << ", " << inodeId; } +} +bool TrashImpl::NeedDelete(uint64_t dtime) { // for compatibility, if fs recycleTimeHour is 0, use old trash logic // if fs recycleTimeHour is 0, use trash wait until expiredAfterSec // if fs recycleTimeHour is not 0, return true - uint64_t recycleTimeHour = GetFsRecycleTimeHour(item.fsId); + uint64_t recycleTimeHour = GetFsRecycleTimeHour(fsId_); if (recycleTimeHour == 0) { - return ((now - item.dtime) >= FLAGS_trash_expiredAfterSec); + return ((TimeUtility::GetTimeofDaySec() - dtime) >= + FLAGS_trash_expiredAfterSec); } else { return true; } } uint64_t TrashImpl::GetFsRecycleTimeHour(uint32_t fsId) { - FsInfo fsInfo; uint64_t recycleTimeHour = 0; - if (fsInfoMap_.find(fsId) == fsInfoMap_.end()) { - auto ret = mdsClient_->GetFsInfo(fsId, &fsInfo); + if (fsInfo_.fsid() == 0) { + auto ret = mdsClient_->GetFsInfo(fsId, &fsInfo_); if (ret != FSStatusCode::OK) { if (FSStatusCode::NOT_FOUND == ret) { LOG(ERROR) << "The fs not exist, fsId = " << fsId; return 0; } else { - LOG(ERROR) - << "GetFsInfo failed, FSStatusCode = " << ret - << ", FSStatusCode_Name = " << FSStatusCode_Name(ret) - << ", fsId = " << fsId; + LOG(ERROR) << "GetFsInfo failed, FSStatusCode = " << ret + << ", FSStatusCode_Name = " << FSStatusCode_Name(ret) + << ", fsId = " << fsId; return 0; } } - fsInfoMap_.insert({fsId, fsInfo}); - } else { - fsInfo = fsInfoMap_.find(fsId)->second; } - if (fsInfo.has_recycletimehour()) { - recycleTimeHour = fsInfo.recycletimehour(); - } else { - recycleTimeHour = 0; + if (fsInfo_.has_recycletimehour()) { + recycleTimeHour = fsInfo_.recycletimehour(); } return recycleTimeHour; } -MetaStatusCode TrashImpl::DeleteInodeAndData(const TrashItem &item) { +MetaStatusCode TrashImpl::DeleteInodeAndData(uint64_t inodeId) { Inode inode; - MetaStatusCode ret = - inodeStorage_->Get(Key4Inode(item.fsId, item.inodeId), &inode); + auto ret = inodeStorage_->Get(Key4Inode(fsId_, inodeId), &inode); + if (ret == MetaStatusCode::NOT_FOUND) { + LOG(WARNING) << "GetInode fail, fsId = " << fsId_ + << ", inodeId = " << inodeId + << ", ret = " << MetaStatusCode_Name(ret); + return MetaStatusCode::OK; + } if (ret != MetaStatusCode::OK) { - LOG(WARNING) << "GetInode fail, fsId = " << item.fsId - << ", inodeId = " << item.inodeId + LOG(WARNING) << "GetInode fail, fsId = " << fsId_ + << ", inodeId = " << inodeId << ", ret = " << MetaStatusCode_Name(ret); return ret; } + // 1. delete data first if (FsFileType::TYPE_FILE == inode.type()) { // TODO(xuchaojie) : delete on volume } else if (FsFileType::TYPE_S3 == inode.type()) { // get s3info from mds - FsInfo fsInfo; - if (fsInfoMap_.find(item.fsId) == fsInfoMap_.end()) { - auto ret = mdsClient_->GetFsInfo(item.fsId, &fsInfo); + if (fsInfo_.fsid() == 0) { + auto ret = mdsClient_->GetFsInfo(fsId_, &fsInfo_); if (ret != FSStatusCode::OK) { if (FSStatusCode::NOT_FOUND == ret) { - LOG(ERROR) << "The fsName not exist, fsId = " << item.fsId; + LOG(ERROR) << "The fsName not exist, fsId = " << fsId_; return MetaStatusCode::S3_DELETE_ERR; } else { LOG(ERROR) << "GetFsInfo failed, FSStatusCode = " << ret << ", FSStatusCode_Name = " << FSStatusCode_Name(ret) - << ", fsId = " << item.fsId; + << ", fsId = " << fsId_; return MetaStatusCode::S3_DELETE_ERR; } } - fsInfoMap_.insert({item.fsId, fsInfo}); - } else { - fsInfo = fsInfoMap_.find(item.fsId)->second; } - const auto& s3Info = fsInfo.detail().s3info(); + const auto& s3Info = fsInfo_.detail().s3info(); // reinit s3 adaptor S3ClientAdaptorOption clientAdaptorOption; s3Adaptor_->GetS3ClientAdaptorOption(&clientAdaptorOption); @@ -219,43 +233,52 @@ MetaStatusCode TrashImpl::DeleteInodeAndData(const TrashItem &item) { clientAdaptorOption.objectPrefix = s3Info.objectprefix(); s3Adaptor_->Reinit(clientAdaptorOption, s3Info.ak(), s3Info.sk(), s3Info.endpoint(), s3Info.bucketname()); - ret = inodeStorage_->PaddingInodeS3ChunkInfo(item.fsId, - item.inodeId, inode.mutable_s3chunkinfomap()); + ret = inodeStorage_->PaddingInodeS3ChunkInfo(fsId_, inodeId, + inode.mutable_s3chunkinfomap()); if (ret != MetaStatusCode::OK) { - LOG(ERROR) << "GetInode chunklist fail, fsId = " << item.fsId - << ", inodeId = " << item.inodeId - << ", retCode = " << MetaStatusCode_Name(ret); + LOG(ERROR) << "GetInode chunklist fail, fsId = " << fsId_ + << ", inodeId = " << inodeId + << ", retCode = " << MetaStatusCode_Name(ret); return ret; } - if (inode.s3chunkinfomap().empty()) { - LOG(WARNING) << "GetInode chunklist empty, fsId = " << item.fsId - << ", inodeId = " << item.inodeId; - return MetaStatusCode::NOT_FOUND; - } - VLOG(9) << "DeleteInodeAndData, inode: " - << inode.ShortDebugString(); - int retVal = s3Adaptor_->Delete(inode); - if (retVal != 0) { - LOG(ERROR) << "S3ClientAdaptor delete s3 data failed" - << ", ret = " << retVal << ", fsId = " << item.fsId - << ", inodeId = " << item.inodeId; - return MetaStatusCode::S3_DELETE_ERR; + VLOG(9) << "DeleteInodeAndData, inode: " << inode.ShortDebugString(); + if (!inode.s3chunkinfomap().empty()) { + int retVal = s3Adaptor_->Delete(inode); + if (retVal != 0) { + LOG(ERROR) << "S3ClientAdaptor delete s3 data failed" + << ", ret = " << retVal << ", fsId = " << fsId_ + << ", inodeId = " << inodeId; + return MetaStatusCode::S3_DELETE_ERR; + } } } - ret = inodeStorage_->ForceDelete(Key4Inode(item.fsId, item.inodeId)); - if (ret != MetaStatusCode::OK && ret != MetaStatusCode::NOT_FOUND) { - LOG(ERROR) << "Delete Inode fail, fsId = " << item.fsId - << ", inodeId = " << item.inodeId - << ", ret = " << MetaStatusCode_Name(ret); - return ret; + // 2. delete metadata + if (copysetNode_->IsLeaderTerm()) { + return DeleteInode(inodeId); } return MetaStatusCode::OK; } -void TrashImpl::ListItems(std::list *items) { - LockGuard lgScan(scanMutex_); +MetaStatusCode TrashImpl::DeleteInode(uint64_t inodeId) { + DeleteInodeRequest request; + request.set_poolid(poolId_); + request.set_copysetid(copysetId_); + request.set_partitionid(partitionId_); + request.set_fsid(fsId_); + request.set_inodeid(inodeId); + + DeleteInodeResponse response; + DeleteInodeClosure done; + auto DeleteInodeOp = new copyset::DeleteInodeOperator( + copysetNode_.get(), nullptr, &request, &response, &done); + DeleteInodeOp->Propose(); + done.WaitRunned(); + return response.statuscode(); +} + +uint64_t TrashImpl::Size() { LockGuard lgItems(itemsMutex_); - *items = trashItems_; + return trashItems_.size(); } } // namespace metaserver diff --git a/curvefs/src/metaserver/trash.h b/curvefs/src/metaserver/trash.h index de9410ddc3..4ac3f995ff 100644 --- a/curvefs/src/metaserver/trash.h +++ b/curvefs/src/metaserver/trash.h @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -34,10 +35,15 @@ #include "curvefs/src/metaserver/inode_storage.h" #include "curvefs/src/metaserver/s3/metaserver_s3_adaptor.h" #include "curvefs/src/client/rpcclient/mds_client.h" +#include "curvefs/src/metaserver/common/types.h" namespace curvefs { namespace metaserver { +namespace copyset { +class CopysetNode; +} // namespace copyset + using ::curve::common::Configuration; using ::curve::common::Thread; using ::curve::common::Atomic; @@ -47,16 +53,6 @@ using ::curve::common::InterruptibleSleeper; using ::curvefs::client::rpcclient::MdsClient; using ::curvefs::client::rpcclient::MdsClientImpl; -struct TrashItem { - uint32_t fsId; - uint64_t inodeId; - uint32_t dtime; - TrashItem() - : fsId(0), - inodeId(0), - dtime(0) {} -}; - struct TrashOption { uint32_t scanPeriodSec; uint32_t expiredAfterSec; @@ -78,9 +74,12 @@ class Trash { virtual void Init(const TrashOption &option) = 0; - virtual void Add(uint32_t fsId, uint64_t inodeId, uint32_t dtime) = 0; + virtual void Add(uint64_t inodeId, + uint64_t dtime, bool deleted = false) = 0; + + virtual void Remove(uint64_t inodeId) = 0; - virtual void ListItems(std::list *items) = 0; + virtual uint64_t Size() = 0; virtual void ScanTrash() = 0; @@ -91,16 +90,21 @@ class Trash { class TrashImpl : public Trash { public: - explicit TrashImpl(const std::shared_ptr &inodeStorage) - : inodeStorage_(inodeStorage) {} + explicit TrashImpl(const std::shared_ptr &inodeStorage, + uint32_t fsId = 0, PoolId poolId = 0, CopysetId copysetId = 0, + PartitionId partitionId = 0) : + inodeStorage_(inodeStorage), fsId_(fsId), poolId_(poolId), + copysetId_(copysetId), partitionId_(partitionId) {} ~TrashImpl() {} void Init(const TrashOption &option) override; - void Add(uint32_t fsId, uint64_t inodeId, uint32_t dtime) override; + void Add(uint64_t inodeId, uint64_t dtime, bool deleted = false) override; - void ListItems(std::list *items) override; + void Remove(uint64_t inodeId) override; + + uint64_t Size() override; void ScanTrash() override; @@ -108,20 +112,35 @@ class TrashImpl : public Trash { bool IsStop() override; - private: - bool NeedDelete(const TrashItem &item); + // for utests + void SetCopysetNode(const std::shared_ptr &node) { + copysetNode_ = node; + } - MetaStatusCode DeleteInodeAndData(const TrashItem &item); + private: + bool NeedDelete(uint64_t dtime); uint64_t GetFsRecycleTimeHour(uint32_t fsId); + MetaStatusCode DeleteInodeAndData(uint64_t inodeId); + + MetaStatusCode DeleteInode(uint64_t inodeId); + + void RemoveDeletedInode(uint64_t inodeId); + private: std::shared_ptr inodeStorage_; std::shared_ptr s3Adaptor_; std::shared_ptr mdsClient_; - std::unordered_map fsInfoMap_; + std::shared_ptr copysetNode_; + FsInfo fsInfo_; - std::list trashItems_; + uint32_t fsId_; + PoolId poolId_; + CopysetId copysetId_; + PartitionId partitionId_; + + std::unordered_map trashItems_; mutable Mutex itemsMutex_; @@ -132,6 +151,25 @@ class TrashImpl : public Trash { bool isStop_; }; +class DeleteInodeClosure : public google::protobuf::Closure { + private: + std::mutex mutex_; + std::condition_variable cond_; + bool runned_ = false; + + public: + void Run() override { + std::lock_guard l(mutex_); + runned_ = true; + cond_.notify_one(); + } + + void WaitRunned() { + std::unique_lock ul(mutex_); + cond_.wait(ul, [this]() { return runned_; }); + } +}; + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/src/metaserver/trash_manager.cpp b/curvefs/src/metaserver/trash_manager.cpp index 7f6341db1c..796c5e2da9 100644 --- a/curvefs/src/metaserver/trash_manager.cpp +++ b/curvefs/src/metaserver/trash_manager.cpp @@ -49,6 +49,13 @@ void TrashManager::Fini() { LOG(INFO) << "stop trash manager ok."; } +void TrashManager::Add(uint32_t partitionId, + const std::shared_ptr &trash) { + curve::common::WriteLockGuard lg(rwLock_); + trash->Init(options_); + trashs_.emplace(partitionId, trash); +} + void TrashManager::ScanLoop() { while (sleeper_.wait_for(std::chrono::seconds(FLAGS_trash_scanPeriodSec))) { ScanEveryTrash(); @@ -82,18 +89,17 @@ void TrashManager::Remove(uint32_t partitionId) { } } -void TrashManager::ListItems(std::list *items) { - items->clear(); +uint64_t TrashManager::Size() { + uint64_t size = 0; std::map> temp; { curve::common::ReadLockGuard lg(rwLock_); temp = trashs_; } for (auto &pair : temp) { - std::list newItems; - pair.second->ListItems(&newItems); - items->splice(items->end(), newItems); + size += pair.second->Size(); } + return size; } } // namespace metaserver diff --git a/curvefs/src/metaserver/trash_manager.h b/curvefs/src/metaserver/trash_manager.h index ca25c0145d..0e2d6ee2f6 100644 --- a/curvefs/src/metaserver/trash_manager.h +++ b/curvefs/src/metaserver/trash_manager.h @@ -27,6 +27,8 @@ #include #include +#include "src/common/string_util.h" + #include "src/common/concurrent/concurrent.h" #include "curvefs/src/metaserver/trash.h" @@ -44,13 +46,7 @@ class TrashManager { return instance_; } - void Add(uint32_t partitionId, const std::shared_ptr &trash) { - curve::common::WriteLockGuard lg(rwLock_); - trash->Init(options_); - trashs_.emplace(partitionId, trash); - LOG(INFO) << "add partition to trash manager, partitionId = " - << partitionId; - } + void Add(uint32_t partitionId, const std::shared_ptr &trash); void Remove(uint32_t partitionId); @@ -64,7 +60,7 @@ class TrashManager { void ScanEveryTrash(); - void ListItems(std::list *items); + uint64_t Size(); private: void ScanLoop(); @@ -79,6 +75,7 @@ class TrashManager { InterruptibleSleeper sleeper_; std::map> trashs_; + curve::common::RWLock rwLock_; }; diff --git a/curvefs/test/client/BUILD b/curvefs/test/client/BUILD index 65bfb51d50..84852b8903 100644 --- a/curvefs/test/client/BUILD +++ b/curvefs/test/client/BUILD @@ -110,6 +110,7 @@ cc_test( "//curvefs/proto:mds_cc_proto", "//curvefs/proto:space_cc_proto", "//curvefs/test/volume/mock", + "//curvefs/test/client/filesystem/helper:helper", ], linkopts = ["-lfuse3", "-L/usr/local/lib/x86_64-linux-gnu"], diff --git a/curvefs/test/client/client_operator_test.cpp b/curvefs/test/client/client_operator_test.cpp index 45574a56c6..41dda5c9fa 100644 --- a/curvefs/test/client/client_operator_test.cpp +++ b/curvefs/test/client/client_operator_test.cpp @@ -25,8 +25,8 @@ #include #include "curvefs/src/client/client_operator.h" -#include "curvefs/test/client/mock_dentry_cache_mamager.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_dentry_mamager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_metaserver_client.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" @@ -191,5 +191,85 @@ TEST_F(ClientOperatorTest, CommitTx) { ASSERT_EQ(rc, CURVEFS_ERROR::OK); } +TEST_F(ClientOperatorTest, PrewriteTx) { + CURVEFS_ERROR rc = CURVEFS_ERROR::OK; + // 1. tso failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 2. GetPartitionId failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, GetPartitionId(_, _, _)) + .WillOnce(Return(false)) + .WillOnce(Return(true)) + .WillOnce(Return(false)); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 3. PrewriteRenameTx failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, GetPartitionId(_, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(1), Return(true))) + .WillOnce(DoAll(SetArgPointee<2>(2), Return(true))); + EXPECT_CALL(*metaClient_, PrewriteRenameTx(_, _, _)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 4. PrewriteRenameTx key is locked and CheckAndResolveTx failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, GetPartitionId(_, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(1), Return(true))) + .WillOnce(DoAll(SetArgPointee<2>(2), Return(true))); + EXPECT_CALL(*metaClient_, PrewriteRenameTx(_, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*dentryManager_, CheckAndResolveTx(_, _, _, _)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 5. PrewriteRenameTx key is locked and CheckAndResolveTx success + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, GetPartitionId(_, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(1), Return(true))) + .WillOnce(DoAll(SetArgPointee<2>(2), Return(true))); + EXPECT_CALL(*metaClient_, PrewriteRenameTx(_, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)) + .WillOnce(Return(MetaStatusCode::OK)) + .WillOnce(Return(MetaStatusCode::OK)); + EXPECT_CALL(*dentryManager_, CheckAndResolveTx(_, _, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + rc = renameOp_->PrewriteTx(); + ASSERT_EQ(rc, CURVEFS_ERROR::OK); +} + +TEST_F(ClientOperatorTest, CommitTxV2) { + CURVEFS_ERROR rc = CURVEFS_ERROR::OK; + // 1. tso failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + rc = renameOp_->CommitTxV2(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 2. CommitTx failed + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CommitTx(_, _, _)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + rc = renameOp_->CommitTxV2(); + ASSERT_EQ(rc, CURVEFS_ERROR::INTERNAL); + // 3. CommitTx success + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CommitTx(_, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + rc = renameOp_->CommitTxV2(); + ASSERT_EQ(rc, CURVEFS_ERROR::OK); +} + } // namespace client } // namespace curvefs diff --git a/curvefs/test/client/client_prefetch_test.cpp b/curvefs/test/client/client_prefetch_test.cpp index 712fbd208d..896467fcc0 100644 --- a/curvefs/test/client/client_prefetch_test.cpp +++ b/curvefs/test/client/client_prefetch_test.cpp @@ -33,7 +33,7 @@ #include "curvefs/test/client/mock_disk_cache_manager.h" #include "curvefs/test/client/mock_disk_cache_read.h" #include "curvefs/test/client/mock_disk_cache_write.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_kvclient.h" #include "curvefs/test/client/mock_test_posix_wapper.h" #include "src/common/concurrent/task_thread_pool.h" diff --git a/curvefs/test/client/client_s3_adaptor_Integration.cpp b/curvefs/test/client/client_s3_adaptor_Integration.cpp index 1b4ee2a280..1127dda78c 100644 --- a/curvefs/test/client/client_s3_adaptor_Integration.cpp +++ b/curvefs/test/client/client_s3_adaptor_Integration.cpp @@ -30,7 +30,7 @@ #include "curvefs/src/client/kvclient/kvclient_manager.h" #include "src/common/curve_define.h" #include "curvefs/test/client/mock_client_s3.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_metaserver_service.h" #include "curvefs/test/client/mock_kvclient.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" diff --git a/curvefs/test/client/client_s3_adaptor_test.cpp b/curvefs/test/client/client_s3_adaptor_test.cpp index 7430a14856..d062d5e674 100644 --- a/curvefs/test/client/client_s3_adaptor_test.cpp +++ b/curvefs/test/client/client_s3_adaptor_test.cpp @@ -30,7 +30,7 @@ #include "curvefs/test/client/mock_client_s3.h" #include "curvefs/test/client/mock_client_s3_cache_manager.h" #include "curvefs/test/client/mock_disk_cache_manager.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_metaserver_service.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" #include "src/common/curve_define.h" diff --git a/curvefs/test/client/file_cache_manager_test.cpp b/curvefs/test/client/file_cache_manager_test.cpp index 013e64b6f8..d9549611f7 100644 --- a/curvefs/test/client/file_cache_manager_test.cpp +++ b/curvefs/test/client/file_cache_manager_test.cpp @@ -26,7 +26,7 @@ #include "curvefs/src/client/s3/client_s3_adaptor.h" #include "curvefs/src/client/s3/client_s3_cache_manager.h" #include "curvefs/test/client/mock_client_s3_cache_manager.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_client_s3.h" #include "src/common/concurrent/task_thread_pool.h" diff --git a/curvefs/test/client/filesystem/defer_sync_test.cpp b/curvefs/test/client/filesystem/defer_sync_test.cpp index 0aabdac1df..5bfe011eda 100644 --- a/curvefs/test/client/filesystem/defer_sync_test.cpp +++ b/curvefs/test/client/filesystem/defer_sync_test.cpp @@ -27,6 +27,8 @@ namespace curvefs { namespace client { namespace filesystem { +class DeferInodesTest : public ::testing::Test {}; + class DeferSyncTest : public ::testing::Test { protected: void SetUp() override { @@ -41,9 +43,113 @@ class DeferSyncTest : public ::testing::Test { std::shared_ptr metaClient_; }; +TEST_F(DeferInodesTest, cto) { + // CASE 1: cto + bool cto = true; + auto deferInodes = std::make_shared(cto); + bool yes = deferInodes->Add(MkInode(100)); + ASSERT_FALSE(yes); + ASSERT_EQ(deferInodes->Size(), 0); + + // CASE 2: nocto + cto = false; + deferInodes = std::make_shared(cto); + yes = deferInodes->Add(MkInode(100)); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 1); +} + +TEST_F(DeferInodesTest, Add) { + auto deferInodes = std::make_shared(false); + + // CASE 1: add inode(100) success + bool yes = deferInodes->Add(MkInode(100, InodeOption().length(1024))); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 1); + + std::shared_ptr inode; + yes = deferInodes->Get(100, &inode); + ASSERT_TRUE(yes); + ASSERT_EQ(inode->GetLength(), 1024); + + // CASE 2: add inode(200) success + yes = deferInodes->Add(MkInode(200, InodeOption().length(2048))); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 2); + + yes = deferInodes->Get(200, &inode); + ASSERT_TRUE(yes); + ASSERT_EQ(inode->GetLength(), 2048); + + // CASE 3: add inode(200) which alreay exists + yes = deferInodes->Add(MkInode(200, InodeOption().length(2049))); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 2); + + yes = deferInodes->Get(200, &inode); + ASSERT_TRUE(yes); + ASSERT_EQ(inode->GetLength(), 2049); +} + +TEST_F(DeferInodesTest, Get) { + auto deferInodes = std::make_shared(false); + bool yes = deferInodes->Add(MkInode(100, InodeOption().length(1024))); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 1); + + // CASE 1: get exist inode + std::shared_ptr inode; + yes = deferInodes->Get(100, &inode); + ASSERT_TRUE(yes); + ASSERT_EQ(inode->GetLength(), 1024); + + // CASE 2: get non-exist inode + yes = deferInodes->Get(200, &inode); + ASSERT_FALSE(yes); +} + +TEST_F(DeferInodesTest, Remove_Basic) { + auto deferInodes = std::make_shared(false); + auto inode = MkInode(100, InodeOption().length(1024)); + bool yes = deferInodes->Add(inode); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 1); + + // CASE 1: remove exist inode + yes = deferInodes->Remove(inode); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 0); + + yes = deferInodes->Get(100, &inode); + ASSERT_FALSE(yes); + + // CASE 2: remove non-exist inode + yes = deferInodes->Remove(inode); + ASSERT_FALSE(yes); +} + +TEST_F(DeferInodesTest, Remove_CompareCtime) { + auto deferInodes = std::make_shared(false); + auto deferInode = MkInode(100, InodeOption().length(1024).ctime(123, 456)); + bool yes = deferInodes->Add(deferInode); + ASSERT_EQ(deferInodes->Size(), 1); + + // CASE 1: attr ctime < defered ctime => remove fail + auto inode = MkInode(100, InodeOption().length(1024).ctime(123, 455)); + yes = deferInodes->Remove(inode); + ASSERT_FALSE(yes); + ASSERT_EQ(deferInodes->Size(), 1); + + // CASE 2: attr ctime > defered ctime => remove success + inode = MkInode(100, InodeOption().length(1024).ctime(123, 457)); + yes = deferInodes->Remove(inode); + ASSERT_TRUE(yes); + ASSERT_EQ(deferInodes->Size(), 0); +} + TEST_F(DeferSyncTest, Basic) { auto builder = DeferSyncBuilder(); - auto deferSync = builder.SetOption([&](DeferSyncOption* option){ + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { option->delay = 3; }).Build(); deferSync->Start(); @@ -57,7 +163,7 @@ TEST_F(DeferSyncTest, Basic) { TEST_F(DeferSyncTest, Dirty) { auto builder = DeferSyncBuilder(); - auto deferSync = builder.SetOption([&](DeferSyncOption* option){ + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { option->delay = 3; }).Build(); deferSync->Start(); @@ -68,6 +174,41 @@ TEST_F(DeferSyncTest, Dirty) { deferSync->Stop(); } +TEST_F(DeferSyncTest, IsDefered_cto) { + auto builder = DeferSyncBuilder(); + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { + *cto = true; + option->delay = 3; + }).Build(); + deferSync->Start(); + + std::shared_ptr inode; + deferSync->Push(MkInode(100, InodeOption())); + bool yes = deferSync->IsDefered(100, &inode); + ASSERT_FALSE(yes); + deferSync->Stop(); +} + +TEST_F(DeferSyncTest, IsDefered_nocto) { + auto builder = DeferSyncBuilder(); + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { + *cto = false; + option->delay = 3; + }).Build(); + deferSync->Start(); + + std::shared_ptr inode; + deferSync->Push(MkInode(100, InodeOption())); + bool yes = deferSync->IsDefered(100, &inode); + ASSERT_TRUE(yes); + + // wait inode synced and defer inode removed + std::this_thread::sleep_for(std::chrono::seconds(4)); + yes = deferSync->IsDefered(100, &inode); + ASSERT_FALSE(yes); + deferSync->Stop(); +} + } // namespace filesystem } // namespace client } // namespace curvefs diff --git a/curvefs/test/client/filesystem/helper/builder.h b/curvefs/test/client/filesystem/helper/builder.h index feece16aa0..31178733af 100644 --- a/curvefs/test/client/filesystem/helper/builder.h +++ b/curvefs/test/client/filesystem/helper/builder.h @@ -32,8 +32,8 @@ #include "curvefs/src/client/filesystem/meta.h" #include "curvefs/src/client/filesystem/filesystem.h" #include "curvefs/test/client/mock_metaserver_client.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" -#include "curvefs/test/client/mock_dentry_cache_mamager.h" +#include "curvefs/test/client/mock_inode_manager.h" +#include "curvefs/test/client/mock_dentry_mamager.h" namespace curvefs { namespace client { @@ -46,7 +46,7 @@ using ::curvefs::client::common::KernelCacheOption; class DeferSyncBuilder { public: - using Callback = std::function; + using Callback = std::function; static DeferSyncOption DefaultOption() { return DeferSyncOption { @@ -57,17 +57,18 @@ class DeferSyncBuilder { public: DeferSyncBuilder() - : option_(DefaultOption()), + : cto_(true), + option_(DefaultOption()), dentryManager_(std::make_shared()), inodeManager_(std::make_shared()) {} DeferSyncBuilder SetOption(Callback callback) { - callback(&option_); + callback(&cto_, &option_); return *this; } std::shared_ptr Build() { - return std::make_shared(option_); + return std::make_shared(cto_, option_); } std::shared_ptr GetDentryManager() { @@ -79,6 +80,7 @@ class DeferSyncBuilder { } private: + bool cto_; DeferSyncOption option_; std::shared_ptr dentryManager_; std::shared_ptr inodeManager_; diff --git a/curvefs/test/client/filesystem/helper/expect.h b/curvefs/test/client/filesystem/helper/expect.h index 5daad73d9c..a3df7b5b91 100644 --- a/curvefs/test/client/filesystem/helper/expect.h +++ b/curvefs/test/client/filesystem/helper/expect.h @@ -26,8 +26,8 @@ #include #include "curvefs/test/client/mock_metaserver_client.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" -#include "curvefs/test/client/mock_dentry_cache_mamager.h" +#include "curvefs/test/client/mock_inode_manager.h" +#include "curvefs/test/client/mock_dentry_mamager.h" namespace curvefs { namespace client { diff --git a/curvefs/test/client/filesystem/helper/meta.cpp b/curvefs/test/client/filesystem/helper/meta.cpp index 49d9540d61..3cab5b6720 100644 --- a/curvefs/test/client/filesystem/helper/meta.cpp +++ b/curvefs/test/client/filesystem/helper/meta.cpp @@ -76,11 +76,21 @@ AttrOption AttrOption::ctime(uint64_t seconds, uint32_t naoSeconds) { return *this; } +InodeOption InodeOption::ctime(uint64_t seconds, uint32_t naoSeconds) { + ctime_ = TimeSpec(seconds, naoSeconds); + return *this; +} + InodeOption InodeOption::mtime(uint64_t seconds, uint32_t naoSeconds) { mtime_ = TimeSpec(seconds, naoSeconds); return *this; } +InodeOption InodeOption::length(uint64_t length) { + length_ = length; + return *this; +} + InodeOption InodeOption::metaClient( std::shared_ptr metaClient) { metaClient_ = metaClient; @@ -109,8 +119,11 @@ InodeAttr MkAttr(Ino ino, AttrOption option) { std::shared_ptr MkInode(Ino ino, InodeOption option) { Inode inode; inode.set_inodeid(ino); + inode.set_ctime(option.ctime_.seconds); + inode.set_ctime_ns(option.ctime_.nanoSeconds); inode.set_mtime(option.mtime_.seconds); inode.set_mtime_ns(option.mtime_.nanoSeconds); + inode.set_length(option.length_); return std::make_shared(inode, option.metaClient_); } diff --git a/curvefs/test/client/filesystem/helper/meta.h b/curvefs/test/client/filesystem/helper/meta.h index ffa5899a67..a63b7297fc 100644 --- a/curvefs/test/client/filesystem/helper/meta.h +++ b/curvefs/test/client/filesystem/helper/meta.h @@ -71,14 +71,17 @@ struct AttrOption { class InodeOption { public: InodeOption() = default; + InodeOption ctime(uint64_t seconds, uint32_t naoSeconds); InodeOption mtime(uint64_t seconds, uint32_t naoSeconds); + InodeOption length(uint64_t length); InodeOption metaClient(std::shared_ptr metaClient); private: friend std::shared_ptr MkInode(Ino ino, InodeOption option); private: - TimeSpec mtime_; + TimeSpec ctime_, mtime_; + uint64_t length_; std::shared_ptr metaClient_; }; diff --git a/curvefs/test/client/lease/lease_executor_test.cpp b/curvefs/test/client/lease/lease_executor_test.cpp index 15e87bf16b..6031a0f63b 100644 --- a/curvefs/test/client/lease/lease_executor_test.cpp +++ b/curvefs/test/client/lease/lease_executor_test.cpp @@ -89,7 +89,7 @@ TEST_F(LeaseExecutorTest, test_start_stop) { EXPECT_CALL(*metaCache_, GetAllTxIds(_)) .WillOnce(SetArgPointee<0>(std::vector{})) .WillRepeatedly(SetArgPointee<0>(txIds)); - EXPECT_CALL(*mdsCli_, RefreshSession(_, _, _, _, _)) + EXPECT_CALL(*mdsCli_, RefreshSession(_, _, _, _, _, _, _)) .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)) .WillRepeatedly( DoAll(SetArgPointee<1>(txIds), Return(FSStatusCode::OK))); diff --git a/curvefs/test/client/mock_dentry_cache_mamager.h b/curvefs/test/client/mock_dentry_mamager.h similarity index 78% rename from curvefs/test/client/mock_dentry_cache_mamager.h rename to curvefs/test/client/mock_dentry_mamager.h index 2e5e8ac41e..f20a3217c4 100644 --- a/curvefs/test/client/mock_dentry_cache_mamager.h +++ b/curvefs/test/client/mock_dentry_mamager.h @@ -20,14 +20,15 @@ * Author: xuchaojie */ -#ifndef CURVEFS_TEST_CLIENT_MOCK_DENTRY_CACHE_MAMAGER_H_ -#define CURVEFS_TEST_CLIENT_MOCK_DENTRY_CACHE_MAMAGER_H_ +#ifndef CURVEFS_TEST_CLIENT_MOCK_DENTRY_MAMAGER_H_ +#define CURVEFS_TEST_CLIENT_MOCK_DENTRY_MAMAGER_H_ #include #include #include #include -#include "curvefs/src/client/dentry_cache_manager.h" +#include +#include "curvefs/src/client/dentry_manager.h" namespace curvefs { namespace client { @@ -37,6 +38,8 @@ class MockDentryCacheManager : public DentryCacheManager { MockDentryCacheManager() {} ~MockDentryCacheManager() {} + MOCK_METHOD(void, Init, (std::shared_ptr mdsClient), (override)); + MOCK_METHOD3(GetDentry, CURVEFS_ERROR(uint64_t parent, const std::string &name, Dentry *out)); @@ -51,10 +54,14 @@ class MockDentryCacheManager : public DentryCacheManager { uint32_t limit, bool onlyDir, uint32_t nlink)); + + MOCK_METHOD(MetaStatusCode, CheckAndResolveTx, (const Dentry& dentry, + const TxLock& txLock, uint64_t timestamp, uint64_t commitTs), + (override)); }; } // namespace client } // namespace curvefs -#endif // CURVEFS_TEST_CLIENT_MOCK_DENTRY_CACHE_MAMAGER_H_ +#endif // CURVEFS_TEST_CLIENT_MOCK_DENTRY_MAMAGER_H_ diff --git a/curvefs/test/client/mock_inode_cache_manager.h b/curvefs/test/client/mock_inode_manager.h similarity index 91% rename from curvefs/test/client/mock_inode_cache_manager.h rename to curvefs/test/client/mock_inode_manager.h index 39c332ce04..adc043d4a5 100644 --- a/curvefs/test/client/mock_inode_cache_manager.h +++ b/curvefs/test/client/mock_inode_manager.h @@ -20,8 +20,8 @@ * Author: xuchaojie */ -#ifndef CURVEFS_TEST_CLIENT_MOCK_INODE_CACHE_MANAGER_H_ -#define CURVEFS_TEST_CLIENT_MOCK_INODE_CACHE_MANAGER_H_ +#ifndef CURVEFS_TEST_CLIENT_MOCK_INODE_MANAGER_H_ +#define CURVEFS_TEST_CLIENT_MOCK_INODE_MANAGER_H_ #include #include @@ -30,7 +30,7 @@ #include #include -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" namespace curvefs { namespace client { @@ -77,4 +77,4 @@ class MockInodeCacheManager : public InodeCacheManager { } // namespace curvefs -#endif // CURVEFS_TEST_CLIENT_MOCK_INODE_CACHE_MANAGER_H_ +#endif // CURVEFS_TEST_CLIENT_MOCK_INODE_MANAGER_H_ diff --git a/curvefs/test/client/mock_metaserver_base_client.h b/curvefs/test/client/mock_metaserver_base_client.h deleted file mode 100644 index dec0dbcdba..0000000000 --- a/curvefs/test/client/mock_metaserver_base_client.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2021 NetEase Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Project: curve - * Created Date: Thur Jun 16 2021 - * Author: lixiaocui - */ - - -#ifndef CURVEFS_TEST_CLIENT_MOCK_METASERVER_BASE_CLIENT_H_ -#define CURVEFS_TEST_CLIENT_MOCK_METASERVER_BASE_CLIENT_H_ - -#include -#include -#include -#include "curvefs/src/client/base_client.h" - -namespace curvefs { -namespace client { - -class MockMetaServerBaseClient : public MetaServerBaseClient { - public: - MockMetaServerBaseClient() : MetaServerBaseClient() {} - ~MockMetaServerBaseClient() = default; - - MOCK_METHOD6(GetDentry, - void(uint32_t fsId, uint64_t inodeid, const std::string &name, - GetDentryResponse *response, brpc::Controller *cntl, - brpc::Channel *channel)); - - MOCK_METHOD7(ListDentry, - void(uint32_t fsId, uint64_t inodeid, const std::string &last, - uint32_t count, ListDentryResponse *response, - brpc::Controller *cntl, brpc::Channel *channel)); - - MOCK_METHOD4(CreateDentry, - void(const Dentry &dentry, CreateDentryResponse *response, - brpc::Controller *cntl, brpc::Channel *channel)); - - MOCK_METHOD6(DeleteDentry, - void(uint32_t fsId, uint64_t inodeid, const std::string &name, - DeleteDentryResponse *response, brpc::Controller *cntl, - brpc::Channel *channel)); - - MOCK_METHOD4(PrepareRenameTx, void(const std::vector& dentrys, - PrepareRenameTxResponse* response, - brpc::Controller* cntl, - brpc::Channel* channel)); - - MOCK_METHOD5(GetInode, - void(uint32_t fsId, uint64_t inodeid, - GetInodeResponse *response, brpc::Controller *cntl, - brpc::Channel *channel)); - - MOCK_METHOD4(UpdateInode, - void(const Inode &inode, UpdateInodeResponse *response, - brpc::Controller *cntl, brpc::Channel *channel)); - - MOCK_METHOD4(CreateInode, - void(const InodeParam ¶m, CreateInodeResponse *response, - brpc::Controller *cntl, brpc::Channel *channel)); - - MOCK_METHOD5(DeleteInode, - void(uint32_t fsId, uint64_t inodeid, - DeleteInodeResponse *response, brpc::Controller *cntl, - brpc::Channel *channel)); -}; -} // namespace client -} // namespace curvefs - -#endif // CURVEFS_TEST_CLIENT_MOCK_METASERVER_BASE_CLIENT_H_ diff --git a/curvefs/test/client/mock_metaserver_client.h b/curvefs/test/client/mock_metaserver_client.h index b0fac7b1fe..db2cb6f432 100644 --- a/curvefs/test/client/mock_metaserver_client.h +++ b/curvefs/test/client/mock_metaserver_client.h @@ -55,18 +55,19 @@ class MockMetaServerClient : public MetaServerClient { MOCK_METHOD2(SetTxId, void(uint32_t partitionId, uint64_t txId)); - MOCK_METHOD4(GetDentry, MetaStatusCode(uint32_t fsId, uint64_t inodeid, - const std::string &name, Dentry *out)); + MOCK_METHOD(MetaStatusCode, GetDentry, (uint32_t fsId, uint64_t inodeid, + const std::string &name, Dentry *out, TxLock* txLockOut), (override)); - MOCK_METHOD6(ListDentry, MetaStatusCode(uint32_t fsId, uint64_t inodeid, + MOCK_METHOD(MetaStatusCode, ListDentry, (uint32_t fsId, uint64_t inodeid, const std::string &last, uint32_t count, bool onlyDir, - std::list *dentryList)); + std::list *dentryList, TxLock* txLockOut), (override)); - MOCK_METHOD1(CreateDentry, MetaStatusCode(const Dentry &dentry)); + MOCK_METHOD(MetaStatusCode, CreateDentry, ( + const Dentry &dentry, TxLock* txLockOut), (override)); - MOCK_METHOD4(DeleteDentry, MetaStatusCode( + MOCK_METHOD(MetaStatusCode, DeleteDentry, ( uint32_t fsId, uint64_t inodeid, const std::string &name, - FsFileType type)); + FsFileType type, TxLock* txLockOut), (override)); MOCK_METHOD1(PrepareRenameTx, MetaStatusCode(const std::vector& dentrys)); @@ -161,6 +162,23 @@ class MockMetaServerClient : public MetaServerClient { MOCK_METHOD3(UpdateDeallocatableBlockGroup, MetaStatusCode(uint32_t, uint64_t, DeallocatableBlockGroupMap *)); + + MOCK_METHOD(MetaStatusCode, PrewriteRenameTx, + (const std::vector& dentrys, + const TxLock& txLockIn, TxLock* txLockOut), (override)); + + MOCK_METHOD(MetaStatusCode, CheckTxStatus, (uint32_t fsId, uint64_t inodeId, + const std::string& primaryKey, uint64_t startTs, uint64_t curTimestamp), + (override)); + + MOCK_METHOD(MetaStatusCode, ResolveTxLock, (const Dentry& dentry, + uint64_t startTs, uint64_t commitTs), (override)); + + MOCK_METHOD(MetaStatusCode, CommitTx, (const std::vector& dentrys, + uint64_t startTs, uint64_t commitTs), (override)); + + MOCK_METHOD(bool, GetPartitionId, (uint32_t fsId, uint64_t inodeId, + PartitionID *partitionId), (override)); }; } // namespace rpcclient diff --git a/curvefs/test/client/rpcclient/mds_client_test.cpp b/curvefs/test/client/rpcclient/mds_client_test.cpp index 024fecb61a..33e9221b44 100644 --- a/curvefs/test/client/rpcclient/mds_client_test.cpp +++ b/curvefs/test/client/rpcclient/mds_client_test.cpp @@ -839,6 +839,7 @@ TEST_F(MdsClientImplTest, RefreshSession) { // out std::vector out; std::atomic* enableSumInDir = new std::atomic (true); + std::string mdsAddrsOverride; RefreshSessionResponse response; { @@ -846,8 +847,9 @@ TEST_F(MdsClientImplTest, RefreshSession) { response.set_statuscode(FSStatusCode::OK); EXPECT_CALL(mockmdsbasecli_, RefreshSession(_, _, _, _)) .WillOnce(SetArgPointee<1>(response)); - ASSERT_FALSE(mdsclient_.RefreshSession(txIds, &out, - fsName, mountpoint, enableSumInDir)); + ASSERT_FALSE(mdsclient_.RefreshSession(txIds, &out, fsName, mountpoint, + enableSumInDir, std::string(), + &mdsAddrsOverride)); ASSERT_TRUE(out.empty()); } @@ -857,8 +859,9 @@ TEST_F(MdsClientImplTest, RefreshSession) { *response.mutable_latesttxidlist() = {txIds.begin(), txIds.end()}; EXPECT_CALL(mockmdsbasecli_, RefreshSession(_, _, _, _)) .WillOnce(SetArgPointee<1>(response)); - ASSERT_FALSE(mdsclient_.RefreshSession(txIds, &out, - fsName, mountpoint, enableSumInDir)); + ASSERT_FALSE(mdsclient_.RefreshSession(txIds, &out, fsName, mountpoint, + enableSumInDir, std::string(), + &mdsAddrsOverride)); ASSERT_EQ(1, out.size()); ASSERT_TRUE( google::protobuf::util::MessageDifferencer::Equals(out[0], tmp)) @@ -874,8 +877,9 @@ TEST_F(MdsClientImplTest, RefreshSession) { EXPECT_CALL(mockmdsbasecli_, RefreshSession(_, _, _, _)) .WillRepeatedly(Invoke(RefreshSessionRpcFailed)); ASSERT_EQ(FSStatusCode::RPC_ERROR, - mdsclient_.RefreshSession(txIds, &out, fsName, mountpoint, - enableSumInDir)); + mdsclient_.RefreshSession(txIds, &out, fsName, mountpoint, + enableSumInDir, std::string(), + &mdsAddrsOverride)); } } @@ -1042,6 +1046,61 @@ TEST_F(MdsClientImplTest, test_AllocOrGetMemcacheCluster) { mdsclient_.AllocOrGetMemcacheCluster(1, &cluster2)); } +TEST_F(MdsClientImplTest, test_GetMdsAddrs) { + ASSERT_EQ(mdsclient_.GetMdsAddrs(), addr_); +} + +TEST_F(MdsClientImplTest, test_SetMdsAddrs) { + auto addr_new = addr_ + ",127.0.0.1:5600"; + mdsclient_.SetMdsAddrs(addr_new); + ASSERT_EQ(mdsclient_.GetMdsAddrs(), addr_new); +} + +TEST_F(MdsClientImplTest, Tso) { + curvefs::mds::TsoResponse response; + + // CASE 1: Tso success + response.set_statuscode(FSStatusCode::OK); + response.set_ts(1); + response.set_timestamp(100); + EXPECT_CALL(mockmdsbasecli_, Tso(_, _, _, _)) + .WillOnce(SetArgPointee<1>(response)); + + uint64_t ts; + uint64_t timestamp; + auto rc = mdsclient_.Tso(&ts, ×tamp); + ASSERT_EQ(rc, FSStatusCode::OK); + ASSERT_EQ(ts, 1); + ASSERT_EQ(timestamp, 100); + + // CASE 2: Tso fail + response.set_statuscode(FSStatusCode::UNKNOWN_ERROR); + EXPECT_CALL(mockmdsbasecli_, Tso(_, _, _, _)) + .WillOnce(SetArgPointee<1>(response)); + + rc = mdsclient_.Tso(&ts, ×tamp); + ASSERT_EQ(rc, FSStatusCode::UNKNOWN_ERROR); + + // CASE 3: RPC error, retry until success + int count = 0; + EXPECT_CALL(mockmdsbasecli_, Tso(_, _, _, _)) + .Times(6) + .WillRepeatedly( + Invoke([&](const TsoRequest& request, + TsoResponse *response, + brpc::Controller *cntl, + brpc::Channel *channel) { + if (++count <= 5) { + cntl->SetFailed(112, "Not connected to"); + } else { + response->set_statuscode(FSStatusCode::OK); + } + })); + + rc = mdsclient_.Tso(&ts, ×tamp); + ASSERT_EQ(rc, FSStatusCode::OK); +} + } // namespace rpcclient } // namespace client } // namespace curvefs diff --git a/curvefs/test/client/rpcclient/metaserver_client_test.cpp b/curvefs/test/client/rpcclient/metaserver_client_test.cpp index bfffefce54..3cd751cbe4 100644 --- a/curvefs/test/client/rpcclient/metaserver_client_test.cpp +++ b/curvefs/test/client/rpcclient/metaserver_client_test.cpp @@ -63,6 +63,12 @@ using ::curvefs::metaserver::BatchGetXAttrRequest; using ::curvefs::metaserver::BatchGetXAttrResponse; using ::curvefs::metaserver::UpdateDeallocatableBlockGroupRequest; using ::curvefs::metaserver::UpdateDeallocatableBlockGroupResponse; +using ::curvefs::metaserver::PrepareRenameTxRequest; +using ::curvefs::metaserver::PrepareRenameTxResponse; +using ::curvefs::metaserver::CheckTxStatusRequest; +using ::curvefs::metaserver::CheckTxStatusResponse; +using ::curvefs::metaserver::ResolveTxLockRequest; +using ::curvefs::metaserver::ResolveTxLockResponse; using ::curvefs::common::StreamServer; using ::curvefs::common::StreamOptions; using ::curvefs::common::StreamConnection; @@ -134,6 +140,7 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { // out Dentry out; uint64_t txID = 1; + TxLock txLockOut; // set response curvefs::metaserver::GetDentryResponse response; @@ -151,8 +158,11 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { Invoke(SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); - MetaStatusCode status = metaserverCli_.GetDentry(fsID, inodeID, name, &out); + MetaStatusCode status = metaserverCli_.GetDentry( + fsID, inodeID, name, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); // test1: get dentry ok @@ -165,7 +175,7 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); - status = metaserverCli_.GetDentry(fsID, inodeID, name, &out); + status = metaserverCli_.GetDentry(fsID, inodeID, name, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); ASSERT_TRUE(google::protobuf::util::MessageDifferencer::Equals(out, *d)) << "out:\n" @@ -175,7 +185,7 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { // test2: get dentry get target fail EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(Return(false)); - status = metaserverCli_.GetDentry(fsID, inodeID, name, &out); + status = metaserverCli_.GetDentry(fsID, inodeID, name, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); // test3: get dentry over load and fail retry ok @@ -192,12 +202,32 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { DoAll(SetArgPointee<2>(response), Invoke(SetRpcService))); - status = metaserverCli_.GetDentry(fsID, inodeID, name, &out); + status = metaserverCli_.GetDentry(fsID, inodeID, name, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); - // test4: test response do not have dentry - response.clear_dentry(); + // test4: test dentry has tx lock + TxLock txLock; + txLock.set_primarykey("key"); + txLock.set_startts(1); + txLock.set_timestamp(100); + response.set_statuscode(MetaStatusCode::TX_KEY_LOCKED); + *response.mutable_txlock() = txLock; + EXPECT_CALL(mockMetaServerService_, GetDentry(_, _, _, _)) + .WillOnce( + DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); + + status = metaserverCli_.GetDentry(fsID, inodeID, name, &out, &txLockOut); + ASSERT_EQ(MetaStatusCode::TX_KEY_LOCKED, status); + ASSERT_TRUE(google::protobuf::util::MessageDifferencer::Equals(txLockOut, + txLock)); + // test5: test response do not have dentry + response.set_statuscode(MetaStatusCode::OK); + response.clear_dentry(); EXPECT_CALL(mockMetaServerService_, GetDentry(_, _, _, _)) .WillRepeatedly( DoAll(SetArgPointee<2>(response), @@ -206,7 +236,7 @@ TEST_F(MetaServerClientImplTest, test_GetDentry) { EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); - status = metaserverCli_.GetDentry(fsID, inodeID, name, &out); + status = metaserverCli_.GetDentry(fsID, inodeID, name, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); } @@ -220,6 +250,7 @@ TEST_F(MetaServerClientImplTest, test_ListDentry) { // out std::list out; uint64_t txID = 10; + TxLock txLockOut; curvefs::metaserver::ListDentryResponse response; auto *d = response.add_dentrys(); @@ -235,12 +266,34 @@ TEST_F(MetaServerClientImplTest, test_ListDentry) { Invoke(SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); - MetaStatusCode status = - metaserverCli_.ListDentry(fsID, inodeID, last, count, onlyDir, &out); + MetaStatusCode status = metaserverCli_.ListDentry( + fsID, inodeID, last, count, onlyDir, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); + // test: dentry has tx lock + TxLock txLock; + txLock.set_primarykey("key"); + txLock.set_startts(1); + txLock.set_timestamp(100); + response.set_statuscode(MetaStatusCode::TX_KEY_LOCKED); + *response.mutable_txlock() = txLock; + EXPECT_CALL(mockMetaServerService_, ListDentry(_, _, _, _)) + .WillOnce(DoAll( + SetArgPointee<2>(response), + Invoke(SetRpcService))); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); + + status = metaserverCli_.ListDentry( + fsID, inodeID, last, count, onlyDir, &out, &txLockOut); + ASSERT_EQ(MetaStatusCode::TX_KEY_LOCKED, status); + ASSERT_TRUE( + google::protobuf::util::MessageDifferencer::Equals(txLockOut, txLock)); + // test1: list dentry ok response.set_statuscode(MetaStatusCode::OK); EXPECT_CALL(mockMetaServerService_, ListDentry(_, _, _, _)) @@ -251,7 +304,7 @@ TEST_F(MetaServerClientImplTest, test_ListDentry) { .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); status = metaserverCli_.ListDentry( - fsID, inodeID, last, count, onlyDir, &out); + fsID, inodeID, last, count, onlyDir, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); ASSERT_EQ(1, out.size()); ASSERT_TRUE( @@ -274,7 +327,7 @@ TEST_F(MetaServerClientImplTest, test_ListDentry) { SetArgPointee<2>(response), Invoke(SetRpcService))); status = metaserverCli_.ListDentry( - fsID, inodeID, last, count, onlyDir, &out); + fsID, inodeID, last, count, onlyDir, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); // test3: test response do not have dentrys @@ -288,7 +341,7 @@ TEST_F(MetaServerClientImplTest, test_ListDentry) { .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); status = metaserverCli_.ListDentry( - fsID, inodeID, last, count, onlyDir, &out); + fsID, inodeID, last, count, onlyDir, &out, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); } @@ -302,8 +355,7 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_rpc_error) { d.set_txid(10); // out - butil::EndPoint target; - butil::str2endpoint(addr_.c_str(), &target); + TxLock txLockOut; curvefs::metaserver::CreateDentryResponse response; @@ -316,11 +368,11 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_rpc_error) { .Times(1 + opt_.maxRetry) .WillRepeatedly(Return(true)); - MetaStatusCode status = metaserverCli_.CreateDentry(d); + MetaStatusCode status = metaserverCli_.CreateDentry(d, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); } -TEST_F(MetaServerClientImplTest, test_CreateDentry_create_dentry_ok) { +TEST_F(MetaServerClientImplTest, test_CreateDentry_ok) { // in Dentry d; d.set_fsid(1); @@ -330,8 +382,7 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_create_dentry_ok) { d.set_txid(10); // out - butil::EndPoint target; - butil::str2endpoint(addr_.c_str(), &target); + TxLock txLockOut; curvefs::metaserver::CreateDentryResponse response; @@ -343,10 +394,44 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_create_dentry_ok) { EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); - auto status = metaserverCli_.CreateDentry(d); + auto status = metaserverCli_.CreateDentry(d, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); } +TEST_F(MetaServerClientImplTest, test_CreateDentry_tx_locked) { + // in + Dentry d; + d.set_fsid(1); + d.set_inodeid(2); + d.set_parentinodeid(1); + d.set_name("test11"); + d.set_txid(10); + + // out + TxLock txLockOut; + + curvefs::metaserver::CreateDentryResponse response; + + TxLock txLock; + txLock.set_primarykey("key"); + txLock.set_startts(1); + txLock.set_timestamp(100); + response.set_statuscode(MetaStatusCode::TX_KEY_LOCKED); + *response.mutable_txlock() = txLock; + + EXPECT_CALL(mockMetaServerService_, CreateDentry(_, _, _, _)) + .WillOnce(DoAll( + SetArgPointee<2>(response), + Invoke(SetRpcService))); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); + + auto status = metaserverCli_.CreateDentry(d, &txLockOut); + ASSERT_EQ(MetaStatusCode::TX_KEY_LOCKED, status); + ASSERT_TRUE( + google::protobuf::util::MessageDifferencer::Equals(txLockOut, txLock)); +} + TEST_F(MetaServerClientImplTest, test_CreateDentry_copyset_not_exist) { // in Dentry d; @@ -357,8 +442,7 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_copyset_not_exist) { d.set_txid(10); // out - butil::EndPoint target; - butil::str2endpoint(addr_.c_str(), &target); + TxLock txLockOut; curvefs::metaserver::CreateDentryResponse response; @@ -380,7 +464,7 @@ TEST_F(MetaServerClientImplTest, test_CreateDentry_copyset_not_exist) { EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) .WillOnce(Return(true)); - auto status = metaserverCli_.CreateDentry(d); + auto status = metaserverCli_.CreateDentry(d, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); } @@ -390,10 +474,7 @@ TEST_F(MetaServerClientImplTest, test_DeleteDentry) { uint64_t inodeid = 2; std::string name = "test"; - // out - butil::EndPoint target; - butil::str2endpoint(addr_.c_str(), &target); - + TxLock txLockOut; curvefs::metaserver::DeleteDentryResponse response; // test1: delete dentry ok @@ -405,9 +486,11 @@ TEST_F(MetaServerClientImplTest, test_DeleteDentry) { Invoke(SetRpcService))); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.DeleteDentry( - fsid, inodeid, name, FsFileType::TYPE_FILE); + fsid, inodeid, name, FsFileType::TYPE_FILE, &txLockOut); ASSERT_EQ(MetaStatusCode::OK, status); // test2: rpc error @@ -418,7 +501,7 @@ TEST_F(MetaServerClientImplTest, test_DeleteDentry) { .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); status = metaserverCli_.DeleteDentry( - fsid, inodeid, name, FsFileType::TYPE_FILE); + fsid, inodeid, name, FsFileType::TYPE_FILE, &txLockOut); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); // test3: delete response with unknown error @@ -430,8 +513,29 @@ TEST_F(MetaServerClientImplTest, test_DeleteDentry) { EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); status = metaserverCli_.DeleteDentry( - fsid, inodeid, name, FsFileType::TYPE_FILE); + fsid, inodeid, name, FsFileType::TYPE_FILE, &txLockOut); ASSERT_EQ(MetaStatusCode::UNKNOWN_ERROR, status); + + // test: delete dentry with tx lock + TxLock txLock; + txLock.set_primarykey("key"); + txLock.set_startts(1); + txLock.set_timestamp(100); + response.set_statuscode(MetaStatusCode::TX_KEY_LOCKED); + *response.mutable_txlock() = txLock; + + EXPECT_CALL(mockMetaServerService_, DeleteDentry(_, _, _, _)) + .WillOnce(DoAll( + SetArgPointee<2>(response), + Invoke(SetRpcService))); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(target_), Return(true))); + + status = metaserverCli_.DeleteDentry( + fsid, inodeid, name, FsFileType::TYPE_FILE, &txLockOut); + ASSERT_EQ(MetaStatusCode::TX_KEY_LOCKED, status); + ASSERT_TRUE( + google::protobuf::util::MessageDifferencer::Equals(txLockOut, txLock)); } TEST_F(MetaServerClientImplTest, PrepareRenameTx) { @@ -443,6 +547,8 @@ TEST_F(MetaServerClientImplTest, PrepareRenameTx) { dentry.set_name("A"); dentry.set_txid(4); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); @@ -478,6 +584,190 @@ TEST_F(MetaServerClientImplTest, PrepareRenameTx) { ASSERT_EQ(rc, MetaStatusCode::RPC_ERROR); } +TEST_F(MetaServerClientImplTest, PrewriteRenameTx) { + curvefs::metaserver::PrewriteRenameTxResponse response; + Dentry dentry; + dentry.set_fsid(1); + dentry.set_inodeid(2); + dentry.set_parentinodeid(3); + dentry.set_name("A"); + TxLock txLockIn; + TxLock txLockOut; + txLockIn.set_primarykey("key"); + txLockIn.set_startts(1); + txLockIn.set_timestamp(100); + + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + + // CASE 1: PrewriteRenameTx success + response.set_statuscode(MetaStatusCode::OK); + EXPECT_CALL(mockMetaServerService_, PrewriteRenameTx(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + auto dentrys = std::vector{dentry}; + auto rc = metaserverCli_.PrewriteRenameTx(dentrys, txLockIn, &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + + // CASE 2: PrewriteRenameTx fail + response.set_statuscode(MetaStatusCode::STORAGE_INTERNAL_ERROR); + EXPECT_CALL(mockMetaServerService_, PrewriteRenameTx(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + dentrys = std::vector{dentry}; + rc = metaserverCli_.PrewriteRenameTx(dentrys, txLockIn, &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::STORAGE_INTERNAL_ERROR); + + // CASE 3: RPC error + EXPECT_CALL(mockMetaServerService_, PrewriteRenameTx(_, _, _, _)) + .WillRepeatedly(Invoke(SetRpcService)); + + dentrys = std::vector{dentry}; + rc = metaserverCli_.PrewriteRenameTx(dentrys, txLockIn, &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::RPC_ERROR); +} + +TEST_F(MetaServerClientImplTest, CheckTxStatus) { + curvefs::metaserver::CheckTxStatusResponse response; + uint32_t poolId = 1; + uint64_t inodeId = 1; + std::string primaryKey = "key"; + uint64_t startTs = 1; + uint64_t curTimestamp = 100; + + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + + // CASE 1: CheckTxStatus success + response.set_statuscode(MetaStatusCode::OK); + EXPECT_CALL(mockMetaServerService_, CheckTxStatus(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + auto rc = metaserverCli_.CheckTxStatus(poolId, inodeId, primaryKey, + startTs, curTimestamp); + ASSERT_EQ(rc, MetaStatusCode::OK); + + // CASE 2: CheckTxStatus fail + response.set_statuscode(MetaStatusCode::STORAGE_INTERNAL_ERROR); + EXPECT_CALL(mockMetaServerService_, CheckTxStatus(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + rc = metaserverCli_.CheckTxStatus(poolId, inodeId, primaryKey, startTs, + curTimestamp); + ASSERT_EQ(rc, MetaStatusCode::STORAGE_INTERNAL_ERROR); + + // CASE 3: RPC error + EXPECT_CALL(mockMetaServerService_, CheckTxStatus(_, _, _, _)) + .WillRepeatedly(Invoke(SetRpcService)); + + rc = metaserverCli_.CheckTxStatus(poolId, inodeId, primaryKey, startTs, + curTimestamp); + ASSERT_EQ(rc, MetaStatusCode::RPC_ERROR); +} + +TEST_F(MetaServerClientImplTest, ResolveTxLock) { + curvefs::metaserver::ResolveTxLockResponse response; + Dentry dentry; + dentry.set_fsid(1); + dentry.set_inodeid(2); + dentry.set_parentinodeid(3); + dentry.set_name("A"); + uint64_t startTs = 1; + + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + + // CASE 1: ResolveTxLock success + response.set_statuscode(MetaStatusCode::OK); + EXPECT_CALL(mockMetaServerService_, ResolveTxLock(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + auto rc = metaserverCli_.ResolveTxLock(dentry, startTs, true); + ASSERT_EQ(rc, MetaStatusCode::OK); + + // CASE 2: ResolveTxLock fail + response.set_statuscode(MetaStatusCode::STORAGE_INTERNAL_ERROR); + EXPECT_CALL(mockMetaServerService_, ResolveTxLock(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + rc = metaserverCli_.ResolveTxLock(dentry, startTs, true); + ASSERT_EQ(rc, MetaStatusCode::STORAGE_INTERNAL_ERROR); + + // CASE 3: RPC error + EXPECT_CALL(mockMetaServerService_, ResolveTxLock(_, _, _, _)) + .WillRepeatedly(Invoke(SetRpcService)); + + rc = metaserverCli_.ResolveTxLock(dentry, startTs, true); + ASSERT_EQ(rc, MetaStatusCode::RPC_ERROR); +} + +TEST_F(MetaServerClientImplTest, CommitTx) { + curvefs::metaserver::CommitTxResponse response; + Dentry dentry; + dentry.set_fsid(1); + dentry.set_inodeid(2); + dentry.set_parentinodeid(3); + dentry.set_name("A"); + uint64_t startTs = 1; + uint64_t commitTs = 2; + + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) + .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + + // CASE 1: CommitTx success + response.set_statuscode(MetaStatusCode::OK); + EXPECT_CALL(mockMetaServerService_, CommitTx(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + auto dentrys = std::vector{dentry}; + auto rc = metaserverCli_.CommitTx(dentrys, startTs, commitTs); + ASSERT_EQ(rc, MetaStatusCode::OK); + + // CASE 2: CommitTx fail + response.set_statuscode(MetaStatusCode::STORAGE_INTERNAL_ERROR); + EXPECT_CALL(mockMetaServerService_, CommitTx(_, _, _, _)) + .WillOnce(DoAll(SetArgPointee<2>(response), + Invoke(SetRpcService))); + + rc = metaserverCli_.CommitTx(dentrys, startTs, commitTs); + ASSERT_EQ(rc, MetaStatusCode::STORAGE_INTERNAL_ERROR); + + // CASE 3: RPC error + EXPECT_CALL(mockMetaServerService_, CommitTx(_, _, _, _)) + .WillRepeatedly(Invoke(SetRpcService< + curvefs::metaserver::CommitTxRequest, + curvefs::metaserver::CommitTxResponse, true>)); + + rc = metaserverCli_.CommitTx(dentrys, startTs, commitTs); + ASSERT_EQ(rc, MetaStatusCode::RPC_ERROR); +} + TEST_F(MetaServerClientImplTest, test_GetInode) { // in uint32_t fsid = 1; @@ -514,6 +804,8 @@ TEST_F(MetaServerClientImplTest, test_GetInode) { Invoke(SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.GetInode( fsid, inodeid, &out, &streaming); @@ -587,6 +879,8 @@ TEST_F(MetaServerClientImplTest, test_UpdateInodeAttr) { SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.UpdateInodeAttr( inode.fsid(), inode.inodeid(), ToInodeAttr(inode)); @@ -786,6 +1080,8 @@ TEST_F(MetaServerClientImplTest, test_CreateInode) { SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), SelectTarget(_, _)) .WillRepeatedly(Return(true)); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.CreateInode(inode, &out); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); @@ -844,6 +1140,8 @@ TEST_F(MetaServerClientImplTest, test_DeleteInode) { SetRpcService)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.DeleteInode(fsId, inodeid); ASSERT_EQ(MetaStatusCode::RPC_ERROR, status); @@ -917,6 +1215,8 @@ TEST_F(MetaServerClientImplTest, test_BatchGetInodeAttr) { BatchGetInodeAttrResponse, true>)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.BatchGetInodeAttr( fsid, inodeIds, &attr); @@ -993,6 +1293,8 @@ TEST_F(MetaServerClientImplTest, test_BatchGetXAttr) { BatchGetXAttrResponse, true>)); EXPECT_CALL(*mockMetacache_.get(), GetTarget(_, _, _, _)) .WillRepeatedly(DoAll(SetArgPointee<2>(target_), Return(true))); + EXPECT_CALL(*mockMetacache_.get(), GetTargetLeader(_, _)) + .WillRepeatedly(Return(true)); MetaStatusCode status = metaserverCli_.BatchGetXAttr( fsid, inodeIds, &xattr); diff --git a/curvefs/test/client/rpcclient/mock_mds_base_client.h b/curvefs/test/client/rpcclient/mock_mds_base_client.h index 6f06d72dd0..6aa7b99e88 100644 --- a/curvefs/test/client/rpcclient/mock_mds_base_client.h +++ b/curvefs/test/client/rpcclient/mock_mds_base_client.h @@ -123,6 +123,9 @@ class MockMDSBaseClient : public MDSBaseClient { void(uint32_t fsId, AllocOrGetMemcacheClusterResponse* response, brpc::Controller* cntl, brpc::Channel* channel)); + + MOCK_METHOD(void, Tso, (const TsoRequest& request, TsoResponse* response, + brpc::Controller* cntl, brpc::Channel* channel), (override)); }; } // namespace rpcclient } // namespace client diff --git a/curvefs/test/client/rpcclient/mock_mds_client.h b/curvefs/test/client/rpcclient/mock_mds_client.h index 2e86d48902..c77a2296cc 100644 --- a/curvefs/test/client/rpcclient/mock_mds_client.h +++ b/curvefs/test/client/rpcclient/mock_mds_client.h @@ -106,12 +106,18 @@ class MockMdsClient : public MdsClient { bool(uint32_t fsID, std::vector* partitionInfos)); - MOCK_METHOD5(RefreshSession, - FSStatusCode(const std::vector &txIds, - std::vector *latestTxIdList, + MOCK_METHOD7(RefreshSession, + FSStatusCode(const std::vector& txIds, + std::vector* latestTxIdList, const std::string& fsName, const Mountpoint& mountpoint, - std::atomic* enableSumInDir)); + std::atomic* enableSumInDir, + const std::string& mdsAddrs, + std::string* mdsAddrsOverride)); + + MOCK_METHOD0(GetMdsAddrs, std::string()); + + MOCK_METHOD1(SetMdsAddrs, void(const std::string& mdsAddrs)); MOCK_METHOD4(AllocateVolumeBlockGroup, SpaceErrCode(uint32_t, @@ -132,6 +138,9 @@ class MockMdsClient : public MdsClient { MOCK_METHOD2(AllocOrGetMemcacheCluster, bool(uint32_t, curvefs::mds::topology::MemcacheClusterInfo*)); + + MOCK_METHOD(FSStatusCode, Tso, (uint64_t*, uint64_t*), + (override)); }; } // namespace rpcclient } // namespace client diff --git a/curvefs/test/client/rpcclient/mock_metaserver_service.h b/curvefs/test/client/rpcclient/mock_metaserver_service.h index f40dd6b15f..961ce9229f 100644 --- a/curvefs/test/client/rpcclient/mock_metaserver_service.h +++ b/curvefs/test/client/rpcclient/mock_metaserver_service.h @@ -66,6 +66,30 @@ class MockMetaServerService : public curvefs::metaserver::MetaServerService { ::curvefs::metaserver::PrepareRenameTxResponse* response, ::google::protobuf::Closure* done)); + MOCK_METHOD(void, PrewriteRenameTx, + (::google::protobuf::RpcController* controller, + const ::curvefs::metaserver::PrewriteRenameTxRequest* request, + ::curvefs::metaserver::PrewriteRenameTxResponse* response, + ::google::protobuf::Closure* done), (override)); + + MOCK_METHOD(void, CheckTxStatus, + (::google::protobuf::RpcController* controller, + const ::curvefs::metaserver::CheckTxStatusRequest* request, + ::curvefs::metaserver::CheckTxStatusResponse* response, + ::google::protobuf::Closure* done), (override)); + + MOCK_METHOD(void, ResolveTxLock, + (::google::protobuf::RpcController* controller, + const ::curvefs::metaserver::ResolveTxLockRequest* request, + ::curvefs::metaserver::ResolveTxLockResponse* response, + ::google::protobuf::Closure* done), (override)); + + MOCK_METHOD(void, CommitTx, + (::google::protobuf::RpcController* controller, + const ::curvefs::metaserver::CommitTxRequest* request, + ::curvefs::metaserver::CommitTxResponse* response, + ::google::protobuf::Closure* done), (override)); + MOCK_METHOD4(GetInode, void(::google::protobuf::RpcController *controller, const ::curvefs::metaserver::GetInodeRequest *request, diff --git a/curvefs/test/client/test_dentry_cache_manager.cpp b/curvefs/test/client/test_dentry_cache_manager.cpp index d67feff8bc..2f80e82b00 100644 --- a/curvefs/test/client/test_dentry_cache_manager.cpp +++ b/curvefs/test/client/test_dentry_cache_manager.cpp @@ -25,8 +25,9 @@ #include #include +#include "curvefs/test/client/rpcclient/mock_mds_client.h" #include "curvefs/test/client/mock_metaserver_client.h" -#include "curvefs/src/client/dentry_cache_manager.h" +#include "curvefs/src/client/dentry_manager.h" namespace curvefs { namespace client { @@ -47,6 +48,7 @@ using ::testing::DoAll; using ::testing::Invoke; using rpcclient::MockMetaServerClient; +using rpcclient::MockMdsClient; class TestDentryCacheManager : public ::testing::Test { protected: @@ -54,8 +56,10 @@ class TestDentryCacheManager : public ::testing::Test { ~TestDentryCacheManager() {} virtual void SetUp() { + mdsClient_ = std::make_shared(); metaClient_ = std::make_shared(); dCacheManager_ = std::make_shared(metaClient_); + dCacheManager_->Init(mdsClient_); dCacheManager_->SetFsId(fsId_); } @@ -67,10 +71,82 @@ class TestDentryCacheManager : public ::testing::Test { protected: std::shared_ptr dCacheManager_; std::shared_ptr metaClient_; + std::shared_ptr mdsClient_; uint32_t fsId_ = 888; uint32_t timeout_ = 3; }; +TEST_F(TestDentryCacheManager, CheckAndResolveTx) { + // In + std::string primaryKey = "3:1:1:A"; + std::string fakePrimaryKey = "ABC"; + uint64_t startTs = 1; + uint64_t commitTs = 2; + uint64_t curTimestamp = 100; + Dentry dentry; + TxLock txLock; + txLock.set_startts(startTs); + // 1. check tx status failed + // case: check tx status parse primary key failed + txLock.set_primarykey(fakePrimaryKey); + ASSERT_EQ(MetaStatusCode::PARSE_FROM_STRING_FAILED, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + // case: check failed + txLock.set_primarykey(primaryKey); + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_MISMATCH)); + ASSERT_EQ(MetaStatusCode::TX_MISMATCH, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + // 2. check tx status success and resolve tx failed + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, startTs, commitTs)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + ASSERT_EQ(MetaStatusCode::STORAGE_INTERNAL_ERROR, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_ROLLBACKED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, startTs, 0)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + ASSERT_EQ(MetaStatusCode::STORAGE_INTERNAL_ERROR, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_TIMEOUT)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, startTs, 0)) + .WillOnce(Return(MetaStatusCode::STORAGE_INTERNAL_ERROR)); + ASSERT_EQ(MetaStatusCode::STORAGE_INTERNAL_ERROR, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_INPROGRESS)); + ASSERT_EQ(MetaStatusCode::TX_INPROGRESS, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_FAILED)); + ASSERT_EQ(MetaStatusCode::TX_FAILED, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); + + // 3. check tx status success and resolve tx success + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, startTs, commitTs)) + .WillOnce(Return(MetaStatusCode::OK)); + ASSERT_EQ(MetaStatusCode::OK, + dCacheManager_->CheckAndResolveTx( + dentry, txLock, curTimestamp, commitTs)); +} + TEST_F(TestDentryCacheManager, GetDentry) { curvefs::client::common::FLAGS_enableCto = false; uint64_t parent = 99; @@ -84,7 +160,7 @@ TEST_F(TestDentryCacheManager, GetDentry) { dentryExp.set_parentinodeid(parent); dentryExp.set_inodeid(inodeid); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce(Return(MetaStatusCode::NOT_FOUND)) .WillOnce(DoAll(SetArgPointee<3>(dentryExp), Return(MetaStatusCode::OK))) @@ -106,16 +182,51 @@ TEST_F(TestDentryCacheManager, GetDentry) { curvefs::client::common::FLAGS_enableCto = true; EXPECT_CALL(*metaClient_, DeleteDentry( - fsId_, parent, name, FsFileType::TYPE_FILE)) + fsId_, parent, name, FsFileType::TYPE_FILE, _)) .WillOnce(Return(MetaStatusCode::OK)); dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce( DoAll(SetArgPointee<3>(dentryExp), Return(MetaStatusCode::OK))); ret = dCacheManager_->GetDentry(parent, name, &out); ASSERT_EQ(CURVEFS_ERROR::OK, ret); ASSERT_TRUE( google::protobuf::util::MessageDifferencer::Equals(dentryExp, out)); + + // get dentry but dentry tx key is locked + // 1. Tso failed + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + ret = dCacheManager_->GetDentry(parent, name, &out); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 2. CheckAndResolveTx failed + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + ret = dCacheManager_->GetDentry(parent, name, &out); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 3. success + TxLock txLock; + txLock.set_primarykey("3:1:1:A"); + txLock.set_startts(1); + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) + .WillOnce(DoAll(SetArgPointee<4>(txLock), + Return(MetaStatusCode::TX_KEY_LOCKED))) + .WillOnce(DoAll(SetArgPointee<3>(dentryExp), + Return(MetaStatusCode::OK))); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + ret = dCacheManager_->GetDentry(parent, name, &out); + ASSERT_EQ(CURVEFS_ERROR::OK, ret); + ASSERT_TRUE( + google::protobuf::util::MessageDifferencer::Equals(dentryExp, out)); } TEST_F(TestDentryCacheManager, CreateAndGetDentry) { @@ -131,11 +242,11 @@ TEST_F(TestDentryCacheManager, CreateAndGetDentry) { dentryExp.set_parentinodeid(parent); dentryExp.set_inodeid(inodeid); - EXPECT_CALL(*metaClient_, CreateDentry(_)) + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) .WillOnce(Return(MetaStatusCode::UNKNOWN_ERROR)) .WillOnce(Return(MetaStatusCode::OK)); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce(DoAll(SetArgPointee<3>(dentryExp), Return(MetaStatusCode::OK))); @@ -152,12 +263,12 @@ TEST_F(TestDentryCacheManager, CreateAndGetDentry) { curvefs::client::common::FLAGS_enableCto = true; EXPECT_CALL(*metaClient_, DeleteDentry( - fsId_, parent, name, FsFileType::TYPE_FILE)) + fsId_, parent, name, FsFileType::TYPE_FILE, _)) .WillOnce(Return(MetaStatusCode::OK)); dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); - EXPECT_CALL(*metaClient_, CreateDentry(_)) + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) .WillOnce(Return(MetaStatusCode::OK)); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce( DoAll(SetArgPointee<3>(dentryExp), Return(MetaStatusCode::OK))); @@ -167,6 +278,38 @@ TEST_F(TestDentryCacheManager, CreateAndGetDentry) { ASSERT_EQ(CURVEFS_ERROR::OK, ret); ASSERT_TRUE( google::protobuf::util::MessageDifferencer::Equals(dentryExp, out)); + + // create dentry but dentry tx key is locked + // 1. Tso failed + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + ret = dCacheManager_->CreateDentry(dentryExp); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 2. CheckAndResolveTx failed + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + ret = dCacheManager_->CreateDentry(dentryExp); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 3. success + TxLock txLock; + txLock.set_primarykey("3:1:1:A"); + txLock.set_startts(1); + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) + .WillOnce(DoAll(SetArgPointee<1>(txLock), + Return(MetaStatusCode::TX_KEY_LOCKED))) + .WillOnce(Return(MetaStatusCode::OK)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + ret = dCacheManager_->CreateDentry(dentryExp); + ASSERT_EQ(CURVEFS_ERROR::OK, ret); } TEST_F(TestDentryCacheManager, DeleteDentry) { @@ -174,7 +317,7 @@ TEST_F(TestDentryCacheManager, DeleteDentry) { const std::string name = "test"; EXPECT_CALL(*metaClient_, DeleteDentry( - fsId_, parent, name, FsFileType::TYPE_FILE)) + fsId_, parent, name, FsFileType::TYPE_FILE, _)) .WillOnce(Return(MetaStatusCode::NOT_FOUND)) .WillOnce(Return(MetaStatusCode::OK)); @@ -184,6 +327,41 @@ TEST_F(TestDentryCacheManager, DeleteDentry) { ret = dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); ASSERT_EQ(CURVEFS_ERROR::OK, ret); + + // create dentry but dentry tx key is locked + // 1. Tso failed + EXPECT_CALL(*metaClient_, DeleteDentry( + fsId_, parent, name, FsFileType::TYPE_FILE, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + ret = dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 2. CheckAndResolveTx failed + EXPECT_CALL(*metaClient_, DeleteDentry( + fsId_, parent, name, FsFileType::TYPE_FILE, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + ret = dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 3. success + TxLock txLock; + txLock.set_primarykey("3:1:1:A"); + txLock.set_startts(1); + EXPECT_CALL(*metaClient_, DeleteDentry( + fsId_, parent, name, FsFileType::TYPE_FILE, _)) + .WillOnce(DoAll(SetArgPointee<4>(txLock), + Return(MetaStatusCode::TX_KEY_LOCKED))) + .WillOnce(Return(MetaStatusCode::OK)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + ret = dCacheManager_->DeleteDentry(parent, name, FsFileType::TYPE_FILE); + ASSERT_EQ(CURVEFS_ERROR::OK, ret); } TEST_F(TestDentryCacheManager, ListDentryNomal) { @@ -194,7 +372,7 @@ TEST_F(TestDentryCacheManager, ListDentryNomal) { part1.resize(limit); part2.resize(limit - 1); - EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _)) + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) .WillOnce(DoAll(SetArgPointee<5>(part1), Return(MetaStatusCode::OK))) .WillOnce(DoAll(SetArgPointee<5>(part2), @@ -209,7 +387,7 @@ TEST_F(TestDentryCacheManager, ListDentryNomal) { TEST_F(TestDentryCacheManager, ListDentryEmpty) { uint64_t parent = 99; - EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _)) + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); std::list out; @@ -229,7 +407,7 @@ TEST_F(TestDentryCacheManager, ListDentryOnlyDir) { TEST_F(TestDentryCacheManager, ListDentryFailed) { uint64_t parent = 99; - EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _)) + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) .WillOnce(Return(MetaStatusCode::UNKNOWN_ERROR)); std::list out; @@ -238,6 +416,57 @@ TEST_F(TestDentryCacheManager, ListDentryFailed) { ASSERT_EQ(0, out.size()); } +TEST_F(TestDentryCacheManager, ListDentry_txLocked) { + uint64_t parent = 99; + std::list out; + std::list part; + // 1. Tso failed + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::UNKNOWN_ERROR)); + CURVEFS_ERROR ret = dCacheManager_->ListDentry(parent, &out, 100); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 2. tx key locked but part empty + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_KEY_LOCKED)); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + ret = dCacheManager_->ListDentry(parent, &out, 100); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 3. CheckAndResolveTx failed + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) + .WillOnce(DoAll(SetArgPointee<5>(part), + Return(MetaStatusCode::TX_KEY_LOCKED))); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + Dentry dentry; + dentry.set_fsid(fsId_); + dentry.set_name("test"); + dentry.set_parentinodeid(parent); + dentry.set_inodeid(100); + part.emplace_back(dentry); + ret = dCacheManager_->ListDentry(parent, &out, 100); + ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // 4. success + TxLock txLock; + txLock.set_primarykey("3:1:1:A"); + txLock.set_startts(1); + EXPECT_CALL(*metaClient_, ListDentry(fsId_, parent, _, _, _, _, _)) + .WillOnce(DoAll(SetArgPointee<5>(part), SetArgPointee<6>(txLock), + Return(MetaStatusCode::TX_KEY_LOCKED))) + .WillOnce(DoAll(SetArgPointee<5>(part), + Return(MetaStatusCode::OK))); + EXPECT_CALL(*mdsClient_, Tso(_, _)) + .WillOnce(Return(FSStatusCode::OK)); + EXPECT_CALL(*metaClient_, CheckTxStatus(_, _, _, _, _)) + .WillOnce(Return(MetaStatusCode::TX_COMMITTED)); + EXPECT_CALL(*metaClient_, ResolveTxLock(_, _, _)) + .WillOnce(Return(MetaStatusCode::OK)); + ret = dCacheManager_->ListDentry(parent, &out, 100); + ASSERT_EQ(CURVEFS_ERROR::OK, ret); +} + TEST_F(TestDentryCacheManager, GetTimeOutDentry) { curvefs::client::common::FLAGS_enableCto = false; uint64_t parent = 99; @@ -251,10 +480,10 @@ TEST_F(TestDentryCacheManager, GetTimeOutDentry) { dentryExp.set_parentinodeid(parent); dentryExp.set_inodeid(inodeid); - EXPECT_CALL(*metaClient_, CreateDentry(_)) + EXPECT_CALL(*metaClient_, CreateDentry(_, _)) .WillOnce(Return(MetaStatusCode::OK)); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce(DoAll(SetArgPointee<3>(dentryExp), Return(MetaStatusCode::OK))); @@ -269,7 +498,7 @@ TEST_F(TestDentryCacheManager, GetTimeOutDentry) { // get from metaserver when timeout sleep(timeout_); - EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _)) + EXPECT_CALL(*metaClient_, GetDentry(fsId_, parent, name, _, _)) .WillOnce(Return(MetaStatusCode::OK)); ret = dCacheManager_->GetDentry(parent, name, &out); ASSERT_EQ(CURVEFS_ERROR::OK, ret); diff --git a/curvefs/test/client/test_fuse_s3_client.cpp b/curvefs/test/client/test_fuse_s3_client.cpp index dd71313917..c948fdf25e 100644 --- a/curvefs/test/client/test_fuse_s3_client.cpp +++ b/curvefs/test/client/test_fuse_s3_client.cpp @@ -41,9 +41,9 @@ #include "curvefs/src/common/define.h" #include "curvefs/test/client/mock_client_s3.h" #include "curvefs/test/client/mock_client_s3_adaptor.h" -#include "curvefs/test/client/mock_dentry_cache_mamager.h" +#include "curvefs/test/client/mock_dentry_mamager.h" #include "curvefs/test/client/mock_disk_cache_manager.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_metaserver_client.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" #include "fuse3/fuse_lowlevel.h" @@ -3941,8 +3941,6 @@ TEST_F(TestFuseS3Client, FuseOpUnlink_EnableSummary) { EXPECT_CALL(*inodeManager_, GetInode(_, _)) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) - .WillOnce(DoAll( - SetArgReferee<1>(parentInodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce(DoAll( @@ -3952,12 +3950,8 @@ TEST_F(TestFuseS3Client, FuseOpUnlink_EnableSummary) { EXPECT_CALL(*metaClient_, UpdateInodeAttr(_, _, _)) .WillRepeatedly(Return(MetaStatusCode::OK)); - EXPECT_CALL(*inodeManager_, ShipToFlush(_)).Times(1); - CURVEFS_ERROR ret = client_->FuseOpUnlink(req, parent, name.c_str()); ASSERT_EQ(CURVEFS_ERROR::OK, ret); - Inode inode2 = inodeWrapper->GetInode(); - ASSERT_EQ(nlink - 1, inode2.nlink()); auto p = parentInodeWrapper->GetInode(); ASSERT_EQ(3, p.nlink()); @@ -4071,7 +4065,10 @@ TEST_F(TestFuseS3Client, FuseOpListXattr) { .WillOnce(DoAll(SetArgPointee<1>(inode), Return(CURVEFS_ERROR::OK))); ret = client_->FuseOpListXattr(req, ino, buf, size, &realSize); ASSERT_EQ(CURVEFS_ERROR::OK, ret); - auto expected = key.length() + 1; + auto expected = key.length() + 1 + strlen(XATTR_DIR_RFILES) + 1 + + strlen(XATTR_DIR_RSUBDIRS) + 1 + + strlen(XATTR_DIR_RENTRIES) + 1 + + strlen(XATTR_DIR_RFBYTES) + 1; ASSERT_EQ(realSize, expected); realSize = 0; diff --git a/curvefs/test/client/test_fuse_volume_client.cpp b/curvefs/test/client/test_fuse_volume_client.cpp index 7f89072c6f..4aafa17fee 100644 --- a/curvefs/test/client/test_fuse_volume_client.cpp +++ b/curvefs/test/client/test_fuse_volume_client.cpp @@ -27,8 +27,8 @@ #include "curvefs/proto/metaserver.pb.h" #include "curvefs/src/client/fuse_volume_client.h" #include "curvefs/src/common/define.h" -#include "curvefs/test/client/mock_dentry_cache_mamager.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_dentry_mamager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" #include "curvefs/test/client/mock_metaserver_client.h" #include "curvefs/test/client/mock_volume_storage.h" @@ -571,8 +571,6 @@ TEST_F(TestFuseVolumeClient, FuseOpUnlink) { EXPECT_CALL(*inodeManager_, GetInode(_, _)) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) - .WillOnce(DoAll( - SetArgReferee<1>(parentInodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))); EXPECT_CALL(*metaClient_, GetInodeAttr(_, _, _)) @@ -604,6 +602,11 @@ TEST_F(TestFuseVolumeClient, FuseOpRmDir) { EXPECT_CALL(*dentryManager_, GetDentry(parent, name, _)) .WillOnce(DoAll(SetArgPointee<2>(dentry), Return(CURVEFS_ERROR::OK))); + std::list dentryList; + EXPECT_CALL(*dentryManager_, ListDentry(inodeid, _, _, _, _)) + .WillOnce(DoAll(SetArgPointee<1>(dentryList), + Return(CURVEFS_ERROR::OK))); + EXPECT_CALL(*dentryManager_, DeleteDentry(parent, name, FsFileType::TYPE_DIRECTORY)) .WillOnce(Return(CURVEFS_ERROR::OK)); @@ -634,8 +637,6 @@ TEST_F(TestFuseVolumeClient, FuseOpRmDir) { EXPECT_CALL(*inodeManager_, GetInode(_, _)) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) - .WillOnce(DoAll( - SetArgReferee<1>(parentInodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))); EXPECT_CALL(*metaClient_, GetInodeAttr(_, _, _)) @@ -647,7 +648,6 @@ TEST_F(TestFuseVolumeClient, FuseOpRmDir) { ASSERT_EQ(CURVEFS_ERROR::OK, ret); Inode inode2 = inodeWrapper->GetInode(); ASSERT_EQ(nlink - 1, inode2.nlink()); - ASSERT_EQ(2, parentInodeWrapper->GetNlinkLocked()); } TEST_F(TestFuseVolumeClient, FuseOpUnlinkFailed) { @@ -674,7 +674,6 @@ TEST_F(TestFuseVolumeClient, FuseOpUnlinkFailed) { EXPECT_CALL(*dentryManager_, DeleteDentry(parent, name, FsFileType::TYPE_FILE)) .WillOnce(Return(CURVEFS_ERROR::INTERNAL)) - .WillOnce(Return(CURVEFS_ERROR::OK)) .WillOnce(Return(CURVEFS_ERROR::OK)); Inode inode; @@ -701,13 +700,9 @@ TEST_F(TestFuseVolumeClient, FuseOpUnlinkFailed) { EXPECT_CALL(*inodeManager_, GetInode(_, _)) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) - .WillOnce(DoAll( - SetArgReferee<1>(parentInodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce(Return(CURVEFS_ERROR::INTERNAL)) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))) - .WillOnce(DoAll( - SetArgReferee<1>(parentInodeWrapper), Return(CURVEFS_ERROR::OK))) .WillOnce( DoAll(SetArgReferee<1>(inodeWrapper), Return(CURVEFS_ERROR::OK))); EXPECT_CALL(*metaClient_, GetInodeAttr(_, _, _)) @@ -715,12 +710,15 @@ TEST_F(TestFuseVolumeClient, FuseOpUnlinkFailed) { EXPECT_CALL(*metaClient_, UpdateInodeAttr(_, _, _)) .WillOnce(Return(MetaStatusCode::UNKNOWN_ERROR)); + // get dentry internal failed CURVEFS_ERROR ret = client_->FuseOpUnlink(req, parent, name.c_str()); ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // delete dentry internal failed ret = client_->FuseOpUnlink(req, parent, name.c_str()); ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); + // get inode internal failed ret = client_->FuseOpUnlink(req, parent, name.c_str()); ASSERT_EQ(CURVEFS_ERROR::INTERNAL, ret); diff --git a/curvefs/test/client/test_inode_cache_manager.cpp b/curvefs/test/client/test_inode_cache_manager.cpp index b004acd62f..2499913c9a 100644 --- a/curvefs/test/client/test_inode_cache_manager.cpp +++ b/curvefs/test/client/test_inode_cache_manager.cpp @@ -30,11 +30,12 @@ #include "curvefs/src/client/inode_wrapper.h" #include "curvefs/src/client/rpcclient/metaserver_client.h" #include "curvefs/test/client/mock_metaserver_client.h" -#include "curvefs/src/client/inode_cache_manager.h" +#include "curvefs/src/client/inode_manager.h" #include "curvefs/src/common/define.h" #include "curvefs/src/client/filesystem/defer_sync.h" #include "curvefs/src/client/filesystem/openfile.h" #include "curvefs/src/client/filesystem/dir_cache.h" +#include "curvefs/test/client/filesystem/helper/helper.h" namespace curvefs { namespace client { @@ -66,6 +67,13 @@ using ::curvefs::client::common::OpenFilesOption; using ::curvefs::client::filesystem::DeferSync; using ::curvefs::client::filesystem::DirCache; using ::curvefs::client::filesystem::OpenFiles; +using ::curvefs::client::filesystem::DeferSyncBuilder; +using ::curvefs::client::filesystem::MkInode; +using ::curvefs::client::filesystem::MkAttr; +using ::curvefs::client::filesystem::InodeOption; +using ::curvefs::client::filesystem::AttrOption; + +class DeferWatcherTest : public ::testing::Test {}; class TestInodeCacheManager : public ::testing::Test { protected: @@ -80,7 +88,7 @@ class TestInodeCacheManager : public ::testing::Test { RefreshDataOption option; option.maxDataSize = 1; option.refreshDataIntervalSec = 0; - auto deferSync = std::make_shared(DeferSyncOption()); + auto deferSync = std::make_shared(true, DeferSyncOption()); auto openFiles = std::make_shared( OpenFilesOption(), deferSync); iCacheManager_->Init(option, openFiles, deferSync); @@ -98,6 +106,87 @@ class TestInodeCacheManager : public ::testing::Test { uint32_t timeout_ = 3; }; +TEST_F(DeferWatcherTest, Basic_cto) { + auto builder = DeferSyncBuilder(); + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { + *cto = true; + option->delay = 3; + }).Build(); + deferSync->Start(); + deferSync->Push(MkInode(100, InodeOption().length(1024).ctime(123, 456))); + + auto watcher = std::make_shared(false, deferSync); + std::set inos { 100 }; + watcher->PreGetAttrs(inos); + + InodeAttr attr = MkAttr(100, AttrOption().length(0).ctime(123, 455)); + std::list attrs; + attrs.emplace_back(attr); // mock get attr from remote + watcher->PostGetAttrs(&attrs); + + InodeAttr out = attrs.front(); + ASSERT_EQ(out.length(), 0); + ASSERT_EQ(out.ctime(), 123); + ASSERT_EQ(out.ctime_ns(), 455); + + deferSync->Stop(); +} + +TEST_F(DeferWatcherTest, Basic) { + auto builder = DeferSyncBuilder(); + auto deferSync = builder.SetOption([&](bool* cto, DeferSyncOption* option) { + *cto = false; + option->delay = 3; + }).Build(); + deferSync->Start(); + deferSync->Push(MkInode(100, InodeOption().length(1024).ctime(123, 456))); + + auto watcher = std::make_shared(false, deferSync); + std::set inos { 100 }; + watcher->PreGetAttrs(inos); + + // CASE 1: attr ctime < defered ctime => update success + { + InodeAttr attr = MkAttr(100, AttrOption().length(0).ctime(123, 455)); + std::list attrs; + attrs.emplace_back(attr); // mock get attr from remote + watcher->PostGetAttrs(&attrs); + + InodeAttr out = attrs.front(); + ASSERT_EQ(out.length(), 1024); + ASSERT_EQ(out.ctime(), 123); + ASSERT_EQ(out.ctime_ns(), 456); + } + + // CASE 2: attr ctime > defered ctime => update failed + { + InodeAttr attr = MkAttr(100, AttrOption().length(0).ctime(123, 457)); + std::list attrs; + attrs.emplace_back(attr); // mock get attr from remote + watcher->PostGetAttrs(&attrs); + + InodeAttr out = attrs.front(); + ASSERT_EQ(out.length(), 0); + ASSERT_EQ(out.ctime(), 123); + ASSERT_EQ(out.ctime_ns(), 457); + } + + // CASE 3: another interface + { + InodeAttr attr = MkAttr(100, AttrOption().length(0).ctime(123, 455)); + std::map attrs; + attrs.emplace(100, attr); // mock get attr from remote + watcher->PostGetAttrs(&attrs); + + InodeAttr out = attrs[100]; + ASSERT_EQ(out.length(), 1024); + ASSERT_EQ(out.ctime(), 123); + ASSERT_EQ(out.ctime_ns(), 456); + } + + deferSync->Stop(); +} + TEST_F(TestInodeCacheManager, GetInode) { uint64_t inodeId = 100; uint64_t fileLength = 100; diff --git a/curvefs/test/client/volume/default_volume_storage_test.cpp b/curvefs/test/client/volume/default_volume_storage_test.cpp index 008b0d2004..879addb5f2 100644 --- a/curvefs/test/client/volume/default_volume_storage_test.cpp +++ b/curvefs/test/client/volume/default_volume_storage_test.cpp @@ -27,7 +27,7 @@ #include "curvefs/src/client/filesystem/error.h" #include "curvefs/src/client/filesystem/meta.h" -#include "curvefs/test/client/mock_inode_cache_manager.h" +#include "curvefs/test/client/mock_inode_manager.h" #include "curvefs/test/client/mock_metaserver_client.h" #include "curvefs/test/volume/mock/mock_block_device_client.h" #include "curvefs/test/volume/mock/mock_space_manager.h" diff --git a/curvefs/test/common/arc_cache_test.cpp b/curvefs/test/common/arc_cache_test.cpp new file mode 100644 index 0000000000..ad2aa952cc --- /dev/null +++ b/curvefs/test/common/arc_cache_test.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2020 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: curve + * Created Date: 20231213 + * Author: xuyifeng + */ + +#include +#include +#include + +#include "src/common/lru_cache.h" + +namespace curve { +namespace common { + +static void +assert_cache_metrics(std::shared_ptr> cache) { + auto metrics = cache->GetCacheMetrics(); + auto arcSize = cache->ArcSize(); + /* sizeof(key) + sizeof(value), yet sizeof(int) + sizeof(value) */ + auto sizeofKey = sizeof(int); + auto sizeofValue = sizeof(int); + ASSERT_EQ(arcSize.BSize() * sizeofKey + + arcSize.TSize() * (sizeofKey + sizeofValue), + metrics->cacheBytes.get_value()); +} + +TEST(ArcTest, test_cache_create) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + + ASSERT_EQ(cache->Capacity(), 5); + ASSERT_EQ(cache->Size(), 0); +} + +TEST(ArcTest, test_cache_put) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + ASSERT_TRUE(cache->Size() == maxCount); + + int v; + for (int i = 0; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + // run again trigger Touch() internally + for (int i = 0; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + assert_cache_metrics(cache); +} + +TEST(ArcTest, test_cache_getlast) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + for (int i = 0; i < maxCount; ++i) { + int k; + ASSERT_TRUE(cache->GetLast(i, &k)); + ASSERT_EQ(k, i); + } +} + +TEST(ArcTest, test_cache_getlast2) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + int k, v; + ASSERT_TRUE(cache->GetLast(&k, &v)); + ASSERT_EQ(k, 0); + ASSERT_EQ(v, 0); +} + +static bool filter_check_3(const int &v) { + return v == 3; +} + +TEST(ArcTest, test_cache_getlast3) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + int k, v; + ASSERT_TRUE(cache->GetLast(&k, &v, filter_check_3)); + ASSERT_EQ(k, 3); + ASSERT_EQ(v, 3); +} + +TEST(ArcTest, test_cache_retire) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount+1; ++i) { + cache->Put(i, i); + } + + ASSERT_TRUE(cache->Size() == maxCount); + + int v; + ASSERT_TRUE(cache->Get(0, &v) == false); + for (int i = 1; i < maxCount+1; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + int removed_count = 0; + for (int i = 100; i < 200; ++i) { + int removed; + int ret = cache->Put(i, i, &removed); + if (ret) { + removed_count++; + // This checks Arc cache's scan-resistent behavior, + // first an item in t2 is eliminated, after that, + // items in t1 is replaced + ASSERT_TRUE(removed == 1 || removed >= 100); + } + (void)removed; + } + ASSERT_EQ(removed_count, (200 - 100)); + + auto s = cache->ArcSize(); + ASSERT_TRUE(s.BSize() + s.TSize() <= 2 * maxCount); + + assert_cache_metrics(cache); +} + +TEST(ArcTest, test_cache_remove) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + cache->Remove(0); + int v; + ASSERT_FALSE(cache->Get(0, &v)); + ASSERT_TRUE(cache->Size() == maxCount-1); + + for (int i = 1; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + for (int i = 100; i < 200; ++i) { + cache->Put(i, i); + } + + auto s = cache->ArcSize(); + ASSERT_TRUE(s.BSize() + s.TSize() <= 2 * maxCount); + assert_cache_metrics(cache); +} + +} // namespace common +} // namespace curve + diff --git a/curvefs/test/mds/fs_manager_test.cpp b/curvefs/test/mds/fs_manager_test.cpp index af5d78d80d..f949efd188 100644 --- a/curvefs/test/mds/fs_manager_test.cpp +++ b/curvefs/test/mds/fs_manager_test.cpp @@ -135,6 +135,7 @@ class FSManagerTest : public ::testing::Test { FsManagerOption fsManagerOption; fsManagerOption.backEndThreadRunInterSec = 1; fsManagerOption.clientTimeoutSec = 1; + fsManagerOption.mdsListenAddr = addr_; s3Adapter_ = std::make_shared(); fsManager_ = std::make_shared(fsStorage_, spaceManager_, metaserverClient_, @@ -1032,5 +1033,89 @@ TEST_F(FSManagerTest, test_success_get_latest_txid_with_fsid) { ASSERT_EQ(response.txids_size(), 1); } +TEST_F(FSManagerTest, test_GetClientMdsAddrsOverride) { + ASSERT_EQ(fsManager_->GetClientMdsAddrsOverride(), std::string()); +} + +TEST_F(FSManagerTest, test_SetClientMdsAddrsOverride) { + std::string addr("127.0.0.1:9999,127.0.0.1:10000"); + fsManager_->SetClientMdsAddrsOverride(addr); + ASSERT_EQ(fsManager_->GetClientMdsAddrsOverride(), addr + "," + addr_); +} + +TEST_F(FSManagerTest, test_refresh_session_with_mdsoverride) { + CreateS3Fs(); + // set override + std::string mds_new("127.0.0.1:9999"); + fsManager_->SetClientMdsAddrsOverride(mds_new); + + PartitionTxId tmp; + tmp.set_partitionid(1); + tmp.set_txid(1); + std::string fsName = kFsName2; + Mountpoint mountpoint; + mountpoint.set_hostname("127.0.0.1"); + mountpoint.set_port(9000); + mountpoint.set_path("/mnt"); + + RefreshSessionResponse response; + RefreshSessionRequest request; + request.set_fsname(fsName); + *request.mutable_mountpoint() = mountpoint; + request.set_mdsaddrs(addr_); + fsManager_->RefreshSession(&request, &response); + ASSERT_EQ(response.mdsaddrsoverride(), mds_new + "," + addr_); + fsManager_->SetClientMdsAddrsOverride(std::string()); +} + +TEST_F(FSManagerTest, test_refresh_session_with_same_mdsoverride) { + CreateS3Fs(); + // set override + std::string mds_new("127.0.0.1:9999"); + fsManager_->SetClientMdsAddrsOverride(mds_new); + PartitionTxId tmp; + tmp.set_partitionid(1); + tmp.set_txid(1); + std::string fsName = kFsName2; + Mountpoint mountpoint; + mountpoint.set_hostname("127.0.0.1"); + mountpoint.set_port(9000); + mountpoint.set_path("/mnt"); + + RefreshSessionResponse response; + RefreshSessionRequest request; + request.set_fsname(fsName); + *request.mutable_mountpoint() = mountpoint; + request.set_mdsaddrs(mds_new + "," + addr_); + fsManager_->RefreshSession(&request, &response); + ASSERT_FALSE(response.has_mdsaddrsoverride()); + fsManager_->SetClientMdsAddrsOverride(std::string()); +} + +TEST_F(FSManagerTest, test_refresh_session_with_old_client) { + CreateS3Fs(); + // set override + auto mds_new = "127.0.0.1:9999"; + fsManager_->SetClientMdsAddrsOverride(mds_new); + + PartitionTxId tmp; + tmp.set_partitionid(1); + tmp.set_txid(1); + std::string fsName = kFsName2; + Mountpoint mountpoint; + mountpoint.set_hostname("127.0.0.1"); + mountpoint.set_port(9000); + mountpoint.set_path("/mnt"); + + RefreshSessionResponse response; + RefreshSessionRequest request; + request.set_fsname(fsName); + *request.mutable_mountpoint() = mountpoint; + // old client do not have mdsaddr in request + fsManager_->RefreshSession(&request, &response); + ASSERT_FALSE(response.has_mdsaddrsoverride()); + fsManager_->SetClientMdsAddrsOverride(std::string()); +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/test/mds/fs_manager_test2.cpp b/curvefs/test/mds/fs_manager_test2.cpp index c0c2bc6685..fb3ee29ec6 100644 --- a/curvefs/test/mds/fs_manager_test2.cpp +++ b/curvefs/test/mds/fs_manager_test2.cpp @@ -241,6 +241,31 @@ TEST_F(FsManagerTest2, CreateFoundConflictFsNameAndNotIdenticalToPreviousOne) { EXPECT_EQ(FSStatusCode::FS_EXIST, fsManager_->CreateFs(&req, nullptr)); } + + // prefix is different + { + FsInfo fsinfo; + fsinfo.set_status(FsStatus::NEW); + fsinfo.set_fsname(fsname); + fsinfo.set_blocksize(4 * 1024); + fsinfo.set_fstype(FSType::TYPE_S3); + + auto s3Info2 = *s3Info; + s3Info2.set_objectprefix(1); + fsinfo.mutable_detail()->set_allocated_s3info( + new curvefs::common::S3Info(s3Info2)); + + FsInfoWrapper wrapper(fsinfo); + + EXPECT_CALL(*storage_, Exist(Matcher(_))) + .WillOnce(Return(true)); + + EXPECT_CALL(*storage_, Get(Matcher(_), _)) + .WillOnce( + DoAll(SetArgPointee<1>(wrapper), Return(FSStatusCode::OK))); + + EXPECT_EQ(FSStatusCode::FS_EXIST, fsManager_->CreateFs(&req, nullptr)); + } } TEST_F(FsManagerTest2, CreateFoundUnCompleteOperation) { @@ -411,5 +436,25 @@ TEST_F(FsManagerTest2, checkFsName) { EXPECT_FALSE(FsManager::CheckFsName("curve-test--01")); } +TEST_F(FsManagerTest2, test_tso) { + // Tso success + TsoRequest request; + TsoResponse response; + EXPECT_CALL(*storage_, Tso(_, _)). + WillOnce(DoAll(SetArgPointee<0>(1), + SetArgPointee<1>(100), + Return(FSStatusCode::OK))); + fsManager_->Tso(&request, &response); + ASSERT_EQ(response.statuscode(), FSStatusCode::OK); + ASSERT_EQ(response.ts(), 1); + ASSERT_EQ(response.timestamp(), 100); + + // Tso failed + EXPECT_CALL(*storage_, Tso(_, _)). + WillOnce(Return(FSStatusCode::INTERNAL_ERROR)); + fsManager_->Tso(&request, &response); + ASSERT_EQ(response.statuscode(), FSStatusCode::INTERNAL_ERROR); +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/test/mds/fs_storage_test.cpp b/curvefs/test/mds/fs_storage_test.cpp index c2d56b4618..f293434628 100644 --- a/curvefs/test/mds/fs_storage_test.cpp +++ b/curvefs/test/mds/fs_storage_test.cpp @@ -23,6 +23,7 @@ #include #include #include +#include "curvefs/test/mds/mock/mock_kvstorage_client.h" using ::testing::AtLeast; using ::testing::StrEq; @@ -50,6 +51,8 @@ class FSStorageTest : public ::testing::Test { TEST_F(FSStorageTest, test1) { MemoryFsStorage storage; + ASSERT_TRUE(storage.Init()); + common::Volume volume; uint32_t fsId = 1; uint64_t rootInodeId = 1; @@ -115,6 +118,15 @@ TEST_F(FSStorageTest, test1) { ASSERT_EQ(FSStatusCode::NOT_FOUND, storage.Delete(fs1.GetFsName())); ASSERT_EQ(FSStatusCode::OK, storage.Delete(fs5.GetFsName())); ASSERT_EQ(FSStatusCode::NOT_FOUND, storage.Delete(fs5.GetFsName())); + + // test tso + uint64_t ts; + uint64_t timestamp; + for (int i = 1; i < 5; i++) { + ASSERT_EQ(FSStatusCode::OK, storage.Tso(&ts, ×tamp)); + ASSERT_EQ(ts, i); + } } + } // namespace mds } // namespace curvefs diff --git a/curvefs/test/mds/mds_service_test.cpp b/curvefs/test/mds/mds_service_test.cpp index bdae7f7691..cb5a60b765 100644 --- a/curvefs/test/mds/mds_service_test.cpp +++ b/curvefs/test/mds/mds_service_test.cpp @@ -952,5 +952,21 @@ TEST_F(MdsServiceTest, test_update_fsinfo_parameter_error) { } } +TEST_F(MdsServiceTest, test_tso) { + TsoRequest tsoRequest; + TsoResponse tsoResponse; + for (int i = 1; i < 5; i++) { + cntl.Reset(); + stub_->Tso(&cntl, &tsoRequest, &tsoResponse, nullptr); + if (!cntl.Failed()) { + ASSERT_EQ(tsoResponse.statuscode(), FSStatusCode::OK); + ASSERT_EQ(tsoResponse.ts(), i); + } else { + LOG(ERROR) << "error = " << cntl.ErrorText(); + ASSERT_TRUE(false); + } + } +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/test/mds/mock/mock_fs_stroage.h b/curvefs/test/mds/mock/mock_fs_stroage.h index c3d08e4ea9..aa178c5526 100644 --- a/curvefs/test/mds/mock/mock_fs_stroage.h +++ b/curvefs/test/mds/mock/mock_fs_stroage.h @@ -61,6 +61,8 @@ class MockFsStorage : public FsStorage { MOCK_METHOD3( GetFsUsage, FSStatusCode(const std::string&, FsUsage*, bool fromCache)); MOCK_METHOD1(DeleteFsUsage, FSStatusCode(const std::string&)); + MOCK_METHOD(FSStatusCode, Tso, (uint64_t*, uint64_t*), + (override)); }; } // namespace mds diff --git a/curvefs/test/mds/persist_kvstorage_test.cpp b/curvefs/test/mds/persist_kvstorage_test.cpp index c5d25b0760..8595fd86ee 100644 --- a/curvefs/test/mds/persist_kvstorage_test.cpp +++ b/curvefs/test/mds/persist_kvstorage_test.cpp @@ -122,6 +122,17 @@ class PersistKVStorageTest : public ::testing::Test { std::shared_ptr storageCli_; }; +#define DO_INIT(storage, storageCli_) \ + do { \ + std::vector> encoded = \ + PrepareFsInfoSamples(); \ + EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) \ + .WillOnce( \ + DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK)));\ + EXPECT_CALL(*storageCli_, Get(_, _)) \ + .WillRepeatedly(Return(EtcdErrCode::EtcdOK)); \ + } while (false) + TEST_F(PersistKVStorageTest, TestInit) { // list from storage failed { @@ -152,14 +163,7 @@ TEST_F(PersistKVStorageTest, TestInit) { { PersisKVStorage storage(storageCli_); - - std::vector> encoded = - PrepareFsInfoSamples(); - - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); - + DO_INIT(storage, storageCli_); EXPECT_TRUE(storage.Init()); EXPECT_TRUE(storage.Exist(1)); @@ -185,6 +189,8 @@ TEST_F(PersistKVStorageTest, TestGetAndExist) { EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) .WillOnce( DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); + EXPECT_CALL(*storageCli_, Get(_, _)) + .WillRepeatedly(Return(EtcdErrCode::EtcdOK)); EXPECT_TRUE(storage.Init()); EXPECT_FALSE(storage.Exist(1)); @@ -197,14 +203,9 @@ TEST_F(PersistKVStorageTest, TestGetAndExist) { { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); - - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); - + DO_INIT(storage, storageCli_); EXPECT_TRUE(storage.Init()); + EXPECT_TRUE(storage.Exist(1)); EXPECT_TRUE(storage.Exist("hello")); EXPECT_TRUE(storage.Exist(2)); @@ -290,17 +291,12 @@ TEST_F(PersistKVStorageTest, TestInsert) { // fs already exists { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .Times(0); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -310,18 +306,13 @@ TEST_F(PersistKVStorageTest, TestInsert) { // kvstorage error { - PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + PersisKVStorage storage(storageCli_); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .WillOnce(Return(EtcdErrCode::EtcdInternal)); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -337,17 +328,12 @@ TEST_F(PersistKVStorageTest, TestInsert) { // kvstorage persist ok { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .WillOnce(Return(EtcdErrCode::EtcdOK)); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -370,17 +356,12 @@ TEST_F(PersistKVStorageTest, TestUpdate) { // fs not found { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .Times(0); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -395,17 +376,12 @@ TEST_F(PersistKVStorageTest, TestUpdate) { // fs id mismatch { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .Times(0); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -420,17 +396,12 @@ TEST_F(PersistKVStorageTest, TestUpdate) { // storage failed { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .WillOnce(Return(EtcdErrCode::EtcdInternal)); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -449,17 +420,12 @@ TEST_F(PersistKVStorageTest, TestUpdate) { // storage ok { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Put(_, _)) .WillOnce(Return(EtcdErrCode::EtcdOK)); - EXPECT_TRUE(storage.Init()); - FsInfoWrapper wrapper; EXPECT_EQ(FSStatusCode::OK, storage.Get("hello", &wrapper)); @@ -480,34 +446,22 @@ TEST_F(PersistKVStorageTest, TestDelete) { // fs not found { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Delete(_)) .Times(0); - - EXPECT_TRUE(storage.Init()); - EXPECT_EQ(FSStatusCode::NOT_FOUND, storage.Delete("bvar")); } // storage failed { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Delete(_)) .WillOnce(Return(EtcdErrCode::EtcdInternal)); - - EXPECT_TRUE(storage.Init()); - EXPECT_EQ(FSStatusCode::STORAGE_ERROR, storage.Delete("hello")); EXPECT_TRUE(storage.Exist("hello")); EXPECT_TRUE(storage.Exist(1)); @@ -516,17 +470,11 @@ TEST_F(PersistKVStorageTest, TestDelete) { // storage ok { PersisKVStorage storage(storageCli_); - std::vector> encoded = - PrepareFsInfoSamples(); + DO_INIT(storage, storageCli_); + EXPECT_TRUE(storage.Init()); - EXPECT_CALL(*storageCli_, List(_, _, Matcher(_))) - .WillOnce( - DoAll(SetArgPointee<2>(encoded), Return(EtcdErrCode::EtcdOK))); EXPECT_CALL(*storageCli_, Delete(_)) .WillOnce(Return(EtcdErrCode::EtcdOK)); - - EXPECT_TRUE(storage.Init()); - EXPECT_EQ(FSStatusCode::OK, storage.Delete("hello")); EXPECT_FALSE(storage.Exist("hello")); EXPECT_FALSE(storage.Exist(1)); @@ -535,5 +483,51 @@ TEST_F(PersistKVStorageTest, TestDelete) { EXPECT_TRUE(storage.Exist(2)); } } + +TEST_F(PersistKVStorageTest, TestTso) { + uint64_t ts; + uint64_t timestamp; + // tsid not exist + { + PersisKVStorage storage(storageCli_); + // get failed + EXPECT_CALL(*storageCli_, Get(_, _)) + .WillOnce(Return(EtcdErrCode::EtcdUnknown)); + ASSERT_EQ(FSStatusCode::INTERNAL_ERROR, + storage.Tso(&ts, ×tamp)); + // CompareAndSwap failed + EXPECT_CALL(*storageCli_, Get(_, _)) + .WillOnce(Return(EtcdErrCode::EtcdKeyNotExist)); + EXPECT_CALL(*storageCli_, CompareAndSwap(_, _, _)) + .WillOnce(Return(EtcdErrCode::EtcdUnknown)); + ASSERT_EQ(FSStatusCode::INTERNAL_ERROR, + storage.Tso(&ts, ×tamp)); + // success + EXPECT_CALL(*storageCli_, Get(_, _)) + .WillOnce(Return(EtcdErrCode::EtcdKeyNotExist)); + EXPECT_CALL(*storageCli_, CompareAndSwap(_, _, _)) + .WillOnce(Return(EtcdErrCode::EtcdOK)); + for (int i = 1; i < 5; i++) { + ASSERT_EQ(FSStatusCode::OK, storage.Tso(&ts, ×tamp)); + ASSERT_EQ(ts, i); + } + } + // txid exist + { + PersisKVStorage storage(storageCli_); + uint64_t tsId = 10000; + // success + EXPECT_CALL(*storageCli_, Get(_, _)) + .WillOnce(DoAll(SetArgPointee<1>(std::to_string(tsId)), + Return(EtcdErrCode::EtcdOK))); + EXPECT_CALL(*storageCli_, CompareAndSwap(_, _, _)) + .WillOnce(Return(EtcdErrCode::EtcdOK)); + for (int i = 1; i < 5; i++) { + ASSERT_EQ(FSStatusCode::OK, storage.Tso(&ts, ×tamp)); + ASSERT_EQ(ts, i + tsId); + } + } +} + } // namespace mds } // namespace curvefs diff --git a/curvefs/test/metaserver/copyset/raft_cli_service2_test.cpp b/curvefs/test/metaserver/copyset/raft_cli_service2_test.cpp index 86162d3686..a3e037074c 100644 --- a/curvefs/test/metaserver/copyset/raft_cli_service2_test.cpp +++ b/curvefs/test/metaserver/copyset/raft_cli_service2_test.cpp @@ -472,7 +472,7 @@ TEST_F(RaftCliService2Test, ChangePeerTest) { // change peer succeed { - sleep(20); + sleep(60); ChangePeersRequest2 request; ChangePeersResponse2 response; SetRequestPoolAndCopysetId(&request); @@ -495,7 +495,7 @@ TEST_F(RaftCliService2Test, ChangePeerTest) { brpc::Controller cntl; CliService2_Stub stub(&channel_); stub.ChangePeers(&cntl, &request, &response, nullptr); - ASSERT_FALSE(cntl.Failed()); + ASSERT_FALSE(cntl.Failed()) << cntl.ErrorText(); // check response ASSERT_EQ(3, response.oldpeers_size()); diff --git a/curvefs/test/metaserver/dentry_manager_test.cpp b/curvefs/test/metaserver/dentry_manager_test.cpp index f113eca4dc..905ef307c1 100644 --- a/curvefs/test/metaserver/dentry_manager_test.cpp +++ b/curvefs/test/metaserver/dentry_manager_test.cpp @@ -35,6 +35,10 @@ namespace curvefs { namespace metaserver { +namespace storage { + DECLARE_int32(tx_lock_ttl_ms); +} + using ::curvefs::metaserver::storage::KVStorage; using ::curvefs::metaserver::storage::RandomStoragePath; using ::curvefs::metaserver::storage::RocksDBStorage; @@ -198,5 +202,141 @@ TEST_F(DentryManagerTest, HandleRenameTx) { ASSERT_EQ(dentryStorage_->Size(), 1); } +TEST_F(DentryManagerTest, PrewriteRenameTx) { + TxLock txLockIn; + TxLock txLockOut; + int64_t logIndex = 1; + uint64_t startTs = 2; + uint64_t commitTs = 3; + Dentry dentryA = GenDentry(1, 0, "A", startTs, 1, false); + // 1. prewrite success + std::vector dentrys = std::vector{dentryA}; + txLockIn.set_primarykey(storage::Key4Dentry(1, 0, "A").SerializeToString()); + txLockIn.set_startts(startTs); + txLockIn.set_timestamp(100); + auto rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + // 2. tx locked + txLockIn.set_startts(1); + rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::TX_KEY_LOCKED); + ASSERT_EQ(txLockOut.startts(), startTs); + ASSERT_EQ(txLockOut.primarykey(), txLockIn.primarykey()); + // 3. tx write conflict + rc = dentryManager_->CommitTx(dentrys, startTs, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::TX_WRITE_CONFLICT); +} + +TEST_F(DentryManagerTest, CheckTxStatus) { + storage::FLAGS_tx_lock_ttl_ms = 100; + + TxLock txLockIn; + TxLock txLockOut; + int64_t logIndex = 1; + uint64_t startTs = 2; + uint64_t commitTs = 3; + Dentry dentryA = GenDentry(1, 0, "A", startTs, 1, false); + std::vector dentrys = std::vector{dentryA}; + txLockIn.set_primarykey(storage::Key4Dentry(1, 0, "A").SerializeToString()); + txLockIn.set_startts(startTs); + txLockIn.set_timestamp(1000); + auto rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + + // timeout + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs, 1500, + logIndex); + ASSERT_EQ(rc, MetaStatusCode::TX_TIMEOUT); + // inprogress + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs, 1050, + logIndex); + ASSERT_EQ(rc, MetaStatusCode::TX_INPROGRESS); + // commited + rc = dentryManager_->CommitTx(dentrys, startTs, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs, 1500, + logIndex); + ASSERT_EQ(rc, MetaStatusCode::TX_COMMITTED); +} + + +TEST_F(DentryManagerTest, ResolveTxLock) { + TxLock txLockIn; + TxLock txLockOut; + int64_t logIndex = 1; + uint64_t startTs = 2; + uint64_t commitTs = 3; + Dentry dentryA = GenDentry(1, 0, "A", startTs, 1, false); + std::vector dentrys = std::vector{dentryA}; + txLockIn.set_primarykey(storage::Key4Dentry(1, 0, "A").SerializeToString()); + txLockIn.set_startts(startTs); + txLockIn.set_timestamp(1000); + + // 1. tx lock not exist + auto rc = dentryManager_->ResolveTxLock(dentryA, startTs, commitTs, + logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + // 2. tx lock exist, but startts not match + rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex++, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->ResolveTxLock(dentryA, startTs + 1, commitTs, + logIndex++); + ASSERT_EQ(rc, MetaStatusCode::TX_MISMATCH); + // 3. roll forward success + rc = dentryManager_->ResolveTxLock(dentryA, startTs, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs, 1500, + logIndex++); + ASSERT_EQ(rc, MetaStatusCode::TX_COMMITTED); + // 4. roll back success + dentrys[0].set_txid(startTs + 2); + txLockIn.set_startts(startTs + 2); + commitTs++; + rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex++, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->ResolveTxLock(dentryA, startTs + 2, 0, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs + 2, 1500, + logIndex++); + ASSERT_EQ(rc, MetaStatusCode::TX_ROLLBACKED); +} + +TEST_F(DentryManagerTest, CommitTx) { + TxLock txLockIn; + TxLock txLockOut; + int64_t logIndex = 1; + uint64_t startTs = 2; + uint64_t commitTs = 3; + Dentry dentryA = GenDentry(1, 0, "A", startTs, 1, false); + std::vector dentrys = std::vector{dentryA}; + + // 1. tx lock not exist + auto rc = dentryManager_->CommitTx(dentrys, startTs, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + // 2. tx lock exist, but startts not match + txLockIn.set_primarykey(storage::Key4Dentry(1, 0, "A").SerializeToString()); + txLockIn.set_startts(startTs); + txLockIn.set_timestamp(1000); + rc = dentryManager_->PrewriteRenameTx(dentrys, txLockIn, logIndex++, + &txLockOut); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->CommitTx(dentrys, startTs + 1, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::TX_MISMATCH); + // 3. commit success + rc = dentryManager_->CommitTx(dentrys, startTs, commitTs, logIndex++); + ASSERT_EQ(rc, MetaStatusCode::OK); + rc = dentryManager_->CheckTxStatus(txLockIn.primarykey(), startTs, 1500, + logIndex++); + ASSERT_EQ(rc, MetaStatusCode::TX_COMMITTED); +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/test/metaserver/dentry_storage_test.cpp b/curvefs/test/metaserver/dentry_storage_test.cpp index 4464f20c76..218248bca8 100644 --- a/curvefs/test/metaserver/dentry_storage_test.cpp +++ b/curvefs/test/metaserver/dentry_storage_test.cpp @@ -29,6 +29,7 @@ #include "curvefs/src/metaserver/storage/rocksdb_storage.h" #include "curvefs/test/metaserver/storage/utils.h" #include "src/fs/ext4_filesystem_impl.h" +#include "src/common/timeutility.h" namespace curvefs { namespace metaserver { @@ -38,6 +39,9 @@ using ::curvefs::metaserver::storage::NameGenerator; using ::curvefs::metaserver::storage::RandomStoragePath; using ::curvefs::metaserver::storage::RocksDBStorage; using ::curvefs::metaserver::storage::StorageOptions; +using ::curvefs::metaserver::storage::Status; +using ::curvefs::metaserver::storage::Key4Dentry; +using ::curvefs::metaserver::storage::Key4TxWrite; namespace { auto localfs = curve::fs::Ext4FileSystemImpl::getInstance(); @@ -55,6 +59,8 @@ class DentryStorageTest : public ::testing::Test { kvStorage_ = std::make_shared(options); ASSERT_TRUE(kvStorage_->Open()); logIndex_ = 0; + table4TxWrite_ = nameGenerator_->GetTxWriteTableName(); + table4TxLock_ = nameGenerator_->GetTxLockTableName(); } void TearDown() override { @@ -108,11 +114,25 @@ class DentryStorageTest : public ::testing::Test { ASSERT_EQ(lhs, rhs); } + std::string DentryKey(const Dentry& dentry) { + Key4Dentry key(dentry.fsid(), dentry.parentinodeid(), dentry.name()); + return conv_.SerializeToString(key); + } + + std::string TxWriteKey(const Dentry& dentry, uint64_t ts) { + Key4TxWrite key(dentry.fsid(), dentry.parentinodeid(), + dentry.name(), ts); + return conv_.SerializeToString(key); + } + protected: std::string dataDir_; std::shared_ptr nameGenerator_; std::shared_ptr kvStorage_; int64_t logIndex_; + Converter conv_; + std::string table4TxWrite_; + std::string table4TxLock_; }; TEST_F(DentryStorageTest, Insert) { @@ -573,5 +593,254 @@ TEST_F(DentryStorageTest, HandleTx) { ASSERT_EQ(dentry.inodeid(), 1); } +TEST_F(DentryStorageTest, PrewriteTx) { + DentryStorage storage(kvStorage_, nameGenerator_, 0); + ASSERT_TRUE(storage.Init()); + + // 1. prepare original dentry + // { fsId, parentId, name, txId, inodeId, deleteMarkFlag } + Dentry dentry = GenDentry(1, 1, "A", 0, 2, false); + ASSERT_EQ(storage.Insert(dentry, logIndex_++), MetaStatusCode::OK); + ASSERT_EQ(storage.Size(), 1); + + // 2. prepare prewrite dentry + uint64_t startTs = 2; + Dentry dentryA = GenDentry(1, 1, "A", startTs, 2, true); + Dentry dentryB = GenDentry(1, 1, "B", startTs, 3, false); + std::vector dentrys = {dentryA, dentryB}; + TxLock txLock; + txLock.set_primarykey(DentryKey(dentryA)); + txLock.set_startts(startTs); + txLock.set_timestamp(curve::common::TimeUtility::GetTimeofDayMs()); + + // 2.1 write conflict + TxLock outLock; + TxWrite txWrite; + txWrite.set_startts(startTs); + txWrite.set_kind(TxWriteKind::Commit); + Status s = kvStorage_->SSet(table4TxWrite_, + TxWriteKey(dentry, startTs + 1), txWrite); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.PrewriteTx(dentrys, txLock, logIndex_++, &outLock), + MetaStatusCode::TX_WRITE_CONFLICT); + s = kvStorage_->SDel(table4TxWrite_, TxWriteKey(dentry, startTs + 1)); + ASSERT_TRUE(s.ok()); + + // 2.2 key locked and IDEMPOTENCE OK + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentryA), txLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.PrewriteTx(dentrys, txLock, logIndex_++, &outLock), + MetaStatusCode::OK); + s = kvStorage_->SDel(table4TxLock_, DentryKey(dentryA)); + ASSERT_TRUE(s.ok()); + + // 2.3 key locked + TxLock preLock(txLock); + preLock.set_startts(startTs + 1); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentryA), preLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.PrewriteTx(dentrys, txLock, logIndex_++, &outLock), + MetaStatusCode::TX_KEY_LOCKED); + s = kvStorage_->SDel(table4TxLock_, DentryKey(dentryA)); + ASSERT_TRUE(s.ok()); + + // 2.4 prewrite success + ASSERT_EQ(storage.PrewriteTx( + std::vector(dentrys.begin() + outLock.index(), dentrys.end()), + txLock, logIndex_++, &outLock), MetaStatusCode::OK); + ASSERT_EQ(storage.Size(), 3); + Dentry entryOut; + entryOut.set_fsid(1); + entryOut.set_parentinodeid(1); + entryOut.set_name("A"); + ASSERT_EQ(storage.Get(&entryOut), MetaStatusCode::OK); + ASSERT_TRUE(dentry == entryOut); + entryOut.set_name("B"); + ASSERT_EQ(storage.Get(&entryOut), MetaStatusCode::NOT_FOUND); +} + +TEST_F(DentryStorageTest, CheckTxStatus) { + DentryStorage storage(kvStorage_, nameGenerator_, 0); + ASSERT_TRUE(storage.Init()); + + // 1. tx lock exist, tx timeout + uint64_t startTs = 1; + uint64_t now = curve::common::TimeUtility::GetTimeofDayMs(); + Dentry dentry = GenDentry(1, 1, "A", startTs, 2, false); + TxLock txLock; + txLock.set_primarykey(DentryKey(dentry)); + txLock.set_startts(startTs); + txLock.set_timestamp(now - 10); + txLock.set_ttl(5); + Status s = kvStorage_->SSet(table4TxLock_, DentryKey(dentry), txLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ( + storage.CheckTxStatus(DentryKey(dentry), startTs, now, logIndex_++), + MetaStatusCode::TX_TIMEOUT); + + // 2. tx lock exist, tx in progress + txLock.set_timestamp(now); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentry), txLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ( + storage.CheckTxStatus(DentryKey(dentry), startTs, now, logIndex_++), + MetaStatusCode::TX_INPROGRESS); + s = kvStorage_->SDel(table4TxLock_, DentryKey(dentry)); + ASSERT_TRUE(s.ok()); + + // 3. tx lock not exist, tx write not exit, committed + ASSERT_EQ( + storage.CheckTxStatus(DentryKey(dentry), startTs, now, logIndex_++), + MetaStatusCode::TX_COMMITTED); + TxWrite txWrite; + txWrite.set_startts(startTs); + txWrite.set_kind(TxWriteKind::Commit); + s = kvStorage_->SSet(table4TxWrite_, + TxWriteKey(dentry, startTs + 1), txWrite); + ASSERT_TRUE(s.ok()); + ASSERT_EQ( + storage.CheckTxStatus(DentryKey(dentry), startTs, now, logIndex_++), + MetaStatusCode::TX_COMMITTED); + + // 4. tx lock not exist, rollbacked + txWrite.set_kind(TxWriteKind::Rollback); + s = kvStorage_->SSet(table4TxWrite_, TxWriteKey(dentry, startTs), txWrite); + ASSERT_TRUE(s.ok()); + ASSERT_EQ( + storage.CheckTxStatus(DentryKey(dentry), startTs, now, logIndex_++), + MetaStatusCode::TX_ROLLBACKED); +} + +TEST_F(DentryStorageTest, ResolveTxLock) { + DentryStorage storage(kvStorage_, nameGenerator_, 0); + ASSERT_TRUE(storage.Init()); + + uint64_t preTxStartTs = 1; + uint64_t startTs = 10; + uint64_t commitTs = 11; + uint64_t now = curve::common::TimeUtility::GetTimeofDayMs(); + Dentry dentry = GenDentry(1, 1, "A", startTs, 2, false); + + // 1. tx lock not exist + ASSERT_EQ(storage.ResolveTxLock(dentry, preTxStartTs, commitTs, + logIndex_++), MetaStatusCode::OK); + + // 2. roll forward + // 2.1 tx lock exist but startts mismatch + TxLock preTxLock; + preTxLock.set_primarykey(DentryKey(dentry)); + preTxLock.set_startts(preTxStartTs + 1); + preTxLock.set_timestamp(now-100); + Status s = kvStorage_->SSet(table4TxLock_, DentryKey(dentry), preTxLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.ResolveTxLock(dentry, preTxStartTs, commitTs, + logIndex_++), MetaStatusCode::TX_MISMATCH); + // 2.2 success + preTxLock.set_startts(preTxStartTs); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentry), preTxLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.ResolveTxLock(dentry, preTxStartTs, commitTs, + logIndex_++), MetaStatusCode::OK); + TxLock lockOut; + s = kvStorage_->SGet(table4TxLock_, DentryKey(dentry), &lockOut); + ASSERT_TRUE(s.IsNotFound()); + TxWrite txWriteOut; + s = kvStorage_->SGet(table4TxWrite_, TxWriteKey(dentry, commitTs), + &txWriteOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(txWriteOut.kind(), TxWriteKind::Commit); + ASSERT_EQ(txWriteOut.startts(), preTxStartTs); + TS tsOut; + s = kvStorage_->SGet(table4TxWrite_, "latestCommit", &tsOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(tsOut.ts(), commitTs); + + // 3. roll backward + // prepare rollback site + TxLock txLock(preTxLock); + txLock.set_startts(startTs); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentry), txLock); + ASSERT_TRUE(s.ok()); + dentry.set_txid(startTs); + ASSERT_EQ(storage.Insert(dentry, logIndex_++), MetaStatusCode::OK); + ASSERT_EQ(storage.Size(), 1); + ASSERT_EQ(storage.ResolveTxLock(dentry, startTs, 0, + logIndex_++), MetaStatusCode::OK); + s = kvStorage_->SGet(table4TxLock_, DentryKey(dentry), &lockOut); + ASSERT_TRUE(s.IsNotFound()); + ASSERT_EQ(storage.Size(), 0); + s = kvStorage_->SGet(table4TxWrite_, TxWriteKey(dentry, startTs), + &txWriteOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(txWriteOut.kind(), TxWriteKind::Rollback); + ASSERT_EQ(txWriteOut.startts(), startTs); +} + +TEST_F(DentryStorageTest, CommitTx) { + DentryStorage storage(kvStorage_, nameGenerator_, 0); + ASSERT_TRUE(storage.Init()); + + uint64_t startTs = 1; + uint64_t commitTs = 2; + uint64_t now = curve::common::TimeUtility::GetTimeofDayMs(); + Dentry dentryA = GenDentry(1, 1, "A", startTs, 2, true); + Dentry dentryB = GenDentry(1, 1, "B", startTs, 2, false); + + // 1. tx lock not exist + ASSERT_EQ(storage.CommitTx( + {dentryA, dentryB}, startTs, commitTs, logIndex_++), + MetaStatusCode::OK); + TS tsOut; + Status s = kvStorage_->SGet(table4TxWrite_, "latestCommit", &tsOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(tsOut.ts(), startTs); + // 2. tx lock exist, but startts mismatch + TxLock txLock; + txLock.set_primarykey(DentryKey(dentryA)); + txLock.set_startts(startTs + 1); + txLock.set_timestamp(now - 100); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentryA), txLock); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.CommitTx( + {dentryA, dentryB}, startTs, commitTs, logIndex_++), + MetaStatusCode::TX_MISMATCH); + // 3. commit success + TxLock txLockA; + txLockA.set_primarykey(DentryKey(dentryA)); + txLockA.set_startts(startTs); + txLockA.set_timestamp(now - 100); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentryA), txLockA); + ASSERT_TRUE(s.ok()); + TxLock txLockB; + txLockB.set_primarykey(DentryKey(dentryB)); + txLockB.set_startts(startTs); + txLockB.set_timestamp(now - 100); + s = kvStorage_->SSet(table4TxLock_, DentryKey(dentryB), txLockB); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(storage.CommitTx( + {dentryA, dentryB}, startTs, commitTs, logIndex_++), + MetaStatusCode::OK); + TxLock lockOut; + s = kvStorage_->SGet(table4TxLock_, DentryKey(dentryA), &lockOut); + ASSERT_TRUE(s.IsNotFound()); + s = kvStorage_->SGet(table4TxLock_, DentryKey(dentryB), &lockOut); + ASSERT_TRUE(s.IsNotFound()); + TxWrite txWriteOut; + s = kvStorage_->SGet(table4TxWrite_, TxWriteKey(dentryA, commitTs), + &txWriteOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(txWriteOut.kind(), TxWriteKind::Commit); + ASSERT_EQ(txWriteOut.startts(), startTs); + s = kvStorage_->SGet(table4TxWrite_, TxWriteKey(dentryB, commitTs), + &txWriteOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(txWriteOut.kind(), TxWriteKind::Commit); + ASSERT_EQ(txWriteOut.startts(), startTs); + s = kvStorage_->SGet(table4TxWrite_, "latestCommit", &tsOut); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(tsOut.ts(), startTs); +} + + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/test/metaserver/inode_storage_test.cpp b/curvefs/test/metaserver/inode_storage_test.cpp index 4ea0bc3f54..4eab5c03c4 100644 --- a/curvefs/test/metaserver/inode_storage_test.cpp +++ b/curvefs/test/metaserver/inode_storage_test.cpp @@ -1033,5 +1033,26 @@ TEST_F(InodeStorageTest, Test_UpdateDeallocatableBlockGroup) { ASSERT_EQ(1, deallocatableBlockGroupVec.size()); } +TEST_F(InodeStorageTest, test_deleting_key) { + InodeStorage storage(kvStorage_, nameGenerator_, 0); + ASSERT_TRUE(storage.Init()); + Inode inode1 = GenInode(1, 1); + Inode inode2 = GenInode(2, 2); + + // insert + ASSERT_EQ(storage.Insert(inode1, logIndex_++), MetaStatusCode::OK); + ASSERT_EQ(storage.Insert(inode2, logIndex_++), MetaStatusCode::OK); + + ASSERT_EQ(storage.AddDeletedInode(Key4Inode( + inode1.fsid(), inode1.inodeid()), 1), MetaStatusCode::OK); + ASSERT_EQ(storage.RemoveDeletedInode(Key4Inode( + inode1.fsid(), inode1.inodeid())), MetaStatusCode::OK); + + ASSERT_EQ(storage.AddDeletedInode(Key4Inode( + inode2.fsid(), inode2.inodeid()), 1), MetaStatusCode::OK); + ASSERT_EQ(storage.RemoveDeletedInode(Key4Inode( + inode2.fsid(), inode2.inodeid())), MetaStatusCode::OK); +} + } // namespace metaserver } // namespace curvefs diff --git a/curvefs/test/metaserver/metaserver_service_test2.cpp b/curvefs/test/metaserver/metaserver_service_test2.cpp index e7376b2e4f..6bb900b171 100644 --- a/curvefs/test/metaserver/metaserver_service_test2.cpp +++ b/curvefs/test/metaserver/metaserver_service_test2.cpp @@ -92,6 +92,10 @@ TEST_F(MetaServerServiceTest2, ServiceOverload) { TEST_SERVICE_OVERLOAD(CreatePartition); TEST_SERVICE_OVERLOAD(DeletePartition); TEST_SERVICE_OVERLOAD(PrepareRenameTx); + TEST_SERVICE_OVERLOAD(PrewriteRenameTx); + TEST_SERVICE_OVERLOAD(CheckTxStatus); + TEST_SERVICE_OVERLOAD(ResolveTxLock); + TEST_SERVICE_OVERLOAD(CommitTx); TEST_SERVICE_OVERLOAD(GetVolumeExtent); TEST_SERVICE_OVERLOAD(UpdateVolumeExtent); TEST_SERVICE_OVERLOAD(UpdateDeallocatableBlockGroup); @@ -130,6 +134,10 @@ TEST_F(MetaServerServiceTest2, CopysetNodeNotFound) { TEST_COPYSETNODE_NOTFOUND(CreatePartition); TEST_COPYSETNODE_NOTFOUND(DeletePartition); TEST_COPYSETNODE_NOTFOUND(PrepareRenameTx); + TEST_COPYSETNODE_NOTFOUND(PrewriteRenameTx); + TEST_COPYSETNODE_NOTFOUND(CheckTxStatus); + TEST_COPYSETNODE_NOTFOUND(ResolveTxLock); + TEST_COPYSETNODE_NOTFOUND(CommitTx); TEST_COPYSETNODE_NOTFOUND(GetVolumeExtent); TEST_COPYSETNODE_NOTFOUND(UpdateVolumeExtent); TEST_COPYSETNODE_NOTFOUND(UpdateDeallocatableBlockGroup); diff --git a/curvefs/test/metaserver/mock/mock_metastore.h b/curvefs/test/metaserver/mock/mock_metastore.h index a8c923d3aa..13188aae72 100644 --- a/curvefs/test/metaserver/mock/mock_metastore.h +++ b/curvefs/test/metaserver/mock/mock_metastore.h @@ -122,6 +122,21 @@ class MockMetaStore : public curvefs::metaserver::MetaStore { MetaStatusCode(const UpdateDeallocatableBlockGroupRequest*, UpdateDeallocatableBlockGroupResponse*, int64_t logIndex)); + + MOCK_METHOD(MetaStatusCode, PrewriteRenameTx, + (const PrewriteRenameTxRequest* request, + PrewriteRenameTxResponse* response, int64_t logIndex), (override)); + + MOCK_METHOD(MetaStatusCode, CheckTxStatus, + (const CheckTxStatusRequest* request, + CheckTxStatusResponse* response, int64_t logIndex), (override)); + + MOCK_METHOD(MetaStatusCode, ResolveTxLock, + (const ResolveTxLockRequest* request, + ResolveTxLockResponse* response, int64_t logIndex), (override)); + + MOCK_METHOD(MetaStatusCode, CommitTx, (const CommitTxRequest* request, + CommitTxResponse* response, int64_t logIndex), (override)); }; } // namespace mock diff --git a/curvefs/test/metaserver/partition_test.cpp b/curvefs/test/metaserver/partition_test.cpp index 9d29461e39..7852786dca 100644 --- a/curvefs/test/metaserver/partition_test.cpp +++ b/curvefs/test/metaserver/partition_test.cpp @@ -292,11 +292,12 @@ TEST_F(PartitionTest, dentrynum) { Time tm; tm.set_sec(0); tm.set_nsec(0); - ASSERT_EQ(partition1.CreateDentry(dentry, tm, logIndex_++), - MetaStatusCode::OK); + ASSERT_EQ( + partition1.CreateDentry(dentry, tm, logIndex_++), MetaStatusCode::OK); ASSERT_EQ(partition1.GetDentryNum(), 1); - ASSERT_EQ(partition1.DeleteDentry(dentry, logIndex_++), MetaStatusCode::OK); + ASSERT_EQ( + partition1.DeleteDentry(dentry, tm, logIndex_++), MetaStatusCode::OK); ASSERT_EQ(partition1.GetDentryNum(), 0); } @@ -331,10 +332,10 @@ TEST_F(PartitionTest, PARTITION_ID_MISSMATCH_ERROR) { MetaStatusCode::PARTITION_ID_MISSMATCH); // test DeleteDentry - ASSERT_EQ(partition1.DeleteDentry(dentry1, logIndex_++), - MetaStatusCode::PARTITION_ID_MISSMATCH); - ASSERT_EQ(partition1.DeleteDentry(dentry2, logIndex_++), - MetaStatusCode::PARTITION_ID_MISSMATCH); + ASSERT_EQ(partition1.DeleteDentry(dentry1, tm, logIndex_++), + MetaStatusCode::PARTITION_ID_MISSMATCH); + ASSERT_EQ(partition1.DeleteDentry(dentry2, tm, logIndex_++), + MetaStatusCode::PARTITION_ID_MISSMATCH); // test GetDentry ASSERT_EQ(partition1.GetDentry(&dentry1), @@ -404,12 +405,6 @@ TEST_F(PartitionTest, PARTITION_ID_MISSMATCH_ERROR) { UpdateInodeRequest inode3Request = MakeUpdateInodeRequestFromInode(inode3); ASSERT_EQ(partition1.UpdateInode(inode3Request, logIndex_++), MetaStatusCode::PARTITION_ID_MISSMATCH); - - // test InsertInode - ASSERT_EQ(partition1.InsertInode(inode2, logIndex_++), - MetaStatusCode::PARTITION_ID_MISSMATCH); - ASSERT_EQ(partition1.InsertInode(inode3, logIndex_++), - MetaStatusCode::PARTITION_ID_MISSMATCH); } TEST_F(PartitionTest, testGetInodeAttr) { diff --git a/curvefs/test/metaserver/recycle_cleaner_test.cpp b/curvefs/test/metaserver/recycle_cleaner_test.cpp index f7b6481ac4..3a596303b4 100644 --- a/curvefs/test/metaserver/recycle_cleaner_test.cpp +++ b/curvefs/test/metaserver/recycle_cleaner_test.cpp @@ -213,7 +213,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { // delete dentry fail { - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::UNKNOWN_ERROR)); ASSERT_FALSE(cleaner_->DeleteNode(dentry)); @@ -221,7 +221,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { // get parent inode fail { - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::UNKNOWN_ERROR)); @@ -231,7 +231,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { // update parent inode fail { - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); @@ -243,7 +243,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { // get inode fail { - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)) @@ -258,7 +258,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { { Inode inode; inode.set_nlink(0); - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)) @@ -274,7 +274,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { { Inode inode; inode.set_nlink(1); - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)) @@ -292,7 +292,7 @@ TEST_F(RecycleCleanerTest, delete_node_test) { { Inode inode; inode.set_nlink(1); - EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _)) + EXPECT_CALL(*metaClient_, DeleteDentry(_, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); EXPECT_CALL(*metaClient_, GetInode(_, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)) @@ -406,7 +406,7 @@ TEST_F(RecycleCleanerTest, scan_recycle_test6) { LOG(INFO) << "create dentry1 " << dentry1.ShortDebugString(); LOG(INFO) << "create dentry2 " << dentry2.ShortDebugString(); - EXPECT_CALL(*metaClient_, ListDentry(_, _, _, _, _, _)) + EXPECT_CALL(*metaClient_, ListDentry(_, _, _, _, _, _, _)) .WillOnce(Return(MetaStatusCode::OK)); ASSERT_FALSE(cleaner_->ScanRecycle()); diff --git a/curvefs/test/metaserver/storage/dumpfile_test.cpp b/curvefs/test/metaserver/storage/dumpfile_test.cpp index ee618e955c..cfa9c8ee11 100644 --- a/curvefs/test/metaserver/storage/dumpfile_test.cpp +++ b/curvefs/test/metaserver/storage/dumpfile_test.cpp @@ -39,6 +39,7 @@ namespace storage { using Hash = std::unordered_map; +namespace { class HashIterator : public Iterator { public: explicit HashIterator(Hash* hash) @@ -56,6 +57,7 @@ class HashIterator : public Iterator { Hash::iterator iter_; Hash* hash_; }; +} // namespace class DumpFileTest : public ::testing::Test { protected: diff --git a/curvefs/test/metaserver/storage/iterator_test.cpp b/curvefs/test/metaserver/storage/iterator_test.cpp index ca76d40c30..f28a4ffcaa 100644 --- a/curvefs/test/metaserver/storage/iterator_test.cpp +++ b/curvefs/test/metaserver/storage/iterator_test.cpp @@ -38,8 +38,8 @@ namespace storage { using Hash = std::unordered_map; using ContainerType = std::map; -using google::protobuf::util::MessageDifferencer; +namespace { class HashIterator : public Iterator { public: explicit HashIterator(Hash* hash) @@ -58,6 +58,7 @@ class HashIterator : public Iterator { Hash* hash_; Hash::iterator iter_; }; +} // namespace class IteratorTest : public ::testing::Test { protected: diff --git a/curvefs/test/metaserver/storage/rocksdb_storage_test.cpp b/curvefs/test/metaserver/storage/rocksdb_storage_test.cpp index 2d4425671e..b2772735f2 100644 --- a/curvefs/test/metaserver/storage/rocksdb_storage_test.cpp +++ b/curvefs/test/metaserver/storage/rocksdb_storage_test.cpp @@ -116,32 +116,32 @@ TEST_F(RocksDBStorageTest, OpenCloseTest) { ASSERT_TRUE(kvStorage_->Close()); - s = kvStorage_->HSet("partition:1", "key1", Value("value1")); + s = kvStorage_->HSet("1:1", "key1", Value("value1")); ASSERT_TRUE(s.IsDBClosed()); - s = kvStorage_->HGet("partition:1", "key1", &value); + s = kvStorage_->HGet("1:1", "key1", &value); ASSERT_TRUE(s.IsDBClosed()); - s = kvStorage_->HDel("partition:1", "key1"); + s = kvStorage_->HDel("1:1", "key1"); ASSERT_TRUE(s.IsDBClosed()); - iterator = kvStorage_->HGetAll("partition:1"); + iterator = kvStorage_->HGetAll("1:1"); ASSERT_EQ(iterator->Status(), -1); - size = kvStorage_->HSize("partition:1"); + size = kvStorage_->HSize("1:1"); ASSERT_EQ(size, 0); - s = kvStorage_->HClear("partition:1"); + s = kvStorage_->HClear("1:1"); ASSERT_TRUE(s.IsDBClosed()); - s = kvStorage_->SSet("partition:1", "key1", Value("value1")); + s = kvStorage_->SSet("3:1", "key1", Value("value1")); ASSERT_TRUE(s.IsDBClosed()); - s = kvStorage_->SGet("partition:1", "key1", &value); + s = kvStorage_->SGet("3:1", "key1", &value); ASSERT_TRUE(s.IsDBClosed()); - s = kvStorage_->SDel("partition:1", "key1"); + s = kvStorage_->SDel("3:1", "key1"); ASSERT_TRUE(s.IsDBClosed()); - iterator = kvStorage_->SGetAll("partition:1"); + iterator = kvStorage_->SGetAll("3:1"); ASSERT_EQ(iterator->Status(), -1); - iterator = kvStorage_->SSeek("partition:1", "key1"); + iterator = kvStorage_->SSeek("3:1", "key1"); ASSERT_EQ(iterator->Status(), -1); - size = kvStorage_->SSize("partition:1"); + size = kvStorage_->SSize("3:1"); ASSERT_EQ(size, 0); - s = kvStorage_->SClear("partition:1"); + s = kvStorage_->SClear("3:1"); ASSERT_TRUE(s.IsDBClosed()); } @@ -250,14 +250,14 @@ TEST_F(RocksDBStorageTest, TestCheckpointAndRecover) { ASSERT_TRUE(kvStorage_->Open()); // put some values - auto s = kvStorage_->SSet("1", "1", Value("1")); - s = kvStorage_->SSet("2", "2", Value("2")); - s = kvStorage_->SSet("3", "3", Value("3")); - s = kvStorage_->SSet("4", "4", Value("4")); - s = kvStorage_->SSet("5", "5", Value("5")); - s = kvStorage_->SSet("6", "6", Value("6")); - s = kvStorage_->SSet("7", "7", Value("7")); - s = kvStorage_->SDel("3", "3"); + auto s = kvStorage_->SSet("1:1", "1", Value("1")); + s = kvStorage_->SSet("1:2", "2", Value("2")); + s = kvStorage_->SSet("1:3", "3", Value("3")); + s = kvStorage_->SSet("1:4", "4", Value("4")); + s = kvStorage_->SSet("1:5", "5", Value("5")); + s = kvStorage_->SSet("1:6", "6", Value("6")); + s = kvStorage_->SSet("1:7", "7", Value("7")); + s = kvStorage_->SDel("1:3", "3"); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -269,28 +269,28 @@ TEST_F(RocksDBStorageTest, TestCheckpointAndRecover) { // get values that checkpoint should have Dentry dummyDentry; - kvStorage_->SGet("1", "1", &dummyDentry); + kvStorage_->SGet("1:1", "1", &dummyDentry); EXPECT_EQ(Value("1"), dummyDentry) << "Expect: " << Value("1").ShortDebugString() << ", actual: " << dummyDentry.ShortDebugString(); - kvStorage_->SGet("2", "2", &dummyDentry); + kvStorage_->SGet("1:2", "2", &dummyDentry); EXPECT_EQ(Value("2"), dummyDentry); // "3" is deleted - s = kvStorage_->SGet("3", "3", &dummyDentry); + s = kvStorage_->SGet("1:3", "3", &dummyDentry); EXPECT_TRUE(s.IsNotFound()) << s.ToString(); - kvStorage_->SGet("4", "4", &dummyDentry); + kvStorage_->SGet("1:4", "4", &dummyDentry); EXPECT_EQ(Value("4"), dummyDentry); - kvStorage_->SGet("5", "5", &dummyDentry); + kvStorage_->SGet("1:5", "5", &dummyDentry); EXPECT_EQ(Value("5"), dummyDentry); - kvStorage_->SGet("6", "6", &dummyDentry); + kvStorage_->SGet("1:6", "6", &dummyDentry); EXPECT_EQ(Value("6"), dummyDentry); - kvStorage_->SGet("7", "7", &dummyDentry); + kvStorage_->SGet("1:7", "7", &dummyDentry); EXPECT_EQ(Value("7"), dummyDentry); } diff --git a/curvefs/test/metaserver/storage/storage_test.cpp b/curvefs/test/metaserver/storage/storage_test.cpp index d47f41a1ee..e897727352 100644 --- a/curvefs/test/metaserver/storage/storage_test.cpp +++ b/curvefs/test/metaserver/storage/storage_test.cpp @@ -783,7 +783,7 @@ void TestMixOperator(std::shared_ptr kvStorage) { // CASE 1: get s = kvStorage->HGet(TableName(1), "key1", &value); ASSERT_TRUE(s.IsNotFound()); - s = kvStorage->SGet(TableName(1), "key1", &value); + s = kvStorage->SGet(TableName(2), "key1", &value); ASSERT_TRUE(s.IsNotFound()); // CASE 2: set @@ -793,25 +793,25 @@ void TestMixOperator(std::shared_ptr kvStorage) { s = kvStorage->HGet(TableName(1), "key1", &value); ASSERT_TRUE(s.ok()); ASSERT_EQ(value, Value("value1")); - s = kvStorage->SGet(TableName(1), "key1", &value); + s = kvStorage->SGet(TableName(2), "key1", &value); ASSERT_TRUE(s.IsNotFound()); // CASE 3: del s = kvStorage->HDel(TableName(1), "key1"); ASSERT_TRUE(s.ok()); - s = kvStorage->SDel(TableName(1), "key1"); + s = kvStorage->SDel(TableName(2), "key1"); ASSERT_TRUE(s.ok()); - s = kvStorage->SSet(TableName(1), "key1", Value("value1")); + s = kvStorage->SSet(TableName(2), "key1", Value("value1")); ASSERT_TRUE(s.ok()); s = kvStorage->HGet(TableName(1), "key1", &value); ASSERT_TRUE(s.IsNotFound()); - s = kvStorage->SGet(TableName(1), "key1", &value); + s = kvStorage->SGet(TableName(2), "key1", &value); ASSERT_TRUE(s.ok()); ASSERT_EQ(value, Value("value1")); // CASE 4: range - iterator = kvStorage->SSeek(TableName(1), "key1"); + iterator = kvStorage->SSeek(TableName(2), "key1"); ASSERT_EQ(iterator->Status(), 0); size = 0; for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { @@ -825,22 +825,22 @@ void TestMixOperator(std::shared_ptr kvStorage) { // CASE 5: clear s = kvStorage->HClear(TableName(1)); ASSERT_TRUE(s.ok()); - s = kvStorage->SClear(TableName(1)); + s = kvStorage->SClear(TableName(2)); ASSERT_TRUE(s.ok()); s = kvStorage->HGet(TableName(1), "key1", &value); ASSERT_TRUE(s.IsNotFound()); - s = kvStorage->SGet(TableName(1), "key1", &value); + s = kvStorage->SGet(TableName(2), "key1", &value); ASSERT_TRUE(s.IsNotFound()); // CASE 6: size s = kvStorage->HSet(TableName(1), "key1", Value("value1")); ASSERT_TRUE(s.ok()); - s = kvStorage->SSet(TableName(1), "key2", Value("value2")); + s = kvStorage->SSet(TableName(2), "key2", Value("value2")); ASSERT_TRUE(s.ok()); ASSERT_EQ(kvStorage->HSize(TableName(1)), 1); - ASSERT_EQ(kvStorage->SSize(TableName(1)), 1); + ASSERT_EQ(kvStorage->SSize(TableName(2)), 1); } void TestTransaction(std::shared_ptr kvStorage) { diff --git a/curvefs/test/metaserver/trash_test.cpp b/curvefs/test/metaserver/trash_test.cpp index 56edf05f7a..1145cfaa4b 100644 --- a/curvefs/test/metaserver/trash_test.cpp +++ b/curvefs/test/metaserver/trash_test.cpp @@ -30,6 +30,8 @@ #include "src/fs/ext4_filesystem_impl.h" #include "curvefs/test/client/rpcclient/mock_mds_client.h" #include "curvefs/test/metaserver/mock_metaserver_s3_adaptor.h" +#include "src/common/timeutility.h" +#include "curvefs/test/metaserver/copyset/mock/mock_copyset_node.h" using ::testing::_; using ::testing::AtLeast; @@ -39,19 +41,30 @@ using ::testing::ReturnArg; using ::testing::SaveArg; using ::testing::SetArgPointee; using ::testing::StrEq; +using ::testing::Invoke; namespace curvefs { namespace metaserver { +DECLARE_uint32(trash_scanPeriodSec); +DECLARE_uint32(trash_expiredAfterSec); + namespace { auto localfs = curve::fs::Ext4FileSystemImpl::getInstance(); } +DECLARE_uint32(trash_expiredAfterSec); +DECLARE_uint32(trash_scanPeriodSec); + using ::curvefs::client::rpcclient::MockMdsClient; using ::curvefs::metaserver::storage::KVStorage; using ::curvefs::metaserver::storage::RandomStoragePath; using ::curvefs::metaserver::storage::RocksDBStorage; using ::curvefs::metaserver::storage::StorageOptions; +using ::curvefs::metaserver::copyset::MockCopysetNode; + +DECLARE_uint32(trash_expiredAfterSec); +DECLARE_uint32(trash_scanPeriodSec); class TestTrash : public ::testing::Test { protected: @@ -68,9 +81,13 @@ class TestTrash : public ::testing::Test { std::make_shared(kvStorage_, nameGenerator, 0); trashManager_ = std::make_shared(); logIndex_ = 0; + copysetNode_ = std::make_shared(); + mdsClient_ = std::make_shared(); + s3Adaptor_ = std::make_shared(); } void TearDown() override { + trashManager_->Fini(); inodeStorage_ = nullptr; trashManager_ = nullptr; ASSERT_TRUE(kvStorage_->Close()); @@ -138,68 +155,78 @@ class TestTrash : public ::testing::Test { std::shared_ptr kvStorage_; std::shared_ptr inodeStorage_; std::shared_ptr trashManager_; + std::shared_ptr copysetNode_; + std::shared_ptr mdsClient_; + std::shared_ptr s3Adaptor_; int64_t logIndex_; }; TEST_F(TestTrash, testAdd3ItemAndDelete) { TrashOption option; - option.scanPeriodSec = 1; - option.expiredAfterSec = 1; - option.mdsClient = std::make_shared(); - option.s3Adaptor = std::make_shared(); + option.mdsClient = mdsClient_; + option.s3Adaptor = s3Adaptor_; + FLAGS_trash_scanPeriodSec = 1; + FLAGS_trash_expiredAfterSec = 4; trashManager_->Init(option); trashManager_->Run(); - auto trash1 = std::make_shared(inodeStorage_); - auto trash2 = std::make_shared(inodeStorage_); + auto trash1 = std::make_shared(inodeStorage_, 1, 1, 1, 1); + auto trash2 = std::make_shared(inodeStorage_, 2, 1, 2, 2); trashManager_->Add(1, trash1); trashManager_->Add(2, trash2); + trash1->SetCopysetNode(copysetNode_); + trash2->SetCopysetNode(copysetNode_); inodeStorage_->Insert(GenInodeHasChunks(1, 1), logIndex_++); inodeStorage_->Insert(GenInodeHasChunks(1, 2), logIndex_++); inodeStorage_->Insert(GenInodeHasChunks(2, 1), logIndex_++); - ASSERT_EQ(inodeStorage_->Size(), 3); - trash1->Add(1, 1, 0); - trash1->Add(1, 2, 0); - trash2->Add(2, 1, 0); + EXPECT_CALL(*copysetNode_, IsLeaderTerm()) + .WillRepeatedly(Return(true)); + FsInfo fsInfo; + fsInfo.set_fsid(1); + fsInfo.set_recycletimehour(0); + EXPECT_CALL(*mdsClient_, GetFsInfo(1, _)) + .WillOnce(DoAll(SetArgPointee<1>(fsInfo), Return(FSStatusCode::OK))); + fsInfo.set_fsid(2); + EXPECT_CALL(*mdsClient_, GetFsInfo(2, _)) + .WillOnce(DoAll(SetArgPointee<1>(fsInfo), Return(FSStatusCode::OK))); + EXPECT_CALL(*s3Adaptor_, GetS3ClientAdaptorOption(_)) + .Times(3); + EXPECT_CALL(*s3Adaptor_, Reinit(_, _, _, _, _)) + .Times(3); + EXPECT_CALL(*s3Adaptor_, Delete(_)) + .Times(3) + .WillRepeatedly(Return(0)); + EXPECT_CALL(*copysetNode_, Propose(_)) + .WillOnce(Invoke([&](const braft::Task& task) { + ASSERT_EQ(inodeStorage_->Delete(Key4Inode(1, 1), logIndex_++), + MetaStatusCode::OK); + LOG(INFO) << "trash deleteInode 1:1"; + task.done->Run(); + })) + .WillOnce(Invoke([&](const braft::Task& task) { + ASSERT_EQ(inodeStorage_->Delete(Key4Inode(1, 2), logIndex_++), + MetaStatusCode::OK); + LOG(INFO) << "trash deleteInode 1:2"; + task.done->Run(); + })) + .WillOnce(Invoke([&](const braft::Task& task) { + ASSERT_EQ(inodeStorage_->Delete(Key4Inode(2, 1), logIndex_++), + MetaStatusCode::OK); + LOG(INFO) << "trash deleteInode 2:1"; + task.done->Run(); + })); + + uint64_t dtime = curve::common::TimeUtility::GetTimeofDaySec(); + trash1->Add(1, dtime - 6, false); + trash1->Add(2, dtime - 2, false); + trash2->Add(1, dtime, false); std::this_thread::sleep_for(std::chrono::seconds(5)); - std::list list; - - trashManager_->ListItems(&list); - ASSERT_EQ(0, list.size()); + ASSERT_EQ(0, trashManager_->Size()); ASSERT_EQ(inodeStorage_->Size(), 0); - - trashManager_->Fini(); -} - -TEST_F(TestTrash, testAdd3ItemAndNoDelete) { - TrashOption option; - option.scanPeriodSec = 1; - option.expiredAfterSec = 1; - option.mdsClient = std::make_shared(); - option.s3Adaptor = std::make_shared(); - trashManager_->Init(option); - trashManager_->Run(); - - auto trash1 = std::make_shared(inodeStorage_); - trashManager_->Add(1, trash1); - - inodeStorage_->Insert(GenInode(1, 1), logIndex_++); - inodeStorage_->Insert(GenInode(1, 2), logIndex_++); - inodeStorage_->Insert(GenInode(2, 1), logIndex_++); - ASSERT_EQ(inodeStorage_->Size(), 3); - trash1->Add(1, 1, 0); - trash1->Add(1, 2, 0); - std::this_thread::sleep_for(std::chrono::seconds(5)); - std::list list; - - trashManager_->ListItems(&list); - ASSERT_EQ(0, list.size()); - ASSERT_EQ(inodeStorage_->Size(), 3); - trashManager_->Fini(); } } // namespace metaserver diff --git a/docker/debian11/entrypoint.sh b/docker/debian11/entrypoint.sh index 16d47fce63..81fcceecc9 100644 --- a/docker/debian11/entrypoint.sh +++ b/docker/debian11/entrypoint.sh @@ -109,7 +109,10 @@ function prepare() { } function create_directory() { - chmod 700 "$g_prefix/data" + if [ "$g_role" != "monitor" ]; then + chmod 700 "$g_prefix/data" + fi + if [ "$g_role" == "etcd" ]; then mkdir -p "$g_prefix/data/wal" elif [ "$g_role" == "client" ]; then diff --git a/docker/openeuler/entrypoint.sh b/docker/openeuler/entrypoint.sh index 476cc65f3c..b4adaeca92 100644 --- a/docker/openeuler/entrypoint.sh +++ b/docker/openeuler/entrypoint.sh @@ -108,7 +108,10 @@ function prepare() { } function create_directory() { - chmod 700 "$g_prefix/data" + if [ "$g_role" != "monitor" ]; then + chmod 700 "$g_prefix/data" + fi + if [ "$g_role" == "etcd" ]; then mkdir -p "$g_prefix/data/wal" elif [ "$g_role" == "client" ]; then diff --git a/docker/ubuntu22/compile/Dockerfile b/docker/ubuntu22/compile/Dockerfile new file mode 100644 index 0000000000..76f34e44be --- /dev/null +++ b/docker/ubuntu22/compile/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:22.04 + +LABEL maintainer="Wine93 " + +ENV TZ=Asia/Shanghai \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 + +COPY setup.sh .setup.sh +RUN bash .setup.sh diff --git a/docker/ubuntu22/compile/Makefile b/docker/ubuntu22/compile/Makefile new file mode 100644 index 0000000000..3f2bfbacfe --- /dev/null +++ b/docker/ubuntu22/compile/Makefile @@ -0,0 +1,4 @@ +.PHONY: build + +build: + docker build -t opencurvedocker/curve-build:ubuntu22 . diff --git a/docker/ubuntu22/compile/setup.sh b/docker/ubuntu22/compile/setup.sh new file mode 100644 index 0000000000..ea7fe73891 --- /dev/null +++ b/docker/ubuntu22/compile/setup.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +g_bazelisk_url="https://github.com/bazelbuild/bazelisk/releases/download/v1.18.0/bazelisk-linux-amd64" +g_protoc_url="https://github.com/protocolbuffers/protobuf/releases/download/v21.8/protoc-21.8-linux-x86_64.zip" + +cat << EOF > /etc/apt/sources.list +deb http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse +EOF + +apt-get clean +apt-get -y update +apt-get -y install --no-install-recommends \ + bison \ + build-essential \ + cmake \ + default-jdk \ + flex \ + git \ + golang \ + libcurl4-gnutls-dev \ + libfiu-dev \ + libfuse3-dev \ + libhashkit-dev \ + liblz4-dev \ + libsnappy-dev \ + libssl-dev \ + libz-dev \ + make \ + maven \ + musl \ + musl-dev \ + musl-tools \ + python3-pip \ + sudo \ + tree \ + unzip \ + uuid-dev \ + vim \ + wget +apt-get autoremove -y + +wget "${g_bazelisk_url}" -O /usr/bin/bazel +chmod a+x /usr/bin/bazel + +g_protoc_zip="/tmp/protoc.zip" +wget "${g_protoc_url}" -O ${g_protoc_zip} +unzip ${g_protoc_zip} "bin/protoc" -d /usr +rm -f ${g_protoc_zip} + +cat << EOF >> ~/.bashrc +export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64" +export GOPATH=${HOME}/go +export PATH=\$JAVA_HOME/bin:\$PATH +EOF diff --git a/docs/cn/curvefs_improve_rename_design.md b/docs/cn/curvefs_improve_rename_design.md index c2a33a2b65..5e83cf121a 100644 --- a/docs/cn/curvefs_improve_rename_design.md +++ b/docs/cn/curvefs_improve_rename_design.md @@ -1,267 +1,193 @@ -# rename 接口实现优化 +# Rename 优化方案 -## 背景 +## 背景描述 在目前 CurveFS 的 rename 实现中,我们保证了该操作的原子性,但是仍然存在以下 2 个问题: -* 在多挂载的场景下,我们为了保证 txid 的正确性,在 MDS 中加了一把分布式锁来保证所有的事务都是串行执行,这严重影响了 rename 接口的性能,即事务不能并发执行 -* 同样地,为了保证其他操作接口(如 GetDentry、DeleteDentry 等)能拿到正确版本的 dentry,在这些接口执行前都要去 MDS 获取最新的 txid,对于这些接口来说,多了一次 RPC 请求,降低了整体的元数据性能 +1. 在多挂载的场景下,我们为了保证 txid 的正确性,在 MDS 中加了一把分布式锁来保证同一 FS 上所有的事务都是串行执行,这严重影响了 rename 接口的性能,即事务不能并发执行。 +2. 同样地,为了保证其他操作接口(如 GetDentry、DeleteDentry 等)能拿到正确版本的 dentry,在这些接口执行前都要去 MDS 获取最新的 txid,对于这些接口来说,多了一次 RPC 请求,降低了整体的元数据性能。 -针对以上 2 个问题,调研了 Google Percolator 事务模型以及现有的开源实现,并发现其能很好解决我们以上 2 个已知问题。 - -## 方案调研 - -### Google Percolator - -资料: - -[Large-scale Incremental Processing Using Distributed Transactions and Notifications](https://www.usenix.org/legacy/event/osdi10/tech/full_papers/Peng.pdf) - -[Google Percolator 的事务模型](https://github.com/ngaut/builddatabase/blob/master/percolator/README.md) - -### TiKV 中 percolator 事务实现优化 - -资料: - -[Optimized Percolator](https://tikv.org/deep-dive/distributed-transaction/optimized-percolator/) +针对以上 2 个问题,调研了 [Google Percolator](https://www.usenix.org/legacy/event/osdi10/tech/full_papers/Peng.pdf) 事务模型以及现有的开源实现([TiKV中 Percolator的优化](https://tikv.org/deep-dive/distributed-transaction/optimized-percolator/)),并发现其能很好解决我们以上 2 个已知问题。 ## 方案实现 -我们先阐述以原论文、没有任何优化的情况下如何在我们 CurveFS 中实现 pecolator 事务,然后再逐一将每一个优化点加入其中。 - -### 基础实现 - -基于我们之前的 rename 实现,我们可以比较好理解 percolator 的实现,percolator 主要对每一个 key 都有一把锁来解决写写冲突。percolator 论文中的 data 就是跟我们之前写入 txid 版本的 dentry 是一样的,而往 write 列写入时间戳则跟我们之前往 MDS 提交 txid 是一样的,也是提交版本号。 - -#### 整体实现 - -percalator 也是一个 2PC 的实现方案,分为 Prewrite 和 Commit 阶段: - -* 客户端首先会通过 Tso() 接口从 MDS 获取全局递增的时间戳,作为当前数据的版本号(start_ts),然后对所有涉及到的 key 进行 prewrite 操作,prewrite 包括对 key(会从所有的 key 中选出一个主 key) 加锁已经写入对应版本的数据。这一步与我们目前实现中的 prepare 是一样的,准备最终版本的数据 -* 当 prewrite 成功后,客户端会再次通过 Tso() 接口从 MDS 获取时间戳,作为提交时间戳 (commit_ts),然后首先对主键进行提交版本号,并进行解锁。如果主键成功后,再并行地对所有从键进行解锁。 - -异常处理: +### 整体实现 -percolator 中的异常处理是由下一个事务完成的: - -* 如果发现当前操作的 key 返回 WriteConflict 则表示写冲突了,需要重新 retry -* 而如果发现 key 被锁住,即 KeyIsLocked 状态,客户端需要调用 CheckTxnStatus 接口来获取上一个事务的状态,如果已经提交,则推动从键提交,如果是锁已经操作,则推动所有键 rollback,而如果前一个事务正在进行中,则需要等待。 +percalator 也是一个 2PC 的实现方案,分为 Prewrite 和 Commit 阶段。基于我们之前的 rename 实现,可以比较好理解 percolator 的实现,它主要对每一个 key 都有一把锁来解决写写冲突。percolator 论文中的 data 就是跟我们之前写入带 txid 版本的 dentry 是一样的,而往 write 列写入时间戳则类似我们之前往 MDS 提交 txid,也是提交版本号。 #### 数据结构 -考虑到性能,我们要保证 lock 和 write 这 2 张表永远保存在内存当中,因为这 2 张表中的内容比较小但是读写却很频繁。对于 RocksDB 来说,我们要给这 2 张表单独配置 column failmy,保证其独立性。 - -| table | key | value | 说明 | -|-----------------|---------|--------------|:----------| -| data | key:timestamp | | 实际的数据,这里保存多版本数据 | -| lock | key | struct lock { primary、timestamp、ttl } | 保护当前 key 的锁 | -| write | key:timestamp | struct write { timestamp、kind } | 记录已提交的版本号 | +在介绍整体流程之前,先约定必要的数据结构,percolator 事务模型中涉及到的三个表:data、lock、write,lock 和 write表内容比较小,但访问频繁,在 Rocksdb 中为其单独配置 column family,保证其独立性。 -**struct lock** +| Table name | Key | Value | 说明 | +| ---------- | --------------------------- | --------------------------------------------------- | -------------------------------- | +| data | dentryKey | struct DentryVec {dentrys} | 实际的dentry数据,保存多版本数据 | +| lock | dentryKey | struct TxLock {primaryKey, startTs, timestamp, ttl} | 包含当前key的锁 | +| write | dentryKey/commitTs或startTs | struct TxWrite {startTs, writeKind} | 记录已提交或已回滚的版本号 | -``` -struct Lock { - std::string primary; // 锁的主键相关信息 - uint64_t timestamp; // 加锁的时间戳 - uint64_t ttl; // 锁的 TTL。我们通过这个来判断锁有没有超时 +```protobuf +message DentryVec { + repeated Dentry dentrys = 1; // 记录多版本dentry信息,版本信息复用之前的txid字段 } -``` -**struct write** - -``` -struct Write { - uint64_t timestamp; // 最新版本数据的时间戳。我们通过这个时间戳可以在 data 表中找到相应版本的数据 - char kind; // 写入的类型,详见 WriteKind +message TxLock { + required string primaryKey = 1; // 事务的主键,rename中涉及两个dentry,默认以src dentry为primaryKey + required uint64 startTs = 2; // 事务开始序列号 + required uint64 timestamp = 3; // 事务开始物理时间 + optional uint32 index = 4; + optional int32 ttl = 5; } -``` -**事务状态码** +enum TxWriteKind { + Commit = 1; + Rollback = 2; +} -``` -enum class TxStatus { - WriteConflict, // 事务写写冲突。为解决该冲突,我们的处理原则是只保证一个事务成功 - KeyIsLocked, // 当前事务涉及的键已被锁住。 - OK, // 成功 -}; +message TxWrite { + required uint64 startTs = 1; + required TxWriteKind kind = 2; +} ``` -**写入类型** +data 表的清理:复用现有逻辑,在insert、delete 等 dentry 操作时会进行压缩操作,正常情况下最多存在两个版本的 dentry 数据。 -``` -enum class WriteKind { - Put, - Delete, - Rollback, -}; -``` - -以上的数据结构中,各数据结构的主要目的如下: +lock 表的清理:每个 key 最多只会有一条记录,事务提交或回滚都会清理对应记录。 -* timestamp:时间戳主要为了确定事务的顺序,为事务冲突提供判断依据。另一个作用是作为锁 TTL 的开始时间戳 -* Lock:主要是为了保证对某一个 key 来说,只能有一个写操作,以此来解决写写冲突 -* Lock.ttl:锁的 TTL 主要用来解决当客户端挂掉或 hang 住,锁仍然遗留在相应的 key 上而导致其他事务无法执行,或 -* Lock.primary:事务/锁的主键。在我们的实现中,我们可能涉及到多个 dentry,我们会选任意一个 -* Write:这个作用跟我们当前事务实现中的 txId 一样,用来保存当前 dentry 的版本号,对外提供统一的视图,通过版本号找到最新的 dentry。而在 Write 表中写入 timeStamp 就跟我们现在实现的往 MDS 提交 txId 一样。 +write 表的清理:key 的每次提交或回滚都会在 write 表中记录一条记录,如果不清理则会越来越多,清理规则是对应 writeKind==TxWriteKind::Commit的记录每个 key 只会保留最新的一条记录,对应 writeKind==TxWriteKind::Rollback 的记录都保留,便于后续判断事务状态,且发生回滚的事务本身极少,不会对容量造成负担。 -#### 相关函数 +#### 整体流程 -**一些约定** +1. 客户端首先会通过 Tso() 接口从 MDS 获取全局递增的事务序列号 startTs 和物理时间戳 timestamp。startTs 作为当前数据的版本号,timestamp用于验证 lock 的超时。 +2. 对涉及到的 dentry 进行 prewrite 操作,会任意选择一个 key 作为 primaryKey(默认选择 src dentry),先 prewrite primaryKey 再 prewrite 其他 key,原因见 write 表清理逻辑。 +3. 所有 key prewrite 完成后,客户端会再次通过 Tso() 接口从 MDS 获取 commitTs。 +4. 对 primaryKey 进行 commit 操作,成功后,对其他 key 并行 commit。 -* 对于每一个函数,我们会先阐述正常的处理流程,之后会阐述每个步骤出现异常、不符合预期时的异常处理流程 -* 对于 dentry 来说,我们以 dentry_key 作为实际存储 dentry 的键,这个键的编码格式如下:fsId:parentInodeId:name +异常处理:percolator 中的异常处理是延迟到下一个事务解决的 -**Tso** +1. prewrite 时从 write 表中获取该 key 最新的 ts,如果 ts >= startTs 则说明期间已有新的事务发生,返回 WriteConflict 错误。 +2. prewrite 时发现该 key 已经被 lock 住了,则返回 TxkeyLocked,客户端需要调用 CheckTxStatus 接口去主键获取上一个事务的状态,如果已经提交,则推动从键提交,如已回滚或锁已经超时,则推动所有键 rollback,而如果前一个事务正在进行中,则需要等待。 -客户端开启事务后,需要通过 Tso() 接口向 MDS 端获取一个全局递增的时间戳(该时间戳的作用类似于我们目前实现中的 txid)用于 prewrite,而在提交事务时同样需要通过 Tso() 接口获取一个时间戳用于 commit。 +#### 相关函数说明 -时间戳是一个 64 位整数,其由物理时间和逻辑时间两部分组成合成,其编码规则如下: +##### Tso -`TSO = 物理时间 + 序列号` +客户端开启事务后,需要通过 Tso 接口向 MDS 端获取一个全局递增的事务序列号 sn 和 一个物理时间 timestamp,sn 标识一次事务,同时表示该数据的版本,timestamp 记录于 lock 表中的 TxLock 中,便于在 client 异常时导致当前事务挂起,其他事务在 CheckTxStatus 时判断超时。 -**Prewrite** +```protobuf +message TsoRequest {} +message TsoResponse { + required FSStatusCode statusCode = 1; + optional uint64 sn = 2; + optional uint64 timestamp = 3; +} ``` -struct PrewriteRequest { - std::string primaryKey; // 事务/锁的主键 - uint64_t startTimestamp; // 开始时间戳。该时间戳从调用 Tso 接口从 MDS 端获取 - uint64_t lockTTL; // 锁的 TTL。用来判断当前 key 的锁是否超时 - ... // 涉及修改的 dentry 相关信息 -}; - -struct PrewriteResponse { - Status status; -}; -``` - -客户端通过 Tso() 接口获取到时间戳后,会调用 Prewrite() 接口依次对事务到涉及到所有 dentry 依次做 prewrite,MetaServer 端在接收到该请求后,会做如下处理: -* 获取当前请求的 dentry 在 write 表中最近一次写入的信息,如果该 dentry 最新一次写入的 Write.timestamp >= PrewriteRequest.startTimestamp,则表明在该事务开始后有其他事务修改了这个 key,返回 TXStatus::WriteConfict; -* 检查当前请求的 dentry 在 lock 表中是否有锁,如果有锁,则返回 status::KeyIsLocked; -* 若前面检查都成功,则调用 RocksDB 的事务接口,执行以下 2 个操作: - * 在 lock 表中对该 key 进行加锁。key="dentry", value=Lock{} - * 在 data 表中写入当前版本的 dentry。key="dentry:PrewriteRequest.startTimeStamp", value=mut_dentry -* 若以上事务成功,则返回 status::OK, 否则返回 status::Error +##### PrewriteRenameTx -注意:在客户端对所有涉及的 dentry 依次加锁的过程中,只要有一个 dentry 不是返回 status::OK,则表明 Prewrite 阶段失败。而当其中一个 dentry 收到 status::KeyIsLocked 的相应时,客户端需要调用 CheckTxnStatus() 接口来判断当前锁的状态,并根据锁的状态来执行相关的操作。而关于锁的状态,我们可以分 +首先 prewrite primaryKey,成功后再 prewrite 其他 key,如果rename 涉及的 dentry 位于同一个 partition 上,则可以合并成一次请求。metaserver 收到该请求进行如下处理: -**Commit** - -``` -struct CommitRequest { - uint64_t startTimestamp; // 该事务开始时间戳 - uint64_t commitTimestamp; // 该事务提交时间戳 +1. 在 write 表中获取该 dentryKey 最新的一次写入的 ts,如果 ts >= PrewriteRenameTxRequest.txLock.startts() 则返回错误码 MetaStatusCode::TX_WRITE_CONFLICT。 +2. 在 lock 表中检查该 dentryKey 是否存在,如果存在,则返回错误码MetaStatusCode::TX_KEY_LOCKED。 +3. 如果以上检查都通过,则通过 Rocksdb 事务接口完成如下两个操作: + 1. 在 lock 表中插入记录: key=dentryKey, value=TxLock + 2. 在 data 表中写入当前版本的 dentry -}; - -struct CommitResponse { +```protobuf +message PrewriteRenameTxRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + repeated Dentry dentrys = 4; + required TxLock txLock = 5; +} -}; +message PrewriteRenameTxResponse { + required MetaStatusCode statusCode = 1; + repeated Dentry dentrys = 2; + optional TxLock txLock = 3; // 如果返回 MetaStatusCode::TX_KEY_LOCKED,则该字段为前一个事务的 txlock 信息 + optional uint64 appliedIndex = 4; +} ``` -客户端在 prewrite 阶段成功后,就会进入提交事务阶段。客户端首先通过 Tso() 接口获取一个时间戳作为事务提交时间戳 commit_ts,再去该事务主键所在 MetaServer 提交事务,如果主键提交事务成功,则代表整个事务已经成功(这个步骤类似于我们现在的 CommitTxId),之后会并行去所有次键提交事务。MetaServer 在接收到该请求后,会做如下处理: +##### CheckTxStatus -* 以 dentry_key 为键在 lock 表中查找当前 dentry 是都有锁,如果无锁则代表当前 dentry 已被提交,直接返回 Status::OK -* 如果有锁,则判断上锁的时间戳是否等于事务提交的时间戳,即 Lock.timestamp == CommitRequest.startTimestamp -* 若前面检查都成功,则调用 RocksDB 的事务接口,执行以下 2 个操作: - * 在 write 表中写入最新版本的 dentry 的时间戳,即更新该 dentry 的版本号。KEY=dentry_key:CommitRequest.commitTimestamp,VALUE=Write{ timestamp=CommitRequest.startTimestamp }。这一步跟我们目前的 CommitTxId 是一样的,主要为了更新版本号。 - * 在 lock 表中删除以 dentry_key 为键的锁 -* 失败见以下 +在 prewrite 过程如果返回 MetaStatusCode::TX_KEY_LOCKED,则表示事务冲突,客户端需要其检查前一个事务的状态,依据状态再去处理 locked key。metaserver 收到该请求进行如下处理,通过 Rocksdb 事务接口完成: -Commit 接口用于提交事务,客户端会先对事务中的主键进行提交,成功后并发对所有次键进行提交。如果提交事务失败,客户端会通过 CheckTxnStatus 接口获取锁的状态,再进行下一步动作。 +1. 在 lock 表中获取 primaryKey 对应的 TxLock。如果存在,如果 CheckTxStatusRequest.curTimestamp > txLock.startTs + txLock.ttl,则表明事务已超时,返回错误码 MetaStatusCode::TX_TIMEOUT,否则说明事务正在进行中,返回错误码 MetaStatusCode::TX_TX_INPROGRESS。 +2. 如果对应 TxLock 不存在,则在 write 表中用 key=CheckTxStatusRequest.startTs 进行查找,如果找到,并且 TxWrite.kind=TxWriteKind::rollback 则说明该事务已经回滚,返回错误码 MetaStatusCode::TX_ROLLBACKED;如果没找到,则说明事务已提交,返回错误码 MetaStatusCode::TX_COMMITTED。 -**Rollback** - -``` -struct RollbackRequest { - uint64_t StartTimestamp; -}; - -struct RollbackResponse { - TXStatus Status; -}; -``` - -* 如果当前 key 的 write 列已经写入 Rollback 的数据,则表明已经 rollback 过了,直接跳过 -* 而 write 列中如果不是 rollback,则直接 abort 当前事务 -* 若前面检查都成功,则调用 RocksDB 的事务接口,执行以下 3 个操作: - * 在 lock 表中删除以 dentry.key 为键的锁 - * 在 data 表中删除之前 prewrite 写入以 dentry.key+RollbackRequest.startTimestamp 为键的数据 - * 在 write 表中写入以下键值对 - * KEY: dentry.key + RollbackRequest.startTimestamp - * VALUE: Write{ timestamp=RollbackRequest.startTimestamp, kind=WriteKind::Rollback } - -**CheckTxnStatus** +```protobuf +message CheckTxStatusRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + required string primaryKey = 4; + required uint64 startTs = 5; + required uint64 curTimestamp = 6; +} -``` -struct CheckTxnStatusRequest { - std::string primaryKey; - uint64_t lockTimestamp; - uint64_t currentTimestamp; -}; - -struct CheckTxnStatusResponse { -}; +message CheckTxStatusResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} ``` -当客户端在执行某事务时,发现与另一事务(我们称之为前事务)冲突时,即 prewrite 阶段接收到 **TxStatus::LockIsKey** 响应时,客户端需要去前事务的主键所在 MetaServer 获取前事务的状态,并根据前事务的状态来做相应的操作: +##### ResolveTxLock -* roll-forward:表明前事务已提交,我们也需要推动这个冲突的 key 去提交 -* roll-back:表明前事务失败、rollback、超时或 hang 住,我们需要推动这个冲突的 key 去回归 -* wait: 表明前事务正在进行中,我们可等待一段时间后再进行 retry。**特别需要注意**的是,重新 retry 开启的事务都需要重新通过 Tso() 获取时间戳并重头开始 +CheckTxStatus 完成后,需要根据返回的事务状态进行处理: -我们用一个 { primary, start_ts, commit_ts } 三元组来描述一个事务,以下的表格是 primary key 在事务中所有状态的列举,我们正是根据这个状态来得知事务处于什么状态: +MetaStatusCode::TX_COMMITTED:rollforward -| data@start_ts | lock? | lock is TTL? | write@start_ts | write@commit_ts | 说明 | 客户端需执行动作 | -|-----------------|---------|------|-------|-------|:----------|-------| -| | | | | timestamp = start_ts | 这个状态说明该事务已被成功提交 | roll-forward | -| | | | kind = rollback | | 这个状态说明事务已被 rollback | roll-back | -| | | | | |这个状态说明在此次事务中,该 primary key 没有被成功 prewrite | roll-back | -| <新版本数据> | Y | Y | | | 这个状态说明事务已超时。有可能是客户端挂了或 hang 住了 | roll-back | -| <新版本数据> | Y | N | | | 这个状态说明 primary key 已被成功 prewrite,事务正在进行当中 | wait | +MetaStatusCode::TX_TIMEOUT; MetaStatusCode::TX_ROLLBACKED : rollback - 之所以我们能根据主键的各个表中的值就能判断事务的状态,主要是因为我们操作的时候以锁为主要元素,不管是 prewrite、commit、rollback 都是原子操作,而且事务提交的成功是以主键是否成功提交为依据,所以该主键的状态就在由以上 5 个状态组成的状态机中转变。 +MetaStatusCode::TX_TX_INPROGRESS: retry -<补充状态机转换图> +metaserver 在收到该请求时进行如下处理,通过 Rocksdb 事务接口完成: -**ResolveLock** +1. 在 lock 表中查找该 key 是否仍然处于 locked 的状态,如果不存在,则直接返回成功;如果存在,判断 TxLock.startTs == ResolveTxLockRequest.startTs,如果不成立,则返回 MetaStatusCode::TX_MISMATCH。 +2. 如上对 lock 的检查通过后,如果 ResolveTxLockRequest.commitTs > 0,表示需要 rollforward:删除该 key 的 lock 记录;在 write 表中插入该版本的 dentry。如果 ResolveTxLockRequest.commitTs==0,表示需要 rollback:删除该 key 的 lock 记录;在 data 表中删除对应版本的 dentry; 在 write 表中写入回退的记录,key=dentryKey/startTs, value=TxWrite{startTs, TxWriteKind::Rollback}。 -``` -struct ResolveLockRequest { - uint64_t commitTimestamp; -}; +```protobuf +message ResolveTxLockRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + required Dentry dentry = 4; + required uint64 startTs = 5; // 待处理事务标识 + required uint64 commitTs = 6; // commitTs > 0 表示rollward, 否则 rollback +} -struct CheckTxnStatusResponse { -}; +message ResolveTxLockResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} ``` -RevolveLock 接口比较简单,主要是根据当前的请求中是否有 commitTimestamp 字段来决定是 commit 事务,还是 rollback 事务。这个函数是一个接口的聚合。 - -**Get** - -Get 接口主要用来描述 GetDentry/DeleteDentry 等非 rename 事务怎么获取当前 dentry。如果当前键没有锁,则直接获取最近 write 列对应版本号的数据,如果有所,则通过 CheckTxnStatus 获取前一个事务的状态,并推动事务 commit 还是 rollback 后再进行 Get 等操作。 +##### CommitTx -#### FAQ +在没有异常情况下,prewrite 完成后会进行 commit 操作,先进行 primaryKey 的 commit,再同时 commit 其他key,只要 promaryKey commit 成功就代表了该事务成功。如果rename 涉及的 dentry 位于同一个 partition 上,则可以合并成一次请求,metaserver 收到该请求会进行如下处理: -1、如果事务 A prewrite 成功,但是此时事务 B 发现与事务 A 存在冲突,将 A 的主键解锁了。这时候事务 A 再去提交事务,能否成功? -<这种情况是否会出现?> +1. 在 lock 表中检查该 key 是否存在,如果不存在,则表示该事务已结束(commit 或 rollback),直接返回。 +2. 判断 txLock.startTs == CommitTxRequest.startTs ,如果不等则返回错误码 MetaStatusCode::TX_MISMATCH。 +3. 在 write 表中更新 dentry 版本,key=dentryKey/commitTs,value=TxWrite{startTs, TxWriteKind::Commit};删除 lock 表中对应 key 的记录。 -### 优化点 1:并发 Prewrite - -论文中的 prewrite 是序列进行的,而我们可以推动所有的键并发进行,提高性能 +```protobuf +message CommitTxRequest { + required uint32 poolId = 1; + required uint32 copysetId = 2; + required uint32 partitionId = 3; + repeated Dentry dentrys = 4; + required uint64 startTs = 5; + required uint64 commitTs = 6; +} -## 优化点 2:去除每次读取操作获取时间戳的流程 +message CommitTxResponse { + required MetaStatusCode statusCode = 1; + optional uint64 appliedIndex = 2; +} +``` -论文中的每一个获取也需要获取一个 Tso,这对于我们的实现来说会增加一个 RPC 时间,而其实这个 Tso 是多余的,我们只需要在无锁获取当前 key 的最新数据即可,而有锁的话则需要推动前一个事务。 +##### 其他 dentry 操作 -``` -Point Read Without Timestamp -Timestamps are critical to providing isolation for transactions. For every transaction, -we allocate a unique start_ts for it, and ensures transaction T can only see -the data committed before T’s start_ts. -But if transaction T does nothing but reads a single key, is it really necessary to allocate it a start_ts? -The answer is no. We can simply read the newest version directly, -because it’s equivalent to reading with start_ts which is exactly the instant when the key is read. - It’s even ok to read a locked key, because it’s equivalent to reading with -the start_ts allocated before the lock’s start_ts. -``` +CreateDentry、GetDentry、ListDentry、DeleteDentry 等非 rename 事务操作,在操作 dentry 时,如果对应 key 没有被 lock,则直接操作 write 表该 key 最新提交版本在 data 表中的数据即可;如果有锁,则需要通过 CheckTxStatus 来获取事务的状态,并进一步推动事务的完成,之后再处理对应 dentry。所以在这几种操作的 response 中增加 optional TxLock 字段,用于在有锁情况下,返回事务对应的 lock 信息。 diff --git a/docs/images/Curve-arch.odg b/docs/images/Curve-arch.odg index 2003f8bd2e..84aa3bde83 100644 Binary files a/docs/images/Curve-arch.odg and b/docs/images/Curve-arch.odg differ diff --git a/docs/images/Curve-arch.png b/docs/images/Curve-arch.png index a1e04569f2..c7bf79fc4b 100644 Binary files a/docs/images/Curve-arch.png and b/docs/images/Curve-arch.png differ diff --git a/nebd/src/part2/main.cpp b/nebd/src/part2/main.cpp index f8c742fe9a..e72bb27cbf 100644 --- a/nebd/src/part2/main.cpp +++ b/nebd/src/part2/main.cpp @@ -24,12 +24,14 @@ #include #include #include "nebd/src/part2/nebd_server.h" +#include "src/common/log_util.h" DEFINE_string(confPath, "/etc/nebd/nebd-server.conf", "nebd server conf path"); int main(int argc, char* argv[]) { // 解析参数 google::ParseCommandLineFlags(&argc, &argv, false); + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); std::string confPath = FLAGS_confPath.c_str(); diff --git a/src/chunkserver/chunkserver.cpp b/src/chunkserver/chunkserver.cpp index 784628447f..398e938240 100644 --- a/src/chunkserver/chunkserver.cpp +++ b/src/chunkserver/chunkserver.cpp @@ -45,6 +45,7 @@ #include "src/common/concurrent/task_thread_pool.h" #include "src/common/curve_version.h" #include "src/common/uri_parser.h" +#include "src/common/log_util.h" using ::curve::fs::LocalFileSystem; using ::curve::fs::LocalFileSystemOption; @@ -105,6 +106,7 @@ int ChunkServer::Run(int argc, char** argv) { LoadConfigFromCmdline(&conf); // 初始化日志模块 + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); // 打印参数 diff --git a/src/client/mds_client.cpp b/src/client/mds_client.cpp index 9ace95e823..5b857c229c 100644 --- a/src/client/mds_client.cpp +++ b/src/client/mds_client.cpp @@ -107,23 +107,15 @@ int RPCExcutorRetryPolicy::PreProcessBeforeRetry(int status, bool retryUnlimit, bool rpcTimeout = false; bool needChangeMDS = false; - // If retryUnlimit is set, sleep a long time to retry no matter what the - // error it is. - if (retryUnlimit) { - if (++(*normalRetryCount) > - retryOpt_.normalRetryTimesBeforeTriggerWait) { - bthread_usleep(retryOpt_.waitSleepMs * 1000); - } - - // 1. 访问存在的IP地址,但无人监听:ECONNREFUSED - // 2. 正常发送RPC情况下,对端进程挂掉了:EHOSTDOWN - // 3. 对端server调用了Stop:ELOGOFF - // 4. 对端链接已关闭:ECONNRESET - // 5. 在一个mds节点上rpc失败超过限定次数 - // 在这几种场景下,主动切换mds。 - } else if (status == -EHOSTDOWN || status == -ECONNRESET || - status == -ECONNREFUSED || status == -brpc::ELOGOFF || - *curMDSRetryCount >= retryOpt_.maxFailedTimesBeforeChangeAddr) { + // 1. 访问存在的IP地址,但无人监听:ECONNREFUSED + // 2. 正常发送RPC情况下,对端进程挂掉了:EHOSTDOWN + // 3. 对端server调用了Stop:ELOGOFF + // 4. 对端链接已关闭:ECONNRESET + // 5. 在一个mds节点上rpc失败超过限定次数 + // 在这几种场景下,主动切换mds。 + if (status == -EHOSTDOWN || status == -ECONNRESET || + status == -ECONNREFUSED || status == -brpc::ELOGOFF || + *curMDSRetryCount >= retryOpt_.maxFailedTimesBeforeChangeAddr) { needChangeMDS = true; // 在开启健康检查的情况下,在底层tcp连接失败时 @@ -140,6 +132,13 @@ int RPCExcutorRetryPolicy::PreProcessBeforeRetry(int status, bool retryUnlimit, *timeOutMS *= 2; *timeOutMS = std::min(*timeOutMS, retryOpt_.maxRPCTimeoutMS); *timeOutMS = std::max(*timeOutMS, retryOpt_.rpcTimeoutMs); + // If retryUnlimit is set, sleep a long time to retry no matter what the + // error it is. + } else if (retryUnlimit) { + if (++(*normalRetryCount) > + retryOpt_.normalRetryTimesBeforeTriggerWait) { + bthread_usleep(retryOpt_.waitSleepMs * 1000); + } } // 获取下一次需要重试的mds索引 @@ -179,7 +178,11 @@ int RPCExcutorRetryPolicy::GetNextMDSIndex(bool needChangeMDS, if (std::atomic_compare_exchange_strong( ¤tWorkingMDSAddrIndex_, lastWorkingindex, currentWorkingMDSAddrIndex_.load())) { - int size = retryOpt_.addrs.size(); + int size; + { + ReadLockGuard lock(retryOptLock_); + size = retryOpt_.addrs.size(); + } nextMDSIndex = needChangeMDS ? (currentRetryIndex + 1) % size : currentRetryIndex; } else { @@ -191,10 +194,14 @@ int RPCExcutorRetryPolicy::GetNextMDSIndex(bool needChangeMDS, int RPCExcutorRetryPolicy::ExcuteTask(int mdsindex, uint64_t rpcTimeOutMS, RPCFunc task) { - assert(mdsindex >= 0 && - mdsindex < static_cast(retryOpt_.addrs.size())); - - const std::string &mdsaddr = retryOpt_.addrs[mdsindex]; + std::string mdsaddr; + { + ReadLockGuard lock(retryOptLock_); + // happen when mds scaling down + if (mdsindex >= static_cast(retryOpt_.addrs.size())) + return -brpc::ELOGOFF; + mdsaddr = retryOpt_.addrs[mdsindex]; + } brpc::Channel channel; int ret = channel.Init(mdsaddr.c_str(), nullptr); diff --git a/src/client/mds_client.h b/src/client/mds_client.h index 36822fa31c..36c4dd97f1 100644 --- a/src/client/mds_client.h +++ b/src/client/mds_client.h @@ -48,7 +48,13 @@ class RPCExcutorRetryPolicy { RPCExcutorRetryPolicy() : retryOpt_(), currentWorkingMDSAddrIndex_(0), cntlID_(1) {} + MetaServerOption::RpcRetryOption GetOption() { + ReadLockGuard lock(retryOptLock_); + return retryOpt_; + } + void SetOption(const MetaServerOption::RpcRetryOption &option) { + WriteLockGuard lock(retryOptLock_); retryOpt_ = option; } using RPCFunc = std::function currentWorkingMDSAddrIndex_; diff --git a/src/common/arc_cache.h b/src/common/arc_cache.h index 98e4eb7bfc..f83c10e5e4 100644 --- a/src/common/arc_cache.h +++ b/src/common/arc_cache.h @@ -26,18 +26,28 @@ #include #include +#include #include #include #include #include +// Adaptive Replacement Cache (ARC) template , typename ValueTraits = CacheTraits> class ARCCache : public LRUCacheInterface { public: - ARCCache(int64_t max_count, + struct ArcSizeInfo { + uint64_t b1, t1; + uint64_t b2, t2; + + uint64_t BSize() const { return b1 + b2; } + uint64_t TSize() const { return t1 + t2; } + }; + + ARCCache(uint64_t max_count, std::shared_ptr cacheMetrics = nullptr) - : c_(max_count/2), p_(0), cacheMetrics_(cacheMetrics) {} + : c_(max_count), p_(0), cacheMetrics_(cacheMetrics) {} void Put(const K& key, const V& value); bool Put(const K& key, const V& value, V* eliminated); @@ -49,8 +59,13 @@ class ARCCache : public LRUCacheInterface { bool GetLast(const V value, K* key); bool GetLast(K* key, V* value); bool GetLast(K* key, V* value, bool (*f)(const V& value)); + uint64_t Capacity() const { return c_; } + ArcSizeInfo ArcSize() const; private: + ARCCache(const ARCCache&) = delete; + void operator=(const ARCCache&) = delete; + struct BMapVal; struct TMapVal; struct BListVal; @@ -69,33 +84,27 @@ class ARCCache : public LRUCacheInterface { struct BMapVal { blist_iter list_iter; - BMapVal() {} - BMapVal(const BMapVal& o) { list_iter = o.list_iter; } - BMapVal& operator=(const BMapVal& o) { list_iter = o.list_iter; } + BMapVal() = default; + BMapVal(const BMapVal& o) = default; + BMapVal& operator=(const BMapVal& o) = default; BMapVal(const blist_iter& iter) // NOLINT : list_iter(iter) {} }; struct TMapVal { tlist_iter list_iter; - TMapVal() {} - TMapVal(const TMapVal& o) { list_iter = o.list_iter; } - TMapVal& operator=(const TMapVal& o) { - list_iter = o.list_iter; - return *this; - } + TMapVal() = default; + TMapVal(const TMapVal& o) = default; + TMapVal& operator=(const TMapVal& o) = default; TMapVal(const tlist_iter& iter) // NOLINT : list_iter(iter) {} }; struct BListVal { bmap_iter map_iter; - BListVal() {} - BListVal(const BListVal& o) { map_iter = o.map_iter; } - BListVal& operator=(const BListVal& o) { - map_iter = o.map_iter; - return *this; - } + BListVal() = default; + BListVal(const BListVal& o) = default; + BListVal& operator=(const BListVal& o) = default; BListVal(const bmap_iter& iter) // NOLINT :map_iter(iter) {} }; @@ -103,29 +112,22 @@ class ARCCache : public LRUCacheInterface { tmap_iter map_iter; V value; - TListVal() {} - TListVal(const TListVal& o) { - map_iter = o.map_iter; - value = o.value; - } + TListVal() = default; + TListVal(const TListVal& o) = default; TListVal(const tmap_iter& iter, const V& v) : map_iter(iter), value(v) {} - TListVal(const V& v) // NOLINT + explicit TListVal(const V& v) : value(v) {} - TListVal& operator=(const TListVal& o) { - map_iter = o.map_iter; - value = o.value; - return *this; - } + TListVal& operator=(const TListVal& o) = default; }; struct B { - BMap map; - BList list; + BMap map_; + BList list_; bool Find(const K& k, bmap_iter* iter) { - auto it = map.find(k); - if (it != map.end()) { + auto it = map_.find(k); + if (it != map_.end()) { if (iter != nullptr) *iter = it; return true; } @@ -133,20 +135,20 @@ class ARCCache : public LRUCacheInterface { } void Insert(const K& k) { - auto r = map.insert({k, blist_iter()}); + auto r = map_.insert({k, blist_iter()}); assert(r.second); - list.push_back(r.first); - r.first->second.list_iter = --list.end(); + list_.push_back(r.first); + r.first->second.list_iter = --list_.end(); } void RemoveLRU(CacheMetrics* m) { - if (list.empty()) return; + if (list_.empty()) return; if (m) { m->UpdateRemoveFromCacheBytes( - KeyTraits::CountBytes(list.front().map_iter->first)); + KeyTraits::CountBytes(list_.front().map_iter->first)); } - map.erase(list.front().map_iter); - list.pop_front(); + map_.erase(list_.front().map_iter); + list_.pop_front(); } void Remove(bmap_iter&& map_iter, CacheMetrics* m) { @@ -154,22 +156,22 @@ class ARCCache : public LRUCacheInterface { m->UpdateRemoveFromCacheBytes( KeyTraits::CountBytes(map_iter->first)); } - list.erase(map_iter->second.list_iter); - map.erase(map_iter); + list_.erase(map_iter->second.list_iter); + map_.erase(map_iter); } - int Count() const { return map.size(); } + uint64_t Count() const { return map_.size(); } }; struct T { - TMap map; - TList list; + TMap map_; + TList list_; void Insert(const K& k, const V& v, CacheMetrics* m) { - auto r = map.insert({k, tlist_iter()}); + auto r = map_.insert({k, tlist_iter()}); assert(r.second); - list.emplace_back(r.first, v); - r.first->second.list_iter = --list.end(); + list_.emplace_back(r.first, v); + r.first->second.list_iter = --list_.end(); if (m != nullptr) { m->UpdateAddToCacheCount(); m->UpdateAddToCacheBytes(KeyTraits::CountBytes(k) + @@ -177,9 +179,42 @@ class ARCCache : public LRUCacheInterface { } } + // Move key-value from another to this. For C++17 and later, + // we use extract() and insert() to improve performance. + void Move(const K& k, T* other, tmap_iter&& other_it, const V *v, + CacheMetrics *m) { + list_.splice(list_.end(), other->list_, other_it->second.list_iter); + if (v != nullptr) { + if (m != nullptr) { + uint64_t oldSize = + ValueTraits::CountBytes(list_.back().value); + uint64_t newSize = ValueTraits::CountBytes(*v); + if (oldSize != newSize) { + if (oldSize != 0) + m->UpdateRemoveFromCacheBytes(oldSize); + m->UpdateAddToCacheBytes(newSize); + } + } + list_.back().value = *v; + } +#if __cplusplus >= 201703L + auto node = other->map_.extract(other_it); + auto r = map_.insert(std::move(node)); + assert(r.inserted); + list_.back().map_iter = r.position; + r.position->second.list_iter = --list_.end(); +#else + auto r = map_.insert({k, tlist_iter()}); + assert(r.second); + list_.back().map_iter = r.first; + r.first->second.list_iter = --list_.end(); + other->map_.erase(other_it); +#endif + } + bool Find(const K& k, tmap_iter* map_iter) { - auto it = map.find(k); - if (it != map.end()) { + auto it = map_.find(k); + if (it != map_.end()) { if (map_iter != nullptr) *map_iter = it; return true; } @@ -194,28 +229,28 @@ class ARCCache : public LRUCacheInterface { KeyTraits::CountBytes(map_iter->first) + ValueTraits::CountBytes(map_iter->second.list_iter->value)); } - list.erase(map_iter->second.list_iter); - map.erase(map_iter); + list_.erase(map_iter->second.list_iter); + map_.erase(map_iter); } bool RemoveLRU(V* eliminated, CacheMetrics* m) { - if (list.empty()) return false; + if (list_.empty()) return false; if (eliminated != nullptr) { - *eliminated = list.front().value; + *eliminated = list_.front().value; } if (m != nullptr) { m->UpdateRemoveFromCacheCount(); m->UpdateRemoveFromCacheBytes( - KeyTraits::CountBytes(list.front().map_iter->first) + - ValueTraits::CountBytes(list.front().value)); + KeyTraits::CountBytes(list_.front().map_iter->first) + + ValueTraits::CountBytes(list_.front().value)); } - map.erase(list.front().map_iter); - list.pop_front(); + map_.erase(list_.front().map_iter); + list_.pop_front(); return true; } void Touch(const tmap_iter& map_iter, const V* v, CacheMetrics* m) { - auto list_iter = --list.end(); + auto list_iter = --list_.end(); if (v && m) { uint64_t oldSize = ValueTraits::CountBytes(map_iter->second.list_iter->value); @@ -229,18 +264,17 @@ class ARCCache : public LRUCacheInterface { if (v != nullptr) list_iter->value = *v; return; } + + list_.splice(list_.end(), list_, map_iter->second.list_iter); if (v != nullptr) { - list.push_back(*v); - } else { - list.push_back(map_iter->second.list_iter->value); + list_.back().value = *v; } - list.erase(map_iter->second.list_iter); - map_iter->second.list_iter = --list.end(); + map_iter->second.list_iter = --list_.end(); } - int Count() const { return map.size(); } + uint64_t Count() const { return map_.size(); } - tmap_iter GetLRU() const { return list.begin()->map_iter; } + tmap_iter GetLRU() const { return list_.begin()->map_iter; } }; bool Move_T_B(T* t, B* b, V* eliminated) { @@ -262,11 +296,33 @@ class ARCCache : public LRUCacheInterface { return true; } + bool IsCacheFull() const { + return t1_.Count() + t2_.Count() == c_; + } + + void IncreaseP(uint64_t delta) { + if (!IsCacheFull()) + return; + if (delta > c_ - p_) + p_ = c_; + else + p_ += delta; + } + + void DecreaseP(uint64_t delta) { + if (!IsCacheFull()) + return; + if (delta > p_) + p_ = 0; + else + p_ -= delta; + } + bool Replace(const K& k, V* eliminated); - ::curve::common::RWLock lock_; - int64_t c_; - int64_t p_; + mutable ::curve::common::RWLock lock_; + uint64_t c_; + uint64_t p_; B b1_, b2_; T t1_, t2_; std::shared_ptr cacheMetrics_; @@ -279,8 +335,7 @@ bool ARCCache::Get(const K& key, V* value) { if (t1_.Find(key, &it)) { if (value) *value = it->second.list_iter->value; - t2_.Insert(key, it->second.list_iter->value, nullptr); - t1_.Remove(std::move(it), nullptr); + t2_.Move(key, &t1_, std::move(it), nullptr, cacheMetrics_.get()); if (cacheMetrics_ != nullptr) { cacheMetrics_->OnCacheHit(); } @@ -313,23 +368,24 @@ bool ARCCache::Put(const K& key, const V& value, bool ret = false; if (t1_.Find(key, &it)) { - t2_.Insert(key, value, nullptr); - t1_.Remove(std::move(it), nullptr); + t2_.Move(key, &t1_, std::move(it), &value, cacheMetrics_.get()); + if (cacheMetrics_ != nullptr) { + cacheMetrics_->OnCacheHit(); + } return false; } if (t2_.Find(key, &it)) { t2_.Touch(it, &value, cacheMetrics_.get()); + if (cacheMetrics_ != nullptr) { + cacheMetrics_->OnCacheHit(); + } return false; } bmap_iter b_it; if (b1_.Find(key, &b_it)) { - if (b1_.Count() >= b2_.Count()) { - p_ += 1; - } else { - p_ += b2_.Count() / b1_.Count(); - } - if (p_ > c_) p_ = c_; + uint64_t delta = std::min((uint64_t)1, b2_.Count() / b1_.Count()); + IncreaseP(delta); ret = Replace(key, eliminated); b1_.Remove(std::move(b_it), cacheMetrics_.get()); @@ -338,12 +394,8 @@ bool ARCCache::Put(const K& key, const V& value, } if (b2_.Find(key, &b_it)) { - if (b2_.Count() >= b1_.Count()) { - p_ -= 1; - } else { - p_ -= b1_.Count() / b2_.Count(); - } - if (p_ < 0) p_ = 0; + uint64_t delta = std::max((uint64_t)1, b1_.Count() / b2_.Count()); + DecreaseP(delta); ret = Replace(key, eliminated); b2_.Remove(std::move(b_it), cacheMetrics_.get()); @@ -351,7 +403,7 @@ bool ARCCache::Put(const K& key, const V& value, return ret; } - if (t1_.Count() + b1_.Count() == c_) { + if (IsCacheFull() && t1_.Count() + b1_.Count() == c_) { if (t1_.Count() < c_) { b1_.RemoveLRU(cacheMetrics_.get()); ret = Replace(key, eliminated); @@ -361,8 +413,14 @@ bool ARCCache::Put(const K& key, const V& value, } else if (t1_.Count() + b1_.Count() < c_) { auto total = t1_.Count() + b1_.Count() + t2_.Count() + b2_.Count(); if (total >= c_) { - if (total == 2 * c_) b2_.RemoveLRU(cacheMetrics_.get()); - Replace(key, eliminated); + if (total == 2 * c_) { + if (b2_.Count() > 0) { + b2_.RemoveLRU(cacheMetrics_.get()); + } else { + b1_.RemoveLRU(cacheMetrics_.get()); + } + } + ret = Replace(key, eliminated); } } t1_.Insert(key, value, cacheMetrics_.get()); @@ -372,11 +430,16 @@ bool ARCCache::Put(const K& key, const V& value, template bool ARCCache::Replace(const K& k, V* eliminated) { + if (!IsCacheFull()) { + return false; + } if (t1_.Count() != 0 && ((t1_.Count() > p_) || (b2_.Find(k, nullptr) && t1_.Count() == p_))) { return Move_T_B(&t1_, &b1_, eliminated); - } else { + } else if (t2_.Count() > 0) { return Move_T_B(&t2_, &b2_, eliminated); + } else { + return Move_T_B(&t1_, &b1_, eliminated); } } @@ -386,6 +449,16 @@ uint64_t ARCCache::Size() { return t1_.Count() + t2_.Count(); } +template +typename ARCCache::ArcSizeInfo +ARCCache::ArcSize() const { + ::curve::common::ReadLockGuard guard(lock_); + + return {b1_.Count(), t1_.Count(), + b2_.Count(), t2_.Count()}; +} + +// This operation detach the key from cache template void ARCCache::Remove(const K& key) { ::curve::common::WriteLockGuard guard(lock_); @@ -397,6 +470,14 @@ void ARCCache::Remove(const K& key) { return; } } + B* bs[]{&b1_, &b2_}; + for (auto b : bs) { + bmap_iter it; + if (b->Find(key, &it)) { + b->Remove(std::move(it), cacheMetrics_.get()); + return; + } + } } template @@ -412,9 +493,9 @@ bool ARCCache::GetLast(const V value, K* key) { ::curve::common::ReadLockGuard guard(lock_); T* ts[]{&t1_, &t2_}; for (auto t : ts) { - for (const auto& item : t->list) { + for (const auto& item : t->list_) { if (item.value == value) { - *key = item->map_iter->first; + *key = item.map_iter->first; return true; } } @@ -428,9 +509,9 @@ bool ARCCache::GetLast(K* key, V* value) { T* ts[]{&t1_, &t2_}; for (auto t : ts) { - if (!t->list.empty()) { - *key = t->list.front().map_iter->first; - *value = t->list.front().value; + if (!t->list_.empty()) { + *key = t->list_.front().map_iter->first; + *value = t->list_.front().value; return true; } } @@ -445,7 +526,7 @@ bool ARCCache::GetLast( T* ts[]{&t1_, &t2_}; for (auto t : ts) { - for (const auto& item : t->list) { + for (const auto& item : t->list_) { bool ok = f(item.value); if (ok) { *key = item.map_iter->first; diff --git a/src/common/concurrent/rw_lock.h b/src/common/concurrent/rw_lock.h index d7c47c7d3c..807afb3b8c 100644 --- a/src/common/concurrent/rw_lock.h +++ b/src/common/concurrent/rw_lock.h @@ -23,13 +23,31 @@ #ifndef SRC_COMMON_CONCURRENT_RW_LOCK_H_ #define SRC_COMMON_CONCURRENT_RW_LOCK_H_ -#include #include -#include #include +#include +#include +#include // gettid +#include "include/curve_compiler_specific.h" #include "src/common/uncopyable.h" +// Due to the mixed use of bthread and pthread in some cases, acquiring another +// bthread lock(mutex/rwlock) after acquiring a write lock on a pthread rwlock +// may result in switching the bthread coroutine, and then the operation of +// releasing the previous write lock in the other pthread will not take effect +// (implying that the write lock is still held), thus causing a deadlock. + +// Check pthread rwlock tid between wrlock and unlock +#if defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) && \ + (ENABLE_CHECK_PTHREAD_WRLOCK_TID == 1) +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1 +#elif !defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1 +#else +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 0 +#endif + namespace curve { namespace common { @@ -51,10 +69,21 @@ class PthreadRWLockBase : public RWLockBase { void WRLock() override { int ret = pthread_rwlock_wrlock(&rwlock_); CHECK(0 == ret) << "wlock failed: " << ret << ", " << strerror(ret); +#if CURVE_CHECK_PTHREAD_WRLOCK_TID + tid_ = gettid(); +#endif } int TryWRLock() override { - return pthread_rwlock_trywrlock(&rwlock_); + int ret = pthread_rwlock_trywrlock(&rwlock_); + if (CURVE_UNLIKELY(ret != 0)) { + return ret; + } + +#if CURVE_CHECK_PTHREAD_WRLOCK_TID + tid_ = gettid(); +#endif + return 0; } void RDLock() override { @@ -67,6 +96,19 @@ class PthreadRWLockBase : public RWLockBase { } void Unlock() override { +#if CURVE_CHECK_PTHREAD_WRLOCK_TID + if (tid_ != 0) { + const pid_t current = gettid(); + // If CHECK here is triggered, please look at the comments at the + // beginning of the file. + // In the meantime, the simplest solution might be to use + // `BthreadRWLock` locks everywhere. + CHECK(tid_ == current) + << ", tid has changed, previous tid: " << tid_ + << ", current tid: " << current; + tid_ = 0; + } +#endif pthread_rwlock_unlock(&rwlock_); } @@ -76,8 +118,14 @@ class PthreadRWLockBase : public RWLockBase { pthread_rwlock_t rwlock_; pthread_rwlockattr_t rwlockAttr_; + +#if CURVE_CHECK_PTHREAD_WRLOCK_TID + pid_t tid_ = 0; +#endif }; +#undef CURVE_CHECK_PTHREAD_WRLOCK_TID + class RWLock : public PthreadRWLockBase { public: RWLock() { @@ -122,7 +170,7 @@ class BthreadRWLock : public RWLockBase { } int TryWRLock() override { - // not support yet + LOG(WARNING) << "TryWRLock not support yet"; return EINVAL; } @@ -132,7 +180,7 @@ class BthreadRWLock : public RWLockBase { } int TryRDLock() override { - // not support yet + LOG(WARNING) << "TryRDLock not support yet"; return EINVAL; } diff --git a/src/common/configuration.cpp b/src/common/configuration.cpp index 0956423a3c..a35db6d357 100644 --- a/src/common/configuration.cpp +++ b/src/common/configuration.cpp @@ -21,13 +21,14 @@ * 2018/08/30 Wenyu Zhou Initial version */ -#include "src/common/configuration.h" - #include -#include +#include #include +#include #include -#include + +#include "src/common/configuration.h" +#include "src/common/string_util.h" namespace curve { namespace common { @@ -38,16 +39,17 @@ bool Configuration::LoadConfig() { if (cFile.is_open()) { std::string line; while (getline(cFile, line)) { - // FIXME: may not remove middle spaces - line.erase(std::remove_if(line.begin(), line.end(), isspace), - line.end()); + Trim(line); if (line.empty() || line[0] == '#') continue; int delimiterPos = line.find("="); std::string key = line.substr(0, delimiterPos); + Trim(key); + int commentPos = line.find("#"); std::string value = line.substr(delimiterPos + 1, commentPos - delimiterPos - 1); + Trim(value); SetValue(key, value); } } else { diff --git a/src/common/encode.h b/src/common/encode.h index 0540ee0193..ab28d11321 100644 --- a/src/common/encode.h +++ b/src/common/encode.h @@ -54,6 +54,11 @@ inline void EncodeBigEndian_uint32(char* buf, uint32_t value) { buf[3] = value & 0xff; } +inline uint32_t DecodeBigEndian_uint32(const char* buf) { + return (uint32_t(buf[0]) << 24) | (uint32_t(buf[1]) << 16) | + (uint32_t(buf[2]) << 8) | uint32_t(buf[3]); +} + } // namespace common } // namespace curve diff --git a/src/common/log_util.h b/src/common/log_util.h new file mode 100644 index 0000000000..458bc01a87 --- /dev/null +++ b/src/common/log_util.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_COMMON_LOG_UTIL_H_ +#define SRC_COMMON_LOG_UTIL_H_ + +#include + +namespace curve { +namespace common { + inline void DisableLoggingToStdErr() { + // NOTE: https://github.com/google/glog#setting-flags + FLAGS_stderrthreshold = 3; + } +} // namespace common +} // namespace curve + +#endif // SRC_COMMON_LOG_UTIL_H_ diff --git a/src/common/s3_adapter.h b/src/common/s3_adapter.h index 2adbbfb3bc..1e3203e033 100644 --- a/src/common/s3_adapter.h +++ b/src/common/s3_adapter.h @@ -130,6 +130,7 @@ struct GetObjectAsyncContext : public Aws::Client::AsyncCallerContext { GetObjectAsyncCallBack cb; butil::Timer timer; ContextType type = ContextType::Unkown; + uint64_t start; explicit GetObjectAsyncContext( std::string key, char* buf, off_t offset, size_t len, @@ -143,7 +144,8 @@ struct GetObjectAsyncContext : public Aws::Client::AsyncCallerContext { len(len), cb(std::move(cb)), type(type), - timer(butil::Timer::STARTED) {} + timer(butil::Timer::STARTED), + start(0) {} }; /* diff --git a/src/common/string_util.h b/src/common/string_util.h index 71a43919b1..30f23d1c83 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -35,6 +35,7 @@ #include #include #include +#include #include namespace curve { @@ -165,6 +166,27 @@ inline std::string ToHexString(void* p) { return oss.str(); } +// trim from start (in place) +inline void LTrim(std::string &s) { // NOLINT + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const char &ch) { + return !std::isspace(ch); + })); +} + +// trim from end (in place) +inline void RTrim(std::string &s) { // NOLINT + s.erase(std::find_if(s.rbegin(), s.rend(), + [](const char &ch) { return !std::isspace(ch); }) + .base(), + s.end()); +} + +// trim from both ends (in place) +inline void Trim(std::string &s) { // NOLINT + LTrim(s); + RTrim(s); +} + } // namespace common } // namespace curve diff --git a/src/common/timeutility.h b/src/common/timeutility.h index 1ba3483d34..4bdd91e6b4 100644 --- a/src/common/timeutility.h +++ b/src/common/timeutility.h @@ -57,6 +57,12 @@ class TimeUtility { return localtime(&now)->tm_hour; } + static uint64_t CLockRealTimeMs() { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + return now.tv_sec * 1000L + now.tv_nsec / 1000000; + } + // 时间戳转成标准时间输出在standard里面,时间戳单位为秒 static inline void TimeStampToStandard(time_t timeStamp, std::string* standard) { diff --git a/src/mds/main/main.cpp b/src/mds/main/main.cpp index c0824f3bca..9897226322 100644 --- a/src/mds/main/main.cpp +++ b/src/mds/main/main.cpp @@ -24,6 +24,7 @@ #include "src/mds/server/mds.h" #include "src/mds/common/mds_define.h" +#include "src/common/log_util.h" DEFINE_string(confPath, "conf/mds.conf", "mds confPath"); DEFINE_string(mdsAddr, "127.0.0.1:6666", "mds listen addr"); @@ -107,6 +108,7 @@ int main(int argc, char **argv) { } // initialize logging module + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); // reset SIGPIPE handler diff --git a/src/snapshotcloneserver/main.cpp b/src/snapshotcloneserver/main.cpp index b44468b857..3430ff0118 100644 --- a/src/snapshotcloneserver/main.cpp +++ b/src/snapshotcloneserver/main.cpp @@ -22,6 +22,7 @@ #include #include #include "src/snapshotcloneserver/snapshotclone_server.h" +#include "src/common/log_util.h" DEFINE_string(conf, "conf/snapshot_clone_server.conf", "snapshot&clone server config file path"); //NOLINT DEFINE_string(addr, "127.0.0.1:5555", "snapshotcloneserver address"); @@ -80,6 +81,7 @@ int main(int argc, char **argv) { LoadConfigFromCmdline(conf.get()); conf->PrintConfig(); conf->ExposeMetric("snapshot_clone_server_config"); + curve::common::DisableLoggingToStdErr(); google::InitGoogleLogging(argv[0]); snapshotcloneserver_main(conf); } diff --git a/test/common/string_util_test.cpp b/test/common/string_util_test.cpp index 12316db991..d72d6493f1 100644 --- a/test/common/string_util_test.cpp +++ b/test/common/string_util_test.cpp @@ -28,7 +28,7 @@ namespace curve { namespace common { -TEST(Common, SpliteString) { +TEST(Common, SplitString) { const struct {std::string path; std::string sep; const int size; std::vector items;} testCases[] = { @@ -59,5 +59,54 @@ TEST(Common, StringToUll) { str = "ffff"; ASSERT_FALSE(StringToUll(str, &out)); } + +TEST(StringUtilTest, LTrim) { + std::array, 4> cases = { + std::array{"hello", "hello"}, + std::array{"", ""}, + std::array{" ", ""}, + std::array{" hello", "hello"}, + }; + + for (auto &c : cases) { + curve::common::LTrim(c[0]); + EXPECT_EQ(c[0], c[1]); + } +} + +TEST(StringUtilTest, RTrim) { + std::array, 4> cases = { + std::array{"hello", "hello"}, + std::array{"", ""}, + std::array{" ", ""}, + std::array{"hello ", "hello"}, + }; + + for (auto &c : cases) { + curve::common::RTrim(c[0]); + EXPECT_EQ(c[0], c[1]); + } +} + +TEST(StringUtilTest, Trim) { + std::array, 10> cases = { + std::array{"hello", "hello"}, + std::array{" hello", "hello"}, + std::array{"hello ", "hello"}, + std::array{" hello ", "hello"}, + std::array{"S3 Browser", "S3 Browser"}, + std::array{"S3 Browser ", "S3 Browser"}, + std::array{" S3 Browser", "S3 Browser"}, + std::array{" S3 Browser ", "S3 Browser"}, + std::array{" ", ""}, + std::array{"", ""}, + }; + + for (auto &c : cases) { + curve::common::Trim(c[0]); + EXPECT_EQ(c[0], c[1]); + } +} + } // namespace common } // namespace curve diff --git a/thirdparties/brpc/0002-Support-fork-without-exec.patch b/thirdparties/brpc/0002-Support-fork-without-exec.patch new file mode 100644 index 0000000000..96b590e2b9 --- /dev/null +++ b/thirdparties/brpc/0002-Support-fork-without-exec.patch @@ -0,0 +1,120 @@ +From 2d88cfa7b52ed6553f1884a669a2b9bc17aff656 Mon Sep 17 00:00:00 2001 +From: jamesge +Date: Fri, 15 Nov 2019 01:06:03 -0800 +Subject: [PATCH 1/5] Support fork without exec + +--- + src/bvar/detail/sampler.cpp | 72 ++++++++++++++++++++++++++++--------- + 1 file changed, 56 insertions(+), 16 deletions(-) + +diff --git a/src/bvar/detail/sampler.cpp b/src/bvar/detail/sampler.cpp +index 1de80970..23cfbd8b 100644 +--- a/src/bvar/detail/sampler.cpp ++++ b/src/bvar/detail/sampler.cpp +@@ -54,13 +54,11 @@ struct CombineSampler { + // deletion is taken place in the thread as well. + class SamplerCollector : public bvar::Reducer { + public: +- SamplerCollector() : _created(false), _stop(false), _cumulated_time_us(0) { +- int rc = pthread_create(&_tid, NULL, sampling_thread, this); +- if (rc != 0) { +- LOG(FATAL) << "Fail to create sampling_thread, " << berror(rc); +- } else { +- _created = true; +- } ++ SamplerCollector() ++ : _created(false) ++ , _stop(false) ++ , _cumulated_time_us(0) { ++ create_sampling_thread(); + } + ~SamplerCollector() { + if (_created) { +@@ -70,33 +68,75 @@ public: + } + } + +- static double get_cumulated_time(void* arg) { +- return ((SamplerCollector*)arg)->_cumulated_time_us / 1000.0 / 1000.0; ++private: ++ // Support for fork: ++ // * The singleton can be null before forking, the child callback will not ++ // be registered. ++ // * If the singleton is not null before forking, the child callback will ++ // be registered and the sampling thread will be re-created. ++ // * A forked program can be forked again. ++ ++ static void child_callback_atfork() { ++ butil::get_leaky_singleton()->after_forked_as_child(); ++ } ++ ++ void create_sampling_thread() { ++ const int rc = pthread_create(&_tid, NULL, sampling_thread, this); ++ if (rc != 0) { ++ LOG(FATAL) << "Fail to create sampling_thread, " << berror(rc); ++ } else { ++ _created = true; ++ pthread_atfork(NULL, NULL, child_callback_atfork); ++ } ++ } ++ ++ void after_forked_as_child() { ++ _created = false; ++ create_sampling_thread(); + } + +-private: + void run(); + + static void* sampling_thread(void* arg) { +- ((SamplerCollector*)arg)->run(); ++ static_cast(arg)->run(); + return NULL; + } + ++ static double get_cumulated_time(void* arg) { ++ return static_cast(arg)->_cumulated_time_us / 1000.0 / 1000.0; ++ } ++ + private: + bool _created; + bool _stop; ++ pid_t _created_pid; + int64_t _cumulated_time_us; + pthread_t _tid; + }; + ++PassiveStatus* g_cumulated_time_bvar = NULL; ++bvar::PerSecond >* g_sampling_thread_usage_bvar = NULL; ++ + void SamplerCollector::run() { +- butil::LinkNode root; +- int consecutive_nosleep = 0; + #ifndef UNIT_TEST +- // PassiveStatus cumulated_time(get_cumulated_time, this); +- // bvar::PerSecond > usage( +- // "bvar_sampler_collector_usage", &cumulated_time, 10); ++ // NOTE: ++ // * Following vars can't be created on thread's stack since this thread ++ // may be adandoned at any time after forking. ++ // * They can't created inside the constructor of SamplerCollector as well, ++ // which results in deadlock. ++ if (g_cumulated_time_bvar == NULL) { ++ g_cumulated_time_bvar = ++ new PassiveStatus(get_cumulated_time, this); ++ } ++ if (g_sampling_thread_usage_bvar == NULL) { ++ g_sampling_thread_usage_bvar = ++ new bvar::PerSecond >( ++ "bvar_sampler_collector_usage", g_cumulated_time_bvar, 10); ++ } + #endif ++ ++ butil::LinkNode root; ++ int consecutive_nosleep = 0; + while (!_stop) { + int64_t abstime = butil::gettimeofday_us(); + Sampler* s = this->reset(); +-- +2.37.2 + diff --git a/thirdparties/brpc/0003-Add-docs-on-fork-w-o-exec.patch b/thirdparties/brpc/0003-Add-docs-on-fork-w-o-exec.patch new file mode 100644 index 0000000000..6c8253f5b5 --- /dev/null +++ b/thirdparties/brpc/0003-Add-docs-on-fork-w-o-exec.patch @@ -0,0 +1,57 @@ +From 0a200eebce9abad390342d880bb446099bcfd1c3 Mon Sep 17 00:00:00 2001 +From: Ge Jun +Date: Fri, 15 Nov 2019 10:18:06 +0000 +Subject: [PATCH 2/5] Add docs on fork w/o exec + +--- + docs/cn/server.md | 11 +++++++++++ + docs/en/server.md | 11 +++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/docs/cn/server.md b/docs/cn/server.md +index a2262c6d..9f519871 100644 +--- a/docs/cn/server.md ++++ b/docs/cn/server.md +@@ -341,6 +341,17 @@ server端会自动尝试其支持的协议,无需用户指定。`cntl->protoco + + 如果你有更多的协议需求,可以联系我们。 + ++# fork without exec ++一般来说,[fork](https://linux.die.net/man/3/fork)出的子进程应尽快调用[exec](https://linux.die.net/man/3/exec)以重置所有状态,中间只应调用满足async-signal-safe的函数。这么使用fork的brpc程序在之前的版本也不会有问题。 ++ ++但在一些场景中,用户想直接运行fork出的子进程,而不调用exec。由于fork只复制其调用者的线程,其余线程便随之消失了。对应到brpc中,bvar会依赖一个sampling_thread采样各种信息,在fork后便消失了,现象是很多bvar归零。 ++ ++最新版本的brpc会在fork后重建这个线程(如有必要),从而使bvar在fork后能正常工作,再次fork也可以。已知问题是fork后cpu profiler不正常。然而,这并不意味着用户可随意地fork,不管是brpc还是上层应用都会大量地创建线程,它们在fork后不会被重建,因为: ++* 大部分fork会紧接exec,浪费了重建 ++* 给代码编写带来很多的麻烦和复杂度 ++ ++brpc的策略是按需创建这类线程,同时fork without exec必须发生在所有可能创建这些线程的代码前。具体地说,至少**发生在初始化所有Server/Channel/应用代码前**,越早越好,不遵守这个约定的fork会导致程序不正常。另外,不支持fork without exec的lib相当普遍,最好还是避免这种用法。 ++ + # 设置 + + ## 版本 +diff --git a/docs/en/server.md b/docs/en/server.md +index d604c1fe..f28fd96f 100644 +--- a/docs/en/server.md ++++ b/docs/en/server.md +@@ -344,6 +344,17 @@ Server detects supported protocols automatically, without assignment from users. + + If you need more protocols, contact us. + ++# fork without exec ++In general, [forked](https://linux.die.net/man/3/fork) subprocess should call [exec](https://linux.die.net/man/3/exec) ASAP, before which only async-signal-safe functions should be called. brpc programs using fork like this should work correctly even in previous versions. ++ ++But in some scenarios, users continue the subprocess without exec. Since fork only copies its caller's thread, which causes other threads to disappear after fork. In the case of brpc, bvar depends on a sampling_thread to sample various information, which disappears after fork and causes many bvars to be zeros. ++ ++Latest brpc re-creates the thread after fork(when necessary) to make bvar work correctly, and can be forked again. A known problem is that the cpu profiler does not work after fork. However users still can't call fork at any time, since brpc and its applications create threads extensively, which are not re-created after fork: ++* most fork continues with exec, which wastes re-creations ++* bring too many troubles and complexities to the code ++ ++brpc's strategy is to create these threads on demand and fork without exec should happen before all code that may create the threads. Specifically, **fork without exec should happen before initializing all Servers/Channels/Applications, earlier is better. fork not obeying this causes the program dysfunctional. BTW, fork without exec better be avoided because many libraries do not support it. ++ + # Settings + + ## Version +-- +2.37.2 + diff --git a/thirdparties/brpc/0004-not-register-pthread_atfork-in-child-process.patch b/thirdparties/brpc/0004-not-register-pthread_atfork-in-child-process.patch new file mode 100644 index 0000000000..88cc9a3e29 --- /dev/null +++ b/thirdparties/brpc/0004-not-register-pthread_atfork-in-child-process.patch @@ -0,0 +1,39 @@ +From 7077d2c4d71bfa9699b738252fe06a468a4eca34 Mon Sep 17 00:00:00 2001 +From: jamesge +Date: Wed, 18 Mar 2020 17:34:09 +0800 +Subject: [PATCH 3/5] not register pthread_atfork in child process + +--- + src/bvar/detail/sampler.cpp | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/src/bvar/detail/sampler.cpp b/src/bvar/detail/sampler.cpp +index 23cfbd8b..06d5cdbd 100644 +--- a/src/bvar/detail/sampler.cpp ++++ b/src/bvar/detail/sampler.cpp +@@ -41,6 +41,10 @@ struct CombineSampler { + } + }; + ++// True iff pthread_atfork was called. The callback to atfork works for child ++// of child as well, no need to register in the child again. ++static bool registered_atfork = false; ++ + // Call take_sample() of all scheduled samplers. + // This can be done with regular timer thread, but it's way too slow(global + // contention + log(N) heap manipulations). We need it to be super fast so that +@@ -86,7 +90,10 @@ private: + LOG(FATAL) << "Fail to create sampling_thread, " << berror(rc); + } else { + _created = true; +- pthread_atfork(NULL, NULL, child_callback_atfork); ++ if (!registered_atfork) { ++ registered_atfork = true; ++ pthread_atfork(NULL, NULL, child_callback_atfork); ++ } + } + } + +-- +2.37.2 + diff --git a/thirdparties/brpc/0005-Fix-LatencyRecorder-qps-not-accurate.patch b/thirdparties/brpc/0005-Fix-LatencyRecorder-qps-not-accurate.patch new file mode 100644 index 0000000000..e970d86ffd --- /dev/null +++ b/thirdparties/brpc/0005-Fix-LatencyRecorder-qps-not-accurate.patch @@ -0,0 +1,103 @@ +From 1e866ba4f2ea633a15e0bcaebe0f84f7b0fcc1c2 Mon Sep 17 00:00:00 2001 +From: wangweibing +Date: Wed, 2 Mar 2022 04:26:51 +0000 +Subject: [PATCH 4/5] Fix LatencyRecorder qps not accurate + +--- + src/bvar/latency_recorder.cpp | 15 +++++++++--- + test/bvar_recorder_unittest.cpp | 42 +++++++++++++++++++++++++++++++++ + 2 files changed, 54 insertions(+), 3 deletions(-) + +diff --git a/src/bvar/latency_recorder.cpp b/src/bvar/latency_recorder.cpp +index 7a27e170..fe8c776e 100644 +--- a/src/bvar/latency_recorder.cpp ++++ b/src/bvar/latency_recorder.cpp +@@ -87,14 +87,23 @@ int CDF::describe_series( + return 0; + } + ++// Return random int value with expectation = `dval' ++static int64_t double_to_random_int(double dval) { ++ int64_t ival = static_cast(dval); ++ if (dval > ival + butil::fast_rand_double()) { ++ ival += 1; ++ } ++ return ival; ++} ++ + static int64_t get_window_recorder_qps(void* arg) { + detail::Sample s; +- static_cast(arg)->get_span(1, &s); ++ static_cast(arg)->get_span(&s); + // Use floating point to avoid overflow. + if (s.time_us <= 0) { + return 0; + } +- return static_cast(round(s.data.num * 1000000.0 / s.time_us)); ++ return double_to_random_int(s.data.num * 1000000.0 / s.time_us); + } + + static int64_t get_recorder_count(void* arg) { +@@ -174,7 +183,7 @@ int64_t LatencyRecorder::qps(time_t window_size) const { + if (s.time_us <= 0) { + return 0; + } +- return static_cast(round(s.data.num * 1000000.0 / s.time_us)); ++ return detail::double_to_random_int(s.data.num * 1000000.0 / s.time_us); + } + + int LatencyRecorder::expose(const butil::StringPiece& prefix1, +diff --git a/test/bvar_recorder_unittest.cpp b/test/bvar_recorder_unittest.cpp +index 412ec36c..823e88b2 100644 +--- a/test/bvar_recorder_unittest.cpp ++++ b/test/bvar_recorder_unittest.cpp +@@ -192,4 +192,46 @@ TEST(RecorderTest, perf) { + << "ns per sample with " << ARRAY_SIZE(threads) + << " threads"; + } ++ ++TEST(RecorderTest, latency_recorder_qps_accuracy) { ++ bvar::LatencyRecorder lr1(2); // set windows size to 2s ++ bvar::LatencyRecorder lr2(2); ++ bvar::LatencyRecorder lr3(2); ++ bvar::LatencyRecorder lr4(2); ++ usleep(2000000); // wait sampler to sample 2 times ++ ++ auto write = [](bvar::LatencyRecorder& lr, int times) { ++ for (int i = 0; i < times; ++i) { ++ lr << 1; ++ } ++ }; ++ write(lr1, 100); ++ write(lr2, 101); ++ write(lr3, 3); ++ write(lr4, 1); ++ usleep(1000000); // wait sampler to sample 1 time ++ ++ auto read = [](bvar::LatencyRecorder& lr, double exp_qps, int window_size = 0) { ++ int64_t qps_sum = 0; ++ int64_t exp_qps_int = (int64_t)exp_qps; ++ for (int i = 0; i < 1000; ++i) { ++ int64_t qps = window_size ? lr.qps(window_size): lr.qps(); ++ EXPECT_GE(qps, exp_qps_int - 1); ++ EXPECT_LE(qps, exp_qps_int + 1); ++ qps_sum += qps; ++ } ++ double err = fabs(qps_sum / 1000.0 - exp_qps); ++ return err; ++ }; ++ ASSERT_GT(0.1, read(lr1, 100/2.0)); ++ ASSERT_GT(0.1, read(lr2, 101/2.0)); ++ ASSERT_GT(0.1, read(lr3, 3/2.0)); ++ ASSERT_GT(0.1, read(lr4, 1/2.0)); ++ ++ ASSERT_GT(0.1, read(lr1, 100/3.0, 3)); ++ ASSERT_GT(0.1, read(lr2, 101/3.0, 3)); ++ ASSERT_GT(0.1, read(lr3, 3/3.0, 3)); ++ ASSERT_GT(0.1, read(lr4, 1/3.0, 3)); ++} ++ + } // namespace +-- +2.37.2 + diff --git a/thirdparties/brpc/0006-fix-1973-1863.patch b/thirdparties/brpc/0006-fix-1973-1863.patch new file mode 100644 index 0000000000..0a9ddd32b5 --- /dev/null +++ b/thirdparties/brpc/0006-fix-1973-1863.patch @@ -0,0 +1,37 @@ +From aa9987efe7ce1d13969f03194d8894060a6d58b2 Mon Sep 17 00:00:00 2001 +From: HU +Date: Mon, 1 Aug 2022 10:15:44 +0800 +Subject: [PATCH 5/5] fix #1973 (#1863) + +Co-authored-by: XiguoHu +--- + src/bvar/detail/sampler.cpp | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/bvar/detail/sampler.cpp b/src/bvar/detail/sampler.cpp +index 06d5cdbd..7baf20b7 100644 +--- a/src/bvar/detail/sampler.cpp ++++ b/src/bvar/detail/sampler.cpp +@@ -15,6 +15,7 @@ + // Author: Ge,Jun (gejun@baidu.com) + // Date: Tue Jul 28 18:14:40 CST 2015 + ++#include + #include "butil/time.h" + #include "butil/memory/singleton_on_pthread_once.h" + #include "bvar/reducer.h" +@@ -124,7 +125,11 @@ private: + PassiveStatus* g_cumulated_time_bvar = NULL; + bvar::PerSecond >* g_sampling_thread_usage_bvar = NULL; + ++DEFINE_int32(bvar_sampler_thread_start_delay_us, 10000, "bvar sampler thread start delay us"); ++ + void SamplerCollector::run() { ++ ::usleep(FLAGS_bvar_sampler_thread_start_delay_us); ++ + #ifndef UNIT_TEST + // NOTE: + // * Following vars can't be created on thread's stack since this thread +-- +2.37.2 + diff --git a/tools-v2/README.md b/tools-v2/README.md index cc24dc284f..309ed57734 100644 --- a/tools-v2/README.md +++ b/tools-v2/README.md @@ -95,6 +95,8 @@ A tool for CurveFS & CurveBs. - [check consistency](#check-consistency) - [snapshot](#snapshot) - [snapshot copyset](#snapshot-copyset) + - [stop](#stop) + - [stop snapshot](#stop-snapshot) - [Comparison of old and new commands](#comparison-of-old-and-new-commands) - [curve fs](#curve-fs) - [curve bs](#curve-bs) @@ -1971,6 +1973,30 @@ Output: +----------------+---------+ ``` +#### stop + +##### stop snapshot + +stop snapshot in curve bs + +Usage: + +```shell +curve bs stop snapshot +``` + +Output: + +```shell ++--------------------------------------+--------------+---------+ +| SNAPSHOTID | SNAPSHOTNAME | RESULT | ++--------------------------------------+--------------+---------+ +| 9aa2b4c5-f27b-40a2-82c9-4e0ad6093567 | testsnap | success | ++--------------------------------------+--------------+---------+ +| 0171a33b-17b7-4215-9f00-6d8de2686f77 | testsnap1 | success | ++--------------------------------------+--------------+---------+ +``` + ## Comparison of old and new commands ### curve fs diff --git a/tools-v2/internal/error/error.go b/tools-v2/internal/error/error.go index 7a1b71d870..aea8ad92d3 100644 --- a/tools-v2/internal/error/error.go +++ b/tools-v2/internal/error/error.go @@ -380,15 +380,12 @@ var ( ErrBsListZone = func() *CmdError { return NewInternalCmdError(39, "list zone fail. the error is %s") } - ErrBsDeleteFile = func() *CmdError { return NewInternalCmdError(40, "delete file fail. the error is %s") } - ErrRespTypeNoExpected = func() *CmdError { return NewInternalCmdError(41, "the response type is not as expected, should be: %s") } - ErrGetPeer = func() *CmdError { return NewInternalCmdError(42, "invalid peer args, err: %s") } @@ -498,7 +495,10 @@ var ( return NewInternalCmdError(78, fmt.Sprintf("list snapshot fail, requestId: %s, code: %s, message: %s", requestId, code, message)) } ErrBsGetCloneRecover = func() *CmdError { - return NewInternalCmdError(73, "get clone-recover fail, err: %s") + return NewInternalCmdError(79, "get clone-recover fail, err: %s") + } + ErrInvalidMetaServerAddr = func() *CmdError { + return NewInternalCmdError(80, "invalid metaserver external addr: %s") } // http error @@ -573,6 +573,19 @@ var ( } return NewRpcReultCmdError(statusCode, message) } + ErrUpdateFs = func(statusCode int) *CmdError { + var message string + code := mds.FSStatusCode(statusCode) + switch code { + case mds.FSStatusCode_OK: + message = "success" + case mds.FSStatusCode_NOT_FOUND: + message = "fs not found!" + default: + message = fmt.Sprintf("update fs failed!, error is %s", mds.FSStatusCode_name[int32(code)]) + } + return NewRpcReultCmdError(statusCode, message) + } ErrGetCopysetsInfo = func(statusCode int) *CmdError { code := topology.TopoStatusCode(statusCode) message := fmt.Sprintf("get copysets info failed: status code is %s", code.String()) diff --git a/tools-v2/internal/utils/snapshot/snapshot.go b/tools-v2/internal/utils/snapshot/snapshot.go index b4fdbf7df0..f160f79e55 100644 --- a/tools-v2/internal/utils/snapshot/snapshot.go +++ b/tools-v2/internal/utils/snapshot/snapshot.go @@ -28,6 +28,8 @@ import ( ) func NewQuerySubUri(params map[string]any) string { + params[QueryVersion] = Version + values := strings.Builder{} for key, value := range params { if value != "" && value != nil { diff --git a/tools-v2/pkg/cli/command/curvebs/bs.go b/tools-v2/pkg/cli/command/curvebs/bs.go index 3fa7fa86c6..c82ca69528 100644 --- a/tools-v2/pkg/cli/command/curvebs/bs.go +++ b/tools-v2/pkg/cli/command/curvebs/bs.go @@ -34,6 +34,7 @@ import ( "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/query" "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/snapshot" "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/status" + "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/stop" "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/update" ) @@ -54,6 +55,7 @@ func (bsCmd *CurveBsCommand) AddSubCommands() { clean_recycle.NewCleanRecycleCommand(), check.NewCheckCommand(), snapshot.NewSnapshotCommand(), + stop.NewStopCommand(), ) } diff --git a/tools-v2/pkg/cli/command/curvebs/stop/snapshot/snapshot.go b/tools-v2/pkg/cli/command/curvebs/stop/snapshot/snapshot.go new file mode 100644 index 0000000000..7dd826f392 --- /dev/null +++ b/tools-v2/pkg/cli/command/curvebs/stop/snapshot/snapshot.go @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: CurveCli + * Created Date: 2023-11-02 + * Author: ZackSoul + */ +package snapshot + +import ( + "time" + + cmderror "github.com/opencurve/curve/tools-v2/internal/error" + cobrautil "github.com/opencurve/curve/tools-v2/internal/utils" + snapshotutil "github.com/opencurve/curve/tools-v2/internal/utils/snapshot" + basecmd "github.com/opencurve/curve/tools-v2/pkg/cli/command" + listSnapshot "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/list/snapshot" + "github.com/opencurve/curve/tools-v2/pkg/config" + "github.com/opencurve/curve/tools-v2/pkg/output" + "github.com/spf13/cobra" +) + +const ( + snapshotExample = `$ curve bs stop snapshot` +) + +type SnapShotCommand struct { + basecmd.FinalCurveCmd + snapshotAddrs []string + timeout time.Duration + + user string + file string + uuid string +} + +var _ basecmd.FinalCurveCmdFunc = (*SnapShotCommand)(nil) + +func NewSnapShotCommand() *cobra.Command { + return NewStopSnapShotCommand().Cmd +} + +func NewStopSnapShotCommand() *SnapShotCommand { + snapShotCommand := &SnapShotCommand{ + FinalCurveCmd: basecmd.FinalCurveCmd{ + Use: "snapshot", + Short: "stop snapshot in curvebs", + Example: snapshotExample, + }, + } + + basecmd.NewFinalCurveCli(&snapShotCommand.FinalCurveCmd, snapShotCommand) + return snapShotCommand +} + +func (sCmd *SnapShotCommand) AddFlags() { + config.AddBsSnapshotCloneFlagOption(sCmd.Cmd) + config.AddHttpTimeoutFlag(sCmd.Cmd) + config.AddBsUserOptionFlag(sCmd.Cmd) + config.AddBsSnapshotIDOptionFlag(sCmd.Cmd) + config.AddBsPathOptionFlag(sCmd.Cmd) +} + +func (sCmd *SnapShotCommand) Init(cmd *cobra.Command, args []string) error { + snapshotAddrs, err := config.GetBsSnapshotAddrSlice(sCmd.Cmd) + if err.TypeCode() != cmderror.CODE_SUCCESS || len(snapshotAddrs) == 0 { + return err.ToError() + } + sCmd.snapshotAddrs = snapshotAddrs + sCmd.timeout = config.GetFlagDuration(sCmd.Cmd, config.HTTPTIMEOUT) + sCmd.user = config.GetBsFlagString(sCmd.Cmd, config.CURVEBS_USER) + sCmd.file = config.GetBsFlagString(sCmd.Cmd, config.CURVEBS_PATH) + sCmd.uuid = config.GetBsFlagString(sCmd.Cmd, config.CURVEBS_SNAPSHOT_ID) + header := []string{ + cobrautil.ROW_SNAPSHOT_ID, + cobrautil.ROW_SNAPSHOT_NAME, + cobrautil.ROW_RESULT, + } + sCmd.SetHeader(header) + return nil +} + +func (sCmd *SnapShotCommand) Print(cmd *cobra.Command, args []string) error { + return output.FinalCmdOutput(&sCmd.FinalCurveCmd, sCmd) +} + +func (sCmd *SnapShotCommand) RunCommand(cmd *cobra.Command, args []string) error { + params := map[string]any{ + snapshotutil.QueryAction: snapshotutil.ActionGetFileSnapshotList, + snapshotutil.QueryUser: sCmd.user, + snapshotutil.QueryFile: sCmd.file, + snapshotutil.QueryLimit: snapshotutil.Limit, + snapshotutil.QueryOffset: snapshotutil.Offset, + } + if sCmd.uuid != "*" { + params[snapshotutil.QueryUUID] = sCmd.uuid + } + snapshotsList, err := listSnapshot.ListSnapShot(sCmd.snapshotAddrs, sCmd.timeout, params) + if err != nil { + sCmd.Error = err + return sCmd.Error.ToError() + } + rows := make([]map[string]string, 0) + for _, snapshot := range snapshotsList { + row := make(map[string]string) + err := StopSnapShot(sCmd.snapshotAddrs, sCmd.timeout, snapshot) + row[cobrautil.ROW_SNAPSHOT_ID] = snapshot.UUID + row[cobrautil.ROW_SNAPSHOT_NAME] = snapshot.Name + if err.TypeCode() == cmderror.CODE_SUCCESS { + row[cobrautil.ROW_RESULT] = cobrautil.ROW_VALUE_SUCCESS + } else { + row[cobrautil.ROW_RESULT] = cobrautil.ROW_VALUE_FAILED + } + rows = append(rows, row) + } + list := cobrautil.ListMap2ListSortByKeys(rows, sCmd.Header, []string{cobrautil.ROW_SNAPSHOT_NAME, cobrautil.ROW_SNAPSHOT_ID}) + sCmd.TableNew.AppendBulk(list) + sCmd.Result = rows + sCmd.Error = cmderror.Success() + return nil +} + +func (sCmd *SnapShotCommand) ResultPlainOutput() error { + return output.FinalCmdOutputPlain(&sCmd.FinalCurveCmd) +} + +func StopSnapShot(addrs []string, timeout time.Duration, snapshot *snapshotutil.SnapshotInfo) *cmderror.CmdError { + params := map[string]any{ + snapshotutil.QueryAction: snapshotutil.ActionCancelSnapshot, + snapshotutil.QueryUser: snapshot.User, + snapshotutil.QueryUUID: snapshot.UUID, + snapshotutil.QueryFile: snapshot.File, + } + subUri := snapshotutil.NewQuerySubUri(params) + metric := basecmd.NewMetric(addrs, subUri, timeout) + _, err := basecmd.QueryMetric(metric) + return err +} diff --git a/tools-v2/pkg/cli/command/curvebs/stop/stop.go b/tools-v2/pkg/cli/command/curvebs/stop/stop.go new file mode 100644 index 0000000000..d4bbb01a93 --- /dev/null +++ b/tools-v2/pkg/cli/command/curvebs/stop/stop.go @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: CurveCli + * Created Date: 2023-11-02 + * Author: ZackSoul + */ +package stop + +import ( + basecmd "github.com/opencurve/curve/tools-v2/pkg/cli/command" + snapshot "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvebs/stop/snapshot" + "github.com/spf13/cobra" +) + +type StopCommand struct { + basecmd.MidCurveCmd +} + +var _ basecmd.MidCurveCmdFunc = (*StopCommand)(nil) + +func (sCmd *StopCommand) AddSubCommands() { + sCmd.Cmd.AddCommand( + snapshot.NewSnapShotCommand(), + ) +} + +func NewStopCommand() *cobra.Command { + sCmd := &StopCommand{ + basecmd.MidCurveCmd{ + Use: "stop", + Short: "stop resources in the curvebs", + }, + } + return basecmd.NewMidCurveCli(&sCmd.MidCurveCmd, sCmd) +} diff --git a/tools-v2/pkg/cli/command/curvefs/status/copyset/copyset.go b/tools-v2/pkg/cli/command/curvefs/status/copyset/copyset.go index 42c8c51187..29334a3a79 100644 --- a/tools-v2/pkg/cli/command/curvefs/status/copyset/copyset.go +++ b/tools-v2/pkg/cli/command/curvefs/status/copyset/copyset.go @@ -74,11 +74,10 @@ func (cCmd *CopysetCommand) Init(cmd *cobra.Command, args []string) error { poolIdVec = append(poolIdVec, fmt.Sprintf("%d", info.GetPoolId())) } if len(copysetIdVec) == 0 { - var err error cCmd.Error = cmderror.ErrSuccess() cCmd.Result = "No copyset found" cCmd.health = cobrautil.HEALTH_OK - return err + return nil } copysetIds := strings.Join(copysetIdVec, ",") poolIds := strings.Join(poolIdVec, ",") diff --git a/tools-v2/pkg/cli/command/curvefs/status/etcd/etcd.go b/tools-v2/pkg/cli/command/curvefs/status/etcd/etcd.go index ab25299415..8eee09d842 100644 --- a/tools-v2/pkg/cli/command/curvefs/status/etcd/etcd.go +++ b/tools-v2/pkg/cli/command/curvefs/status/etcd/etcd.go @@ -83,6 +83,7 @@ func (eCmd *EtcdCommand) Init(cmd *cobra.Command, args []string) error { // set main addr etcdAddrs, addrErr := config.GetFsEtcdAddrSlice(eCmd.Cmd) if addrErr.TypeCode() != cmderror.CODE_SUCCESS { + eCmd.Error = addrErr return fmt.Errorf(addrErr.Message) } for _, addr := range etcdAddrs { diff --git a/tools-v2/pkg/cli/command/curvefs/status/mds/mds.go b/tools-v2/pkg/cli/command/curvefs/status/mds/mds.go index df7a29651a..b1f5702f11 100644 --- a/tools-v2/pkg/cli/command/curvefs/status/mds/mds.go +++ b/tools-v2/pkg/cli/command/curvefs/status/mds/mds.go @@ -76,12 +76,14 @@ func (mCmd *MdsCommand) Init(cmd *cobra.Command, args []string) error { // set main addr mainAddrs, addrErr := config.GetFsMdsAddrSlice(mCmd.Cmd) if addrErr.TypeCode() != cmderror.CODE_SUCCESS { + mCmd.Error = addrErr return fmt.Errorf(addrErr.Message) } // set dummy addr dummyAddrs, addrErr := config.GetFsMdsDummyAddrSlice(mCmd.Cmd) if addrErr.TypeCode() != cmderror.CODE_SUCCESS { + mCmd.Error = addrErr return fmt.Errorf(addrErr.Message) } for _, addr := range dummyAddrs { diff --git a/tools-v2/pkg/cli/command/curvefs/status/metaserver/metaserver.go b/tools-v2/pkg/cli/command/curvefs/status/metaserver/metaserver.go index 26e264e5fa..f2ab22dce1 100644 --- a/tools-v2/pkg/cli/command/curvefs/status/metaserver/metaserver.go +++ b/tools-v2/pkg/cli/command/curvefs/status/metaserver/metaserver.go @@ -88,7 +88,9 @@ func (mCmd *MetaserverCommand) Init(cmd *cobra.Command, args []string) error { for i, addr := range externalAddrs { if !config.IsValidAddr(addr) { - return fmt.Errorf("invalid metaserver external addr: %s", addr) + mCmd.Error = cmderror.ErrInvalidMetaServerAddr() + mCmd.Error.Format(addr) + return fmt.Errorf(mCmd.Error.Message) } // set metrics diff --git a/tools-v2/pkg/cli/command/curvefs/update/fs/fs.go b/tools-v2/pkg/cli/command/curvefs/update/fs/fs.go index f4b5ad2c2d..54628fc1d6 100644 --- a/tools-v2/pkg/cli/command/curvefs/update/fs/fs.go +++ b/tools-v2/pkg/cli/command/curvefs/update/fs/fs.go @@ -83,7 +83,7 @@ func (fCmd *FsCommand) AddFlags() { func (fCmd *FsCommand) Init(cmd *cobra.Command, args []string) error { // args check - fsName, _ := cmd.Flags().GetString("fsName") + fsName, _ := cmd.Flags().GetString(config.CURVEFS_FSNAME) request := &mds.UpdateFsInfoRequest{ FsName: &fsName, } @@ -131,10 +131,10 @@ func (fCmd *FsCommand) RunCommand(cmd *cobra.Command, args []string) error { } response := result.(*mds.UpdateFsInfoResponse) - errCreate := cmderror.ErrCreateFs(int(response.GetStatusCode())) + errUpdateFs := cmderror.ErrUpdateFs(int(response.GetStatusCode())) row := map[string]string{ cobrautil.ROW_FS_NAME: fCmd.Rpc.Request.GetFsName(), - cobrautil.ROW_RESULT: errCreate.Message, + cobrautil.ROW_RESULT: errUpdateFs.Message, } fCmd.TableNew.Append(cobrautil.Map2List(row, fCmd.Header)) diff --git a/tools-v2/pkg/cli/command/curvefs/update/mds/clientMdsAddrsOverride.go b/tools-v2/pkg/cli/command/curvefs/update/mds/clientMdsAddrsOverride.go new file mode 100644 index 0000000000..9f5da47ccb --- /dev/null +++ b/tools-v2/pkg/cli/command/curvefs/update/mds/clientMdsAddrsOverride.go @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package mds + +import ( + "context" + "fmt" + + cmderror "github.com/opencurve/curve/tools-v2/internal/error" + cobrautil "github.com/opencurve/curve/tools-v2/internal/utils" + basecmd "github.com/opencurve/curve/tools-v2/pkg/cli/command" + "github.com/opencurve/curve/tools-v2/pkg/config" + "github.com/opencurve/curve/tools-v2/pkg/output" + mds "github.com/opencurve/curve/tools-v2/proto/curvefs/proto/mds" + "github.com/spf13/cobra" + "github.com/spf13/viper" + "google.golang.org/grpc" +) + +const ( + example = `$ curve fs update mds client-mds-addrs-override 1.2.3.4:1234,2.3.4.5:2345` +) + +type SetClientMdsAddrsOverride struct { + Info *basecmd.Rpc + Request *mds.SetClientMdsAddrsOverrideRequest + mdsClient mds.MdsServiceClient +} + +var _ basecmd.RpcFunc = (*SetClientMdsAddrsOverride)(nil) // check interface + +type SetClientMdsAddrsOverrideCommand struct { + basecmd.FinalCurveCmd + Rpc *SetClientMdsAddrsOverride +} + +var _ basecmd.FinalCurveCmdFunc = (*SetClientMdsAddrsOverrideCommand)(nil) // check interface + +func (ufRp *SetClientMdsAddrsOverride) NewRpcClient(cc grpc.ClientConnInterface) { + ufRp.mdsClient = mds.NewMdsServiceClient(cc) +} + +func (ufRp *SetClientMdsAddrsOverride) Stub_Func(ctx context.Context) (interface{}, error) { + return ufRp.mdsClient.SetClientMdsAddrsOverride(ctx, ufRp.Request) +} + +func NewSetClientMdsAddrsOverrideCommand() *cobra.Command { + cmd := &SetClientMdsAddrsOverrideCommand{ + FinalCurveCmd: basecmd.FinalCurveCmd{ + Use: "client-mds-addrs-override", + Short: "set client mds addrs override", + Long: "set client mds addrs override", + Example: example, + }, + } + basecmd.NewFinalCurveCli(&cmd.FinalCurveCmd, cmd) + return cmd.Cmd +} + +func (fCmd *SetClientMdsAddrsOverrideCommand) AddFlags() { + config.AddRpcRetryTimesFlag(fCmd.Cmd) + config.AddRpcTimeoutFlag(fCmd.Cmd) + config.AddFsMdsAddrFlag(fCmd.Cmd) +} + +func (fCmd *SetClientMdsAddrsOverrideCommand) Init(cmd *cobra.Command, args []string) error { + addrs, addrErr := config.GetFsMdsAddrSlice(fCmd.Cmd) + if addrErr.TypeCode() != cmderror.CODE_SUCCESS { + return fmt.Errorf(addrErr.Message) + } + timeout := viper.GetDuration(config.VIPER_GLOBALE_RPCTIMEOUT) + retrytimes := viper.GetInt32(config.VIPER_GLOBALE_RPCRETRYTIMES) + + // output format + header := []string{cobrautil.ROW_RESULT} + fCmd.SetHeader(header) + + if len(args) != 1 { + return fmt.Errorf("please specify client mds addrs override, example: " + example) + } + + request := &mds.SetClientMdsAddrsOverrideRequest{ + ClientMdsAddrsOverride: &args[0], + } + // set rpc + fCmd.Rpc = &SetClientMdsAddrsOverride{ + Request: request, + } + fCmd.Rpc.Info = basecmd.NewRpc(addrs, timeout, retrytimes, "change client mds addrs override") + return nil +} + +func (fCmd *SetClientMdsAddrsOverrideCommand) Print(cmd *cobra.Command, args []string) error { + return output.FinalCmdOutput(&fCmd.FinalCurveCmd, fCmd) +} + +func (fCmd *SetClientMdsAddrsOverrideCommand) RunCommand(cmd *cobra.Command, args []string) error { + result, errCmd := basecmd.GetRpcResponse(fCmd.Rpc.Info, fCmd.Rpc) + if errCmd.TypeCode() != cmderror.CODE_SUCCESS { + return fmt.Errorf(errCmd.Message) + } + + response := result.(*mds.SetClientMdsAddrsOverrideResponse) + errMsg := mds.FSStatusCode_name[int32(response.GetStatusCode())] + row := map[string]string{ + cobrautil.ROW_RESULT: errMsg, + } + + fCmd.TableNew.Append(cobrautil.Map2List(row, fCmd.Header)) + + var errs []*cmderror.CmdError + res, errTranslate := output.MarshalProtoJson(response) + if errTranslate != nil { + errMar := cmderror.ErrMarShalProtoJson() + errMar.Format(errTranslate.Error()) + errs = append(errs, errMar) + } + + fCmd.Result = res + fCmd.Error = cmderror.MostImportantCmdError(errs) + return nil +} + +func (fCmd *SetClientMdsAddrsOverrideCommand) ResultPlainOutput() error { + return output.FinalCmdOutputPlain(&fCmd.FinalCurveCmd) +} diff --git a/tools-v2/pkg/cli/command/curvefs/update/mds/mds.go b/tools-v2/pkg/cli/command/curvefs/update/mds/mds.go new file mode 100644 index 0000000000..c0e14a3afc --- /dev/null +++ b/tools-v2/pkg/cli/command/curvefs/update/mds/mds.go @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package mds + +import ( + basecmd "github.com/opencurve/curve/tools-v2/pkg/cli/command" + "github.com/spf13/cobra" +) + +type MdsCommand struct { + basecmd.MidCurveCmd +} + +var _ basecmd.MidCurveCmdFunc = (*MdsCommand)(nil) // check interface + +func (mdsCmd *MdsCommand) AddSubCommands() { + mdsCmd.Cmd.AddCommand( + NewSetClientMdsAddrsOverrideCommand(), + ) +} + +func NewMdsCommand() *cobra.Command { + mdsCmd := &MdsCommand{ + basecmd.MidCurveCmd{ + Use: "mds", + Short: "mds resources in the curvefs", + }, + } + return basecmd.NewMidCurveCli(&mdsCmd.MidCurveCmd, mdsCmd) +} diff --git a/tools-v2/pkg/cli/command/curvefs/update/update.go b/tools-v2/pkg/cli/command/curvefs/update/update.go index 2b7cf4513f..5882a195b4 100644 --- a/tools-v2/pkg/cli/command/curvefs/update/update.go +++ b/tools-v2/pkg/cli/command/curvefs/update/update.go @@ -19,6 +19,7 @@ package update import ( basecmd "github.com/opencurve/curve/tools-v2/pkg/cli/command" "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvefs/update/fs" + "github.com/opencurve/curve/tools-v2/pkg/cli/command/curvefs/update/mds" "github.com/spf13/cobra" ) @@ -31,6 +32,7 @@ var _ basecmd.MidCurveCmdFunc = (*UpdateCommand)(nil) // check interface func (updateCmd *UpdateCommand) AddSubCommands() { updateCmd.Cmd.AddCommand( fs.NewFsCommand(), + mds.NewMdsCommand(), ) } diff --git a/ut.sh b/ut.sh index 8a3599f800..d405ccc028 100644 --- a/ut.sh +++ b/ut.sh @@ -86,7 +86,7 @@ get_options() { main() { get_options "$@" - sudo docker run --rm -w /var/lib/jenkins/workspace/curve/curve_multijob/ -v /var/lib/jenkins:/var/lib/jenkins -v $(pwd):/var/lib/jenkins/workspace/curve/curve_multijob/ -v ${HOME}:${HOME} --user $(id -u ${USER}):$(id -g ${USER}) -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro -v /etc/sudoers:/etc/sudoers:ro -v /etc/shadow:/etc/shadow:ro --ulimit core=-1 --privileged opencurvedocker/curve-base:build-$g_os bash util/ut_in_image.sh "$@" + sudo docker run --rm -w /var/lib/jenkins/workspace/curve/curve_multijob/ -v /var/lib/jenkins:/var/lib/jenkins -v $(pwd):/var/lib/jenkins/workspace/curve/curve_multijob/ -e BUILD_NUMBER=$BUILD_NUMBER -v ${HOME}:${HOME} --user $(id -u ${USER}):$(id -g ${USER}) -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro -v /etc/sudoers:/etc/sudoers:ro -v /etc/shadow:/etc/shadow:ro --ulimit core=-1 --privileged opencurvedocker/curve-base:build-$g_os bash util/ut_in_image.sh "$@" } ############################ MAIN() diff --git a/util/playground.sh b/util/playground.sh index 8b1680e67d..c8d584c1ab 100755 --- a/util/playground.sh +++ b/util/playground.sh @@ -2,13 +2,11 @@ # Copyright (C) 2023 Jingli Chen (Wine93), NetEase Inc. -# see also: https://github.com/Burnett01/rsync-deployments/issues/21 - ############################ GLOBAL VARIABLES g_obm_cfg=".obm.cfg" g_worker_dir="/curve" g_container_name="curve-build-playground.master" -g_container_image="opencurvedocker/curve-base:build-debian11" +g_container_image="opencurvedocker/curve-build:ubuntu22" g_init_script=$(cat << EOF useradd -m -s /bin/bash -N -u $UID $USER echo "${USER} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers @@ -17,15 +15,22 @@ chmod g+w /etc/passwd echo 'alias ls="ls --color"' >> /home/${USER}/.bashrc EOF ) -g_install_script=$(cat << EOF -apt-get update -apt-get -y install tree rsync golang jq vim python3-pip maven >/dev/null -curl -sSL https://bit.ly/install-xq | sudo bash >/dev/null 2>&1 -pip3 install cpplint >/dev/null 2>/dev/null -EOF -) ############################ BASIC FUNCTIONS +msg() { + printf '%b' "$1" >&2 +} + +success() { + msg "\33[32m[✔]\33[0m ${1}${2}" +} + +die() { + msg "\33[31m[✘]\33[0m ${1}${2}" + exit 1 +} + +############################ FUNCTIONS parse_cfg() { local args=`getopt -o v: --long version: -n "playground.sh" -- "$@"` eval set -- "${args}" @@ -51,13 +56,13 @@ create_container() { --env "UID=$(id -u)" \ --env "USER=${USER}" \ --env "TZ=Asia/Shanghai" \ - --hostname "playground" \ + --hostname "${g_container_name}" \ --name "${g_container_name}" \ --workdir ${g_worker_dir} \ "${g_container_image}" + docker exec "${g_container_name}" bash -c "${g_init_script}" - docker exec "${g_container_name}" bash -c "${g_install_script}" - success "create ${g_container_name} (${g_container_image}) success :)" + success "create ${g_container_name} (${g_container_image}) success :)\n" } enter_container() { @@ -68,9 +73,7 @@ enter_container() { "${g_container_name}" /bin/bash } - main() { - source "util/basic.sh" parse_cfg "$@" create_container enter_container diff --git a/util/ut_in_image.sh b/util/ut_in_image.sh index ee2010fe28..f0650a29f4 100755 --- a/util/ut_in_image.sh +++ b/util/ut_in_image.sh @@ -133,8 +133,8 @@ do now_test=`ps -ef | grep test | grep -v 'test[0-9]' | grep -v grep | awk '{print $8}'` echo "now_test case is "$now_test - for i in `find ${test_bin_dirs} -type f -executable -exec file -i '{}' \; | grep -E 'executable|sharedlib' | grep "charset=binary" | grep -v ".so"|grep test | grep -Ev $exclude_test_names | awk -F":" '{print $1'}`;do a=`cat $i.log | grep "FAILED ]" | wc -l`;if [ $a -gt 0 ];then f1=`cat $i.log | grep "FAILED ]"`;f1_file="${i}.log"; echo "fail test is $i"; check=1; fi;done - for i in `find ${test_bin_dirs} -type f -executable -exec file -i '{}' \; | grep -E 'executable|sharedlib' | grep "charset=binary" | grep -v ".so"|grep test | grep -Ev $exclude_test_names | awk -F":" '{print $1'}`;do b=`cat $i.log | grep "Failure" | wc -l`;if [ $b -gt 0 ];then f2=`cat $i.log | grep "Failure"`; f2_file="${i}.log";echo "fail test is $i"; check=1; fi;done + for i in `find ${test_bin_dirs} -type f -executable -exec file -i '{}' \; | grep -E 'executable|sharedlib' | grep "charset=binary" | grep -v ".so"|grep test | grep -Ev $exclude_test_names | awk -F":" '{print $1'}`;do a=`cat $i.log | grep -a "FAILED ]" | wc -l`;if [ $a -gt 0 ];then f1=`cat $i.log | grep -a "FAILED ]"`;f1_file="${i}.log"; echo "fail test is $i"; check=1; fi;done + for i in `find ${test_bin_dirs} -type f -executable -exec file -i '{}' \; | grep -E 'executable|sharedlib' | grep "charset=binary" | grep -v ".so"|grep test | grep -Ev $exclude_test_names | awk -F":" '{print $1'}`;do b=`cat $i.log | grep -a "Failure" | wc -l`;if [ $b -gt 0 ];then f2=`cat $i.log | grep -a "Failure"`; f2_file="${i}.log";echo "fail test is $i"; check=1; fi;done if [ $check -eq 1 ];then echo "=========================test fail,Here is the logs of failed use cases=========================" echo "=========================test fail,Here is the logs of failed use cases========================="