Skip to content

Commit 9310805

Browse files
committed
[Enhancement](lock) Optimize CatalogRecycleBin lock granularity to improve concurrency
1 parent 69ff4f3 commit 9310805

File tree

1 file changed

+120
-73
lines changed

1 file changed

+120
-73
lines changed

fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java

Lines changed: 120 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,15 @@
5252
import java.io.DataInput;
5353
import java.io.DataOutput;
5454
import java.io.IOException;
55+
import java.util.ArrayList;
5556
import java.util.Collection;
5657
import java.util.HashMap;
5758
import java.util.Iterator;
5859
import java.util.List;
5960
import java.util.Map;
6061
import java.util.Set;
62+
import java.util.concurrent.locks.ReadWriteLock;
63+
import java.util.concurrent.locks.ReentrantReadWriteLock;
6164
import java.util.stream.Collectors;
6265
import java.util.stream.Stream;
6366

@@ -68,6 +71,7 @@ public class CatalogRecycleBin extends MasterDaemon implements Writable, GsonPos
6871
// to avoid erase log ahead of drop log
6972
private static final long minEraseLatency = 10 * 60 * 1000; // 10 min
7073

74+
private final ReadWriteLock lock = new ReentrantReadWriteLock(true);
7175
private Map<Long, RecycleDatabaseInfo> idToDatabase;
7276
private Map<Long, RecycleTableInfo> idToTable;
7377
private Map<Long, RecyclePartitionInfo> idToPartition;
@@ -186,23 +190,28 @@ public synchronized boolean recycleTable(long dbId, Table table, boolean isRepla
186190
return true;
187191
}
188192

189-
public synchronized boolean recyclePartition(long dbId, long tableId, String tableName, Partition partition,
193+
public boolean recyclePartition(long dbId, long tableId, String tableName, Partition partition,
190194
Range<PartitionKey> range, PartitionItem listPartitionItem,
191195
DataProperty dataProperty, ReplicaAllocation replicaAlloc,
192196
boolean isInMemory, boolean isMutable) {
193-
if (idToPartition.containsKey(partition.getId())) {
194-
LOG.error("partition[{}] already in recycle bin.", partition.getId());
195-
return false;
196-
}
197+
lock.writeLock().lock();
198+
try {
199+
if (idToPartition.containsKey(partition.getId())) {
200+
LOG.error("partition[{}] already in recycle bin.", partition.getId());
201+
return false;
202+
}
197203

198-
// recycle partition
199-
RecyclePartitionInfo partitionInfo = new RecyclePartitionInfo(dbId, tableId, partition,
200-
range, listPartitionItem, dataProperty, replicaAlloc, isInMemory, isMutable);
201-
idToRecycleTime.put(partition.getId(), System.currentTimeMillis());
202-
idToPartition.put(partition.getId(), partitionInfo);
203-
LOG.info("recycle partition[{}-{}] of table [{}-{}]", partition.getId(), partition.getName(),
204-
tableId, tableName);
205-
return true;
204+
// recycle partition
205+
RecyclePartitionInfo partitionInfo = new RecyclePartitionInfo(dbId, tableId, partition,
206+
range, listPartitionItem, dataProperty, replicaAlloc, isInMemory, isMutable);
207+
idToRecycleTime.put(partition.getId(), System.currentTimeMillis());
208+
idToPartition.put(partition.getId(), partitionInfo);
209+
LOG.info("recycle partition[{}-{}] of table [{}-{}]", partition.getId(), partition.getName(),
210+
tableId, tableName);
211+
return true;
212+
} finally {
213+
lock.writeLock().unlock();
214+
}
206215
}
207216

208217
public synchronized Long getRecycleTimeById(long id) {
@@ -474,47 +483,66 @@ public synchronized void replayEraseTable(long tableId) {
474483
LOG.info("replay erase table[{}]", tableId);
475484
}
476485

477-
private synchronized void erasePartition(long currentTimeMs, int keepNum) {
486+
private void erasePartition(long currentTimeMs, int keepNum) {
478487
int eraseNum = 0;
479488
StopWatch watch = StopWatch.createStarted();
480-
try {
481-
// 1. erase expired partitions
482-
Iterator<Map.Entry<Long, RecyclePartitionInfo>> iterator = idToPartition.entrySet().iterator();
483-
while (iterator.hasNext()) {
484-
Map.Entry<Long, RecyclePartitionInfo> entry = iterator.next();
485-
RecyclePartitionInfo partitionInfo = entry.getValue();
486-
Partition partition = partitionInfo.getPartition();
489+
List<Partition> partitionsToErase = new ArrayList<>();
487490

488-
long partitionId = entry.getKey();
489-
if (isExpire(partitionId, currentTimeMs)) {
490-
Env.getCurrentEnv().onErasePartition(partition);
491-
// erase partition
492-
iterator.remove();
493-
idToRecycleTime.remove(partitionId);
494-
// log
495-
Env.getCurrentEnv().getEditLog().logErasePartition(partitionId);
496-
LOG.info("erase partition[{}]. reason: expired", partitionId);
497-
eraseNum++;
491+
try {
492+
// 1. First collect expired partitions under write lock
493+
lock.writeLock().lock();
494+
try {
495+
Iterator<Map.Entry<Long, RecyclePartitionInfo>> iterator = idToPartition.entrySet().iterator();
496+
while (iterator.hasNext()) {
497+
Map.Entry<Long, RecyclePartitionInfo> entry = iterator.next();
498+
RecyclePartitionInfo partitionInfo = entry.getValue();
499+
Partition partition = partitionInfo.getPartition();
500+
long partitionId = entry.getKey();
501+
502+
if (isExpire(partitionId, currentTimeMs)) {
503+
partitionsToErase.add(partition);
504+
iterator.remove();
505+
idToRecycleTime.remove(partitionId);
506+
eraseNum++;
507+
}
498508
}
499-
} // end for partitions
500-
501-
// 2. erase exceed number
502-
if (keepNum < 0) {
503-
return;
509+
} finally {
510+
lock.writeLock().unlock();
504511
}
505-
com.google.common.collect.Table<Long, Long, Set<String>> dbTblId2PartitionNames = HashBasedTable.create();
506-
for (RecyclePartitionInfo partitionInfo : idToPartition.values()) {
507-
Set<String> partitionNames = dbTblId2PartitionNames.get(partitionInfo.dbId, partitionInfo.tableId);
508-
if (partitionNames == null) {
509-
partitionNames = Sets.newHashSet();
510-
dbTblId2PartitionNames.put(partitionInfo.dbId, partitionInfo.tableId, partitionNames);
511-
}
512-
partitionNames.add(partitionInfo.getPartition().getName());
512+
513+
// 2. Then erase partitions outside of lock
514+
for (Partition partition : partitionsToErase) {
515+
Env.getCurrentEnv().onErasePartition(partition);
516+
// log
517+
Env.getCurrentEnv().getEditLog().logErasePartition(partition.getId());
518+
LOG.info("erase partition[{}]. reason: expired", partition.getId());
513519
}
514-
for (Cell<Long, Long, Set<String>> cell : dbTblId2PartitionNames.cellSet()) {
515-
for (String partitionName : cell.getValue()) {
516-
erasePartitionWithSameName(cell.getRowKey(), cell.getColumnKey(), partitionName, currentTimeMs,
517-
keepNum);
520+
521+
// 3. Handle exceed number case
522+
if (keepNum >= 0) {
523+
lock.readLock().lock();
524+
try {
525+
com.google.common.collect.Table<Long, Long, Set<String>> dbTblId2PartitionNames =
526+
HashBasedTable.create();
527+
for (RecyclePartitionInfo partitionInfo : idToPartition.values()) {
528+
Set<String> partitionNames =
529+
dbTblId2PartitionNames.get(partitionInfo.dbId, partitionInfo.tableId);
530+
if (partitionNames == null) {
531+
partitionNames = Sets.newHashSet();
532+
dbTblId2PartitionNames.put(partitionInfo.dbId, partitionInfo.tableId, partitionNames);
533+
}
534+
partitionNames.add(partitionInfo.getPartition().getName());
535+
}
536+
537+
for (Cell<Long, Long, Set<String>> cell : dbTblId2PartitionNames.cellSet()) {
538+
for (String partitionName : cell.getValue()) {
539+
erasePartitionWithSameName(cell.getRowKey(), cell.getColumnKey(),
540+
partitionName, currentTimeMs,
541+
keepNum);
542+
}
543+
}
544+
} finally {
545+
lock.readLock().unlock();
518546
}
519547
}
520548
} finally {
@@ -523,26 +551,31 @@ private synchronized void erasePartition(long currentTimeMs, int keepNum) {
523551
}
524552
}
525553

526-
private synchronized List<Long> getSameNamePartitionIdListToErase(long dbId, long tableId, String partitionName,
554+
private List<Long> getSameNamePartitionIdListToErase(long dbId, long tableId, String partitionName,
527555
int maxSameNameTrashNum) {
528-
Iterator<Map.Entry<Long, RecyclePartitionInfo>> iterator = idToPartition.entrySet().iterator();
529556
List<List<Long>> partitionRecycleTimeLists = Lists.newArrayList();
530-
while (iterator.hasNext()) {
531-
Map.Entry<Long, RecyclePartitionInfo> entry = iterator.next();
532-
RecyclePartitionInfo partitionInfo = entry.getValue();
533-
if (partitionInfo.getDbId() != dbId || partitionInfo.getTableId() != tableId) {
534-
continue;
535-
}
536557

537-
Partition partition = partitionInfo.getPartition();
538-
if (partition.getName().equals(partitionName)) {
539-
List<Long> partitionRecycleTimeInfo = Lists.newArrayList();
540-
partitionRecycleTimeInfo.add(entry.getKey());
541-
partitionRecycleTimeInfo.add(idToRecycleTime.get(entry.getKey()));
558+
lock.readLock().lock();
559+
try {
560+
for (Map.Entry<Long, RecyclePartitionInfo> entry : idToPartition.entrySet()) {
561+
RecyclePartitionInfo partitionInfo = entry.getValue();
562+
if (partitionInfo.getDbId() != dbId || partitionInfo.getTableId() != tableId) {
563+
continue;
564+
}
542565

543-
partitionRecycleTimeLists.add(partitionRecycleTimeInfo);
566+
Partition partition = partitionInfo.getPartition();
567+
if (partition.getName().equals(partitionName)) {
568+
List<Long> partitionRecycleTimeInfo = Lists.newArrayList();
569+
partitionRecycleTimeInfo.add(entry.getKey());
570+
partitionRecycleTimeInfo.add(idToRecycleTime.get(entry.getKey()));
571+
572+
partitionRecycleTimeLists.add(partitionRecycleTimeInfo);
573+
}
544574
}
575+
} finally {
576+
lock.readLock().unlock();
545577
}
578+
546579
List<Long> partitionIdToErase = Lists.newArrayList();
547580
if (partitionRecycleTimeLists.size() <= maxSameNameTrashNum) {
548581
return partitionIdToErase;
@@ -559,20 +592,34 @@ private synchronized List<Long> getSameNamePartitionIdListToErase(long dbId, lon
559592

560593
private synchronized void erasePartitionWithSameName(long dbId, long tableId, String partitionName,
561594
long currentTimeMs, int maxSameNameTrashNum) {
562-
List<Long> partitionIdToErase = getSameNamePartitionIdListToErase(dbId, tableId, partitionName,
563-
maxSameNameTrashNum);
564-
for (Long partitionId : partitionIdToErase) {
565-
RecyclePartitionInfo partitionInfo = idToPartition.get(partitionId);
566-
if (!isExpireMinLatency(partitionId, currentTimeMs)) {
567-
continue;
595+
List<Long> partitionIdToErase;
596+
List<Partition> partitionsToErase = new ArrayList<>();
597+
598+
// First get partitions to erase under write lock
599+
lock.writeLock().lock();
600+
try {
601+
partitionIdToErase = getSameNamePartitionIdListToErase(dbId, tableId, partitionName, maxSameNameTrashNum);
602+
for (Long partitionId : partitionIdToErase) {
603+
RecyclePartitionInfo partitionInfo = idToPartition.get(partitionId);
604+
if (!isExpireMinLatency(partitionId, currentTimeMs)) {
605+
continue;
606+
}
607+
Partition partition = partitionInfo.getPartition();
608+
partitionsToErase.add(partition);
609+
610+
idToPartition.remove(partitionId);
611+
idToRecycleTime.remove(partitionId);
568612
}
569-
Partition partition = partitionInfo.getPartition();
613+
} finally {
614+
lock.writeLock().unlock();
615+
}
570616

617+
// Then erase partitions outside of lock
618+
for (Partition partition : partitionsToErase) {
571619
Env.getCurrentEnv().onErasePartition(partition);
572-
idToPartition.remove(partitionId);
573-
idToRecycleTime.remove(partitionId);
574-
Env.getCurrentEnv().getEditLog().logErasePartition(partitionId);
575-
LOG.info("erase partition[{}] name: {} from table[{}] from db[{}]", partitionId, partitionName, tableId,
620+
Env.getCurrentEnv().getEditLog().logErasePartition(partition.getId());
621+
LOG.info("erase partition[{}] name: {} from table[{}] from db[{}]",
622+
partition.getId(), partitionName, tableId,
576623
dbId);
577624
}
578625
}

0 commit comments

Comments
 (0)