Skip to content

Pipe: implement external sources strategy and MQTT extractor #15275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@
import org.apache.iotdb.confignode.consensus.request.write.pipe.task.OperateMultiplePipesPlanV2;
import org.apache.iotdb.confignode.consensus.request.write.pipe.task.SetPipeStatusPlanV2;
import org.apache.iotdb.confignode.consensus.response.pipe.task.PipeTableResp;
import org.apache.iotdb.confignode.manager.ConfigManager;
import org.apache.iotdb.confignode.manager.pipe.resource.PipeConfigNodeResourceManager;
import org.apache.iotdb.confignode.procedure.impl.pipe.runtime.PipeHandleMetaChangeProcedure;
import org.apache.iotdb.confignode.procedure.impl.pipe.util.ExternalLoadBalancer;
import org.apache.iotdb.confignode.rpc.thrift.TAlterPipeReq;
import org.apache.iotdb.confignode.rpc.thrift.TCreatePipeReq;
import org.apache.iotdb.confignode.service.ConfigNode;
Expand Down Expand Up @@ -78,6 +80,9 @@
import java.util.stream.StreamSupport;

import static org.apache.iotdb.commons.pipe.agent.plugin.builtin.BuiltinPipePlugin.IOTDB_THRIFT_CONNECTOR;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_EXTRACTOR_PARALLELISM_DEFAULT_VALUE;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_EXTRACTOR_PARALLELISM_KEY;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_SOURCE_PARALLELISM_KEY;
import static org.apache.iotdb.commons.pipe.config.constant.PipeRPCMessageConstant.PIPE_ALREADY_EXIST_MSG;
import static org.apache.iotdb.commons.pipe.config.constant.PipeRPCMessageConstant.PIPE_NOT_EXIST_MSG;

Expand Down Expand Up @@ -594,6 +599,10 @@ private TSStatus handleLeaderChangeInternal(final PipeHandleLeaderChangePlan pla
return; // pipe consensus pipe task will not change
}

if (pipeMeta.getStaticMeta().isSourceExternal()) {
return;
}

final Map<Integer, PipeTaskMeta> consensusGroupIdToTaskMetaMap =
pipeMeta.getRuntimeMeta().getConsensusGroupId2TaskMetaMap();

Expand Down Expand Up @@ -623,7 +632,39 @@ private TSStatus handleLeaderChangeInternal(final PipeHandleLeaderChangePlan pla
// the data region group has already been removed"
}
}));

final ConfigManager configManager = ConfigNode.getInstance().getConfigManager();
pipeMetaKeeper
.getPipeMetaList()
.forEach(
pipeMeta -> {
if (pipeMeta.getStaticMeta().isSourceExternal()) {
final ExternalLoadBalancer loadBalancer = new ExternalLoadBalancer(configManager);
final int parallelism =
pipeMeta
.getStaticMeta()
.getExtractorParameters()
.getIntOrDefault(
Arrays.asList(
EXTERNAL_EXTRACTOR_PARALLELISM_KEY,
EXTERNAL_SOURCE_PARALLELISM_KEY),
EXTERNAL_EXTRACTOR_PARALLELISM_DEFAULT_VALUE);
loadBalancer
.balance(
parallelism,
configManager.getLoadManager().getRegionLeaderMap(),
pipeMeta.getStaticMeta())
.forEach(
(taskIndex, newLeader) -> {
if (newLeader != -1) {
pipeMeta
.getRuntimeMeta()
.getConsensusGroupId2TaskMetaMap()
.get(taskIndex)
.setLeaderNodeId(newLeader);
}
});
}
});
return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,10 @@ public static PipeMeta copyAndFilterOutNonWorkingDataRegionPipeTasks(PipeMeta or
.entrySet()
.removeIf(
consensusGroupId2TaskMeta -> {
if (originalPipeMeta.getStaticMeta().isSourceExternal()) {
// should keep the external source tasks
return false;
}
final String database;
try {
database =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.iotdb.confignode.procedure.env.ConfigNodeProcedureEnv;
import org.apache.iotdb.confignode.procedure.impl.pipe.AbstractOperatePipeProcedureV2;
import org.apache.iotdb.confignode.procedure.impl.pipe.PipeTaskOperation;
import org.apache.iotdb.confignode.procedure.impl.pipe.util.ExternalLoadBalancer;
import org.apache.iotdb.confignode.procedure.store.ProcedureType;
import org.apache.iotdb.confignode.rpc.thrift.TCreatePipeReq;
import org.apache.iotdb.consensus.exception.ConsensusException;
Expand All @@ -66,6 +67,9 @@
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicReference;

import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_EXTRACTOR_PARALLELISM_DEFAULT_VALUE;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_EXTRACTOR_PARALLELISM_KEY;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTERNAL_SOURCE_PARALLELISM_KEY;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTRACTOR_CONSENSUS_GROUP_ID_KEY;
import static org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant.EXTRACTOR_CONSENSUS_SENDER_DATANODE_ID_KEY;

Expand Down Expand Up @@ -269,6 +273,26 @@ public void executeFromCalculateInfoForTask(final ConfigNodeProcedureEnv env) {
new PipeTaskMeta(
new RecoverProgressIndex(senderDataNodeId, new SimpleProgressIndex(0, 0)),
senderDataNodeId));
} else if (pipeStaticMeta.isSourceExternal()) {
// external source
final ExternalLoadBalancer loadBalancer = new ExternalLoadBalancer(env.getConfigManager());
final int parallelism =
pipeStaticMeta
.getExtractorParameters()
.getIntOrDefault(
Arrays.asList(
EXTERNAL_EXTRACTOR_PARALLELISM_KEY, EXTERNAL_SOURCE_PARALLELISM_KEY),
EXTERNAL_EXTRACTOR_PARALLELISM_DEFAULT_VALUE);
loadBalancer
.balance(
parallelism,
env.getConfigManager().getLoadManager().getRegionLeaderMap(),
pipeStaticMeta)
.forEach(
(taskIndex, leaderNodeId) -> {
consensusGroupIdToTaskMetaMap.put(
taskIndex, new PipeTaskMeta(MinimumProgressIndex.INSTANCE, leaderNodeId));
});
} else {
// data regions & schema regions
env.getConfigManager()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iotdb.confignode.procedure.impl.pipe.util;

import org.apache.iotdb.common.rpc.thrift.TConsensusGroupId;
import org.apache.iotdb.common.rpc.thrift.TConsensusGroupType;
import org.apache.iotdb.commons.cluster.NodeStatus;
import org.apache.iotdb.commons.pipe.agent.task.meta.PipeStaticMeta;
import org.apache.iotdb.commons.pipe.config.constant.PipeExtractorConstant;
import org.apache.iotdb.confignode.manager.ConfigManager;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class ExternalLoadBalancer {
private final ConfigManager configManager;

public ExternalLoadBalancer(final ConfigManager configManager) {
this.configManager = configManager;
}

/**
* Distribute the number of source parallel tasks evenly over the sorted region group ids.
*
* @param parallelCount the number of parallel tasks from external source
* @return a mapping from task index to leader node id
*/
public Map<Integer, Integer> balance(
int parallelCount,
final Map<TConsensusGroupId, Integer> regionLeaderMap,
PipeStaticMeta pipestaticMeta) {
final Map<Integer, Integer> parallelAssignment = new HashMap<>();

// Check if the external extractor is single instance per node
if (pipestaticMeta
.getExtractorParameters()
.getBooleanOrDefault(
Arrays.asList(
PipeExtractorConstant.EXTERNAL_EXTRACTOR_SINGLE_INSTANCE_PER_NODE_KEY,
PipeExtractorConstant.EXTERNAL_SOURCE_SINGLE_INSTANCE_PER_NODE_KEY),
PipeExtractorConstant.EXTERNAL_EXTRACTOR_SINGLE_INSTANCE_PER_NODE_DEFAULT_VALUE)) {
final List<Integer> runningDataNodes =
configManager.getLoadManager().filterDataNodeThroughStatus(NodeStatus.Running).stream()
.sorted()
.collect(Collectors.toList());
if (runningDataNodes.isEmpty()) {
throw new RuntimeException("No available datanode to assign tasks");
}
int numNodes = runningDataNodes.size();
for (int i = 1; i <= Math.min(numNodes, parallelCount); i++) {
int datanodeId = runningDataNodes.get(i - 1);
parallelAssignment.put(-i, datanodeId);
}
return parallelAssignment;
}

// Get sorted regionGroupIds
final List<Integer> sortedRegionGroupIds =
regionLeaderMap.entrySet().stream()
.filter(
t -> t.getKey().getType() == TConsensusGroupType.DataRegion && t.getValue() != -1)
.map(t -> t.getKey().getId())
.sorted()
.collect(Collectors.toList());

if (sortedRegionGroupIds.isEmpty()) {
final List<Integer> runningDataNodes =
configManager.getLoadManager().filterDataNodeThroughStatus(NodeStatus.Running).stream()
.sorted()
.collect(Collectors.toList());
if (runningDataNodes.isEmpty()) {
throw new RuntimeException("No available datanode to assign tasks");
}
int numNodes = runningDataNodes.size();
for (int i = 1; i <= parallelCount; i++) {
int nodeIndex = (i - 1) % numNodes;
int datanodeId = runningDataNodes.get(nodeIndex);
parallelAssignment.put(-i, datanodeId);
}
} else {
int numGroups = sortedRegionGroupIds.size();
for (int i = 1; i <= parallelCount; i++) {
int groupIndex = (i - 1) % numGroups;
int regionGroupId = sortedRegionGroupIds.get(groupIndex);
int leaderNodeId =
regionLeaderMap.get(
new TConsensusGroupId(TConsensusGroupType.DataRegion, regionGroupId));
parallelAssignment.put(-i, leaderNodeId);
}
}
return parallelAssignment;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.iotdb.commons.pipe.agent.plugin.constructor.PipeExtractorConstructor;
import org.apache.iotdb.commons.pipe.agent.plugin.meta.DataNodePipePluginMetaKeeper;
import org.apache.iotdb.db.pipe.extractor.dataregion.IoTDBDataRegionExtractor;
import org.apache.iotdb.db.pipe.extractor.mqtt.MQTTExtractor;

class PipeDataRegionExtractorConstructor extends PipeExtractorConstructor {

Expand All @@ -42,5 +43,8 @@ protected void initConstructors() {
BuiltinPipePlugin.DO_NOTHING_SOURCE.getPipePluginName(), DoNothingExtractor::new);
pluginConstructors.put(
BuiltinPipePlugin.IOTDB_SOURCE.getPipePluginName(), IoTDBDataRegionExtractor::new);

pluginConstructors.put(
BuiltinPipePlugin.MQTT_EXTRACTOR.getPipePluginName(), MQTTExtractor::new);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ protected boolean isShutdown() {
@Override
protected Map<Integer, PipeTask> buildPipeTasks(final PipeMeta pipeMetaFromConfigNode)
throws IllegalPathException {
if (pipeMetaFromConfigNode.getStaticMeta().isSourceExternal()) {
return new PipeDataNodeBuilder(pipeMetaFromConfigNode).buildExternalPipeTasks();
}
return new PipeDataNodeBuilder(pipeMetaFromConfigNode).build();
}

Expand Down Expand Up @@ -192,7 +195,8 @@ protected void createPipeTask(
.isEmpty();

// Advance the extractor parameters parsing logic to avoid creating un-relevant pipeTasks
if (needConstructDataRegionTask || needConstructSchemaRegionTask) {
// consensusGroupId < 0 means an external source task, should create it
if (needConstructDataRegionTask || needConstructSchemaRegionTask || consensusGroupId < 0) {
final PipeDataNodeTask pipeTask =
new PipeDataNodeTaskBuilder(pipeStaticMeta, consensusGroupId, pipeTaskMeta).build();
pipeTask.create();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,23 @@ public Map<Integer, PipeTask> build() throws IllegalPathException {
}
return consensusGroupIdToPipeTaskMap;
}

public Map<Integer, PipeTask> buildExternalPipeTasks() {
final Map<Integer, PipeTask> consensusGroupIdToPipeTaskMap = new HashMap<>();
final PipeStaticMeta pipeStaticMeta = pipeMeta.getStaticMeta();
final PipeRuntimeMeta pipeRuntimeMeta = pipeMeta.getRuntimeMeta();

for (Map.Entry<Integer, PipeTaskMeta> consensusGroupIdToPipeTaskMeta :
pipeRuntimeMeta.getConsensusGroupId2TaskMetaMap().entrySet()) {
final int consensusGroupId = consensusGroupIdToPipeTaskMeta.getKey();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better rename to "taskId"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, fixed it

final PipeTaskMeta pipeTaskMeta = consensusGroupIdToPipeTaskMeta.getValue();
if (pipeTaskMeta.getLeaderNodeId() == CONFIG.getDataNodeId()) {
consensusGroupIdToPipeTaskMap.put(
consensusGroupId,
new PipeDataNodeTaskBuilder(pipeStaticMeta, consensusGroupId, pipeTaskMeta).build());
}
}

return consensusGroupIdToPipeTaskMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public PipeTaskExtractorStage(
PipeTaskMeta pipeTaskMeta) {
pipeExtractor =
StorageEngine.getInstance().getAllDataRegionIds().contains(new DataRegionId(regionId))
// regionId that is less than 0 means an external pipe source, use
// dataRegionExtractor
|| regionId < 0
? PipeDataNodeAgent.plugin().dataRegion().reflectExtractor(extractorParameters)
: PipeDataNodeAgent.plugin().schemaRegion().reflectExtractor(extractorParameters);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ public PipeTaskProcessorStage(
pipeName, creationTime, regionId, pipeTaskMeta));
final PipeProcessor pipeProcessor =
StorageEngine.getInstance().getAllDataRegionIds().contains(new DataRegionId(regionId))
// regionId that is less than 0 means an external pipe source, use
// dataRegionProcessor
|| regionId < 0
? PipeDataNodeAgent.plugin()
.dataRegion()
.getConfiguredProcessor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ public synchronized String register(

final boolean isDataRegionConnector =
StorageEngine.getInstance()
.getAllDataRegionIds()
.contains(new DataRegionId(environment.getRegionId()));
.getAllDataRegionIds()
.contains(new DataRegionId(environment.getRegionId()))
// regionId that is less than 0 means an external pipe source, use dataRegionConnector
|| environment.getRegionId() < 0;

final int connectorNum;
boolean realTimeFirst = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ public PipeProcessorSubtask(
this.subtaskCreationTime = System.currentTimeMillis();

// Only register dataRegions
if (StorageEngine.getInstance().getAllDataRegionIds().contains(new DataRegionId(regionId))) {
if (StorageEngine.getInstance().getAllDataRegionIds().contains(new DataRegionId(regionId))
// regionId that is less than 0 means an external pipe source, should register it
|| regionId < 0) {
PipeProcessorMetrics.getInstance().register(this);
}
}
Expand Down
Loading
Loading