Skip to content

Commit ace8a2d

Browse files
committed
commit
1 parent 4ae3982 commit ace8a2d

32 files changed

+1007
-183
lines changed

src/backend/distributed/commands/create_distributed_table.c

Lines changed: 242 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ static CitusTableParams DecideCitusTableParams(CitusTableType tableType,
139139
distributedTableParams);
140140
static void CreateCitusTable(Oid relationId, CitusTableType tableType,
141141
DistributedTableParams *distributedTableParams);
142+
static void ConvertCitusLocalTableToTableType(Oid relationId,
143+
CitusTableType tableType,
144+
DistributedTableParams *
145+
distributedTableParams);
146+
static uint32 SingleShardTableColocationNodeId(uint32 colocationId);
147+
static uint32 SingleShardTableGetNodeId(Oid relationId);
148+
static int64 NoneDistTableGetShardId(Oid relationId);
142149
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
143150
Oid colocatedTableId, bool localTableEmpty);
144151
static void CreateSingleShardTableShard(Oid relationId, Oid colocatedTableId,
@@ -1095,23 +1102,36 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
10951102
}
10961103

10971104
/*
1098-
* EnsureTableNotDistributed errors out when relation is a citus table but
1099-
* we don't want to ask user to first undistribute their citus local tables
1100-
* when creating reference or distributed tables from them.
1101-
* For this reason, here we undistribute citus local tables beforehand.
1102-
* But since UndistributeTable does not support undistributing relations
1103-
* involved in foreign key relationships, we first drop foreign keys that
1104-
* given relation is involved, then we undistribute the relation and finally
1105-
* we re-create dropped foreign keys at the end of this function.
1105+
* EnsureTableNotDistributed errors out when relation is a Citus table.
1106+
*
1107+
* For this reason, we either undistribute the Citus Local table first
1108+
* and then follow the usual code-path to create distributed table; or
1109+
* we simply move / replicate its shard to create a single-shard table /
1110+
* reference table, and then we update the metadata accordingly.
1111+
*
1112+
* If we're about it to undistribute it (because we will create a distributed
1113+
* table soon), then we first drop foreign keys that given relation is
1114+
* involved because UndistributeTable does not support undistributing
1115+
* relations involved in foreign key relationships. At the end of this
1116+
* function, we then re-create the dropped foreign keys.
11061117
*/
11071118
List *originalForeignKeyRecreationCommands = NIL;
11081119
if (IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
11091120
{
1110-
/* store foreign key creation commands that relation is involved */
1111-
originalForeignKeyRecreationCommands =
1112-
GetFKeyCreationCommandsRelationInvolvedWithTableType(relationId,
1113-
INCLUDE_ALL_TABLE_TYPES);
1114-
relationId = DropFKeysAndUndistributeTable(relationId);
1121+
if (tableType == REFERENCE_TABLE || tableType == SINGLE_SHARD_DISTRIBUTED)
1122+
{
1123+
ConvertCitusLocalTableToTableType(relationId, tableType,
1124+
distributedTableParams);
1125+
return;
1126+
}
1127+
else
1128+
{
1129+
/* store foreign key creation commands that relation is involved */
1130+
originalForeignKeyRecreationCommands =
1131+
GetFKeyCreationCommandsRelationInvolvedWithTableType(relationId,
1132+
INCLUDE_ALL_TABLE_TYPES);
1133+
relationId = DropFKeysAndUndistributeTable(relationId);
1134+
}
11151135
}
11161136
/*
11171137
* To support foreign keys between reference tables and local tables,
@@ -1319,6 +1339,215 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
13191339
}
13201340

13211341

1342+
/*
1343+
* ConvertCitusLocalTableToTableType converts given Citus local table to
1344+
* given table type.
1345+
*
1346+
* This only supports converting Citus local tables to reference tables
1347+
* (by replicating the shard to workers) and single-shard distributed
1348+
* tables (by moving the shard to appropriate worker).
1349+
*/
1350+
static void
1351+
ConvertCitusLocalTableToTableType(Oid relationId, CitusTableType tableType,
1352+
DistributedTableParams *distributedTableParams)
1353+
{
1354+
if (!IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
1355+
{
1356+
ereport(ERROR, (errmsg("table is not a local table added to metadata")));
1357+
}
1358+
1359+
if (tableType != REFERENCE_TABLE && tableType != SINGLE_SHARD_DISTRIBUTED)
1360+
{
1361+
ereport(ERROR, (errmsg("table type is not supported for conversion")));
1362+
}
1363+
1364+
LockRelationOid(relationId, ExclusiveLock);
1365+
1366+
Var *distributionColumn = NULL;
1367+
CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
1368+
distributedTableParams);
1369+
1370+
uint32 colocationId = INVALID_COLOCATION_ID;
1371+
if (distributedTableParams &&
1372+
distributedTableParams->colocationParam.colocationParamType ==
1373+
COLOCATE_WITH_COLOCATION_ID)
1374+
{
1375+
colocationId = distributedTableParams->colocationParam.colocationId;
1376+
}
1377+
else
1378+
{
1379+
colocationId = ColocationIdForNewTable(relationId, tableType,
1380+
distributedTableParams,
1381+
distributionColumn);
1382+
}
1383+
1384+
/* check constraints etc. on table based on new distribution params */
1385+
EnsureRelationCanBeDistributed(relationId, distributionColumn,
1386+
citusTableParams.distributionMethod,
1387+
colocationId, citusTableParams.replicationModel);
1388+
1389+
/*
1390+
* Regarding the foreign key relationships that given relation is involved,
1391+
* EnsureRelationCanBeDistributed() only checks the ones where the relation is
1392+
* the referencing table.
1393+
*
1394+
* And given that the table at hand is a Citus local table, right now it may
1395+
* only be referenced by a reference table or a Citus local table.
1396+
*
1397+
* However, given that neither of those two cases are not applicable for a
1398+
* distributed table, here we throw an error assuming that the referencing
1399+
* relation is a reference table or a Citus local table.
1400+
*
1401+
* While doing so, we use the same error message used in
1402+
* ErrorIfUnsupportedForeignConstraintExists(), which is eventually called
1403+
* by EnsureRelationCanBeDistributed().
1404+
*
1405+
* Note that we don't need to check the same if we're creating a reference
1406+
* table from a Citus local table because all the foreign keys referencing
1407+
* Citus local tables are supported by reference tables.
1408+
*/
1409+
if (tableType == SINGLE_SHARD_DISTRIBUTED)
1410+
{
1411+
int fkeyFlags = (INCLUDE_REFERENCED_CONSTRAINTS | EXCLUDE_SELF_REFERENCES |
1412+
INCLUDE_ALL_TABLE_TYPES);
1413+
List *externalReferencedFkeyIds = GetForeignKeyOids(relationId, fkeyFlags);
1414+
if (list_length(externalReferencedFkeyIds) != 0)
1415+
{
1416+
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1417+
errmsg("cannot create foreign key constraint "
1418+
"since foreign keys from reference tables "
1419+
"and local tables to distributed tables "
1420+
"are not supported"),
1421+
errdetail("Reference tables and local tables "
1422+
"can only have foreign keys to reference "
1423+
"tables and local tables")));
1424+
}
1425+
}
1426+
1427+
EnsureReferenceTablesExistOnAllNodes();
1428+
1429+
LockColocationId(colocationId, ShareLock);
1430+
1431+
int64 shardId = NoneDistTableGetShardId(relationId);
1432+
WorkerNode *sourceNode = CoordinatorNodeIfAddedAsWorkerOrError();
1433+
1434+
if (tableType == SINGLE_SHARD_DISTRIBUTED)
1435+
{
1436+
uint32 targetNodeId = SingleShardTableColocationNodeId(colocationId);
1437+
if (targetNodeId != sourceNode->nodeId)
1438+
{
1439+
bool missingOk = false;
1440+
WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);
1441+
1442+
TransferCitusLocalTableShardInXact(shardId, sourceNode->workerName,
1443+
sourceNode->workerPort,
1444+
targetNode->workerName,
1445+
targetNode->workerPort,
1446+
SHARD_TRANSFER_MOVE);
1447+
}
1448+
}
1449+
else if (tableType == REFERENCE_TABLE)
1450+
{
1451+
List *nodeList = ActivePrimaryNonCoordinatorNodeList(ShareLock);
1452+
nodeList = SortList(nodeList, CompareWorkerNodes);
1453+
1454+
WorkerNode *targetNode = NULL;
1455+
foreach_ptr(targetNode, nodeList)
1456+
{
1457+
TransferCitusLocalTableShardInXact(shardId, sourceNode->workerName,
1458+
sourceNode->workerPort,
1459+
targetNode->workerName,
1460+
targetNode->workerPort,
1461+
SHARD_TRANSFER_COPY);
1462+
}
1463+
}
1464+
1465+
bool autoConverted = false;
1466+
UpdateNoneDistTableMetadataGlobally(
1467+
relationId, citusTableParams.replicationModel,
1468+
colocationId, autoConverted);
1469+
1470+
/*
1471+
* TransferCitusLocalTableShardInXact() moves / copies partition shards
1472+
* to the target node too, but we still need to update the metadata
1473+
* for them.
1474+
*/
1475+
if (PartitionedTable(relationId))
1476+
{
1477+
Oid partitionRelationId = InvalidOid;
1478+
List *partitionList = PartitionList(relationId);
1479+
foreach_oid(partitionRelationId, partitionList)
1480+
{
1481+
UpdateNoneDistTableMetadataGlobally(
1482+
partitionRelationId, citusTableParams.replicationModel,
1483+
colocationId, autoConverted);
1484+
}
1485+
}
1486+
}
1487+
1488+
1489+
/*
1490+
* SingleShardTableColocationNodeId takes a colocation id that is known to be
1491+
* used / going be used to colocate a set of single-shard tables and returns
1492+
* id of the node that should store the shards of those tables.
1493+
*/
1494+
static uint32
1495+
SingleShardTableColocationNodeId(uint32 colocationId)
1496+
{
1497+
List *tablesInColocationGroup = ColocationGroupTableList(colocationId, 1);
1498+
if (list_length(tablesInColocationGroup) == 0)
1499+
{
1500+
int workerNodeIndex =
1501+
EmptySingleShardTableColocationDecideNodeId(colocationId);
1502+
List *workerNodeList = DistributedTablePlacementNodeList(RowShareLock);
1503+
WorkerNode *workerNode = (WorkerNode *) list_nth(workerNodeList, workerNodeIndex);
1504+
1505+
return workerNode->nodeId;
1506+
}
1507+
else
1508+
{
1509+
Oid colocatedTableId = linitial_oid(tablesInColocationGroup);
1510+
return SingleShardTableGetNodeId(colocatedTableId);
1511+
}
1512+
}
1513+
1514+
1515+
/*
1516+
* SingleShardTableGetNodeId returns id of the node that stores shard of
1517+
* given single-shard table.
1518+
*/
1519+
static uint32
1520+
SingleShardTableGetNodeId(Oid relationId)
1521+
{
1522+
int64 shardId = NoneDistTableGetShardId(relationId);
1523+
1524+
List *shardPlacementList = ShardPlacementList(shardId);
1525+
if (list_length(shardPlacementList) != 1)
1526+
{
1527+
ereport(ERROR, (errmsg("table shard does not have a single shard placement")));
1528+
}
1529+
1530+
return ((ShardPlacement *) linitial(shardPlacementList))->nodeId;
1531+
}
1532+
1533+
1534+
/*
1535+
* NoneDistTableGetShardId returns shard id of given table that is known
1536+
* to be a none-distriubted table.
1537+
*/
1538+
static int64
1539+
NoneDistTableGetShardId(Oid relationId)
1540+
{
1541+
if (HasDistributionKey(relationId))
1542+
{
1543+
ereport(ERROR, (errmsg("table is not a none-distributed table")));
1544+
}
1545+
1546+
List *shardIntervalList = LoadShardIntervalList(relationId);
1547+
return ((ShardInterval *) linitial(shardIntervalList))->shardId;
1548+
}
1549+
1550+
13221551
/*
13231552
* DecideCitusTableParams decides CitusTableParams based on given CitusTableType
13241553
* and DistributedTableParams if it's a distributed table.

src/backend/distributed/metadata/metadata_sync.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ static char * RemoteSchemaIdExpressionById(Oid schemaId);
150150
static char * RemoteSchemaIdExpressionByName(char *schemaName);
151151
static char * RemoteTypeIdExpression(Oid typeId);
152152
static char * RemoteCollationIdExpression(Oid colocationId);
153+
static char * RemoteTableIdExpression(Oid relationId);
153154

154155

155156
PG_FUNCTION_INFO_V1(start_metadata_sync_to_all_nodes);
@@ -176,6 +177,7 @@ PG_FUNCTION_INFO_V1(citus_internal_add_colocation_metadata);
176177
PG_FUNCTION_INFO_V1(citus_internal_delete_colocation_metadata);
177178
PG_FUNCTION_INFO_V1(citus_internal_add_tenant_schema);
178179
PG_FUNCTION_INFO_V1(citus_internal_delete_tenant_schema);
180+
PG_FUNCTION_INFO_V1(citus_internal_update_none_dist_table_metadata);
179181

180182

181183
static bool got_SIGTERM = false;
@@ -3836,6 +3838,33 @@ citus_internal_delete_tenant_schema(PG_FUNCTION_ARGS)
38363838
}
38373839

38383840

3841+
/*
3842+
* citus_internal_update_none_dist_table_metadata is an internal UDF to
3843+
* update a row in pg_dist_partition that belongs to given none-distributed
3844+
* table.
3845+
*/
3846+
Datum
3847+
citus_internal_update_none_dist_table_metadata(PG_FUNCTION_ARGS)
3848+
{
3849+
CheckCitusVersion(ERROR);
3850+
3851+
Oid relationId = PG_GETARG_OID(0);
3852+
char replicationModel = PG_GETARG_CHAR(1);
3853+
uint32 colocationId = PG_GETARG_INT32(2);
3854+
bool autoConverted = PG_GETARG_BOOL(3);
3855+
3856+
if (!ShouldSkipMetadataChecks())
3857+
{
3858+
EnsureCoordinatorInitiatedOperation();
3859+
}
3860+
3861+
UpdateNoneDistTableMetadata(relationId, replicationModel,
3862+
colocationId, autoConverted);
3863+
3864+
PG_RETURN_VOID();
3865+
}
3866+
3867+
38393868
/*
38403869
* SyncNewColocationGroup synchronizes a new pg_dist_colocation entry to a worker.
38413870
*/
@@ -4017,6 +4046,24 @@ TenantSchemaDeleteCommand(char *schemaName)
40174046
}
40184047

40194048

4049+
/*
4050+
* UpdateNoneDistTableMetadataCommand returns a command to call
4051+
* citus_internal_update_none_dist_table_metadata().
4052+
*/
4053+
char *
4054+
UpdateNoneDistTableMetadataCommand(Oid relationId, char replicationModel,
4055+
uint32 colocationId, bool autoConverted)
4056+
{
4057+
StringInfo command = makeStringInfo();
4058+
appendStringInfo(command,
4059+
"SELECT pg_catalog.citus_internal_update_none_dist_table_metadata(%s, '%c', %u, %s)",
4060+
RemoteTableIdExpression(relationId), replicationModel, colocationId,
4061+
autoConverted ? "true" : "false");
4062+
4063+
return command->data;
4064+
}
4065+
4066+
40204067
/*
40214068
* RemoteSchemaIdExpressionById returns an expression in text form that
40224069
* can be used to obtain the OID of the schema with given schema id on a
@@ -4051,6 +4098,22 @@ RemoteSchemaIdExpressionByName(char *schemaName)
40514098
}
40524099

40534100

4101+
/*
4102+
* RemoteTableIdExpression returns an expression in text form that
4103+
* can be used to obtain the OID of the table with given name on a
4104+
* different node when included in a query string.
4105+
*/
4106+
static char *
4107+
RemoteTableIdExpression(Oid relationId)
4108+
{
4109+
StringInfo regnamespaceExpr = makeStringInfo();
4110+
appendStringInfo(regnamespaceExpr, "%s::regclass",
4111+
quote_literal_cstr(generate_qualified_relation_name(relationId)));
4112+
4113+
return regnamespaceExpr->data;
4114+
}
4115+
4116+
40544117
/*
40554118
* SetMetadataSyncNodesFromNodeList sets list of nodes that needs to be metadata
40564119
* synced among given node list into metadataSyncContext.

0 commit comments

Comments
 (0)