Skip to content

[FLINK-37507] Fix MySQL CDC accidentally captures common-prefix database when scan.binlog.newly-added-table is enabled #3957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -497,31 +497,57 @@ && doubleCompare(distributionFactorLower, 1.0d) <= 0,
distributionFactorLower));
}

private static final String DOT_PLACEHOLDER = "_$dot_placeholder$_";

/**
* Currently, The supported regular syntax is not exactly the same in {@link Selectors} and
* {@link Tables.TableFilter}.
*
* <p>The main distinction are :
*
* <p>1) {@link Selectors} use `,` to split table names and {@link Tables.TableFilter} use use
* <p>1) {@link Selectors} use {@code ,} to split table names and {@link Tables.TableFilter} use
* `|` to split table names.
*
* <p>2) If there is a need to use a dot (.) in a regular expression to match any character, it
* is necessary to escape the dot with a backslash, refer to {@link
* <p>2) If there is a need to use a dot ({@code .}) in a regular expression to match any
* character, it is necessary to escape the dot with a backslash, refer to {@link
* MySqlDataSourceOptions#TABLES}.
*
* <p>3) The unescaped {@code .} is used as the separator of database and table name. When
* converting to Debezium style, it is expected to be escaped to match the dot ({@code .})
* literally instead of the meta-character.
*/
private String validateTableAndReturnDebeziumStyle(String tables) {
// MySQL table names are not allowed to have `,` character.
if (tables.contains(",")) {
throw new IllegalArgumentException(
"the `,` in "
+ tables
+ " is not supported when "
+ SCAN_BINLOG_NEWLY_ADDED_TABLE_ENABLED
+ " was enabled.");
}

return tables.replace("\\.", ".");
LOG.info("Rewriting CDC style table capture list: {}", tables);

// In CDC-style table matching, table names could be separated by `,` character.
// Convert it to `|` as it's standard RegEx syntax.
tables = tables.replace(",", "|");
LOG.info("Expression after replacing comma with vert separator: {}", tables);

// Essentially, we're just trying to swap escaped `\\.` and unescaped `.`.
// In our table matching syntax, `\\.` means RegEx token matcher and `.` means database &
// table name separator.
// On the contrary, while we're matching TableId string, `\\.` means matching the "dot"
// literal and `.` is the meta-character.

// Step 1: escape the dot with a backslash, but keep it as a placeholder (like `$`).
// For example, `db\.*.tbl\.*` => `db$*.tbl$*`
String unescapedTables = tables.replace("\\.", DOT_PLACEHOLDER);
LOG.info("Expression after unescaping dots as RegEx meta-character: {}", unescapedTables);

// Step 2: replace all remaining dots (`.`) to quoted version (`\.`), as a separator between
// database and table names.
// For example, `db$*.tbl$*` => `db$*\.tbl$*`
String unescapedTablesWithDbTblSeparator = unescapedTables.replace(".", "\\.");
LOG.info("Re-escaping dots as TableId delimiter: {}", unescapedTablesWithDbTblSeparator);

// Step 3: restore placeholder to normal RegEx matcher (`.`)
// For example, `db$*\.tbl$*` => `db.*\.tbl.*`
String debeziumStyleTableCaptureList =
unescapedTablesWithDbTblSeparator.replace(DOT_PLACEHOLDER, ".");
LOG.info("Final Debezium-style table capture list: {}", debeziumStyleTableCaptureList);

return debeziumStyleTableCaptureList;
}

/** Replaces the default timezone placeholder with session timezone, if applicable. */
Expand Down
Loading
Loading