Skip to content

Mixed case identifier support #24551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/product-tests-basic-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,4 @@ jobs:
if: needs.changes.outputs.codechange == 'true'
env:
OVERRIDE_JDK_DIR: ${{ env.JAVA_HOME }}
run: presto-product-tests/bin/run_on_docker.sh multinode -x quarantine,big_query,storage_formats,profile_specific_tests,tpcds,cassandra,mysql_connector,postgresql_connector,mysql,kafka,avro
run: presto-product-tests/bin/run_on_docker.sh multinode -x quarantine,big_query,storage_formats,profile_specific_tests,tpcds,cassandra,mysql_connector,mysql_mixed_case,postgresql_connector,mysql,kafka,avro,mixed_case
7 changes: 6 additions & 1 deletion .github/workflows/product-tests-specific-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
if: needs.changes.outputs.codechange == 'true'
env:
OVERRIDE_JDK_DIR: ${{ env.JAVA_HOME }}
run: presto-product-tests/bin/run_on_docker.sh singlenode -g hdfs_no_impersonation,avro
run: presto-product-tests/bin/run_on_docker.sh singlenode -g hdfs_no_impersonation,avro,mixed_case
- name: Product Tests Specific 1.2
if: needs.changes.outputs.codechange == 'true'
env:
Expand Down Expand Up @@ -169,3 +169,8 @@ jobs:
env:
OVERRIDE_JDK_DIR: ${{ env.JAVA_HOME }}
run: presto-product-tests/bin/run_on_docker.sh singlenode-sqlserver -g sqlserver
- name: Product Tests Specific 2.9
if: needs.changes.outputs.codechange == 'true'
env:
OVERRIDE_JDK_DIR: ${{ env.JAVA_HOME }}
run: presto-product-tests/bin/run_on_docker.sh singlenode-mysql-mixed-case-on -g mysql_connector,mysql_mixed_case
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.FixedSplitSource;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.PrestoWarning;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
Expand Down Expand Up @@ -70,14 +70,14 @@
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.common.type.Varchars.isVarcharType;
import static com.facebook.presto.plugin.jdbc.JdbcErrorCode.JDBC_ERROR;
import static com.facebook.presto.plugin.jdbc.JdbcWarningCode.USE_OF_DEPRECATED_CONFIGURATION_PROPERTY;
import static com.facebook.presto.plugin.jdbc.StandardReadMappings.jdbcTypeToPrestoType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_FOUND;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.emptyToNull;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Iterables.getOnlyElement;
Expand Down Expand Up @@ -118,6 +118,7 @@ public class BaseJdbcClient
protected final Cache<JdbcIdentity, Map<String, String>> remoteSchemaNames;
protected final Cache<RemoteTableNameCacheKey, Map<String, String>> remoteTableNames;
protected final Set<String> listSchemasIgnoredSchemas;
protected final boolean caseSensitiveNameMatchingEnabled;

public BaseJdbcClient(JdbcConnectorId connectorId, BaseJdbcConfig config, String identifierQuote, ConnectionFactory connectionFactory)
{
Expand All @@ -132,6 +133,7 @@ public BaseJdbcClient(JdbcConnectorId connectorId, BaseJdbcConfig config, String
this.remoteSchemaNames = remoteNamesCacheBuilder.build();
this.remoteTableNames = remoteNamesCacheBuilder.build();
this.listSchemasIgnoredSchemas = config.getlistSchemasIgnoredSchemas();
this.caseSensitiveNameMatchingEnabled = config.isCaseSensitiveNameMatching();
}

@PreDestroy
Expand All @@ -152,7 +154,7 @@ public final Set<String> getSchemaNames(ConnectorSession session, JdbcIdentity i
{
try (Connection connection = connectionFactory.openConnection(identity)) {
return listSchemas(connection).stream()
.map(schemaName -> schemaName.toLowerCase(ENGLISH))
.map(schemaName -> normalizeIdentifier(session, schemaName))
.collect(toImmutableSet());
}
catch (SQLException e) {
Expand Down Expand Up @@ -182,13 +184,14 @@ protected Collection<String> listSchemas(Connection connection)
public List<SchemaTableName> getTableNames(ConnectorSession session, JdbcIdentity identity, Optional<String> schema)
{
try (Connection connection = connectionFactory.openConnection(identity)) {
Optional<String> remoteSchema = schema.map(schemaName -> toRemoteSchemaName(identity, connection, schemaName));
Optional<String> remoteSchema = schema.map(schemaName -> toRemoteSchemaName(session, identity, connection, schemaName));
try (ResultSet resultSet = getTables(connection, remoteSchema, Optional.empty())) {
ImmutableList.Builder<SchemaTableName> list = ImmutableList.builder();
while (resultSet.next()) {
String tableSchema = getTableSchemaName(resultSet);
String tableName = resultSet.getString("TABLE_NAME");
list.add(new SchemaTableName(tableSchema.toLowerCase(ENGLISH), tableName.toLowerCase(ENGLISH)));
list.add(new SchemaTableName(normalizeIdentifier(session, tableSchema),
normalizeIdentifier(session, tableName)));
}
return list.build();
}
Expand All @@ -203,8 +206,8 @@ public List<SchemaTableName> getTableNames(ConnectorSession session, JdbcIdentit
public JdbcTableHandle getTableHandle(ConnectorSession session, JdbcIdentity identity, SchemaTableName schemaTableName)
{
try (Connection connection = connectionFactory.openConnection(identity)) {
String remoteSchema = toRemoteSchemaName(identity, connection, schemaTableName.getSchemaName());
String remoteTable = toRemoteTableName(identity, connection, remoteSchema, schemaTableName.getTableName());
String remoteSchema = toRemoteSchemaName(session, identity, connection, schemaTableName.getSchemaName());
String remoteTable = toRemoteTableName(session, identity, connection, remoteSchema, schemaTableName.getTableName());
try (ResultSet resultSet = getTables(connection, Optional.of(remoteSchema), Optional.of(remoteTable))) {
List<JdbcTableHandle> tableHandles = new ArrayList<>();
while (resultSet.next()) {
Expand Down Expand Up @@ -363,8 +366,8 @@ protected JdbcOutputTableHandle createTable(ConnectorTableMetadata tableMetadata

try (Connection connection = connectionFactory.openConnection(identity)) {
boolean uppercase = connection.getMetaData().storesUpperCaseIdentifiers();
String remoteSchema = toRemoteSchemaName(identity, connection, schemaTableName.getSchemaName());
String remoteTable = toRemoteTableName(identity, connection, remoteSchema, schemaTableName.getTableName());
String remoteSchema = toRemoteSchemaName(session, identity, connection, schemaTableName.getSchemaName());
String remoteTable = toRemoteTableName(session, identity, connection, remoteSchema, schemaTableName.getTableName());
if (uppercase) {
tableName = tableName.toUpperCase(ENGLISH);
}
Expand Down Expand Up @@ -607,6 +610,12 @@ public PreparedStatement getPreparedStatement(ConnectorSession session, Connecti
return connection.prepareStatement(sql);
}

@Override
public String normalizeIdentifier(ConnectorSession session, String identifier)
{
return identifier.toLowerCase(ENGLISH);
}

protected ResultSet getTables(Connection connection, Optional<String> schemaName, Optional<String> tableName)
throws SQLException
{
Expand All @@ -625,12 +634,14 @@ protected String getTableSchemaName(ResultSet resultSet)
return resultSet.getString("TABLE_SCHEM");
}

protected String toRemoteSchemaName(JdbcIdentity identity, Connection connection, String schemaName)
protected String toRemoteSchemaName(ConnectorSession session, JdbcIdentity identity, Connection connection, String schemaName)
{
requireNonNull(schemaName, "schemaName is null");
verify(CharMatcher.forPredicate(Character::isUpperCase).matchesNoneOf(schemaName), "Expected schema name from internal metadata to be lowercase: %s", schemaName);

if (caseInsensitiveNameMatching) {
session.getWarningCollector().add(new PrestoWarning(USE_OF_DEPRECATED_CONFIGURATION_PROPERTY,
"'case-insensitive-name-matching' is deprecated. Use of this configuration value may lead to query failures. " +
"Please switch to using 'case-sensitive-name-matching' for proper case sensitivity behavior."));
try {
Map<String, String> mapping = remoteSchemaNames.getIfPresent(identity);
if (mapping != null && !mapping.containsKey(schemaName)) {
Expand Down Expand Up @@ -669,13 +680,15 @@ protected Map<String, String> listSchemasByLowerCase(Connection connection)
.collect(toImmutableMap(schemaName -> schemaName.toLowerCase(ENGLISH), schemaName -> schemaName));
}

protected String toRemoteTableName(JdbcIdentity identity, Connection connection, String remoteSchema, String tableName)
protected String toRemoteTableName(ConnectorSession session, JdbcIdentity identity, Connection connection, String remoteSchema, String tableName)
{
requireNonNull(remoteSchema, "remoteSchema is null");
requireNonNull(tableName, "tableName is null");
verify(CharMatcher.forPredicate(Character::isUpperCase).matchesNoneOf(tableName), "Expected table name from internal metadata to be lowercase: %s", tableName);

if (caseInsensitiveNameMatching) {
session.getWarningCollector().add(new PrestoWarning(USE_OF_DEPRECATED_CONFIGURATION_PROPERTY,
"'case-insensitive-name-matching' is deprecated. Use of this configuration value may lead to query failures. " +
"Please switch to using 'case-sensitive-name-matching' for proper case sensitivity behavior."));
try {
RemoteTableNameCacheKey cacheKey = new RemoteTableNameCacheKey(identity, remoteSchema);
Map<String, String> mapping = remoteTableNames.getIfPresent(cacheKey);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@
package com.facebook.presto.plugin.jdbc;

import com.facebook.airlift.configuration.Config;
import com.facebook.airlift.configuration.ConfigDescription;
import com.facebook.airlift.configuration.ConfigSecuritySensitive;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.inject.ConfigurationException;
import com.google.inject.spi.Message;
import io.airlift.units.Duration;
import io.airlift.units.MinDuration;

import javax.annotation.Nullable;
import javax.annotation.PostConstruct;
import javax.validation.constraints.NotNull;

import java.util.Set;
Expand All @@ -38,6 +43,7 @@ public class BaseJdbcConfig
private boolean caseInsensitiveNameMatching;
private Duration caseInsensitiveNameMatchingCacheTtl = new Duration(1, MINUTES);
private Set<String> listSchemasIgnoredSchemas = ImmutableSet.of("information_schema");
private boolean caseSensitiveNameMatchingEnabled;

@NotNull
public String getConnectionUrl()
Expand Down Expand Up @@ -105,12 +111,18 @@ public BaseJdbcConfig setPasswordCredentialName(String passwordCredentialName)
return this;
}

@Deprecated
public boolean isCaseInsensitiveNameMatching()
{
return caseInsensitiveNameMatching;
}

@Deprecated
@Config("case-insensitive-name-matching")
@ConfigDescription("Deprecated: This will be removed in future releases. Use 'case-sensitive-name-matching=true' instead for mysql. " +
"This configuration setting converts all schema/table names to lowercase. " +
"If your source database contains names differing only by case (e.g., 'Testdb' and 'testdb'), " +
"this setting can lead to conflicts and query failures.")
public BaseJdbcConfig setCaseInsensitiveNameMatching(boolean caseInsensitiveNameMatching)
{
this.caseInsensitiveNameMatching = caseInsensitiveNameMatching;
Expand Down Expand Up @@ -142,4 +154,27 @@ public BaseJdbcConfig setlistSchemasIgnoredSchemas(String listSchemasIgnoredSche
this.listSchemasIgnoredSchemas = ImmutableSet.copyOf(Splitter.on(",").trimResults().omitEmptyStrings().split(listSchemasIgnoredSchemas.toLowerCase(ENGLISH)));
return this;
}

public boolean isCaseSensitiveNameMatching()
{
return caseSensitiveNameMatchingEnabled;
}

@Config("case-sensitive-name-matching")
@ConfigDescription("Enable case-sensitive matching of schema, table names across the connector. " +
"When disabled, names are matched case-insensitively using lowercase normalization.")
public BaseJdbcConfig setCaseSensitiveNameMatching(boolean caseSensitiveNameMatchingEnabled)
{
this.caseSensitiveNameMatchingEnabled = caseSensitiveNameMatchingEnabled;
return this;
}

@PostConstruct
public void validateConfig()
{
if (isCaseInsensitiveNameMatching() && isCaseSensitiveNameMatching()) {
throw new ConfigurationException(ImmutableList.of(new Message("Only one of 'case-insensitive-name-matching=true' or 'case-sensitive-name-matching=true' can be set. " +
"These options are mutually exclusive.")));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,6 @@ PreparedStatement getPreparedStatement(ConnectorSession session, Connection conn
throws SQLException;

TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, List<JdbcColumnHandle> columnHandles, TupleDomain<ColumnHandle> tupleDomain);

String normalizeIdentifier(ConnectorSession session, String identifier);
}
Original file line number Diff line number Diff line change
Expand Up @@ -267,4 +267,10 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab
List<JdbcColumnHandle> columns = columnHandles.stream().map(JdbcColumnHandle.class::cast).collect(Collectors.toList());
return jdbcClient.getTableStatistics(session, handle, columns, constraint.getSummary());
}

@Override
public String normalizeIdentifier(ConnectorSession session, String identifier)
{
return jdbcClient.normalizeIdentifier(session, identifier);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.plugin.jdbc;

import com.facebook.presto.spi.WarningCode;
import com.facebook.presto.spi.WarningCodeSupplier;

public enum JdbcWarningCode
implements WarningCodeSupplier
{
USE_OF_DEPRECATED_CONFIGURATION_PROPERTY(1),
/**/;
private final WarningCode warningCode;

public static final int WARNING_CODE_MASK = 0x0300_0000;

JdbcWarningCode(int code)
{
warningCode = new WarningCode(code + WARNING_CODE_MASK, name());
}

@Override
public WarningCode toWarningCode()
{
return warningCode;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public void testDefaults()
.setPasswordCredentialName(null)
.setCaseInsensitiveNameMatching(false)
.setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, MINUTES))
.setlistSchemasIgnoredSchemas("information_schema"));
.setlistSchemasIgnoredSchemas("information_schema")
.setCaseSensitiveNameMatching(false));
}

@Test
Expand All @@ -51,6 +52,7 @@ public void testExplicitPropertyMappings()
.put("case-insensitive-name-matching", "true")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now there are two configs, one is existing case-insensitive-name-matching and second, case-sensitive-name-matching that we are trying to add. Do you think we can say case-insensitive-name-matching when set to false is same as case-sensitive-name-matching set to true?

Copy link
Member Author

@agrawalreetika agrawalreetika Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. Existing case-insensitive-name-matching has multiple limitations. So it converts all the actual schema/tables to lower case on Presto side, gives user a way to query databases that are not lower case in actual data source(like MySQL). But this will have issue in case, suppose MySQL has (Testdb & testdb) and on Presto config we have case-insensitive-name-matching=true then all the queries from that catalog in Presto will start failing with below error -

Query 20250423_055900_00003_xdtrw failed: Failed to find remote schema name: Multiple entries with same key: testdb=testdb and testdb=TestDb

So case-insensitive-name-matching=true shows users all the data sets/tables in lower case on Presto side, could work fine if actual data source strictly doesn't have same names in difference cases.

And default case-insensitive-name-matching=false will still have catalog in working condition in above case since it will convert all data sets/tables in lower case and allow user to at least query the dataset fine which are in lower case in actual data source.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also I feel now, even though the config names sounds appropriate but renaming currrent PR config to case-sensitive-name-matching could be little confusing since there is an existing similar flag there.
I think we should be eventually able to deprecate that once we have this PR changes in.

So I think it would be better to name the current PR configuration something different and then eventually we can deprecate exiting one. Let me know your thoughts around this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, thanks for clarifying. If all that can be achieved via case-insensitive-name-matching=true is a subset of the current enhancement, then definitely adding a deprecation note(including the recommended way) is a way to go. Regarding config name change, the deprecation warning along with suggestion of how to achieve the same using new flag is enough.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the differences you pointed out between the two config keys, but from a user experience perspective this is very confusing. I think we need to make the new property name a little different or deprecate+change the name of the old one because the way it stands currently the names are too similar and it will be too easy for users to configure the wrong thing

.put("case-insensitive-name-matching.cache-ttl", "1s")
.put("list-schemas-ignored-schemas", "test,test2")
.put("case-sensitive-name-matching", "true")
.build();

BaseJdbcConfig expected = new BaseJdbcConfig()
Expand All @@ -61,7 +63,8 @@ public void testExplicitPropertyMappings()
.setPasswordCredentialName("bar")
.setCaseInsensitiveNameMatching(true)
.setlistSchemasIgnoredSchemas("test,test2")
.setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, SECONDS));
.setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, SECONDS))
.setCaseSensitiveNameMatching(true);

ConfigAssertions.assertFullMapping(properties, expected);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public final class CatalogSchemaName
public CatalogSchemaName(String catalogName, String schemaName)
{
this.catalogName = requireNonNull(catalogName, "catalogName is null").toLowerCase(ENGLISH);
this.schemaName = requireNonNull(schemaName, "schemaName is null").toLowerCase(ENGLISH);
this.schemaName = requireNonNull(schemaName, "schemaName is null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to normalize catalogName and schemaName here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here I kept it original then all the places like accesscontrol & MetadataManager where getSchemaName is accessed, normalized it

}

@ThriftField(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ public static QualifiedObjectName valueOf(String catalogName, String schemaName,
public QualifiedObjectName(String catalogName, String schemaName, String objectName)
{
checkLowerCase(catalogName, "catalogName");
checkLowerCase(schemaName, "schemaName");
checkLowerCase(objectName, "objectName");
this.catalogName = catalogName;
this.schemaName = schemaName;
this.objectName = objectName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ private RuntimeMetricName()
public static final String GET_CANONICAL_INFO_TIME_NANOS = "getCanonicalInfoTimeNanos";
public static final String FRAGMENT_PLAN_TIME_NANOS = "fragmentPlanTimeNanos";
public static final String GET_LAYOUT_TIME_NANOS = "getLayoutTimeNanos";
public static final String GET_IDENTIFIER_NORMALIZATION_TIME_NANOS = "getIdentifierNormalizationTimeNanos";
public static final String REWRITE_AGGREGATION_IF_TO_FILTER_APPLIED = "rewriteAggregationIfToFilterApplied";
// Time between task creation and start.
public static final String TASK_QUEUED_TIME_NANOS = "taskQueuedTimeNanos";
Expand Down
4 changes: 4 additions & 0 deletions presto-docs/src/main/sphinx/connector/mysql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ Property Name Description
cached. Set to ``0ms`` to disable the cache. ``1m``

``list-schemas-ignored-schemas`` List of schemas to ignore when listing schemas. ``information_schema,mysql``

``case-sensitive-name-matching`` Enable case sensitive identifier support for schema and table ``false``
names for the connector. When disabled, names are matched
case-insensitively using lowercase normalization.
================================================== ==================================================================== ===========

Querying MySQL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,7 @@ private Table updateTable(String tableName)

protected Table loadTable(String tableName)
{
tableName = normalizeIdentifier(tableName, ICEBERG_CATALOG);
Catalog catalog = CatalogUtil.loadCatalog(catalogType.getCatalogImpl(), ICEBERG_CATALOG, getProperties(), new Configuration());
return catalog.loadTable(TableIdentifier.of("tpch", tableName));
}
Expand Down
Loading
Loading