Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -234,15 +234,12 @@ struct BaseModeFunction {
}

template <class STATE, class OP>
static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
static void Combine(const STATE &source, STATE &target, AggregateInputData &aggr_input_data) {
if (!source.frequency_map) {
return;
}
if (!target.frequency_map) {
// Copy - don't destroy! Otherwise windowing will break.
target.frequency_map = new typename STATE::Counts(*source.frequency_map);
target.count = source.count;
return;
target.frequency_map = TYPE_OP::CreateEmpty(aggr_input_data.allocator);
}
for (auto &val : *source.frequency_map) {
auto &i = (*target.frequency_map)[val.first];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,8 @@ unique_ptr<BaseStatistics> DateTruncStatistics(vector<BaseStatistics> &child_sta
auto result = NumericStats::CreateEmpty(min_value.type());
NumericStats::SetMin(result, min_value);
NumericStats::SetMax(result, max_value);
result.CopyValidity(child_stats[0]);

result.CombineValidity(child_stats[0], child_stats[1]);
return result.ToUnique();
}

Expand Down
9 changes: 6 additions & 3 deletions src/duckdb/extension/icu/icu-makedate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/operator/subtract.hpp"
#include "duckdb/common/types/date.hpp"
#include "duckdb/common/types/time.hpp"
#include "duckdb/common/types/timestamp.hpp"
#include "duckdb/common/vector_operations/senary_executor.hpp"
#include "duckdb/common/vector_operations/septenary_executor.hpp"
#include "duckdb/function/cast/cast_function_set.hpp"
#include "duckdb/main/extension/extension_loader.hpp"
#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
#include "duckdb/main/settings.hpp"
#include "include/icu-casts.hpp"
#include "include/icu-datefunc.hpp"
#include "include/icu-datetrunc.hpp"
Expand Down Expand Up @@ -57,6 +56,10 @@ BoundCastInfo ICUMakeDate::BindCastToDate(BindCastInput &input, const LogicalTyp
if (!input.context) {
throw InternalException("Missing context for TIMESTAMPTZ to DATE cast.");
}
if (DBConfig::GetSetting<DisableTimestamptzCastsSetting>(*input.context)) {
throw BinderException("Casting from TIMESTAMP WITH TIME ZONE to DATE without an explicit time zone "
"has been disabled - use \"AT TIME ZONE ...\"");
}

auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));

Expand All @@ -80,7 +83,7 @@ struct ICUMakeTimestampTZFunc : public ICUDateFunc {
ss -= secs;
ss *= Interval::MSECS_PER_SEC;
const auto millis = int32_t(ss);
int64_t micros = std::round((ss - millis) * Interval::MICROS_PER_MSEC);
int64_t micros = LossyNumericCast<int64_t, double>(std::round((ss - millis) * Interval::MICROS_PER_MSEC));

calendar->set(UCAL_YEAR, year);
calendar->set(UCAL_MONTH, month);
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ class StringColumnReader : public ColumnReader {
const StringColumnType string_column_type;

public:
static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
static bool IsValid(const char *str_data, uint32_t str_len, bool is_varchar);
static bool IsValid(const string &str, bool is_varchar);
static void VerifyString(const char *str_data, uint32_t str_len, bool is_varchar);
void VerifyString(const char *str_data, uint32_t str_len) const;

static void ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block);
Expand Down
12 changes: 4 additions & 8 deletions src/duckdb/extension/parquet/parquet_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,18 +396,14 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
break;
case LogicalTypeId::VARCHAR: {
auto string_stats = StringStats::CreateUnknown(type);
if (parquet_stats.__isset.min_value) {
StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
if (parquet_stats.__isset.min_value && StringColumnReader::IsValid(parquet_stats.min_value, true)) {
StringStats::SetMin(string_stats, parquet_stats.min_value);
} else if (parquet_stats.__isset.min) {
StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
} else if (parquet_stats.__isset.min && StringColumnReader::IsValid(parquet_stats.min, true)) {
StringStats::SetMin(string_stats, parquet_stats.min);
}
if (parquet_stats.__isset.max_value) {
StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
if (parquet_stats.__isset.max_value && StringColumnReader::IsValid(parquet_stats.max_value, true)) {
StringStats::SetMax(string_stats, parquet_stats.max_value);
} else if (parquet_stats.__isset.max) {
StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
} else if (parquet_stats.__isset.max && StringColumnReader::IsValid(parquet_stats.max, true)) {
StringStats::SetMax(string_stats, parquet_stats.max);
}
row_group_stats = string_stats.ToUnique();
Expand Down
13 changes: 10 additions & 3 deletions src/duckdb/extension/parquet/reader/string_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,23 @@ StringColumnReader::StringColumnReader(ParquetReader &reader, const ParquetColum
}
}

void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
bool StringColumnReader::IsValid(const char *str_data, uint32_t str_len, const bool is_varchar) {
if (!is_varchar) {
return;
return true;
}
// verify if a string is actually UTF8, and if there are no null bytes in the middle of the string
// technically Parquet should guarantee this, but reality is often disappointing
UnicodeInvalidReason reason;
size_t pos;
auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos);
if (utf_type == UnicodeType::INVALID) {
return utf_type != UnicodeType::INVALID;
}

bool StringColumnReader::IsValid(const string &str, bool is_varchar) {
return IsValid(str.c_str(), str.size(), is_varchar);
}
void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
if (!IsValid(str_data, str_len, is_varchar)) {
throw InvalidInputException("Invalid string encoding found in Parquet file: value \"%s\" is not valid UTF8!",
Blob::ToString(string_t(str_data, str_len)));
}
Expand Down
19 changes: 13 additions & 6 deletions src/duckdb/src/catalog/catalog_search_path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,15 @@ void CatalogSearchPath::Set(CatalogSearchEntry new_value, CatalogSetPathType set
Set(std::move(new_paths), set_type);
}

const vector<CatalogSearchEntry> &CatalogSearchPath::Get() const {
return paths;
vector<CatalogSearchEntry> CatalogSearchPath::Get() const {
vector<CatalogSearchEntry> res;
for (auto &path : paths) {
if (path.schema.empty()) {
continue;
}
res.emplace_back(path);
}
return res;
}

string CatalogSearchPath::GetDefaultSchema(const string &catalog) const {
Expand Down Expand Up @@ -248,7 +255,7 @@ vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) con
catalogs.push_back(SYSTEM_CATALOG);
} else {
for (auto &path : paths) {
if (StringUtil::CIEquals(path.schema, schema)) {
if (StringUtil::CIEquals(path.schema, schema) || path.schema.empty()) {
catalogs.push_back(path.catalog);
}
}
Expand All @@ -259,24 +266,24 @@ vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) con
vector<string> CatalogSearchPath::GetSchemasForCatalog(const string &catalog) const {
vector<string> schemas;
for (auto &path : paths) {
if (StringUtil::CIEquals(path.catalog, catalog)) {
if (!path.schema.empty() && StringUtil::CIEquals(path.catalog, catalog)) {
schemas.push_back(path.schema);
}
}
return schemas;
}

const CatalogSearchEntry &CatalogSearchPath::GetDefault() const {
const auto &paths = Get();
D_ASSERT(paths.size() >= 2);
D_ASSERT(!paths[1].schema.empty());
return paths[1];
}

void CatalogSearchPath::SetPathsInternal(vector<CatalogSearchEntry> new_paths) {
this->set_paths = std::move(new_paths);

paths.clear();
paths.reserve(set_paths.size() + 3);
paths.reserve(set_paths.size() + 4);
paths.emplace_back(TEMP_CATALOG, DEFAULT_SCHEMA);
for (auto &path : set_paths) {
paths.push_back(path);
Expand Down
73 changes: 55 additions & 18 deletions src/duckdb/src/common/adbc/adbc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "duckdb/common/adbc/single_batch_array_stream.hpp"
#include "duckdb/function/table/arrow.hpp"
#include "duckdb/common/adbc/wrappers.hpp"
#include <algorithm>
#include <cstring>
#include <stdlib.h>
#include <string.h>

Expand Down Expand Up @@ -862,6 +864,22 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
if (has_stream && to_table) {
return IngestToTableFromBoundStream(wrapper, error);
}

if (!wrapper->statement) {
if (out) {
out->private_data = nullptr;
out->get_schema = nullptr;
out->get_next = nullptr;
out->release = nullptr;
out->get_last_error = nullptr;
}

if (rows_affected) {
*rows_affected = 0;
}
return ADBC_STATUS_OK;
}

auto stream_wrapper = static_cast<DuckDBAdbcStreamWrapper *>(malloc(sizeof(DuckDBAdbcStreamWrapper)));
if (has_stream) {
// A stream was bound to the statement, use that to bind parameters
Expand Down Expand Up @@ -987,6 +1005,12 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
return ADBC_STATUS_INVALID_ARGUMENT;
}

auto query_len = strlen(query);
if (std::all_of(query, query + query_len, duckdb::StringUtil::CharacterIsSpace)) {
SetError(error, "No statements found");
return ADBC_STATUS_INVALID_ARGUMENT;
}

auto wrapper = static_cast<DuckDBAdbcStatementWrapper *>(statement->private_data);
if (wrapper->ingestion_stream.release) {
// Release any resources currently held by the ingestion stream before we overwrite it
Expand All @@ -1006,6 +1030,13 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
duckdb_destroy_extracted(&extracted_statements);
return ADBC_STATUS_INTERNAL;
}

if (extract_statements_size == 0) {
// Query is non-empty, but there are no actual statements.
duckdb_destroy_extracted(&extracted_statements);
return ADBC_STATUS_OK;
}

// Now lets loop over the statements, and execute every one
for (idx_t i = 0; i < extract_statements_size - 1; i++) {
duckdb_prepared_statement statement_internal = nullptr;
Expand Down Expand Up @@ -1161,12 +1192,21 @@ AdbcStatusCode StatementSetOption(struct AdbcStatement *statement, const char *k
return ADBC_STATUS_INVALID_ARGUMENT;
}

std::string createFilter(const char *input) {
if (input) {
auto quoted = duckdb::KeywordHelper::WriteQuoted(input, '\'');
return quoted;
}
return "'%'";
}

AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
const char *db_schema, const char *table_name, const char **table_type,
const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
std::string catalog_filter = catalog ? catalog : "%";
std::string db_schema_filter = db_schema ? db_schema : "%";
std::string table_name_filter = table_name ? table_name : "%";
std::string catalog_filter = createFilter(catalog);
std::string db_schema_filter = createFilter(db_schema);
std::string table_name_filter = createFilter(table_name);
std::string column_name_filter = createFilter(column_name);
std::string table_type_condition = "";
if (table_type && table_type[0]) {
table_type_condition = " AND table_type IN (";
Expand All @@ -1182,13 +1222,10 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
if (i > 0) {
table_type_condition += ", ";
}
table_type_condition += "'";
table_type_condition += table_type[i];
table_type_condition += "'";
table_type_condition += createFilter(table_type[i]);
}
table_type_condition += ")";
}
std::string column_name_filter = column_name ? column_name : "%";

std::string query;
switch (depth) {
Expand Down Expand Up @@ -1233,7 +1270,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
)[] catalog_db_schemas
FROM
information_schema.schemata
WHERE catalog_name LIKE '%s'
WHERE catalog_name LIKE %s
GROUP BY catalog_name
)",
catalog_filter);
Expand All @@ -1246,7 +1283,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
catalog_name,
schema_name,
FROM information_schema.schemata
WHERE schema_name LIKE '%s'
WHERE schema_name LIKE %s
)

SELECT
Expand Down Expand Up @@ -1289,7 +1326,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
information_schema.schemata
LEFT JOIN db_schemas dbs
USING (catalog_name, schema_name)
WHERE catalog_name LIKE '%s'
WHERE catalog_name LIKE %s
GROUP BY catalog_name
)",
db_schema_filter, catalog_filter);
Expand Down Expand Up @@ -1333,7 +1370,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
)[],
}) db_schema_tables
FROM information_schema.tables
WHERE table_name LIKE '%s'%s
WHERE table_name LIKE %s%s
GROUP BY table_catalog, table_schema
),
db_schemas AS (
Expand All @@ -1344,7 +1381,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
FROM information_schema.schemata
LEFT JOIN tables
USING (catalog_name, schema_name)
WHERE schema_name LIKE '%s'
WHERE schema_name LIKE %s
)

SELECT
Expand All @@ -1357,7 +1394,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
information_schema.schemata
LEFT JOIN db_schemas dbs
USING (catalog_name, schema_name)
WHERE catalog_name LIKE '%s'
WHERE catalog_name LIKE %s
GROUP BY catalog_name
)",
table_name_filter, table_type_condition, db_schema_filter, catalog_filter);
Expand Down Expand Up @@ -1392,7 +1429,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
xdbc_is_generatedcolumn: NULL::BOOLEAN,
}) table_columns
FROM information_schema.columns
WHERE column_name LIKE '%s'
WHERE column_name LIKE %s
GROUP BY table_catalog, table_schema, table_name
),
constraints AS (
Expand Down Expand Up @@ -1421,7 +1458,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
constraint_column_names,
list_filter(
constraint_column_names,
lambda name: name LIKE '%s'
lambda name: name LIKE %s
)
)
GROUP BY database_name, schema_name, table_name
Expand All @@ -1441,7 +1478,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
USING (table_catalog, table_schema, table_name)
LEFT JOIN constraints
USING (table_catalog, table_schema, table_name)
WHERE table_name LIKE '%s'%s
WHERE table_name LIKE %s%s
GROUP BY table_catalog, table_schema
),
db_schemas AS (
Expand All @@ -1452,7 +1489,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
FROM information_schema.schemata
LEFT JOIN tables
USING (catalog_name, schema_name)
WHERE schema_name LIKE '%s'
WHERE schema_name LIKE %s
)

SELECT
Expand All @@ -1465,7 +1502,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
information_schema.schemata
LEFT JOIN db_schemas dbs
USING (catalog_name, schema_name)
WHERE catalog_name LIKE '%s'
WHERE catalog_name LIKE %s
GROUP BY catalog_name
)",
column_name_filter, column_name_filter, table_name_filter,
Expand Down
Loading
Loading