Skip to content

Commit c7e068a

Browse files
authored
Merge pull request #175 from duckdb/bump143-2
bump to duckdb 1.4.3
2 parents e252e49 + aa46fbc commit c7e068a

File tree

91 files changed

+868
-369
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+868
-369
lines changed

src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,11 +355,11 @@ AggregateFunction GetApproxQuantileListAggregateFunction(const LogicalType &type
355355
return GetTypedApproxQuantileListAggregateFunction<int16_t, int16_t>(type);
356356
case LogicalTypeId::INTEGER:
357357
case LogicalTypeId::DATE:
358-
case LogicalTypeId::TIME:
359358
return GetTypedApproxQuantileListAggregateFunction<int32_t, int32_t>(type);
360359
case LogicalTypeId::BIGINT:
361360
case LogicalTypeId::TIMESTAMP:
362361
case LogicalTypeId::TIMESTAMP_TZ:
362+
case LogicalTypeId::TIME:
363363
return GetTypedApproxQuantileListAggregateFunction<int64_t, int64_t>(type);
364364
case LogicalTypeId::TIME_TZ:
365365
// Not binary comparable

src/duckdb/extension/icu/icu_extension.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,16 @@ static string NormalizeTimeZone(const string &tz_str) {
230230
}
231231

232232
idx_t pos = 3;
233-
const auto sign = tz_str[pos++];
234-
if (sign != '+' && sign != '-') {
233+
const auto utc = tz_str[pos++];
234+
// Invert the sign (UTC and Etc use opposite sign conventions)
235+
// https://en.wikipedia.org/wiki/Tz_database#Area
236+
auto sign = utc;
237+
if (utc == '+') {
238+
sign = '-';
239+
;
240+
} else if (utc == '-') {
241+
sign = '+';
242+
} else {
235243
break;
236244
}
237245

@@ -424,12 +432,13 @@ static void LoadInternal(ExtensionLoader &loader) {
424432
auto locales = icu::Collator::getAvailableLocales(count);
425433
for (int32_t i = 0; i < count; i++) {
426434
string collation;
427-
if (string(locales[i].getCountry()).empty()) {
435+
const auto &locale = locales[i]; // NOLINT
436+
if (string(locale.getCountry()).empty()) {
428437
// language only
429-
collation = locales[i].getLanguage();
438+
collation = locale.getLanguage();
430439
} else {
431440
// language + country
432-
collation = locales[i].getLanguage() + string("_") + locales[i].getCountry();
441+
collation = locale.getLanguage() + string("_") + locale.getCountry();
433442
}
434443
collation = StringUtil::Lower(collation);
435444

src/duckdb/extension/parquet/column_writer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -534,10 +534,10 @@ ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &write
534534
template <>
535535
struct NumericLimits<float_na_equal> {
536536
static constexpr float Minimum() {
537-
return std::numeric_limits<float>::lowest();
537+
return NumericLimits<float>::Minimum();
538538
};
539539
static constexpr float Maximum() {
540-
return std::numeric_limits<float>::max();
540+
return NumericLimits<float>::Maximum();
541541
};
542542
static constexpr bool IsSigned() {
543543
return std::is_signed<float>::value;
@@ -550,10 +550,10 @@ struct NumericLimits<float_na_equal> {
550550
template <>
551551
struct NumericLimits<double_na_equal> {
552552
static constexpr double Minimum() {
553-
return std::numeric_limits<double>::lowest();
553+
return NumericLimits<double>::Minimum();
554554
};
555555
static constexpr double Maximum() {
556-
return std::numeric_limits<double>::max();
556+
return NumericLimits<double>::Maximum();
557557
};
558558
static constexpr bool IsSigned() {
559559
return std::is_signed<double>::value;

src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
126126
public:
127127
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
128128
auto result = make_uniq<StandardColumnWriterState<SRC, TGT, OP>>(writer, row_group, row_group.columns.size());
129-
result->encoding = duckdb_parquet::Encoding::RLE_DICTIONARY;
129+
result->encoding = writer.GetParquetVersion() == ParquetVersion::V1 ? duckdb_parquet::Encoding::PLAIN_DICTIONARY
130+
: duckdb_parquet::Encoding::RLE_DICTIONARY;
130131
RegisterToRowGroup(row_group);
131132
return std::move(result);
132133
}
@@ -150,6 +151,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
150151
}
151152
page_state.dbp_encoder.FinishWrite(temp_writer);
152153
break;
154+
case duckdb_parquet::Encoding::PLAIN_DICTIONARY:
155+
// PLAIN_DICTIONARY can be treated the same as RLE_DICTIONARY
153156
case duckdb_parquet::Encoding::RLE_DICTIONARY:
154157
D_ASSERT(page_state.dict_bit_width != 0);
155158
if (!page_state.dict_written_value) {
@@ -265,7 +268,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
265268

266269
bool HasDictionary(PrimitiveColumnWriterState &state_p) override {
267270
auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
268-
return state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY;
271+
return state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY ||
272+
state.encoding == duckdb_parquet::Encoding::PLAIN_DICTIONARY;
269273
}
270274

271275
idx_t DictionarySize(PrimitiveColumnWriterState &state_p) override {
@@ -285,7 +289,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
285289

286290
void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
287291
auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
288-
D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
292+
D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY ||
293+
state.encoding == duckdb_parquet::Encoding::PLAIN_DICTIONARY);
289294

290295
if (writer.EnableBloomFilters()) {
291296
state.bloom_filter =
@@ -310,7 +315,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
310315
idx_t GetRowSize(const Vector &vector, const idx_t index,
311316
const PrimitiveColumnWriterState &state_p) const override {
312317
auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
313-
if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY) {
318+
if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY ||
319+
state.encoding == duckdb_parquet::Encoding::PLAIN_DICTIONARY) {
314320
return (state.key_bit_width + 7) / 8;
315321
} else {
316322
return OP::template GetRowSize<SRC, TGT>(vector, index);
@@ -328,6 +334,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
328334
const auto *data_ptr = FlatVector::GetData<SRC>(input_column);
329335

330336
switch (page_state.encoding) {
337+
case duckdb_parquet::Encoding::PLAIN_DICTIONARY:
338+
// PLAIN_DICTIONARY can be treated the same as RLE_DICTIONARY
331339
case duckdb_parquet::Encoding::RLE_DICTIONARY: {
332340
idx_t r = chunk_start;
333341
if (!page_state.dict_written_value) {

src/duckdb/src/common/encryption_key_manager.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,25 @@ string EncryptionKeyManager::GenerateRandomKeyID() {
7272
}
7373

7474
void EncryptionKeyManager::AddKey(const string &key_name, data_ptr_t key) {
75+
lock_guard<mutex> guard(lock);
7576
derived_keys.emplace(key_name, EncryptionKey(key));
7677
// Zero-out the encryption key
7778
duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(key, DERIVED_KEY_LENGTH);
7879
}
7980

8081
bool EncryptionKeyManager::HasKey(const string &key_name) const {
82+
lock_guard<mutex> guard(lock);
8183
return derived_keys.find(key_name) != derived_keys.end();
8284
}
8385

8486
const_data_ptr_t EncryptionKeyManager::GetKey(const string &key_name) const {
8587
D_ASSERT(HasKey(key_name));
88+
lock_guard<mutex> guard(lock);
8689
return derived_keys.at(key_name).GetPtr();
8790
}
8891

8992
void EncryptionKeyManager::DeleteKey(const string &key_name) {
93+
lock_guard<mutex> guard(lock);
9094
derived_keys.erase(key_name);
9195
}
9296

src/duckdb/src/common/local_file_system.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,29 @@ bool LocalFileSystem::OnDiskFile(FileHandle &handle) {
12831283
return true;
12841284
}
12851285

1286+
string LocalFileSystem::GetVersionTag(FileHandle &handle) {
1287+
// TODO: Fix using FileSystem::Stats for v1.5, which should also fix it for Windows
1288+
#ifdef _WIN32
1289+
return "";
1290+
#else
1291+
int fd = handle.Cast<UnixFileHandle>().fd;
1292+
struct stat s;
1293+
if (fstat(fd, &s) == -1) {
1294+
throw IOException("Failed to get file size for file \"%s\": %s", {{"errno", std::to_string(errno)}},
1295+
handle.path, strerror(errno));
1296+
}
1297+
1298+
// dev/ino should be enough, but to guard against in-place writes we also add file size and modification time
1299+
uint64_t version_tag[4];
1300+
Store(NumericCast<uint64_t>(s.st_dev), data_ptr_cast(&version_tag[0]));
1301+
Store(NumericCast<uint64_t>(s.st_ino), data_ptr_cast(&version_tag[1]));
1302+
Store(NumericCast<uint64_t>(s.st_size), data_ptr_cast(&version_tag[2]));
1303+
Store(Timestamp::FromEpochSeconds(s.st_mtime).value, data_ptr_cast(&version_tag[3]));
1304+
1305+
return string(char_ptr_cast(version_tag), sizeof(uint64_t) * 4);
1306+
#endif
1307+
}
1308+
12861309
void LocalFileSystem::Seek(FileHandle &handle, idx_t location) {
12871310
if (!CanSeek()) {
12881311
throw IOException("Cannot seek in files of this type");

src/duckdb/src/common/types/column/column_data_collection.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,7 @@ void ColumnDataCollection::InitializeScan(ColumnDataParallelScanState &state, ve
10361036

10371037
bool ColumnDataCollection::Scan(ColumnDataParallelScanState &state, ColumnDataLocalScanState &lstate,
10381038
DataChunk &result) const {
1039+
D_ASSERT(result.GetTypes() == types);
10391040
result.Reset();
10401041

10411042
idx_t chunk_index;
@@ -1129,6 +1130,10 @@ void ColumnDataCollection::ScanAtIndex(ColumnDataParallelScanState &state, Colum
11291130
}
11301131

11311132
bool ColumnDataCollection::Scan(ColumnDataScanState &state, DataChunk &result) const {
1133+
for (idx_t i = 0; i < state.column_ids.size(); i++) {
1134+
D_ASSERT(result.GetTypes()[i] == types[state.column_ids[i]]);
1135+
}
1136+
11321137
result.Reset();
11331138

11341139
idx_t chunk_index;
@@ -1213,6 +1218,7 @@ idx_t ColumnDataCollection::ChunkCount() const {
12131218
}
12141219

12151220
void ColumnDataCollection::FetchChunk(idx_t chunk_idx, DataChunk &result) const {
1221+
D_ASSERT(result.GetTypes() == types);
12161222
D_ASSERT(chunk_idx < ChunkCount());
12171223
for (auto &segment : segments) {
12181224
if (chunk_idx >= segment->ChunkCount()) {

src/duckdb/src/common/types/conflict_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ optional_idx ConflictManager::GetFirstInvalidIndex(const idx_t count, const bool
8787
for (idx_t i = 0; i < count; i++) {
8888
if (negate && !validity.RowIsValid(i)) {
8989
return i;
90-
} else if (validity.RowIsValid(i)) {
90+
} else if (!negate && validity.RowIsValid(i)) {
9191
return i;
9292
}
9393
}

src/duckdb/src/execution/index/art/base_node.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ void Node4::DeleteChild(ART &art, Node &node, Node &parent, const uint8_t byte,
9595

9696
auto prev_node4_status = node.GetGateStatus();
9797
Node::FreeNode(art, node);
98-
Prefix::Concat(art, parent, node, child, remaining_byte, prev_node4_status);
98+
// Propagate both the prev_node_4 status and the general gate status (if the gate was earlier on),
99+
// since the concatenation logic depends on both.
100+
Prefix::Concat(art, parent, node, child, remaining_byte, prev_node4_status, status);
99101
}
100102

101103
void Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) {

src/duckdb/src/execution/index/art/prefix.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ void Prefix::New(ART &art, reference<Node> &ref, const ARTKey &key, const idx_t
6565
}
6666
}
6767

68-
void Prefix::Concat(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
69-
const GateStatus node4_status) {
68+
void Prefix::Concat(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte, const GateStatus node4_status,
69+
const GateStatus status) {
7070
// We have four situations from which we enter here:
7171
// 1: PREFIX (parent) - Node4 (prev_node4) - PREFIX (child) - INLINED_LEAF, or
7272
// 2: PREFIX (parent) - Node4 (prev_node4) - INLINED_LEAF (child), or
@@ -90,10 +90,7 @@ void Prefix::Concat(ART &art, Node &parent, Node &node4, const Node child, uint8
9090
ConcatChildIsGate(art, parent, node4, child, byte);
9191
return;
9292
}
93-
94-
auto inside_gate = parent.GetGateStatus() == GateStatus::GATE_SET;
95-
ConcatInternal(art, parent, node4, child, byte, inside_gate);
96-
return;
93+
ConcatInternal(art, parent, node4, child, byte, status);
9794
}
9895

9996
void Prefix::Reduce(ART &art, Node &node, const idx_t pos) {
@@ -286,9 +283,9 @@ Prefix Prefix::GetTail(ART &art, const Node &node) {
286283
}
287284

288285
void Prefix::ConcatInternal(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
289-
const bool inside_gate) {
286+
const GateStatus status) {
290287
if (child.GetType() == NType::LEAF_INLINED) {
291-
if (inside_gate) {
288+
if (status == GateStatus::GATE_SET) {
292289
if (parent.GetType() == NType::PREFIX) {
293290
// The parent only contained the Node4, so we can now inline 'all the way up',
294291
// and the gate is no longer nested.

0 commit comments

Comments
 (0)