Skip to content

Commit a49c40f

Browse files
committed
apacheGH-38304: [C++][Parquet] Fix Valgrind memory leak in arrow-dataset-file-parquet-encryption-test
If OpenSSL initializes itself from a non-main thread, it can fail deallocating all memory at shutdown. This is really a benign leak, but we don't want any spurious CI errors.
1 parent 39298fe commit a49c40f

7 files changed

+93
-2
lines changed

cpp/src/arrow/dataset/file_parquet_encryption_test.cc

+7
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "parquet/arrow/reader.h"
3535
#include "parquet/encryption/crypto_factory.h"
3636
#include "parquet/encryption/encryption.h"
37+
#include "parquet/encryption/encryption_internal.h"
3738
#include "parquet/encryption/kms_client.h"
3839
#include "parquet/encryption/test_in_memory_kms.h"
3940

@@ -58,6 +59,12 @@ class DatasetEncryptionTest : public ::testing::Test {
5859
// partitioning scheme. The function also checks if the written files exist in the file
5960
// system.
6061
static void SetUpTestSuite() {
62+
#ifdef ARROW_VALGRIND
63+
// Not necessary otherwise, but prevents a Valgrind leak by making sure
64+
// OpenSSL initialization is done from the main thread.
65+
::parquet::encryption::EnsureBackendInitialized();
66+
#endif
67+
6168
// Creates a mock file system using the current time point.
6269
EXPECT_OK_AND_ASSIGN(file_system_, fs::internal::MockFileSystem::Make(
6370
std::chrono::system_clock::now(), {}));

cpp/src/parquet/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ if(ARROW_HAVE_RUNTIME_AVX2)
234234
endif()
235235

236236
if(PARQUET_REQUIRE_ENCRYPTION)
237-
set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc)
237+
set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
238+
encryption/openssl_internal.cc)
238239
# Encryption key management
239240
set(PARQUET_SRCS
240241
${PARQUET_SRCS}

cpp/src/parquet/encryption/encryption_internal.cc

+12-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
#include "parquet/encryption/encryption_internal.h"
19+
1920
#include <openssl/aes.h>
2021
#include <openssl/evp.h>
2122
#include <openssl/rand.h>
@@ -27,6 +28,7 @@
2728
#include <string>
2829
#include <vector>
2930

31+
#include "parquet/encryption/openssl_internal.h"
3032
#include "parquet/exception.h"
3133

3234
using parquet::ParquetException;
@@ -92,6 +94,8 @@ class AesEncryptor::AesEncryptorImpl {
9294

9395
AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len,
9496
bool metadata, bool write_length) {
97+
openssl::EnsureInitialized();
98+
9599
ctx_ = nullptr;
96100

97101
length_buffer_length_ = write_length ? kBufferSizeLength : 0;
@@ -358,6 +362,8 @@ AesDecryptor::~AesDecryptor() {}
358362

359363
AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int key_len,
360364
bool metadata, bool contains_length) {
365+
openssl::EnsureInitialized();
366+
361367
ctx_ = nullptr;
362368
length_buffer_length_ = contains_length ? kBufferSizeLength : 0;
363369
ciphertext_size_delta_ = length_buffer_length_ + kNonceLength;
@@ -646,6 +652,11 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {
646652
std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2);
647653
}
648654

649-
void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); }
655+
void RandBytes(unsigned char* buf, int num) {
656+
openssl::EnsureInitialized();
657+
RAND_bytes(buf, num);
658+
}
659+
660+
void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
650661

651662
} // namespace parquet::encryption

cpp/src/parquet/encryption/encryption_internal.h

+8
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,12 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD);
130130
// Wraps OpenSSL RAND_bytes function
131131
void RandBytes(unsigned char* buf, int num);
132132

133+
// Ensure OpenSSL is initialized.
134+
//
135+
// This is only necessary in specific situations since OpenSSL otherwise
136+
// initializes itself automatically. For example, under Valgrind, a memory
137+
// leak will be reported if OpenSSL is initialized for the first time from
138+
// a worker thread; calling this function from the main thread prevents this.
139+
void EnsureBackendInitialized();
140+
133141
} // namespace parquet::encryption

cpp/src/parquet/encryption/encryption_internal_nossl.cc

+2
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,6 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {
114114

115115
void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); }
116116

117+
void EnsureBackendInitialized() {}
118+
117119
} // namespace parquet::encryption
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "parquet/encryption/openssl_internal.h"
19+
20+
#include <openssl/crypto.h>
21+
22+
#include "parquet/exception.h"
23+
24+
namespace parquet::encryption::openssl {
25+
26+
void EnsureInitialized() {
27+
// Initialize ciphers and random engines
28+
if (!OPENSSL_init_crypto(OPENSSL_INIT_ENGINE_ALL_BUILTIN | OPENSSL_INIT_ADD_ALL_CIPHERS,
29+
NULL)) {
30+
throw ParquetException("OpenSSL initialization failed");
31+
}
32+
}
33+
34+
} // namespace parquet::encryption::openssl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <memory>
21+
#include <string>
22+
#include <vector>
23+
24+
namespace parquet::encryption::openssl {
25+
26+
void EnsureInitialized();
27+
28+
} // namespace parquet::encryption::openssl

0 commit comments

Comments
 (0)