Skip to content

Commit 2040e74

Browse files
authored
feat: Add IPC writer scaffolding (#564)
Add `ArrowIpcEncoder`, init/reset, and tests. Extracted from #555 (review)
1 parent d6368d0 commit 2040e74

File tree

8 files changed

+256
-64
lines changed

8 files changed

+256
-64
lines changed

CMakeLists.txt

Lines changed: 27 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ if(NANOARROW_IPC)
223223
endif()
224224

225225
if(NOT NANOARROW_BUNDLE)
226-
set(NANOARROW_IPC_BUILD_SOURCES src/nanoarrow/ipc/decoder.c
227-
src/nanoarrow/ipc/reader.c)
226+
set(NANOARROW_IPC_BUILD_SOURCES
227+
src/nanoarrow/ipc/decoder.c src/nanoarrow/ipc/encoder.c
228+
src/nanoarrow/ipc/reader.c)
228229
endif()
229230

230231
add_library(nanoarrow_ipc ${NANOARROW_IPC_BUILD_SOURCES})
@@ -418,51 +419,37 @@ if(NANOARROW_BUILD_TESTS)
418419
endif()
419420

420421
enable_testing()
421-
422-
add_executable(nanoarrow_ipc_decoder_test src/nanoarrow/ipc/decoder_test.cc)
423-
add_executable(nanoarrow_ipc_reader_test src/nanoarrow/ipc/reader_test.cc)
424-
add_executable(nanoarrow_ipc_files_test src/nanoarrow/ipc/files_test.cc)
425-
add_executable(nanoarrow_ipc_hpp_test src/nanoarrow/ipc/ipc_hpp_test.cc)
422+
include(GoogleTest)
426423

427424
if(NANOARROW_CODE_COVERAGE)
428425
target_compile_options(ipc_coverage_config INTERFACE -O0 -g --coverage)
429426
target_link_options(ipc_coverage_config INTERFACE --coverage)
430427
target_link_libraries(nanoarrow_ipc PRIVATE ipc_coverage_config)
431428
endif()
432-
target_link_libraries(nanoarrow_ipc_decoder_test
433-
nanoarrow_ipc
434-
nanoarrow
435-
flatccrt
436-
${NANOARROW_ARROW_TARGET}
437-
gtest_main
438-
ipc_coverage_config)
439-
target_link_libraries(nanoarrow_ipc_reader_test
440-
nanoarrow_ipc
441-
nanoarrow
442-
flatccrt
443-
gtest_main
444-
ipc_coverage_config)
445-
target_link_libraries(nanoarrow_ipc_files_test
446-
nanoarrow_ipc
447-
nanoarrow
448-
flatccrt
449-
${NANOARROW_ARROW_TARGET}
450-
nlohmann_json
451-
ZLIB::ZLIB
452-
gtest_main
453-
ipc_coverage_config)
454-
target_link_libraries(nanoarrow_ipc_hpp_test
455-
nanoarrow_ipc
456-
nanoarrow
457-
${NANOARROW_ARROW_TARGET}
458-
gtest_main
459-
ipc_coverage_config)
460429

461-
include(GoogleTest)
462-
gtest_discover_tests(nanoarrow_ipc_decoder_test)
463-
gtest_discover_tests(nanoarrow_ipc_reader_test)
464-
gtest_discover_tests(nanoarrow_ipc_files_test)
465-
gtest_discover_tests(nanoarrow_ipc_hpp_test)
430+
foreach(name
431+
decoder
432+
encoder
433+
reader
434+
files
435+
ipc_hpp)
436+
add_executable(nanoarrow_ipc_${name}_test src/nanoarrow/ipc/${name}_test.cc)
437+
438+
target_link_libraries(nanoarrow_ipc_${name}_test
439+
nanoarrow_ipc
440+
nanoarrow
441+
${NANOARROW_ARROW_TARGET}
442+
gtest_main
443+
ipc_coverage_config)
444+
445+
if(NOT (name MATCHES "_hpp_"))
446+
target_link_libraries(nanoarrow_ipc_${name}_test flatccrt)
447+
endif()
448+
449+
gtest_discover_tests(nanoarrow_ipc_${name}_test)
450+
endforeach()
451+
452+
target_link_libraries(nanoarrow_ipc_files_test nlohmann_json ZLIB::ZLIB)
466453
endif()
467454

468455
if(NANOARROW_DEVICE)

ci/scripts/bundle.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def bundle_nanoarrow_ipc(
203203
[
204204
src_dir / "ipc" / "flatcc_generated.h",
205205
src_dir / "ipc" / "decoder.c",
206+
src_dir / "ipc" / "encoder.c",
206207
src_dir / "ipc" / "reader.c",
207208
]
208209
)

src/nanoarrow/ipc/decoder.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,6 @@ ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error) {
110110
return NANOARROW_OK;
111111
}
112112

113-
static enum ArrowIpcEndianness ArrowIpcSystemEndianness(void) {
114-
uint32_t check = 1;
115-
char first_byte;
116-
memcpy(&first_byte, &check, sizeof(char));
117-
if (first_byte) {
118-
return NANOARROW_IPC_ENDIANNESS_LITTLE;
119-
} else {
120-
return NANOARROW_IPC_ENDIANNESS_BIG;
121-
}
122-
}
123-
124113
#if NANOARROW_IPC_USE_STDATOMIC
125114
struct ArrowIpcSharedBufferPrivate {
126115
struct ArrowBuffer src;

src/nanoarrow/ipc/decoder_test.cc

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030

3131
using namespace arrow;
3232

33-
// Copied from nanoarrow_ipc.c so we can test the internal state
34-
// of the decoder
33+
// Copied from decoder.c so we can test the internal state
3534
extern "C" {
3635
struct ArrowIpcField {
3736
struct ArrowArrayView* array_view;
@@ -51,17 +50,6 @@ struct ArrowIpcDecoderPrivate {
5150
};
5251
}
5352

54-
static enum ArrowIpcEndianness ArrowIpcSystemEndianness(void) {
55-
uint32_t check = 1;
56-
char first_byte;
57-
memcpy(&first_byte, &check, sizeof(char));
58-
if (first_byte) {
59-
return NANOARROW_IPC_ENDIANNESS_LITTLE;
60-
} else {
61-
return NANOARROW_IPC_ENDIANNESS_BIG;
62-
}
63-
}
64-
6553
TEST(NanoarrowIpcCheckRuntime, CheckRuntime) {
6654
EXPECT_EQ(ArrowIpcCheckRuntime(nullptr), NANOARROW_OK);
6755
}

src/nanoarrow/ipc/encoder.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <errno.h>
19+
#include <inttypes.h>
20+
#include <stdio.h>
21+
#include <string.h>
22+
23+
#include "flatcc/flatcc_builder.h"
24+
#include "nanoarrow/nanoarrow.h"
25+
#include "nanoarrow/nanoarrow_ipc.h"
26+
27+
struct ArrowIpcEncoderPrivate {
28+
flatcc_builder_t builder;
29+
struct ArrowBuffer buffers;
30+
struct ArrowBuffer nodes;
31+
};
32+
33+
ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
34+
NANOARROW_DCHECK(encoder != NULL);
35+
memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
36+
encoder->encode_buffer = NULL;
37+
encoder->encode_buffer_state = NULL;
38+
encoder->codec = NANOARROW_IPC_COMPRESSION_TYPE_NONE;
39+
encoder->private_data = ArrowMalloc(sizeof(struct ArrowIpcEncoderPrivate));
40+
struct ArrowIpcEncoderPrivate* private =
41+
(struct ArrowIpcEncoderPrivate*)encoder->private_data;
42+
if (flatcc_builder_init(&private->builder) == -1) {
43+
ArrowFree(private);
44+
return ESPIPE;
45+
}
46+
ArrowBufferInit(&private->buffers);
47+
ArrowBufferInit(&private->nodes);
48+
return NANOARROW_OK;
49+
}
50+
51+
void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder) {
52+
NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL);
53+
struct ArrowIpcEncoderPrivate* private =
54+
(struct ArrowIpcEncoderPrivate*)encoder->private_data;
55+
flatcc_builder_clear(&private->builder);
56+
ArrowBufferReset(&private->nodes);
57+
ArrowBufferReset(&private->buffers);
58+
ArrowFree(private);
59+
memset(encoder, 0, sizeof(struct ArrowIpcEncoder));
60+
}
61+
62+
ArrowErrorCode ArrowIpcEncoderFinalizeBuffer(struct ArrowIpcEncoder* encoder,
63+
struct ArrowBuffer* out) {
64+
NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL && out != NULL);
65+
struct ArrowIpcEncoderPrivate* private =
66+
(struct ArrowIpcEncoderPrivate*)encoder->private_data;
67+
68+
int64_t size = (int64_t)flatcc_builder_get_buffer_size(&private->builder);
69+
if (size == 0) {
70+
// Finalizing an empty flatcc_builder_t triggers an assertion
71+
return NANOARROW_OK;
72+
}
73+
74+
void* data = flatcc_builder_get_direct_buffer(&private->builder, NULL);
75+
if (data == NULL) {
76+
return ENOMEM;
77+
}
78+
79+
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(out, data, size));
80+
81+
// don't deallocate yet, just wipe the builder's current Message
82+
flatcc_builder_reset(&private->builder);
83+
return NANOARROW_OK;
84+
}

src/nanoarrow/ipc/encoder_test.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gtest/gtest.h>
19+
20+
#include "flatcc/flatcc_builder.h"
21+
#include "nanoarrow/nanoarrow.hpp"
22+
#include "nanoarrow/nanoarrow_ipc.hpp"
23+
24+
// Copied from encoder.c so we can test the internal state
25+
extern "C" {
26+
struct ArrowIpcEncoderPrivate {
27+
flatcc_builder_t builder;
28+
struct ArrowBuffer buffers;
29+
struct ArrowBuffer nodes;
30+
};
31+
}
32+
33+
TEST(NanoarrowIpcTest, NanoarrowIpcEncoderConstruction) {
34+
nanoarrow::ipc::UniqueEncoder encoder;
35+
36+
EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
37+
38+
EXPECT_EQ(encoder->codec, NANOARROW_IPC_COMPRESSION_TYPE_NONE);
39+
EXPECT_EQ(encoder->body_length, 0);
40+
EXPECT_EQ(encoder->encode_buffer, nullptr);
41+
EXPECT_EQ(encoder->encode_buffer_state, nullptr);
42+
43+
auto* priv = static_cast<struct ArrowIpcEncoderPrivate*>(encoder->private_data);
44+
ASSERT_NE(priv, nullptr);
45+
for (auto* b : {&priv->buffers, &priv->nodes}) {
46+
// Buffers are empty but initialized with the default allocator
47+
EXPECT_EQ(b->size_bytes, 0);
48+
49+
auto default_allocator = ArrowBufferAllocatorDefault();
50+
EXPECT_EQ(memcmp(&b->allocator, &default_allocator, sizeof(b->allocator)), 0);
51+
}
52+
53+
// Empty buffer works
54+
nanoarrow::UniqueBuffer buffer;
55+
EXPECT_EQ(ArrowIpcEncoderFinalizeBuffer(encoder.get(), buffer.get()), NANOARROW_OK);
56+
EXPECT_EQ(buffer->size_bytes, 0);
57+
58+
// Append a string (finalizing an empty buffer is an error for flatcc_builder_t)
59+
EXPECT_NE(flatcc_builder_create_string_str(&priv->builder, "hello world"), 0);
60+
EXPECT_EQ(ArrowIpcEncoderFinalizeBuffer(encoder.get(), buffer.get()), NANOARROW_OK);
61+
EXPECT_GT(buffer->size_bytes, sizeof("hello world"));
62+
}

src/nanoarrow/nanoarrow_ipc.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@
5757
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcInputStreamMove)
5858
#define ArrowIpcArrayStreamReaderInit \
5959
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcArrayStreamReaderInit)
60+
#define ArrowIpcEncoderInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderInit)
61+
#define ArrowIpcEncoderReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderReset)
62+
#define ArrowIpcEncoderFinalizeBuffer \
63+
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderFinalizeBuffer)
6064

6165
#endif
6266

@@ -117,6 +121,18 @@ enum ArrowIpcCompressionType {
117121
/// \brief Checks the nanoarrow runtime to make sure the run/build versions match
118122
ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error);
119123

124+
/// \brief Get the endianness of the current runtime
125+
static inline enum ArrowIpcEndianness ArrowIpcSystemEndianness(void) {
126+
uint32_t check = 1;
127+
char first_byte;
128+
memcpy(&first_byte, &check, sizeof(char));
129+
if (first_byte) {
130+
return NANOARROW_IPC_ENDIANNESS_LITTLE;
131+
} else {
132+
return NANOARROW_IPC_ENDIANNESS_BIG;
133+
}
134+
}
135+
120136
/// \brief A structure representing a reference-counted buffer that may be passed to
121137
/// ArrowIpcDecoderDecodeArrayFromShared().
122138
struct ArrowIpcSharedBuffer {
@@ -379,6 +395,52 @@ ArrowErrorCode ArrowIpcArrayStreamReaderInit(
379395
struct ArrowArrayStream* out, struct ArrowIpcInputStream* input_stream,
380396
struct ArrowIpcArrayStreamReaderOptions* options);
381397

398+
/// \brief Encoder for Arrow IPC messages
399+
///
400+
/// This structure is intended to be allocated by the caller,
401+
/// initialized using ArrowIpcEncoderInit(), and released with
402+
/// ArrowIpcEncoderReset().
403+
struct ArrowIpcEncoder {
404+
/// \brief Compression to encode in the next RecordBatch message.
405+
enum ArrowIpcCompressionType codec;
406+
407+
/// \brief Callback invoked against each buffer to be encoded
408+
///
409+
/// Encoding of buffers is left as a callback to accommodate dissociated data storage.
410+
/// One implementation of this callback might copy all buffers into a contiguous body
411+
/// for use in an arrow IPC stream, another implementation might store offsets and
412+
/// lengths relative to a known arena.
413+
ArrowErrorCode (*encode_buffer)(struct ArrowBufferView buffer_view,
414+
struct ArrowIpcEncoder* encoder, int64_t* offset,
415+
int64_t* length, struct ArrowError* error);
416+
417+
/// \brief Pointer to arbitrary data used by encode_buffer()
418+
void* encode_buffer_state;
419+
420+
/// \brief Finalized body length of the most recently encoded RecordBatch message
421+
///
422+
/// (This is initially 0 and encode_buffer() is expected to update it. After all
423+
/// buffers are encoded, this will be written to the RecordBatch's .bodyLength)
424+
int64_t body_length;
425+
426+
/// \brief Private resources managed by this library
427+
void* private_data;
428+
};
429+
430+
/// \brief Initialize an encoder
431+
///
432+
/// If NANOARROW_OK is returned, the caller must call ArrowIpcEncoderReset()
433+
/// to release resources allocated by this function.
434+
ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder);
435+
436+
/// \brief Release all resources attached to an encoder
437+
void ArrowIpcEncoderReset(struct ArrowIpcEncoder* encoder);
438+
439+
/// \brief Finalize the most recently encoded message to a buffer
440+
///
441+
/// The bytes of the encoded message will be appended to the provided buffer.
442+
ArrowErrorCode ArrowIpcEncoderFinalizeBuffer(struct ArrowIpcEncoder* encoder,
443+
struct ArrowBuffer* out);
382444
/// @}
383445

384446
#ifdef __cplusplus

0 commit comments

Comments
 (0)