Skip to content

Commit f0ce845

Browse files
committed
apacheGH-38798: [Integration] Enable C Data Interface integration testing on Rust
1 parent c1b12ca commit f0ce845

File tree

7 files changed

+128
-22
lines changed

7 files changed

+128
-22
lines changed

ci/scripts/integration_arrow.sh

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ fi
4343
# Get more detailed context on crashes
4444
export PYTHONFAULTHANDLER=1
4545

46+
4647
# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
4748
time archery integration \
4849
--run-c-data \

ci/scripts/rust_build.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set -e
2121

2222
arrow_dir=${1}
2323
source_dir=${1}/rust
24+
build_dir=${2}/rust
2425

2526
# This file is used to build the rust binaries needed for the archery
2627
# integration tests. Testing of the rust implementation in normal CI is handled
@@ -54,7 +55,7 @@ rustup show
5455
pushd ${source_dir}
5556

5657
# build only the integration testing binaries
57-
cargo build -p arrow-integration-testing
58+
cargo build -p arrow-integration-testing --target-dir ${build_dir}
5859

5960
# Save disk space by removing large temporary build products
6061
rm -rf target/debug/deps

dev/archery/archery/integration/cdata.py

+9
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,19 @@
1818
import cffi
1919
from contextlib import contextmanager
2020
import functools
21+
import os
22+
import sys
2123

2224
from .tester import CDataExporter, CDataImporter
2325

2426

27+
if sys.platform == "darwin":
28+
dll_suffix = ".dylib"
29+
elif os.name == "nt":
30+
dll_suffix = ".dll"
31+
else:
32+
dll_suffix = ".so"
33+
2534
_c_data_decls = """
2635
struct ArrowSchema {
2736
// Array type description

dev/archery/archery/integration/tester_cpp.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import contextlib
1919
import functools
2020
import os
21-
import sys
2221
import subprocess
2322

2423
from . import cdata
@@ -42,15 +41,8 @@
4241
"localhost",
4342
]
4443

45-
if sys.platform == "darwin":
46-
_dll_suffix = ".dylib"
47-
elif os.name == "nt":
48-
_dll_suffix = ".dll"
49-
else:
50-
_dll_suffix = ".so"
51-
5244
_DLL_PATH = _EXE_PATH
53-
_ARROW_DLL = os.path.join(_DLL_PATH, "libarrow" + _dll_suffix)
45+
_ARROW_DLL = os.path.join(_DLL_PATH, "libarrow" + cdata.dll_suffix)
5446

5547

5648
class CppTester(Tester):

dev/archery/archery/integration/tester_go.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import contextlib
1919
import functools
2020
import os
21-
import sys
2221
import subprocess
2322

2423
from . import cdata
@@ -43,17 +42,10 @@
4342
"localhost",
4443
]
4544

46-
if sys.platform == "darwin":
47-
_dll_suffix = ".dylib"
48-
elif os.name == "nt":
49-
_dll_suffix = ".dll"
50-
else:
51-
_dll_suffix = ".so"
52-
5345
_DLL_PATH = os.path.join(
5446
ARROW_ROOT_DEFAULT,
5547
"go/arrow/internal/cdata_integration")
56-
_INTEGRATION_DLL = os.path.join(_DLL_PATH, "arrow_go_integration" + _dll_suffix)
48+
_INTEGRATION_DLL = os.path.join(_DLL_PATH, "arrow_go_integration" + cdata.dll_suffix)
5749

5850

5951
class GoTester(Tester):

dev/archery/archery/integration/tester_rust.py

+112-2
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@
1616
# under the License.
1717

1818
import contextlib
19+
import functools
1920
import os
2021
import subprocess
2122

22-
from .tester import Tester
23+
from . import cdata
24+
from .tester import Tester, CDataExporter, CDataImporter
2325
from .util import run_cmd, log
2426
from ..utils.source import ARROW_ROOT_DEFAULT
2527

2628

27-
_EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, "rust/target/debug")
29+
_EXE_PATH = os.environ.get(
30+
"ARROW_RUST_EXE_PATH", os.path.join(ARROW_ROOT_DEFAULT, "rust/target/debug")
31+
)
2832
_INTEGRATION_EXE = os.path.join(_EXE_PATH, "arrow-json-integration-test")
2933
_STREAM_TO_FILE = os.path.join(_EXE_PATH, "arrow-stream-to-file")
3034
_FILE_TO_STREAM = os.path.join(_EXE_PATH, "arrow-file-to-stream")
@@ -37,12 +41,19 @@
3741
"localhost",
3842
]
3943

44+
_INTEGRATION_DLL = os.path.join(_EXE_PATH,
45+
"libarrow_integration_testing" + cdata.dll_suffix)
46+
4047

4148
class RustTester(Tester):
4249
PRODUCER = True
4350
CONSUMER = True
4451
FLIGHT_SERVER = True
4552
FLIGHT_CLIENT = True
53+
C_DATA_SCHEMA_EXPORTER = True
54+
C_DATA_ARRAY_EXPORTER = True
55+
C_DATA_SCHEMA_IMPORTER = True
56+
C_DATA_ARRAY_IMPORTER = True
4657

4758
name = 'Rust'
4859

@@ -117,3 +128,102 @@ def flight_request(self, port, json_path=None, scenario_name=None):
117128
if self.debug:
118129
log(' '.join(cmd))
119130
run_cmd(cmd)
131+
132+
def make_c_data_exporter(self):
133+
return RustCDataExporter(self.debug, self.args)
134+
135+
def make_c_data_importer(self):
136+
return RustCDataImporter(self.debug, self.args)
137+
138+
139+
_rust_c_data_entrypoints = """
140+
const char* arrow_rs_cdata_integration_export_schema_from_json(
141+
const char* json_path, uintptr_t out);
142+
const char* arrow_rs_cdata_integration_import_schema_and_compare_to_json(
143+
const char* json_path, uintptr_t c_schema);
144+
145+
const char* arrow_rs_cdata_integration_export_batch_from_json(
146+
const char* json_path, int num_batch, uintptr_t out);
147+
const char* arrow_rs_cdata_integration_import_batch_and_compare_to_json(
148+
const char* json_path, int num_batch, uintptr_t c_array);
149+
150+
void arrow_rs_free_error(const char*);
151+
"""
152+
153+
154+
@functools.lru_cache
155+
def _load_ffi(ffi, lib_path=_INTEGRATION_DLL):
156+
ffi.cdef(_rust_c_data_entrypoints)
157+
dll = ffi.dlopen(lib_path)
158+
return dll
159+
160+
161+
class _CDataBase:
162+
163+
def __init__(self, debug, args):
164+
self.debug = debug
165+
self.args = args
166+
self.ffi = cdata.ffi()
167+
self.dll = _load_ffi(self.ffi)
168+
169+
def _pointer_to_int(self, c_ptr):
170+
return self.ffi.cast('uintptr_t', c_ptr)
171+
172+
def _check_rust_error(self, rs_error):
173+
"""
174+
Check a `const char*` error return from an integration entrypoint.
175+
176+
A null means success, a non-empty string is an error message.
177+
The string is dynamically allocated on the Rust side.
178+
"""
179+
assert self.ffi.typeof(rs_error) is self.ffi.typeof("const char*")
180+
if rs_error != self.ffi.NULL:
181+
try:
182+
error = self.ffi.string(rs_error).decode(
183+
'utf8', errors='replace')
184+
raise RuntimeError(
185+
f"Rust C Data Integration call failed: {error}")
186+
finally:
187+
self.dll.arrow_rs_free_error(rs_error)
188+
189+
190+
class RustCDataExporter(CDataExporter, _CDataBase):
191+
192+
def export_schema_from_json(self, json_path, c_schema_ptr):
193+
rs_error = self.dll.arrow_rs_cdata_integration_export_schema_from_json(
194+
str(json_path).encode(), self._pointer_to_int(c_schema_ptr))
195+
self._check_rust_error(rs_error)
196+
197+
def export_batch_from_json(self, json_path, num_batch, c_array_ptr):
198+
rs_error = self.dll.arrow_rs_cdata_integration_export_batch_from_json(
199+
str(json_path).encode(), num_batch,
200+
self._pointer_to_int(c_array_ptr))
201+
self._check_rust_error(rs_error)
202+
203+
@property
204+
def supports_releasing_memory(self):
205+
return True
206+
207+
def record_allocation_state(self):
208+
# FIXME is it possible to measure the amount of Rust-allocated memory?
209+
return 0
210+
211+
212+
class RustCDataImporter(CDataImporter, _CDataBase):
213+
214+
def import_schema_and_compare_to_json(self, json_path, c_schema_ptr):
215+
rs_error = \
216+
self.dll.arrow_rs_cdata_integration_import_schema_and_compare_to_json(
217+
str(json_path).encode(), self._pointer_to_int(c_schema_ptr))
218+
self._check_rust_error(rs_error)
219+
220+
def import_batch_and_compare_to_json(self, json_path, num_batch,
221+
c_array_ptr):
222+
rs_error = \
223+
self.dll.arrow_rs_cdata_integration_import_batch_and_compare_to_json(
224+
str(json_path).encode(), num_batch, self._pointer_to_int(c_array_ptr))
225+
self._check_rust_error(rs_error)
226+
227+
@property
228+
def supports_releasing_memory(self):
229+
return True

docker-compose.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1716,8 +1716,9 @@ services:
17161716
environment:
17171717
<<: [*common, *ccache]
17181718
ARCHERY_INTEGRATION_WITH_RUST: 0
1719-
# Tell Archery where the arrow C++ binaries are located
1719+
# Tell Archery where Arrow binaries are located
17201720
ARROW_CPP_EXE_PATH: /build/cpp/debug
1721+
ARROW_RUST_EXE_PATH: /build/rust/debug
17211722
command:
17221723
["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&
17231724
/arrow/ci/scripts/integration_arrow.sh /arrow /build"]

0 commit comments

Comments
 (0)