16
16
# under the License.
17
17
18
18
import contextlib
19
+ import functools
19
20
import os
20
21
import subprocess
21
22
22
- from .tester import Tester
23
+ from . import cdata
24
+ from .tester import Tester , CDataExporter , CDataImporter
23
25
from .util import run_cmd , log
24
26
from ..utils .source import ARROW_ROOT_DEFAULT
25
27
@@ -42,18 +44,25 @@ def load_version_from_pom():
42
44
"ARROW_JAVA_INTEGRATION_JAR" ,
43
45
os .path .join (
44
46
ARROW_ROOT_DEFAULT ,
45
- "java/tools/target/arrow-tools-{}-"
46
- "jar-with-dependencies.jar" .format (_arrow_version ),
47
- ),
47
+ "java/tools/target" ,
48
+ f"arrow-tools-{ _arrow_version } -jar-with-dependencies.jar"
49
+ )
50
+ )
51
+ _ARROW_C_DATA_JAR = os .environ .get (
52
+ "ARROW_C_DATA_JAVA_INTEGRATION_JAR" ,
53
+ os .path .join (
54
+ ARROW_ROOT_DEFAULT ,
55
+ "java/c/target" ,
56
+ f"arrow-c-data-{ _arrow_version } .jar"
57
+ )
48
58
)
49
59
_ARROW_FLIGHT_JAR = os .environ .get (
50
60
"ARROW_FLIGHT_JAVA_INTEGRATION_JAR" ,
51
61
os .path .join (
52
62
ARROW_ROOT_DEFAULT ,
53
- "java/flight/flight-integration-tests/target/"
54
- "flight-integration-tests-{}-jar-with-dependencies.jar" .format (
55
- _arrow_version ),
56
- ),
63
+ "java/flight/flight-integration-tests/target" ,
64
+ f"flight-integration-tests-{ _arrow_version } -jar-with-dependencies.jar"
65
+ )
57
66
)
58
67
_ARROW_FLIGHT_SERVER = (
59
68
"org.apache.arrow.flight.integration.tests.IntegrationTestServer"
@@ -63,11 +72,157 @@ def load_version_from_pom():
63
72
)
64
73
65
74
75
+ @functools .lru_cache
76
+ def setup_jpype ():
77
+ import jpype
78
+ jar_path = f"{ _ARROW_TOOLS_JAR } :{ _ARROW_C_DATA_JAR } "
79
+ # XXX Didn't manage to tone down the logging level here (DEBUG -> INFO)
80
+ jpype .startJVM (jpype .getDefaultJVMPath (),
81
+ "-Djava.class.path=" + jar_path , * _JAVA_OPTS )
82
+
83
+
84
+ class _CDataBase :
85
+
86
+ def __init__ (self , debug , args ):
87
+ import jpype
88
+ self .debug = debug
89
+ self .args = args
90
+ self .ffi = cdata .ffi ()
91
+ setup_jpype ()
92
+ # JPype pointers to java.io, org.apache.arrow...
93
+ self .java_io = jpype .JPackage ("java" ).io
94
+ self .java_arrow = jpype .JPackage ("org" ).apache .arrow
95
+ self .java_allocator = self ._make_java_allocator ()
96
+
97
+ def _pointer_to_int (self , c_ptr ):
98
+ return int (self .ffi .cast ('uintptr_t' , c_ptr ))
99
+
100
+ def _wrap_c_schema_ptr (self , c_schema_ptr ):
101
+ return self .java_arrow .c .ArrowSchema .wrap (
102
+ self ._pointer_to_int (c_schema_ptr ))
103
+
104
+ def _wrap_c_array_ptr (self , c_array_ptr ):
105
+ return self .java_arrow .c .ArrowArray .wrap (
106
+ self ._pointer_to_int (c_array_ptr ))
107
+
108
+ def _make_java_allocator (self ):
109
+ # Return a new allocator
110
+ return self .java_arrow .memory .RootAllocator ()
111
+
112
+ def _assert_schemas_equal (self , expected , actual ):
113
+ # XXX This is fragile for dictionaries, as Schema.equals compares
114
+ # dictionary ids.
115
+ self .java_arrow .vector .util .Validator .compareSchemas (
116
+ expected , actual )
117
+
118
+ def _assert_batches_equal (self , expected , actual ):
119
+ self .java_arrow .vector .util .Validator .compareVectorSchemaRoot (
120
+ expected , actual )
121
+
122
+ def _assert_dict_providers_equal (self , expected , actual ):
123
+ self .java_arrow .vector .util .Validator .compareDictionaryProviders (
124
+ expected , actual )
125
+
126
+
127
+ class JavaCDataExporter (CDataExporter , _CDataBase ):
128
+
129
+ def export_schema_from_json (self , json_path , c_schema_ptr ):
130
+ json_file = self .java_io .File (json_path )
131
+ with self .java_arrow .vector .ipc .JsonFileReader (
132
+ json_file , self .java_allocator ) as json_reader :
133
+ schema = json_reader .start ()
134
+ dict_provider = json_reader
135
+ self .java_arrow .c .Data .exportSchema (
136
+ self .java_allocator , schema , dict_provider ,
137
+ self ._wrap_c_schema_ptr (c_schema_ptr )
138
+ )
139
+
140
+ def export_batch_from_json (self , json_path , num_batch , c_array_ptr ):
141
+ json_file = self .java_io .File (json_path )
142
+ with self .java_arrow .vector .ipc .JsonFileReader (
143
+ json_file , self .java_allocator ) as json_reader :
144
+ json_reader .start ()
145
+ if num_batch > 0 :
146
+ actually_skipped = json_reader .skip (num_batch )
147
+ assert actually_skipped == num_batch
148
+ with json_reader .read () as batch :
149
+ dict_provider = json_reader
150
+ self .java_arrow .c .Data .exportVectorSchemaRoot (
151
+ self .java_allocator , batch , dict_provider ,
152
+ self ._wrap_c_array_ptr (c_array_ptr ))
153
+
154
+ @property
155
+ def supports_releasing_memory (self ):
156
+ return True
157
+
158
+ def record_allocation_state (self ):
159
+ return self .java_allocator .getAllocatedMemory ()
160
+
161
+ def compare_allocation_state (self , recorded , gc_until ):
162
+ def pred ():
163
+ return self .java_allocator .getAllocatedMemory () == recorded
164
+
165
+ return gc_until (pred )
166
+
167
+
168
+ class JavaCDataImporter (CDataImporter , _CDataBase ):
169
+
170
+ def import_schema_and_compare_to_json (self , json_path , c_schema_ptr ):
171
+ json_file = self .java_io .File (json_path )
172
+ with self .java_arrow .vector .ipc .JsonFileReader (
173
+ json_file , self .java_allocator ) as json_reader :
174
+ json_schema = json_reader .start ()
175
+ with self .java_arrow .c .CDataDictionaryProvider () as dict_provider :
176
+ imported_schema = self .java_arrow .c .Data .importSchema (
177
+ self .java_allocator ,
178
+ self ._wrap_c_schema_ptr (c_schema_ptr ),
179
+ dict_provider )
180
+ self ._assert_schemas_equal (json_schema , imported_schema )
181
+
182
+ def import_batch_and_compare_to_json (self , json_path , num_batch ,
183
+ c_array_ptr ):
184
+ json_file = self .java_io .File (json_path )
185
+ with self .java_arrow .vector .ipc .JsonFileReader (
186
+ json_file , self .java_allocator ) as json_reader :
187
+ schema = json_reader .start ()
188
+ if num_batch > 0 :
189
+ actually_skipped = json_reader .skip (num_batch )
190
+ assert actually_skipped == num_batch
191
+ with (json_reader .read () as batch ,
192
+ self .java_arrow .vector .VectorSchemaRoot .create (
193
+ schema , self .java_allocator ) as imported_batch ):
194
+ # We need to pass a dict provider primed with dictionary ids
195
+ # matching those in the schema, hence an empty
196
+ # CDataDictionaryProvider would not work here!
197
+ dict_provider = (self .java_arrow .vector .dictionary
198
+ .DictionaryProvider .MapDictionaryProvider ())
199
+ dict_provider .copyStructureFrom (json_reader , self .java_allocator )
200
+ with dict_provider :
201
+ self .java_arrow .c .Data .importIntoVectorSchemaRoot (
202
+ self .java_allocator ,
203
+ self ._wrap_c_array_ptr (c_array_ptr ),
204
+ imported_batch , dict_provider )
205
+ self ._assert_batches_equal (batch , imported_batch )
206
+ self ._assert_dict_providers_equal (json_reader , dict_provider )
207
+
208
+ @property
209
+ def supports_releasing_memory (self ):
210
+ return True
211
+
212
+ def gc_until (self , predicate ):
213
+ # No need to call the Java GC thanks to AutoCloseable (?)
214
+ return predicate ()
215
+
216
+
66
217
class JavaTester (Tester ):
67
218
PRODUCER = True
68
219
CONSUMER = True
69
220
FLIGHT_SERVER = True
70
221
FLIGHT_CLIENT = True
222
+ C_DATA_SCHEMA_EXPORTER = True
223
+ C_DATA_SCHEMA_IMPORTER = True
224
+ C_DATA_ARRAY_EXPORTER = True
225
+ C_DATA_ARRAY_IMPORTER = True
71
226
72
227
name = 'Java'
73
228
@@ -186,3 +341,9 @@ def flight_server(self, scenario_name=None):
186
341
finally :
187
342
server .kill ()
188
343
server .wait (5 )
344
+
345
+ def make_c_data_exporter (self ):
346
+ return JavaCDataExporter (self .debug , self .args )
347
+
348
+ def make_c_data_importer (self ):
349
+ return JavaCDataImporter (self .debug , self .args )
0 commit comments