16
16
# under the License.
17
17
18
18
import contextlib
19
+ import functools
19
20
import os
20
21
import subprocess
21
22
22
- from .tester import Tester
23
+ from . import cdata
24
+ from .tester import Tester , CDataExporter , CDataImporter
23
25
from .util import run_cmd , log
24
26
from ..utils .source import ARROW_ROOT_DEFAULT
25
27
@@ -42,18 +44,25 @@ def load_version_from_pom():
42
44
"ARROW_JAVA_INTEGRATION_JAR" ,
43
45
os .path .join (
44
46
ARROW_ROOT_DEFAULT ,
45
- "java/tools/target/arrow-tools-{}-"
46
- "jar-with-dependencies.jar" .format (_arrow_version ),
47
- ),
47
+ "java/tools/target" ,
48
+ f"arrow-tools-{ _arrow_version } -jar-with-dependencies.jar"
49
+ )
50
+ )
51
+ _ARROW_C_DATA_JAR = os .environ .get (
52
+ "ARROW_C_DATA_JAVA_INTEGRATION_JAR" ,
53
+ os .path .join (
54
+ ARROW_ROOT_DEFAULT ,
55
+ "java/c/target" ,
56
+ f"arrow-c-data-{ _arrow_version } .jar"
57
+ )
48
58
)
49
59
_ARROW_FLIGHT_JAR = os .environ .get (
50
60
"ARROW_FLIGHT_JAVA_INTEGRATION_JAR" ,
51
61
os .path .join (
52
62
ARROW_ROOT_DEFAULT ,
53
- "java/flight/flight-integration-tests/target/"
54
- "flight-integration-tests-{}-jar-with-dependencies.jar" .format (
55
- _arrow_version ),
56
- ),
63
+ "java/flight/flight-integration-tests/target" ,
64
+ f"flight-integration-tests-{ _arrow_version } -jar-with-dependencies.jar"
65
+ )
57
66
)
58
67
_ARROW_FLIGHT_SERVER = (
59
68
"org.apache.arrow.flight.integration.tests.IntegrationTestServer"
@@ -63,11 +72,151 @@ def load_version_from_pom():
63
72
)
64
73
65
74
75
+ @functools .lru_cache
76
+ def setup_jpype ():
77
+ import jpype
78
+ jar_path = f"{ _ARROW_TOOLS_JAR } :{ _ARROW_C_DATA_JAR } "
79
+ # XXX Didn't manage to tone down the logging level here (DEBUG -> INFO)
80
+ jpype .startJVM (jpype .getDefaultJVMPath (),
81
+ "-Djava.class.path=" + jar_path )
82
+
83
+
84
+ class _CDataBase :
85
+
86
+ def __init__ (self , debug , args ):
87
+ import jpype
88
+ self .debug = debug
89
+ self .args = args
90
+ self .ffi = cdata .ffi ()
91
+ setup_jpype ()
92
+ # JPype pointers to java.io, org.apache.arrow...
93
+ self .java_io = jpype .JPackage ("java" ).io
94
+ self .java_arrow = jpype .JPackage ("org" ).apache .arrow
95
+ self .java_allocator = self ._make_java_allocator ()
96
+
97
+ def _pointer_to_int (self , c_ptr ):
98
+ return int (self .ffi .cast ('uintptr_t' , c_ptr ))
99
+
100
+ def _wrap_c_schema_ptr (self , c_schema_ptr ):
101
+ return self .java_arrow .c .ArrowSchema .wrap (
102
+ self ._pointer_to_int (c_schema_ptr ))
103
+
104
+ def _wrap_c_array_ptr (self , c_array_ptr ):
105
+ return self .java_arrow .c .ArrowArray .wrap (
106
+ self ._pointer_to_int (c_array_ptr ))
107
+
108
+ def _make_java_allocator (self ):
109
+ # Return a new allocator
110
+ return self .java_arrow .memory .RootAllocator ()
111
+
112
+ def _assert_schemas_equal (self , expected , actual ):
113
+ # XXX This is fragile for dictionaries, as Schema.equals compares
114
+ # dictionary ids!
115
+ # Should perhaps instead add a logical comparison function in
116
+ # org.apache.arrow.vector.util.DictionaryUtil
117
+ if not expected .equals (actual ):
118
+ raise AssertionError (
119
+ f"Java Schemas are not equal:\n "
120
+ f"* expected = { expected .toString ()} \n "
121
+ f"* actual = { actual .toString ()} " )
122
+
123
+
124
+ class JavaCDataExporter (CDataExporter , _CDataBase ):
125
+
126
+ def export_schema_from_json (self , json_path , c_schema_ptr ):
127
+ json_file = self .java_io .File (json_path )
128
+ with self .java_arrow .vector .ipc .JsonFileReader (
129
+ json_file , self .java_allocator ) as json_reader :
130
+ schema = json_reader .start ()
131
+ dict_provider = json_reader
132
+ self .java_arrow .c .Data .exportSchema (
133
+ self .java_allocator , schema , dict_provider ,
134
+ self ._wrap_c_schema_ptr (c_schema_ptr )
135
+ )
136
+
137
+ def export_batch_from_json (self , json_path , num_batch , c_array_ptr ):
138
+ json_file = self .java_io .File (json_path )
139
+ with self .java_arrow .vector .ipc .JsonFileReader (
140
+ json_file , self .java_allocator ) as json_reader :
141
+ json_reader .start ()
142
+ if num_batch > 0 :
143
+ actually_skipped = json_reader .skip (num_batch )
144
+ assert actually_skipped == num_batch
145
+ with json_reader .read () as batch :
146
+ dict_provider = json_reader
147
+ self .java_arrow .c .Data .exportVectorSchemaRoot (
148
+ self .java_allocator , batch , dict_provider ,
149
+ self ._wrap_c_array_ptr (c_array_ptr ))
150
+
151
+ @property
152
+ def supports_releasing_memory (self ):
153
+ return True
154
+
155
+ def record_allocation_state (self ):
156
+ return self .java_allocator .getAllocatedMemory ()
157
+
158
+ def compare_allocation_state (self , recorded , gc_until ):
159
+ def pred ():
160
+ return self .java_allocator .getAllocatedMemory () == recorded
161
+
162
+ return gc_until (pred )
163
+
164
+
165
+ class JavaCDataImporter (CDataImporter , _CDataBase ):
166
+
167
+ def import_schema_and_compare_to_json (self , json_path , c_schema_ptr ):
168
+ json_file = self .java_io .File (json_path )
169
+ with self .java_arrow .vector .ipc .JsonFileReader (
170
+ json_file , self .java_allocator ) as json_reader :
171
+ json_schema = json_reader .start ()
172
+ with self .java_arrow .c .CDataDictionaryProvider () as dict_provider :
173
+ imported_schema = self .java_arrow .c .Data .importSchema (
174
+ self .java_allocator ,
175
+ self ._wrap_c_schema_ptr (c_schema_ptr ),
176
+ dict_provider )
177
+ self ._assert_schemas_equal (json_schema , imported_schema )
178
+
179
+ def import_batch_and_compare_to_json (self , json_path , num_batch ,
180
+ c_array_ptr ):
181
+ json_file = self .java_io .File (json_path )
182
+ with self .java_arrow .vector .ipc .JsonFileReader (
183
+ json_file , self .java_allocator ) as json_reader :
184
+ schema = json_reader .start ()
185
+ if num_batch > 0 :
186
+ actually_skipped = json_reader .skip (num_batch )
187
+ assert actually_skipped == num_batch
188
+ with (json_reader .read () as batch ,
189
+ self .java_arrow .vector .VectorSchemaRoot .create (
190
+ schema , self .java_allocator ) as imported_batch ):
191
+ # We need to pass a dict provider primed with dictionary ids
192
+ # matching those in the schema, hence an empty
193
+ # CDataDictionaryProvider would not work here!
194
+ dict_provider = json_reader
195
+ self .java_arrow .c .Data .importIntoVectorSchemaRoot (
196
+ self .java_allocator ,
197
+ self ._wrap_c_array_ptr (c_array_ptr ),
198
+ imported_batch , dict_provider )
199
+ # TODO print nice error message if not equal
200
+ assert imported_batch .equals (batch )
201
+
202
+ @property
203
+ def supports_releasing_memory (self ):
204
+ return True
205
+
206
+ def gc_until (self , predicate ):
207
+ # No need to call the Java GC thanks to AutoCloseable (?)
208
+ return predicate ()
209
+
210
+
66
211
class JavaTester (Tester ):
67
212
PRODUCER = True
68
213
CONSUMER = True
69
214
FLIGHT_SERVER = True
70
215
FLIGHT_CLIENT = True
216
+ C_DATA_SCHEMA_EXPORTER = True
217
+ C_DATA_SCHEMA_IMPORTER = True
218
+ C_DATA_ARRAY_EXPORTER = True
219
+ C_DATA_ARRAY_IMPORTER = True
71
220
72
221
name = 'Java'
73
222
@@ -186,3 +335,9 @@ def flight_server(self, scenario_name=None):
186
335
finally :
187
336
server .kill ()
188
337
server .wait (5 )
338
+
339
+ def make_c_data_exporter (self ):
340
+ return JavaCDataExporter (self .debug , self .args )
341
+
342
+ def make_c_data_importer (self ):
343
+ return JavaCDataImporter (self .debug , self .args )
0 commit comments