@@ -4,7 +4,7 @@ use std::collections::HashMap;
44use std:: sync:: { Arc , Mutex } ;
55
66use delta_kernel:: scan:: state:: { visit_scan_files, DvInfo , GlobalScanState } ;
7- use delta_kernel:: scan:: { Scan , ScanData } ;
7+ use delta_kernel:: scan:: { Scan , ScanMetadata } ;
88use delta_kernel:: snapshot:: Snapshot ;
99use delta_kernel:: { DeltaResult , Error , Expression , ExpressionRef } ;
1010use delta_kernel_ffi_macros:: handle_descriptor;
@@ -24,7 +24,7 @@ use crate::{
2424use super :: handle:: Handle ;
2525
2626// TODO: Why do we even need to expose a scan, when the only thing an engine can do with it is
27- // handit back to the kernel by calling `kernel_scan_data_init `? There isn't even an FFI method to
27+ // handit back to the kernel by calling `scan_metadata_iter_init `? There isn't even an FFI method to
2828// drop it!
2929#[ handle_descriptor( target=Scan , mutable=false , sized=true ) ]
3030pub struct SharedScan ;
@@ -125,70 +125,70 @@ pub unsafe extern "C" fn free_global_scan_state(state: Handle<SharedGlobalScanSt
125125// means kernel made the decision of how to achieve thread safety. This may not be desirable if the
126126// engine is single-threaded, or has its own mutual exclusion mechanisms. Deadlock is even a
127127// conceivable risk, if this interacts poorly with engine's mutual exclusion mechanism.
128- pub struct KernelScanDataIterator {
128+ pub struct ScanMetadataIterator {
129129 // Mutex -> Allow the iterator to be accessed safely by multiple threads.
130130 // Box -> Wrap its unsized content this struct is fixed-size with thin pointers.
131- // Item = DeltaResult<ScanData >
132- data : Mutex < Box < dyn Iterator < Item = DeltaResult < ScanData > > + Send > > ,
131+ // Item = DeltaResult<ScanMetadata >
132+ data : Mutex < Box < dyn Iterator < Item = DeltaResult < ScanMetadata > > + Send > > ,
133133
134134 // Also keep a reference to the external engine for its error allocator. The default Parquet and
135135 // Json handlers don't hold any reference to the tokio reactor they rely on, so the iterator
136136 // terminates early if the last engine goes out of scope.
137137 engine : Arc < dyn ExternEngine > ,
138138}
139139
140- #[ handle_descriptor( target=KernelScanDataIterator , mutable=false , sized=true ) ]
141- pub struct SharedScanDataIterator ;
140+ #[ handle_descriptor( target=ScanMetadataIterator , mutable=false , sized=true ) ]
141+ pub struct SharedScanMetadataIterator ;
142142
143- impl Drop for KernelScanDataIterator {
143+ impl Drop for ScanMetadataIterator {
144144 fn drop ( & mut self ) {
145- debug ! ( "dropping KernelScanDataIterator " ) ;
145+ debug ! ( "dropping ScanMetadataIterator " ) ;
146146 }
147147}
148148
149149/// Get an iterator over the data needed to perform a scan. This will return a
150- /// [`KernelScanDataIterator `] which can be passed to [`kernel_scan_data_next `] to get the actual
151- /// data in the iterator.
150+ /// [`ScanMetadataIterator `] which can be passed to [`scan_metadata_next `] to get the
151+ /// actual data in the iterator.
152152///
153153/// # Safety
154154///
155155/// Engine is responsible for passing a valid [`SharedExternEngine`] and [`SharedScan`]
156156#[ no_mangle]
157- pub unsafe extern "C" fn kernel_scan_data_init (
157+ pub unsafe extern "C" fn scan_metadata_iter_init (
158158 engine : Handle < SharedExternEngine > ,
159159 scan : Handle < SharedScan > ,
160- ) -> ExternResult < Handle < SharedScanDataIterator > > {
160+ ) -> ExternResult < Handle < SharedScanMetadataIterator > > {
161161 let engine = unsafe { engine. clone_as_arc ( ) } ;
162162 let scan = unsafe { scan. as_ref ( ) } ;
163- kernel_scan_data_init_impl ( & engine, scan) . into_extern_result ( & engine. as_ref ( ) )
163+ scan_metadata_iter_init_impl ( & engine, scan) . into_extern_result ( & engine. as_ref ( ) )
164164}
165165
166- fn kernel_scan_data_init_impl (
166+ fn scan_metadata_iter_init_impl (
167167 engine : & Arc < dyn ExternEngine > ,
168168 scan : & Scan ,
169- ) -> DeltaResult < Handle < SharedScanDataIterator > > {
170- let scan_data = scan. scan_data ( engine. engine ( ) . as_ref ( ) ) ?;
171- let data = KernelScanDataIterator {
172- data : Mutex :: new ( Box :: new ( scan_data ) ) ,
169+ ) -> DeltaResult < Handle < SharedScanMetadataIterator > > {
170+ let scan_metadata = scan. scan_metadata ( engine. engine ( ) . as_ref ( ) ) ?;
171+ let data = ScanMetadataIterator {
172+ data : Mutex :: new ( Box :: new ( scan_metadata ) ) ,
173173 engine : engine. clone ( ) ,
174174 } ;
175175 Ok ( Arc :: new ( data) . into ( ) )
176176}
177177
178- /// Call the provided `engine_visitor` on the next scan data item. The visitor will be provided with
179- /// a selection vector and engine data. It is the responsibility of the _engine_ to free these when
180- /// it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively.
178+ /// Call the provided `engine_visitor` on the next scan metadata item. The visitor will be provided
179+ /// with a selection vector and engine data. It is the responsibility of the _engine_ to free these
180+ /// when it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively.
181181///
182182/// # Safety
183183///
184- /// The iterator must be valid (returned by [kernel_scan_data_init ]) and not yet freed by
185- /// [`free_kernel_scan_data `]. The visitor function pointer must be non-null.
184+ /// The iterator must be valid (returned by [scan_metadata_iter_init ]) and not yet freed by
185+ /// [`free_scan_metadata_iter `]. The visitor function pointer must be non-null.
186186///
187187/// [`free_bool_slice`]: crate::free_bool_slice
188188/// [`free_engine_data`]: crate::free_engine_data
189189#[ no_mangle]
190- pub unsafe extern "C" fn kernel_scan_data_next (
191- data : Handle < SharedScanDataIterator > ,
190+ pub unsafe extern "C" fn scan_metadata_next (
191+ data : Handle < SharedScanMetadataIterator > ,
192192 engine_context : NullableCvoid ,
193193 engine_visitor : extern "C" fn (
194194 engine_context : NullableCvoid ,
@@ -198,11 +198,11 @@ pub unsafe extern "C" fn kernel_scan_data_next(
198198 ) ,
199199) -> ExternResult < bool > {
200200 let data = unsafe { data. as_ref ( ) } ;
201- kernel_scan_data_next_impl ( data, engine_context, engine_visitor)
201+ scan_metadata_next_impl ( data, engine_context, engine_visitor)
202202 . into_extern_result ( & data. engine . as_ref ( ) )
203203}
204- fn kernel_scan_data_next_impl (
205- data : & KernelScanDataIterator ,
204+ fn scan_metadata_next_impl (
205+ data : & ScanMetadataIterator ,
206206 engine_context : NullableCvoid ,
207207 engine_visitor : extern "C" fn (
208208 engine_context : NullableCvoid ,
@@ -228,11 +228,11 @@ fn kernel_scan_data_next_impl(
228228/// # Safety
229229///
230230/// Caller is responsible for (at most once) passing a valid pointer returned by a call to
231- /// [`kernel_scan_data_init `].
231+ /// [`scan_metadata_iter_init `].
232232// we should probably be consistent with drop vs. free on engine side (probably the latter is more
233233// intuitive to non-rust code)
234234#[ no_mangle]
235- pub unsafe extern "C" fn free_kernel_scan_data ( data : Handle < SharedScanDataIterator > ) {
235+ pub unsafe extern "C" fn free_scan_metadata_iter ( data : Handle < SharedScanMetadataIterator > ) {
236236 data. drop_handle ( ) ;
237237}
238238
@@ -297,14 +297,14 @@ pub unsafe extern "C" fn get_from_string_map(
297297 . and_then ( |v| allocate_fn ( kernel_string_slice ! ( v) ) )
298298}
299299
300- /// Transformation expressions that need to be applied to each row `i` in ScanData . You can use
300+ /// Transformation expressions that need to be applied to each row `i` in ScanMetadata . You can use
301301/// [`get_transform_for_row`] to get the transform for a particular row. If that returns an
302302/// associated expression, it _must_ be applied to the data read from the file specified by the
303303/// row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If
304304/// `get_transform_for_row` returns `NULL` no expression need be applied and the data read from disk
305305/// is already in the correct logical state.
306306///
307- /// NB: If you are using `visit_scan_data ` you don't need to worry about dealing with probing
307+ /// NB: If you are using `visit_scan_metadata ` you don't need to worry about dealing with probing
308308/// `CTransforms`. The callback will be invoked with the correct transform for you.
309309pub struct CTransforms {
310310 transforms : Vec < Option < ExpressionRef > > ,
@@ -420,13 +420,13 @@ struct ContextWrapper {
420420 callback : CScanCallback ,
421421}
422422
423- /// Shim for ffi to call visit_scan_data . This will generally be called when iterating through scan
423+ /// Shim for ffi to call visit_scan_metadata . This will generally be called when iterating through scan
424424/// data which provides the data handle and selection vector as each element in the iterator.
425425///
426426/// # Safety
427427/// engine is responsible for passing a valid [`ExclusiveEngineData`] and selection vector.
428428#[ no_mangle]
429- pub unsafe extern "C" fn visit_scan_data (
429+ pub unsafe extern "C" fn visit_scan_metadata (
430430 data : Handle < ExclusiveEngineData > ,
431431 selection_vec : KernelBoolSlice ,
432432 transforms : & CTransforms ,
0 commit comments