From 6e1bbf18ca70f48841a1f2760a293e631f850ca1 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 27 Sep 2024 10:07:09 -0700 Subject: [PATCH 01/82] Move ffi code, add kernel expr -> engine expr FFI code for schema and expressions have been moved to their own modules. Add prototype of the visitor to enable kernel expresison -> engine expression --- ffi/src/expressions.rs | 361 +++++++++++++++++++++++++++++++++ ffi/src/lib.rs | 449 +---------------------------------------- ffi/src/scan.rs | 8 +- ffi/src/schema.rs | 207 +++++++++++++++++++ 4 files changed, 575 insertions(+), 450 deletions(-) create mode 100644 ffi/src/expressions.rs create mode 100644 ffi/src/schema.rs diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs new file mode 100644 index 000000000..1f8b723aa --- /dev/null +++ b/ffi/src/expressions.rs @@ -0,0 +1,361 @@ +use std::ffi::c_void; + +use crate::{ + handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, + KernelStringSlice, ReferenceSet, TryFromStringSlice, +}; +use delta_kernel::{ + expressions::{BinaryOperator, Expression, Scalar, UnaryOperator, VariadicOperator}, + DeltaResult, +}; + +#[derive(Default)] +pub struct KernelExpressionVisitorState { + // TODO: ReferenceSet> instead? + inflight_expressions: ReferenceSet, +} +impl KernelExpressionVisitorState { + pub fn new() -> Self { + Self { + inflight_expressions: Default::default(), + } + } +} + +/// A predicate that can be used to skip data when scanning. +/// +/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, +/// along with a visitor function that can be invoked to recursively visit the predicate. This +/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// kernel allocates visitor state, which becomes the second argument to the predicate visitor +/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the +/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and +/// kernel each retain ownership of their respective objects, with no need to coordinate memory +/// lifetimes with the other. +#[repr(C)] +pub struct EnginePredicate { + pub predicate: *mut c_void, + pub visitor: + extern "C" fn(predicate: *mut c_void, state: &mut KernelExpressionVisitorState) -> usize, +} + +fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: Expression) -> usize { + state.inflight_expressions.insert(expr) +} + +pub fn unwrap_kernel_expression( + state: &mut KernelExpressionVisitorState, + exprid: usize, +) -> Option { + state.inflight_expressions.take(exprid) +} + +fn visit_expression_binary( + state: &mut KernelExpressionVisitorState, + op: BinaryOperator, + a: usize, + b: usize, +) -> usize { + let left = unwrap_kernel_expression(state, a).map(Box::new); + let right = unwrap_kernel_expression(state, b).map(Box::new); + match left.zip(right) { + Some((left, right)) => { + wrap_expression(state, Expression::BinaryOperation { op, left, right }) + } + None => 0, // invalid child => invalid node + } +} + +fn visit_expression_unary( + state: &mut KernelExpressionVisitorState, + op: UnaryOperator, + inner_expr: usize, +) -> usize { + unwrap_kernel_expression(state, inner_expr).map_or(0, |expr| { + wrap_expression(state, Expression::unary(op, expr)) + }) +} + +// The EngineIterator is not thread safe, not reentrant, not owned by callee, not freed by callee. +#[no_mangle] +pub extern "C" fn visit_expression_and( + state: &mut KernelExpressionVisitorState, + children: &mut EngineIterator, +) -> usize { + let result = Expression::and_from( + children.flat_map(|child| unwrap_kernel_expression(state, child as usize)), + ); + wrap_expression(state, result) +} + +#[no_mangle] +pub extern "C" fn visit_expression_lt( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::LessThan, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_le( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::LessThanOrEqual, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_gt( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::GreaterThan, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_ge( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::GreaterThanOrEqual, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_eq( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::Equal, a, b) +} + +/// # Safety +/// The string slice must be valid +#[no_mangle] +pub unsafe extern "C" fn visit_expression_column( + state: &mut KernelExpressionVisitorState, + name: KernelStringSlice, + allocate_error: AllocateErrorFn, +) -> ExternResult { + let name = unsafe { String::try_from_slice(&name) }; + visit_expression_column_impl(state, name).into_extern_result(&allocate_error) +} +fn visit_expression_column_impl( + state: &mut KernelExpressionVisitorState, + name: DeltaResult, +) -> DeltaResult { + Ok(wrap_expression(state, Expression::Column(name?))) +} + +#[no_mangle] +pub extern "C" fn visit_expression_not( + state: &mut KernelExpressionVisitorState, + inner_expr: usize, +) -> usize { + visit_expression_unary(state, UnaryOperator::Not, inner_expr) +} + +#[no_mangle] +pub extern "C" fn visit_expression_is_null( + state: &mut KernelExpressionVisitorState, + inner_expr: usize, +) -> usize { + visit_expression_unary(state, UnaryOperator::IsNull, inner_expr) +} + +/// # Safety +/// The string slice must be valid +#[no_mangle] +pub unsafe extern "C" fn visit_expression_literal_string( + state: &mut KernelExpressionVisitorState, + value: KernelStringSlice, + allocate_error: AllocateErrorFn, +) -> ExternResult { + let value = unsafe { String::try_from_slice(&value) }; + visit_expression_literal_string_impl(state, value).into_extern_result(&allocate_error) +} +fn visit_expression_literal_string_impl( + state: &mut KernelExpressionVisitorState, + value: DeltaResult, +) -> DeltaResult { + Ok(wrap_expression( + state, + Expression::Literal(Scalar::from(value?)), + )) +} + +// We need to get parse.expand working to be able to macro everything below, see issue #255 + +#[no_mangle] +pub extern "C" fn visit_expression_literal_int( + state: &mut KernelExpressionVisitorState, + value: i32, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_long( + state: &mut KernelExpressionVisitorState, + value: i64, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_short( + state: &mut KernelExpressionVisitorState, + value: i16, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_byte( + state: &mut KernelExpressionVisitorState, + value: i8, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_float( + state: &mut KernelExpressionVisitorState, + value: f32, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_double( + state: &mut KernelExpressionVisitorState, + value: f64, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_bool( + state: &mut KernelExpressionVisitorState, + value: bool, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +/// Kernel Expression to Engine Expression +/// +#[repr(C)] +pub struct EngineExpressionVisitor { + /// opaque state pointer + pub data: *mut c_void, + /// Creates a new field list, optionally reserving capacity up front + pub make_expr_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, + pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_short: extern "C" fn(data: *mut c_void, value: i16) -> usize, + pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, + pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, + pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, + pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, + pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, + + pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_variadic_item: + extern "C" fn(data: *mut c_void, variadic_id: usize, sub_expr_id: usize), + pub visit_not: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + pub visit_is_null: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + + pub visit_lt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_le: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + + pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, +} + +#[no_mangle] +pub unsafe extern "C" fn visit_expression( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, +) -> usize { + fn visit_variadic( + visitor: &mut EngineExpressionVisitor, + op: &VariadicOperator, + exprs: &Vec, + ) -> usize { + let variadic_id = match op { + VariadicOperator::And => (visitor.visit_and)(visitor.data, exprs.len()), + VariadicOperator::Or => (visitor.visit_or)(visitor.data, exprs.len()), + }; + for expr in exprs { + let expr_id = visit_expression(visitor, expr); + (visitor.visit_variadic_item)(visitor.data, variadic_id, expr_id) + } + variadic_id + } + fn visit_binary_op( + visitor: &mut EngineExpressionVisitor, + op: &BinaryOperator, + a: &Expression, + b: &Expression, + ) -> usize { + let a_id = visit_expression(visitor, a); + let b_id = visit_expression(visitor, b); + match op { + BinaryOperator::Plus => todo!(), + BinaryOperator::Minus => todo!(), + BinaryOperator::Multiply => todo!(), + BinaryOperator::Divide => todo!(), + BinaryOperator::LessThan => todo!(), + BinaryOperator::LessThanOrEqual => todo!(), + BinaryOperator::GreaterThan => todo!(), + BinaryOperator::GreaterThanOrEqual => todo!(), + BinaryOperator::Equal => todo!(), + BinaryOperator::NotEqual => todo!(), + BinaryOperator::Distinct => todo!(), + BinaryOperator::In => todo!(), + BinaryOperator::NotIn => todo!(), + } + } + fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { + macro_rules! call { + ( $visitor_fn:ident $(, $extra_args:expr) *) => { + (visitor.$visitor_fn)(visitor.data $(, $extra_args) *) + }; + } + match expression { + Expression::Literal(lit) => match lit { + Scalar::Integer(val) => call!(visit_int, *val), + Scalar::Long(val) => call!(visit_long, *val), + Scalar::Short(val) => call!(visit_short, *val), + Scalar::Byte(val) => call!(visit_byte, *val), + Scalar::Float(val) => call!(visit_float, *val), + Scalar::Double(val) => call!(visit_double, *val), + Scalar::String(val) => call!(visit_string, val.into()), + Scalar::Boolean(val) => call!(visit_bool, *val), + Scalar::Timestamp(val) => todo!(), + Scalar::TimestampNtz(_) => todo!(), + Scalar::Date(_) => todo!(), + Scalar::Binary(_) => todo!(), + Scalar::Decimal(_, _, _) => todo!(), + Scalar::Null(_) => todo!(), + Scalar::Struct(_) => todo!(), + Scalar::Array(_) => todo!(), + }, + Expression::Column(name) => call!(visit_column, name.into()), + Expression::Struct(_) => todo!(), + Expression::BinaryOperation { op, left, right } => todo!(), + Expression::UnaryOperation { op, expr } => todo!(), + Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), + } + } + visit_expression(visitor, expression) +} diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 1466ab65e..923277e86 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -10,8 +10,6 @@ use std::sync::Arc; use tracing::debug; use url::Url; -use delta_kernel::expressions::{BinaryOperator, Expression, Scalar, UnaryOperator}; -use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Engine, EngineData, Error, Table}; use delta_kernel_ffi_macros::handle_descriptor; @@ -32,7 +30,9 @@ use handle::Handle; extern crate self as delta_kernel_ffi; pub mod engine_funcs; -pub mod scan; +pub(crate) mod expressions; +pub(crate) mod scan; +pub(crate) mod schema; pub(crate) type NullableCvoid = Option>; @@ -790,212 +790,6 @@ pub unsafe extern "C" fn free_string_slice_data(data: Handle usize, - - // visitor methods that should instantiate and append the appropriate type to the field list - /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a - /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. - pub visit_struct: extern "C" fn( - data: *mut c_void, - sibling_list_id: usize, - name: KernelStringSlice, - child_list_id: usize, - ), - - /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list - /// with the array's element type - pub visit_array: extern "C" fn( - data: *mut c_void, - sibling_list_id: usize, - name: KernelStringSlice, - contains_null: bool, // if this array can contain null values - child_list_id: usize, - ), - - /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list - /// where the first element is the map's key type and the second element is the - /// map's value type - pub visit_map: extern "C" fn( - data: *mut c_void, - sibling_list_id: usize, - name: KernelStringSlice, - value_contains_null: bool, // if this map can contain null values - child_list_id: usize, - ), - - /// visit a `decimal` with the specified `precision` and `scale` - pub visit_decimal: extern "C" fn( - data: *mut c_void, - sibling_list_id: usize, - name: KernelStringSlice, - precision: u8, - scale: u8, - ), - - /// Visit a `string` belonging to the list identified by `sibling_list_id`. - pub visit_string: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `long` belonging to the list identified by `sibling_list_id`. - pub visit_long: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. - pub visit_integer: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `short` belonging to the list identified by `sibling_list_id`. - pub visit_short: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `byte` belonging to the list identified by `sibling_list_id`. - pub visit_byte: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `float` belonging to the list identified by `sibling_list_id`. - pub visit_float: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `double` belonging to the list identified by `sibling_list_id`. - pub visit_double: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - pub visit_boolean: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit `binary` belonging to the list identified by `sibling_list_id`. - pub visit_binary: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `date` belonging to the list identified by `sibling_list_id`. - pub visit_date: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. - pub visit_timestamp: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - - /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. - pub visit_timestamp_ntz: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), -} - -/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the -/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. -/// -/// This method returns the id of the list allocated to hold the top level schema columns. -/// -/// # Safety -/// -/// Caller is responsible for passing a valid snapshot handle and schema visitor. -#[no_mangle] -pub unsafe extern "C" fn visit_schema( - snapshot: Handle, - visitor: &mut EngineSchemaVisitor, -) -> usize { - let snapshot = unsafe { snapshot.as_ref() }; - // Visit all the fields of a struct and return the list of children - fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { - let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); - for field in s.fields() { - visit_schema_item(field.data_type(), field.name(), visitor, child_list_id); - } - child_list_id - } - - fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { - let child_list_id = (visitor.make_field_list)(visitor.data, 1); - visit_schema_item(&at.element_type, "array_element", visitor, child_list_id); - child_list_id - } - - fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize { - let child_list_id = (visitor.make_field_list)(visitor.data, 2); - visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id); - visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id); - child_list_id - } - - // Visit a struct field (recursively) and add the result to the list of siblings. - fn visit_schema_item( - data_type: &DataType, - name: &str, - visitor: &EngineSchemaVisitor, - sibling_list_id: usize, - ) { - macro_rules! call { - ( $visitor_fn:ident $(, $extra_args:expr) *) => { - (visitor.$visitor_fn)(visitor.data, sibling_list_id, name.into() $(, $extra_args) *) - }; - } - match data_type { - DataType::Struct(st) => call!(visit_struct, visit_struct_fields(visitor, st)), - DataType::Map(mt) => { - call!( - visit_map, - mt.value_contains_null, - visit_map_types(visitor, mt) - ) - } - DataType::Array(at) => { - call!(visit_array, at.contains_null, visit_array_item(visitor, at)) - } - DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { - call!(visit_decimal, *precision, *scale) - } - &DataType::STRING => call!(visit_string), - &DataType::LONG => call!(visit_long), - &DataType::INTEGER => call!(visit_integer), - &DataType::SHORT => call!(visit_short), - &DataType::BYTE => call!(visit_byte), - &DataType::FLOAT => call!(visit_float), - &DataType::DOUBLE => call!(visit_double), - &DataType::BOOLEAN => call!(visit_boolean), - &DataType::BINARY => call!(visit_binary), - &DataType::DATE => call!(visit_date), - &DataType::TIMESTAMP => call!(visit_timestamp), - &DataType::TIMESTAMP_NTZ => call!(visit_timestamp_ntz), - } - } - - visit_struct_fields(visitor, snapshot.schema()) -} - -// TODO move expression visitors to separate module - // A set that can identify its contents by address pub struct ReferenceSet { map: std::collections::HashMap, @@ -1047,240 +841,3 @@ impl Default for ReferenceSet { } } } - -#[derive(Default)] -pub struct KernelExpressionVisitorState { - // TODO: ReferenceSet> instead? - inflight_expressions: ReferenceSet, -} -impl KernelExpressionVisitorState { - fn new() -> Self { - Self { - inflight_expressions: Default::default(), - } - } -} - -/// A predicate that can be used to skip data when scanning. -/// -/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, -/// along with a visitor function that can be invoked to recursively visit the predicate. This -/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the -/// kernel allocates visitor state, which becomes the second argument to the predicate visitor -/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the -/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and -/// kernel each retain ownership of their respective objects, with no need to coordinate memory -/// lifetimes with the other. -#[repr(C)] -pub struct EnginePredicate { - predicate: *mut c_void, - visitor: - extern "C" fn(predicate: *mut c_void, state: &mut KernelExpressionVisitorState) -> usize, -} - -fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: Expression) -> usize { - state.inflight_expressions.insert(expr) -} - -fn unwrap_kernel_expression( - state: &mut KernelExpressionVisitorState, - exprid: usize, -) -> Option { - state.inflight_expressions.take(exprid) -} - -fn visit_expression_binary( - state: &mut KernelExpressionVisitorState, - op: BinaryOperator, - a: usize, - b: usize, -) -> usize { - let left = unwrap_kernel_expression(state, a).map(Box::new); - let right = unwrap_kernel_expression(state, b).map(Box::new); - match left.zip(right) { - Some((left, right)) => { - wrap_expression(state, Expression::BinaryOperation { op, left, right }) - } - None => 0, // invalid child => invalid node - } -} - -fn visit_expression_unary( - state: &mut KernelExpressionVisitorState, - op: UnaryOperator, - inner_expr: usize, -) -> usize { - unwrap_kernel_expression(state, inner_expr).map_or(0, |expr| { - wrap_expression(state, Expression::unary(op, expr)) - }) -} - -// The EngineIterator is not thread safe, not reentrant, not owned by callee, not freed by callee. -#[no_mangle] -pub extern "C" fn visit_expression_and( - state: &mut KernelExpressionVisitorState, - children: &mut EngineIterator, -) -> usize { - let result = Expression::and_from( - children.flat_map(|child| unwrap_kernel_expression(state, child as usize)), - ); - wrap_expression(state, result) -} - -#[no_mangle] -pub extern "C" fn visit_expression_lt( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::LessThan, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_le( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::LessThanOrEqual, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_gt( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::GreaterThan, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_ge( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::GreaterThanOrEqual, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_eq( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::Equal, a, b) -} - -/// # Safety -/// The string slice must be valid -#[no_mangle] -pub unsafe extern "C" fn visit_expression_column( - state: &mut KernelExpressionVisitorState, - name: KernelStringSlice, - allocate_error: AllocateErrorFn, -) -> ExternResult { - let name = unsafe { String::try_from_slice(&name) }; - visit_expression_column_impl(state, name).into_extern_result(&allocate_error) -} -fn visit_expression_column_impl( - state: &mut KernelExpressionVisitorState, - name: DeltaResult, -) -> DeltaResult { - Ok(wrap_expression(state, Expression::Column(name?))) -} - -#[no_mangle] -pub extern "C" fn visit_expression_not( - state: &mut KernelExpressionVisitorState, - inner_expr: usize, -) -> usize { - visit_expression_unary(state, UnaryOperator::Not, inner_expr) -} - -#[no_mangle] -pub extern "C" fn visit_expression_is_null( - state: &mut KernelExpressionVisitorState, - inner_expr: usize, -) -> usize { - visit_expression_unary(state, UnaryOperator::IsNull, inner_expr) -} - -/// # Safety -/// The string slice must be valid -#[no_mangle] -pub unsafe extern "C" fn visit_expression_literal_string( - state: &mut KernelExpressionVisitorState, - value: KernelStringSlice, - allocate_error: AllocateErrorFn, -) -> ExternResult { - let value = unsafe { String::try_from_slice(&value) }; - visit_expression_literal_string_impl(state, value).into_extern_result(&allocate_error) -} -fn visit_expression_literal_string_impl( - state: &mut KernelExpressionVisitorState, - value: DeltaResult, -) -> DeltaResult { - Ok(wrap_expression( - state, - Expression::Literal(Scalar::from(value?)), - )) -} - -// We need to get parse.expand working to be able to macro everything below, see issue #255 - -#[no_mangle] -pub extern "C" fn visit_expression_literal_int( - state: &mut KernelExpressionVisitorState, - value: i32, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_long( - state: &mut KernelExpressionVisitorState, - value: i64, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_short( - state: &mut KernelExpressionVisitorState, - value: i16, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_byte( - state: &mut KernelExpressionVisitorState, - value: i8, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_float( - state: &mut KernelExpressionVisitorState, - value: f32, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_double( - state: &mut KernelExpressionVisitorState, - value: f64, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_bool( - state: &mut KernelExpressionVisitorState, - value: bool, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 495965def..92d067621 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,11 +13,11 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; +use crate::expressions::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; use crate::{ - unwrap_kernel_expression, AllocateStringFn, EnginePredicate, ExclusiveEngineData, ExternEngine, - ExternResult, IntoExternResult, KernelBoolSlice, KernelExpressionVisitorState, - KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, SharedSnapshot, - StringIter, StringSliceIterator, TryFromStringSlice, + AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, + KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, + SharedSnapshot, StringIter, StringSliceIterator, TryFromStringSlice, }; use super::handle::Handle; diff --git a/ffi/src/schema.rs b/ffi/src/schema.rs new file mode 100644 index 000000000..55bf28dce --- /dev/null +++ b/ffi/src/schema.rs @@ -0,0 +1,207 @@ +use std::os::raw::c_void; + +use crate::{handle::Handle, KernelStringSlice, SharedSnapshot}; +use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; +/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own +/// representation of a schema from a particular schema within kernel. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct, map, array) contains a list +/// of "child" elements. +/// 1. Before visiting schema or any complex type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any schema element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For the top-level schema, visit each top-level column, passing the column's name and type +/// - For a struct, first visit each struct field, passing the field's name, type, nullability, +/// and metadata +/// - For a map, visit the key and value, passing its special name ("map_key" or "map_value"), +/// type, and value nullability (keys are never nullable) +/// - For a list, visit the element, passing its special name ("array_element"), type, and +/// nullability +/// 3. When visiting a complex schema element, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_schema`] method returns the id of the list of top-level columns +// WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods +// TODO: struct nullability and field metadata +#[repr(C)] +pub struct EngineSchemaVisitor { + /// opaque state pointer + pub data: *mut c_void, + /// Creates a new field list, optionally reserving capacity up front + pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + + // visitor methods that should instantiate and append the appropriate type to the field list + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. + pub visit_struct: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + child_list_id: usize, + ), + + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the array's element type + pub visit_array: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + contains_null: bool, // if this array can contain null values + child_list_id: usize, + ), + + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the map's key type and the second element is the + /// map's value type + pub visit_map: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + value_contains_null: bool, // if this map can contain null values + child_list_id: usize, + ), + + /// visit a `decimal` with the specified `precision` and `scale` + pub visit_decimal: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + precision: u8, + scale: u8, + ), + + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + pub visit_string: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + pub visit_long: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + pub visit_integer: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + pub visit_short: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + pub visit_byte: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + pub visit_float: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + pub visit_double: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + pub visit_boolean: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + pub visit_binary: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + pub visit_date: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + pub visit_timestamp: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + pub visit_timestamp_ntz: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), +} + +/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the +/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. +/// +/// This method returns the id of the list allocated to hold the top level schema columns. +/// +/// # Safety +/// +/// Caller is responsible for passing a valid snapshot handle and schema visitor. +#[no_mangle] +pub unsafe extern "C" fn visit_schema( + snapshot: Handle, + visitor: &mut EngineSchemaVisitor, +) -> usize { + let snapshot = unsafe { snapshot.as_ref() }; + // Visit all the fields of a struct and return the list of children + fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); + for field in s.fields() { + visit_schema_item(field.data_type(), field.name(), visitor, child_list_id); + } + child_list_id + } + + fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 1); + visit_schema_item(&at.element_type, "array_element", visitor, child_list_id); + child_list_id + } + + fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 2); + visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id); + visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id); + child_list_id + } + + // Visit a struct field (recursively) and add the result to the list of siblings. + fn visit_schema_item( + data_type: &DataType, + name: &str, + visitor: &EngineSchemaVisitor, + sibling_list_id: usize, + ) { + macro_rules! call { + ( $visitor_fn:ident $(, $extra_args:expr) *) => { + (visitor.$visitor_fn)(visitor.data, sibling_list_id, name.into() $(, $extra_args) *) + }; + } + match data_type { + DataType::Struct(st) => call!(visit_struct, visit_struct_fields(visitor, st)), + DataType::Map(mt) => { + call!( + visit_map, + mt.value_contains_null, + visit_map_types(visitor, mt) + ) + } + DataType::Array(at) => { + call!(visit_array, at.contains_null, visit_array_item(visitor, at)) + } + DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { + call!(visit_decimal, *precision, *scale) + } + &DataType::STRING => call!(visit_string), + &DataType::LONG => call!(visit_long), + &DataType::INTEGER => call!(visit_integer), + &DataType::SHORT => call!(visit_short), + &DataType::BYTE => call!(visit_byte), + &DataType::FLOAT => call!(visit_float), + &DataType::DOUBLE => call!(visit_double), + &DataType::BOOLEAN => call!(visit_boolean), + &DataType::BINARY => call!(visit_binary), + &DataType::DATE => call!(visit_date), + &DataType::TIMESTAMP => call!(visit_timestamp), + &DataType::TIMESTAMP_NTZ => call!(visit_timestamp_ntz), + } + } + + visit_struct_fields(visitor, snapshot.schema()) +} From b367df1ca617c7e301cd787d1e820308f6c46d5a Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 27 Sep 2024 10:44:40 -0700 Subject: [PATCH 02/82] Remove new visitor --- ffi/src/expressions.rs | 120 ++--------------------------------------- 1 file changed, 3 insertions(+), 117 deletions(-) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 1f8b723aa..9bd1a01cb 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,11 +1,11 @@ use std::ffi::c_void; use crate::{ - handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, - KernelStringSlice, ReferenceSet, TryFromStringSlice, + AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, KernelStringSlice, + ReferenceSet, TryFromStringSlice, }; use delta_kernel::{ - expressions::{BinaryOperator, Expression, Scalar, UnaryOperator, VariadicOperator}, + expressions::{BinaryOperator, Expression, Scalar, UnaryOperator}, DeltaResult, }; @@ -245,117 +245,3 @@ pub extern "C" fn visit_expression_literal_bool( ) -> usize { wrap_expression(state, Expression::literal(value)) } - -/// Kernel Expression to Engine Expression -/// -#[repr(C)] -pub struct EngineExpressionVisitor { - /// opaque state pointer - pub data: *mut c_void, - /// Creates a new field list, optionally reserving capacity up front - pub make_expr_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, - - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. - pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, - pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, - pub visit_short: extern "C" fn(data: *mut c_void, value: i16) -> usize, - pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, - pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, - pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, - pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, - pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, - - pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_variadic_item: - extern "C" fn(data: *mut c_void, variadic_id: usize, sub_expr_id: usize), - pub visit_not: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, - pub visit_is_null: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, - - pub visit_lt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - pub visit_le: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - - pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, -} - -#[no_mangle] -pub unsafe extern "C" fn visit_expression( - expression: &Expression, - visitor: &mut EngineExpressionVisitor, -) -> usize { - fn visit_variadic( - visitor: &mut EngineExpressionVisitor, - op: &VariadicOperator, - exprs: &Vec, - ) -> usize { - let variadic_id = match op { - VariadicOperator::And => (visitor.visit_and)(visitor.data, exprs.len()), - VariadicOperator::Or => (visitor.visit_or)(visitor.data, exprs.len()), - }; - for expr in exprs { - let expr_id = visit_expression(visitor, expr); - (visitor.visit_variadic_item)(visitor.data, variadic_id, expr_id) - } - variadic_id - } - fn visit_binary_op( - visitor: &mut EngineExpressionVisitor, - op: &BinaryOperator, - a: &Expression, - b: &Expression, - ) -> usize { - let a_id = visit_expression(visitor, a); - let b_id = visit_expression(visitor, b); - match op { - BinaryOperator::Plus => todo!(), - BinaryOperator::Minus => todo!(), - BinaryOperator::Multiply => todo!(), - BinaryOperator::Divide => todo!(), - BinaryOperator::LessThan => todo!(), - BinaryOperator::LessThanOrEqual => todo!(), - BinaryOperator::GreaterThan => todo!(), - BinaryOperator::GreaterThanOrEqual => todo!(), - BinaryOperator::Equal => todo!(), - BinaryOperator::NotEqual => todo!(), - BinaryOperator::Distinct => todo!(), - BinaryOperator::In => todo!(), - BinaryOperator::NotIn => todo!(), - } - } - fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { - macro_rules! call { - ( $visitor_fn:ident $(, $extra_args:expr) *) => { - (visitor.$visitor_fn)(visitor.data $(, $extra_args) *) - }; - } - match expression { - Expression::Literal(lit) => match lit { - Scalar::Integer(val) => call!(visit_int, *val), - Scalar::Long(val) => call!(visit_long, *val), - Scalar::Short(val) => call!(visit_short, *val), - Scalar::Byte(val) => call!(visit_byte, *val), - Scalar::Float(val) => call!(visit_float, *val), - Scalar::Double(val) => call!(visit_double, *val), - Scalar::String(val) => call!(visit_string, val.into()), - Scalar::Boolean(val) => call!(visit_bool, *val), - Scalar::Timestamp(val) => todo!(), - Scalar::TimestampNtz(_) => todo!(), - Scalar::Date(_) => todo!(), - Scalar::Binary(_) => todo!(), - Scalar::Decimal(_, _, _) => todo!(), - Scalar::Null(_) => todo!(), - Scalar::Struct(_) => todo!(), - Scalar::Array(_) => todo!(), - }, - Expression::Column(name) => call!(visit_column, name.into()), - Expression::Struct(_) => todo!(), - Expression::BinaryOperation { op, left, right } => todo!(), - Expression::UnaryOperation { op, expr } => todo!(), - Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), - } - } - visit_expression(visitor, expression) -} From 1bc700ab2b3b596d9f003adc3d1db2239f1b958c Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 27 Sep 2024 10:45:28 -0700 Subject: [PATCH 03/82] Add kernel expression to engine expression --- ffi/src/expressions.rs | 120 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 9bd1a01cb..1f8b723aa 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,11 +1,11 @@ use std::ffi::c_void; use crate::{ - AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, KernelStringSlice, - ReferenceSet, TryFromStringSlice, + handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, + KernelStringSlice, ReferenceSet, TryFromStringSlice, }; use delta_kernel::{ - expressions::{BinaryOperator, Expression, Scalar, UnaryOperator}, + expressions::{BinaryOperator, Expression, Scalar, UnaryOperator, VariadicOperator}, DeltaResult, }; @@ -245,3 +245,117 @@ pub extern "C" fn visit_expression_literal_bool( ) -> usize { wrap_expression(state, Expression::literal(value)) } + +/// Kernel Expression to Engine Expression +/// +#[repr(C)] +pub struct EngineExpressionVisitor { + /// opaque state pointer + pub data: *mut c_void, + /// Creates a new field list, optionally reserving capacity up front + pub make_expr_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, + pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_short: extern "C" fn(data: *mut c_void, value: i16) -> usize, + pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, + pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, + pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, + pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, + pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, + + pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_variadic_item: + extern "C" fn(data: *mut c_void, variadic_id: usize, sub_expr_id: usize), + pub visit_not: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + pub visit_is_null: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + + pub visit_lt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_le: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + + pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, +} + +#[no_mangle] +pub unsafe extern "C" fn visit_expression( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, +) -> usize { + fn visit_variadic( + visitor: &mut EngineExpressionVisitor, + op: &VariadicOperator, + exprs: &Vec, + ) -> usize { + let variadic_id = match op { + VariadicOperator::And => (visitor.visit_and)(visitor.data, exprs.len()), + VariadicOperator::Or => (visitor.visit_or)(visitor.data, exprs.len()), + }; + for expr in exprs { + let expr_id = visit_expression(visitor, expr); + (visitor.visit_variadic_item)(visitor.data, variadic_id, expr_id) + } + variadic_id + } + fn visit_binary_op( + visitor: &mut EngineExpressionVisitor, + op: &BinaryOperator, + a: &Expression, + b: &Expression, + ) -> usize { + let a_id = visit_expression(visitor, a); + let b_id = visit_expression(visitor, b); + match op { + BinaryOperator::Plus => todo!(), + BinaryOperator::Minus => todo!(), + BinaryOperator::Multiply => todo!(), + BinaryOperator::Divide => todo!(), + BinaryOperator::LessThan => todo!(), + BinaryOperator::LessThanOrEqual => todo!(), + BinaryOperator::GreaterThan => todo!(), + BinaryOperator::GreaterThanOrEqual => todo!(), + BinaryOperator::Equal => todo!(), + BinaryOperator::NotEqual => todo!(), + BinaryOperator::Distinct => todo!(), + BinaryOperator::In => todo!(), + BinaryOperator::NotIn => todo!(), + } + } + fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { + macro_rules! call { + ( $visitor_fn:ident $(, $extra_args:expr) *) => { + (visitor.$visitor_fn)(visitor.data $(, $extra_args) *) + }; + } + match expression { + Expression::Literal(lit) => match lit { + Scalar::Integer(val) => call!(visit_int, *val), + Scalar::Long(val) => call!(visit_long, *val), + Scalar::Short(val) => call!(visit_short, *val), + Scalar::Byte(val) => call!(visit_byte, *val), + Scalar::Float(val) => call!(visit_float, *val), + Scalar::Double(val) => call!(visit_double, *val), + Scalar::String(val) => call!(visit_string, val.into()), + Scalar::Boolean(val) => call!(visit_bool, *val), + Scalar::Timestamp(val) => todo!(), + Scalar::TimestampNtz(_) => todo!(), + Scalar::Date(_) => todo!(), + Scalar::Binary(_) => todo!(), + Scalar::Decimal(_, _, _) => todo!(), + Scalar::Null(_) => todo!(), + Scalar::Struct(_) => todo!(), + Scalar::Array(_) => todo!(), + }, + Expression::Column(name) => call!(visit_column, name.into()), + Expression::Struct(_) => todo!(), + Expression::BinaryOperation { op, left, right } => todo!(), + Expression::UnaryOperation { op, expr } => todo!(), + Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), + } + } + visit_expression(visitor, expression) +} From dc1ed230c0ed86b756d8b822bc8c4713041a18b1 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 30 Sep 2024 11:50:10 -0700 Subject: [PATCH 04/82] Add ffi validation and more visitor features --- ffi/examples/read-table/expression.h | 214 +++++++++++++++++++++++++++ ffi/examples/read-table/read_table.c | 9 +- ffi/src/expressions.rs | 107 +++++++++----- ffi/src/lib.rs | 5 +- 4 files changed, 297 insertions(+), 38 deletions(-) create mode 100644 ffi/examples/read-table/expression.h diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h new file mode 100644 index 000000000..fb6a57770 --- /dev/null +++ b/ffi/examples/read-table/expression.h @@ -0,0 +1,214 @@ +#include "delta_kernel_ffi.h" +#include "assert.h" +#include "read_table.h" +#include +#include +#include +#include + +enum OpType { + Add, + Sub +}; +enum LitType { + i32, + i16, + i8 +}; +struct Literal { + enum LitType type; + int64_t value; +}; +struct BinOp { + enum OpType op; + struct Literal *left; + struct Literal *right; +}; + +enum VariadicType { + And, + Or +}; +enum ExpressionType { + BinOp, + Variadic, + Literal +}; +struct Variadic { + enum VariadicType op; + size_t len; + size_t max_len; + struct ExpressionRef *expr_list; +}; +struct ExpressionRef { + void* ref; + enum ExpressionType type; +}; +struct Data { + size_t len; + struct ExpressionRef handles[100]; +}; + +size_t put_handle(void *data, void *ref, enum ExpressionType type) { + struct Data * data_ptr = (struct Data *) data; + struct ExpressionRef expr = {.ref= ref, .type = type}; + data_ptr->handles[data_ptr->len] = expr; + return data_ptr->len++; +} +struct ExpressionRef *get_handle(void *data, size_t handle_index) { + struct Data * data_ptr = (struct Data *) data; + if (handle_index > data_ptr->len) { + return NULL; + } + return &data_ptr->handles[handle_index]; +} + +uintptr_t visit_add(void *data, uintptr_t a, uintptr_t b) { + struct BinOp *op = malloc(sizeof(struct BinOp)); + op->op = Add; + op->left = (struct Literal *) a; + op->right = (struct Literal *) b; + return put_handle(data, op, BinOp); +} + +uintptr_t visit_int(void *data, int32_t a) { + struct Literal *val = malloc(sizeof(struct Literal)); + val->type = i32; + val->value = (uintptr_t) a; + return put_handle(data, val, Literal); +} + +uintptr_t visit_and(void *data, uintptr_t len) { + struct Variadic* and = malloc(sizeof(struct Variadic)); + struct ExpressionRef* expr_lst = malloc(sizeof(struct ExpressionRef) * len); + and->len = 0; + and->max_len = len; + and->expr_list = expr_lst; + return put_handle(data, and, Variadic); +} + +void visit_variadic_item(void *data, uintptr_t variadic_id, uintptr_t sub_expr_id) { + struct ExpressionRef *sub_expr_ref = get_handle(data, sub_expr_id); + struct ExpressionRef *variadic_ref = get_handle(data, variadic_id); + if (sub_expr_ref == NULL || variadic_ref == NULL) { + abort(); + } + struct Variadic *variadic = variadic_ref->ref; + variadic->expr_list[variadic->len++] = *sub_expr_ref; +} + +// Print the schema of the snapshot +struct ExpressionRef construct_predicate(KernelPredicate* predicate) +{ + print_diag("Building schema\n"); + struct Data data = {0}; + EngineExpressionVisitor visitor = { + .data = &data, + .make_expr_list = NULL, + .visit_int = visit_int, + .visit_long = NULL, + .visit_short = NULL, + .visit_byte = NULL, + .visit_float = NULL, + .visit_double = NULL, + .visit_bool = NULL, + .visit_string = NULL, + .visit_and = visit_and, + .visit_or = NULL, + .visit_variadic_item = visit_variadic_item, + .visit_not = NULL, + .visit_is_null = NULL, + .visit_lt = NULL, + .visit_le = NULL, + .visit_gt = NULL, + .visit_ge = NULL, + .visit_eq = NULL, + .visit_ne = NULL, + .visit_distinct = NULL, + .visit_in = NULL, + .visit_not_in = NULL, + .visit_add = visit_add, + .visit_minus = NULL, + .visit_multiply = NULL, + .visit_divide = NULL, + .visit_column = NULL, + }; + uintptr_t schema_list_id = visit_expression(&predicate, &visitor); + return data.handles[schema_list_id]; +} + +void tab_helper(int n) { + if (n == 0) return; + printf(" "); + tab_helper(n-1); +} + +void print_tree(struct ExpressionRef ref, int depth) { + switch (ref.type) { + case BinOp: { + struct BinOp *op = ref.ref; + tab_helper(depth); + switch(op->op) { + case Add:{ + printf("ADD \n"); + break; + } + case Sub: { + printf("SUB \n"); + break; + } + break; + } + + struct ExpressionRef left = {.ref = op->left, .type = Literal}; + struct ExpressionRef right = {.ref = op->right, .type = Literal}; + print_tree(left, depth+1); + print_tree(right, depth+1); + break; + } + case Variadic: { + struct Variadic *var = ref.ref; + tab_helper(depth); + switch (var->op) { + case And: + printf("AND (\n"); + break; + case Or: + printf("OR (\n"); + break; + } + for (size_t i = 0; i < var->len; i ++) { + print_tree(var->expr_list[i], depth +1); + } + tab_helper(depth); + printf(")\n"); + } + break; + case Literal: { + struct Literal *lit = ref.ref; + tab_helper(depth); + switch (lit->type) { + case i32: { + printf("i32("); + } + break; + case i16:{ + printf("i16("); + } + break; + case i8:{ + printf("i8("); + } + break; + } + printf("%lld)\n", lit->value); + } + break; + } + } + +void test_kernel_expr() { + KernelPredicate* pred = get_kernel_expression(); + struct ExpressionRef ref = construct_predicate(pred); + print_tree(ref, 0); +} diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index b9b2337e5..f7793c752 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -3,6 +3,7 @@ #include #include "arrow.h" +#include "expression.h" #include "read_table.h" #include "schema.h" @@ -105,7 +106,11 @@ void scan_row_callback( { (void)size; // not using this at the moment struct EngineContext* context = engine_context; - print_diag("Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", (int)path.len, path.ptr, size); + print_diag( + "Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", + (int)path.len, + path.ptr, + size); if (stats) { print_diag("%" PRId64 ")\n", stats->num_records); } else { @@ -195,6 +200,8 @@ PartitionList* get_partition_list(SharedGlobalScanState* state) int main(int argc, char* argv[]) { + test_kernel_expr(); + return -1; if (argc < 2) { printf("Usage: %s table/path\n", argv[0]); return -1; diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 1f8b723aa..6ea8179bb 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,8 +1,8 @@ -use std::ffi::c_void; +use std::{ffi::c_void, io::Read, ops::Add, sync::Arc}; use crate::{ handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, - KernelStringSlice, ReferenceSet, TryFromStringSlice, + KernelPredicate, KernelStringSlice, ReferenceSet, TryFromStringSlice, }; use delta_kernel::{ expressions::{BinaryOperator, Expression, Scalar, UnaryOperator, VariadicOperator}, @@ -246,6 +246,20 @@ pub extern "C" fn visit_expression_literal_bool( wrap_expression(state, Expression::literal(value)) } +#[no_mangle] +pub unsafe extern "C" fn get_kernel_expression() -> Handle { + use Expression as Expr; + Arc::new(Expr::and_from(vec![ + Expr::and_from(vec![ + Expr::literal(Scalar::Integer(5)), + Expr::literal(Scalar::Integer(20)), + ]), + Expr::literal(Scalar::Integer(10)), + Expr::literal(Scalar::Integer(10)), + ])) + .into() +} + /// Kernel Expression to Engine Expression /// #[repr(C)] @@ -262,9 +276,17 @@ pub struct EngineExpressionVisitor { pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, - pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, - + pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, + pub visit_timestamp: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_date: extern "C" fn(data: *mut c_void, value: i32) -> usize, + pub visit_binary: extern "C" fn(data: *mut c_void, buf: *const u8, len: usize) -> usize, + // Scalar::Binary(_) => todo!(), TODO: Figure out how to pass over binary data + // Scalar::Decimal(_, _, _) => todo!(), + // Scalar::Null(_) => todo!(), + // Scalar::Struct(_) => todo!(), + // Scalar::Array(_) => todo!(), pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, pub visit_variadic_item: @@ -277,13 +299,22 @@ pub struct EngineExpressionVisitor { pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_ne: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_distinct: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_not_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + + pub visit_add: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_minus: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_multiply: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_divide: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, } #[no_mangle] pub unsafe extern "C" fn visit_expression( - expression: &Expression, + expression: &Handle, // TODO: This will likely be some kind of Handle visitor: &mut EngineExpressionVisitor, ) -> usize { fn visit_variadic( @@ -301,30 +332,6 @@ pub unsafe extern "C" fn visit_expression( } variadic_id } - fn visit_binary_op( - visitor: &mut EngineExpressionVisitor, - op: &BinaryOperator, - a: &Expression, - b: &Expression, - ) -> usize { - let a_id = visit_expression(visitor, a); - let b_id = visit_expression(visitor, b); - match op { - BinaryOperator::Plus => todo!(), - BinaryOperator::Minus => todo!(), - BinaryOperator::Multiply => todo!(), - BinaryOperator::Divide => todo!(), - BinaryOperator::LessThan => todo!(), - BinaryOperator::LessThanOrEqual => todo!(), - BinaryOperator::GreaterThan => todo!(), - BinaryOperator::GreaterThanOrEqual => todo!(), - BinaryOperator::Equal => todo!(), - BinaryOperator::NotEqual => todo!(), - BinaryOperator::Distinct => todo!(), - BinaryOperator::In => todo!(), - BinaryOperator::NotIn => todo!(), - } - } fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { macro_rules! call { ( $visitor_fn:ident $(, $extra_args:expr) *) => { @@ -341,10 +348,14 @@ pub unsafe extern "C" fn visit_expression( Scalar::Double(val) => call!(visit_double, *val), Scalar::String(val) => call!(visit_string, val.into()), Scalar::Boolean(val) => call!(visit_bool, *val), - Scalar::Timestamp(val) => todo!(), - Scalar::TimestampNtz(_) => todo!(), - Scalar::Date(_) => todo!(), - Scalar::Binary(_) => todo!(), + Scalar::Timestamp(val) => call!(visit_timestamp, *val), + Scalar::TimestampNtz(val) => call!(visit_timestamp_ntz, *val), + Scalar::Date(val) => call!(visit_date, *val), + Scalar::Binary(buf) => { + todo!() + // let len = buf.len(); + // call!(visit_binary, buf as *const, buf.len()); + } Scalar::Decimal(_, _, _) => todo!(), Scalar::Null(_) => todo!(), Scalar::Struct(_) => todo!(), @@ -352,10 +363,34 @@ pub unsafe extern "C" fn visit_expression( }, Expression::Column(name) => call!(visit_column, name.into()), Expression::Struct(_) => todo!(), - Expression::BinaryOperation { op, left, right } => todo!(), - Expression::UnaryOperation { op, expr } => todo!(), + Expression::BinaryOperation { op, left, right } => { + let left_id = visit_expression(visitor, left); + let right_id = visit_expression(visitor, right); + match op { + BinaryOperator::Plus => call!(visit_add, left_id, right_id), + BinaryOperator::Minus => call!(visit_minus, left_id, right_id), + BinaryOperator::Multiply => call!(visit_multiply, left_id, right_id), + BinaryOperator::Divide => call!(visit_divide, left_id, right_id), + BinaryOperator::LessThan => call!(visit_lt, left_id, right_id), + BinaryOperator::LessThanOrEqual => call!(visit_le, left_id, right_id), + BinaryOperator::GreaterThan => call!(visit_gt, left_id, right_id), + BinaryOperator::GreaterThanOrEqual => call!(visit_ge, left_id, right_id), + BinaryOperator::Equal => call!(visit_eq, left_id, right_id), + BinaryOperator::NotEqual => call!(visit_ne, left_id, right_id), + BinaryOperator::Distinct => call!(visit_distinct, left_id, right_id), + BinaryOperator::In => call!(visit_in, left_id, right_id), + BinaryOperator::NotIn => call!(visit_not_in, left_id, right_id), + } + } + Expression::UnaryOperation { op, expr } => { + let expr_id = visit_expression(visitor, expr); + match op { + UnaryOperator::Not => call!(visit_not, expr_id), + UnaryOperator::IsNull => call!(visit_is_null, expr_id), + } + } Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), } } - visit_expression(visitor, expression) + visit_expression(visitor, expression.as_ref()) } diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 923277e86..97ae53bfc 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -11,7 +11,7 @@ use tracing::debug; use url::Url; use delta_kernel::snapshot::Snapshot; -use delta_kernel::{DeltaResult, Engine, EngineData, Error, Table}; +use delta_kernel::{DeltaResult, Engine, EngineData, Error, Expression, Table}; use delta_kernel_ffi_macros::handle_descriptor; // cbindgen doesn't understand our use of feature flags here, and by default it parses `mod handle` @@ -692,6 +692,9 @@ pub unsafe extern "C" fn free_engine(engine: Handle) { #[handle_descriptor(target=Snapshot, mutable=false, sized=true)] pub struct SharedSnapshot; +#[handle_descriptor(target=Expression, mutable=false, sized=true)] +pub struct KernelPredicate; + /// Get the latest snapshot from the specified table /// /// # Safety From 8ce63381b0045a28145623c681ae31d87d36e58e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 1 Oct 2024 15:06:01 -0700 Subject: [PATCH 05/82] initial type expansion --- .../clangd/index/arrow.c.593AB35726E16CF5.idx | Bin 0 -> 6730 bytes .../clangd/index/arrow.h.5C86D068362A9230.idx | Bin 0 -> 1184 bytes .../index/expression.h.2E6480F4E885C761.idx | Bin 0 -> 6454 bytes .../index/read_table.c.A243E3FFE9F1D262.idx | Bin 0 -> 6072 bytes .../index/read_table.h.B5A915C23F6DC678.idx | Bin 0 -> 1962 bytes .../index/schema.h.9F10114AF5265F91.idx | Bin 0 -> 6838 bytes ffi/examples/read-table/compile_commands.json | 14 + ffi/examples/read-table/expression.h | 280 +++++++++++++----- ffi/src/expressions.rs | 32 +- 9 files changed, 241 insertions(+), 85 deletions(-) create mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx create mode 100644 ffi/examples/read-table/compile_commands.json diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx new file mode 100644 index 0000000000000000000000000000000000000000..2f130e517ab0571e0acb7fd574cff1a09dd7f767 GIT binary patch literal 6730 zcmZ8l3tWs@|9^g`X_{tE%`{!6%XFKXO6f^T(Y>VGs+MIJvE&*GE0$c7iYS+mQi@%x zNNcg%Dj|6nOS!Dd5?3JHBn~&cln$0pX z_g!xL?t#OvU7O42CE)9)*j7mR`P9mmKb|yfx#GP{nIFHbS}xAXyImHVZ+~rI)HT`S z2K3}oDt?f+Zd>U}_hA8p!v%e@&8J&dAJ1O-yxlm_FiQ93pE_H@OSN`{|Jk|yF?#_c7HP!QfS~Y(EZ;is=oA&MaiuD_PEAi!dr_iSv75nB3$4lP$Opd;jJ%#Tv zVG`dj*~Umu;az(=@o!%16^UX;%^p{K#XB+Iuin^xzsX<}%)v#wL z`rH@KZ%KW#7r4JXSO39@ruzPVtBfn6 zO3&pl+@4M}-Fz`MdRb4Aw&SbeuH$kSnI&{4{t*8q^bej!%Ik-em%jg1_pnn!;GZ`i z1eGZpH~sw6o}hhSmw4MwIug4sc46q4`4!@?1Ra9~`Df4Wo;g1Md2VFnm*I_}g)e@Z z&^ygyT=Sp2{~X-9cyyzSQ|xUVR~>a=s$y?jm0)?@=FS@pyZ7!}7F;^crsCN4-wa!~ z{o}9j^^McHy!qU*Pra47rSh)+Gnus~N4N&Ze&3XB?{!UL_h?_0G}!Oc6+ZSpt4!Cn zzH9zEOB~gb*js7a?tI?xb@bE0%u7Fv)TzDOd24*n_Fr@(pXa@f5$@bxE(zW5^Ki4p zORZzC#&uiBCpBvaJl!7q=<3FtYX`NRuj+?QsQ-1+T1%575y{2VDy}UJTT)z-oRT!V z4;Ur}=ew<(8Dor@(G0^@0!W*A|41KMvu^6^Dksxs!9oY}p01ZGAM7Kye*=4PgCCS!DYz>IMKHwZK`w|<-D z;EtJ*gh64fupW9)d5id4(p*Vhf}!s24?;ym=A8swq3 zE}V<=-Na{{3Ar0|xR%(;sJD`0BbSeK-!S?U(?pUNF zz^qUtlyYjNu2N5RA+R|h$^of5S_BWz0S^ujg5MyFRi}(iL*Z#ChQsN|IUTukI1{;M zB2S8ET>j>1=dK_1h;=d@nTUp`@phU`_mtscGJF7Z4uFJ~ch$AJn6fhg5<+MKaT7?X zHwg<9XYC)8FCjsy0^KSwQLh>HD(GGX6Ur!>`J_y6pv{nI$VFzk$c>sp1(5K6mKJ8) zU~C&qq#6c%4;%KKx*;bT+QFn9Bg3P%!))JV=khpAWj7{E@o*kHVvXV zoB{F-aNuwz=w*V8!;eA!7#!#{R|9r=dsV)+B=a}GXASTwn41MsJeCiUs! zk=pAWiN^E5I}iGtBb~tS1PNuNEsbdW(9bxH*=SfcS7)~*8%E9ds2@f=mLjQ?mUoUc zD%);nyz@{fQ3D96{JZ`F2hoXi7edHK{CtFImE;$Hb@XQLk${7H&0Z>w7C>IEadOJ0SHgyt3RY#}8 znB8Eu!H()W{qKgrZM`yYVzHI3l|E(SO>R6qVzzWN87>4~Az+$h>p-+kXipT-u98gfR zFxwB}evs2jXwpjA`G;OlAepuZctzk!?TPs0mDk<|98L%ec?-pK;zxaURbf`n5yT!} zJ-zZ_<|7b0=f$x)Ys{UcYo)oES^GqeFf+h=+IpivzdsWi9|G= z!yKfxf|40i3FBxqT5TU@*P^vz8X#?#?@wFb{CXOh<^`Y^+-me$SKD=0Hh(6O9wQhd zrS@WWonW21-?xlv2<&mz*}1>@Z*Q@0755q&Zxo($)MV?Kn_D5nPAyw_(EF^?!c z0Ez>kqy)^~2ibjYw@PyO+i{cf=XpHL&f?9|u)P#eDd=%xeSrEvkER#18Hk^O3}^`a zgJv$C7*n=@7?O$1GLao6?1|{N`}6uG6`>8R+JL3eO^BJLcm9jT2_mu}y`|&mlE7>n zKTe~?&Ee0{Knr*aGy;^!OVmK=z)RQMATxlMp^1xuUkpZ^LuG)3 zwgB-9kP*c+_u9gb=O}u#Q5C+RA21i0ez5BYM-FEpQ5F(YMP%V1{~Q!d@r8==mvNWf z7Lfoof_Ec?&;Zc`nwIe6v5cFs4`Tb^b8d!hNVE+}sm9<{MLIJ)%9at0b>LnHfwXH= z2GVvu9Oh{AD4#O!^(XTB6+}W|qi~~rD7og}-;8sM_?isMMRGCa-f5M$I5j9{H5sl3 zR1M}-KZ?!&*7~d1)uia^K(CG?VD>TSJ?4r|{&Ib*?zn9#4LL}hgB+=4Ma}hg>HWJ@ z8mb|(8e*wxRdB2uK}mx{8#QT_szWC=4YM2VH+s?~S7*7Xq;7HR8ZyljKu^GyN^kh( z#iRXuy*7|(lwQgJ>Mv&5AU1-krr{>>|8Q{SCV2y%Zy=P5|5ha4ima&<$^`!3=Yqb> z(8g>iFI1ndRm|=Lv=cD34zp(godwLfcoWb~z|=_0b^*VO+hdE(cDTVv_rqkOK9Ki8AXi-($UFo2)9NZXv}x^~5ynS| zhBRcJ#t|;oj`W`ROV|(r4Aa1sniQ58Q6sf4`i@A%2x6S5YtNUxy*ixz>!^?%eRNz* zX&y;d5o^dHFC8yaDuP*Wu>f^~xw_9418jh=yVYX0xW^wRZ@bt^9)E`KUX31ua-t7{ z$SESNv{8D}rO}endGA8I_Gu!00eBZcK#K>nPl5lGt4l}Sk~K$4Bxi`cuf&(^)*mjI z9+nqg|0NCnk;oe&Lq4}*ea`<}NSQ3x-(1#lpy5X|a+xz0nsH?yG8M@=WF|H<Yakb#KA?QC zpmh;4BdBfUiqC3E0}bW{>(Z2Avb~}n(4qS+W(R>c2)eWwF`J5XQ;|N!ccNZA-x(^c zBXtmB7osjsRp&9d!5!-q#V&oHCtOsq)j(hkgmC3{9&FEpJ;n3_L3U-;=MCx>HIFnD z)kh6FjM=SVu$8-{hHsKgk52fao_PBVWY4&hd;Q-DPQ{_3yM#~+jSZWs)r{{?~#LkjB|`1r)HMttS}Ch!pKqxrAi~7Rwqb65E0HB>z#@@vo<^$9>#3aFO5#utR`HQ#t+bQihi${b|FX=Py69@z`HQgB7C z05}2xy)#`yP1jK}eJRIAu?7z?iNsVuzo^Pkn7sEir>Z7)jY|bln5m zZ+QNfcySJ7=Rirb@QvwU_vOuxRfLTp8^dY1{CDp@%2Vh+C&Ps>tPohbI- zEY&zTZUf#n5K`|kdmMPjLCE=b0{ADm8$V`G0{^5YZU$a6x4&WbA@Cn+9yB*2{$}pe z3TF2s{(endgm^_rozx{xvXaUr?Smv%JRP2TUm@Q~6XR7*Coi9qJk+n$O^acM(`Ycj znqApZT$8e@M$2uuE90srFmsaTCR!3`ZDFY+5Q_Bl#Rd|rE3-F~ON~qnjm>RME%;W< F{{e|$m3IID literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx new file mode 100644 index 0000000000000000000000000000000000000000..8ea41df84fe5ee822da09fbd91a8385e8181fd92 GIT binary patch literal 1184 zcmWIYbaR`*!oc91;#rZKT9OE4G6({3aY<2T5+eh{TxJG_iaBe84(1&);5obe%Yj6N zTMp$nt_Vxt%5>jSJZ0Ik-f3#FkC=pR?br8BT%40Ut0hEp+x^d-1T-UFf={7xw=i+_Yo|7*Nj7p3=HupS{ab@l6$T5mq zBi8IUF}EW@tm2?l-u)9l`W9gMG=RVF-@0-1C!WM>Bn>yaI zWd8NnCmrg$-pk}}eN&=xUBjom45f9NVX^KyZ{!aB*HYT9nxHNd9=GH*hsd?y8SH*4 zzRg>VOrMBtE8+Ai^sYSocYfz#yTX+ze$UHK>dXvW*_zdTc$w`d(SM3kH!_Rnt9|yL zyH0F*+>aaij1d7T?YmWiylu2!Zc97nzys8OjFI&i&UOb}->6s4vW&q4|>MlNtT!JMCwnUgTxa)tmS55Fj< zs2I#dSnvT=gTfA`+91&C>T@e|RiJ8d4smgqYFKEpa6v*59Ec1IJp958!aQ(K@=Hic zsKP^qUs6I+1s2LMnvoL{%1{GJx2>GcF^$;@Xq^~`m?X@)n1vIUs;kb601ERn@w3BH z0?d&>H-lpWYREFFl|iK!wiNP%OZ# z^76TOY<H)b`iN60VHpP23UOuZhykD95j?z+3HYSNPu69fAq zG%`DBS?=88*)|9vBmFNbE?iipKxmr^p=G-cPD~sBeX!xgo?4Y_VnR;x&9I+%;YIN2 ziOpXu%&#%c+)#Z#{@8?V{a^oo+c;Bt=V04?^dDK=Bv#xSWS3=A8Q|9mZI^hSw=E$T;uY4ZVobG+;6}>##c1qRJLD9V}wH`av zPky(3Yf0Vl4VyihdN7)8D3#ob>QmPtu?Mvo;6jHnooW=i<$-y}LtgZf|SW6*dOX zd*}A0>jmAHJ}cO?e}DVoLm8PJwtug2db&S3^H@vEvB#>bbq9ubci(*Z_1mh>DQEY# zP4wKdB&Yp&=91Clep%b38`qfqDrVfw@d^GDru}f_<;9y19~Y1I4Ib{_e^@iV8)Vn{ z#s1IhwwKQ{pGw|Z4`q{-}{dWIcdw1RTqj6VAVaU><8J7;P@?QVeZ+UjNdIm@5 zJ!vnyw=mZ0gvW?lb8YHsU%s~aEI)NlY}@dvX}hX=iKb}O#&xy%cKHXK7X5j3?5^0g zEe|&?TwPhyQ@y6--suy@mIzBq<&*99`}VE)@!OUsYiFc9GpS!CDRVw^T6?s)!+Y0- zj?QblN1Zy@Gg#wN_v6p~dixA@{GFrDsKx&$; z^zWwUJL7)7x!0GVD1?M4bCkOT5U=OZXkQe(;E>S$4GPww7)K;H+Bupf0H*_euJ8WN zeP_51B~Y@(&f+KmAkT1Hda^5HWh_C-lzBVm+Ofl5ABfjzQ2Ibrc>j~VDdDf^mQ=bW zNt0xf0OEBvXsjRloBqhf^i=_)2^vSqv6@(u1Q2gfisUc9YAd)ra!MLOBPlsl6>5|K z;!QkC^h0U0dy=C1bk5PE{*>&ZaIuj9;;G?5ekj!5+jehoK#mRtdn3Wu;@e*Wp#72h zLD}g6nZ-(Erl%cL4nzXdY09AM+K9KW3OU6D#Zht%=4!D1uMfm)`-sNn-3f?YI7_>f zpftKAlgqS70P%Kek(~ea|L%;5s9&c?UXQqJnNSa?*`o1 zMbLOkPSvEEB!GCMN_1=mSAM=Xq<)Vc*;6vF;5i8Z^S?b5F81si|A`(Ap=6yx$4NjU z2l%1b+_D*Ow@+-;q2biA1NZ?%0^r!lREuq3^ss|^$+7MVca-wi1BTU4F{jkI!=_btr`K4>oyby3fi7ZsLKvkfv1iVhxai^0LXuoRVA;SfASkWa-hun zDf)2|keJhi`k(VZK1;iyXrp(HYVL3Lcai{{UO74>dq`Sm8$pBU>1>B=UkM=I7JH2H zL7|8Ad`Z-tECLAFW0K@K9Jx{qh_ucy(>PN|L zoSR7kKwc4)plUwq_dto9D0!f2puGfu95y9s*rvSa52;(I-QM=zW(k1i9aVA0*qH~O zDWy#HQhDhG5y0u+@(oGXHmvE^BUj4YL*cuU z55yCNcojT~vM!$c{qXNfiGxvP)CnR00o7S&$Mk#c^^zu3UnB(aK|}(ep4aa0n!^i@ z{Gmh9lpO3B>?#2uPl&J8%yT*ShaL^1^7RV6jRYj-G+PVj&MW%WfY7vid)7aF(1C1- z5a$@@B0oJsvL)G7ey4=AzG-gq?Ge&F)BOO0r?!tqdSbyQB8*Xuv1JUt!ltjVBRn71 zzg_MAbc(-TLxflq%QLC5?y-JWSU{}6uq2ekGlpXgWBW2J6{Yf^5fM_|Q)T^#km;Qn zC_hR<=FrRphONhn_1G42G;G*8%|FYg+AHbRRSZzWjV6MR!bc6aghoLTq=w)SAryrw zY6vqd%O^|5pq|kqVljg!o(S>kc*6W!j|1!R zU{Ln+*5L8^_n(JR@kLyb4z!B7YuS9JToF!}7jcVBunaYkiVzDT6Oo`!&@vGXIH&=K zfrvkz9J*7|FC>EUD(A{&3rV+sOXS$A;_^yvr7WUP;ff(qXCDlw%SrA@J`j5-w^##3 z_AigX28HHLag`mFW6W^^y;qIh9RKB|nF*BXYRs+11|ZzZ3irJ7P)MYNd@f&J22&_g zu`d6`6A_^b8>_I)7V^PH^QJV*vJ-6+9e@WBQuq`Ts7ZuL{3H|Pa8B%6eb>F6nN<8* z%&o;n&?~F0$S}L+nn+l|t&n}@9kW3FjLe%wmrV*259J~@$9?gfb4O>>mU+S`(sNvgzqc%vCboptYb@i+u&W zpG3$|Wf;IRBFrMQ99W_@V@osc54`)tr#ydjw=tLUUXNYY;{dSR@K{|vz5J<2DCJ7+ zU>OpPiWGYl7LPdFI0JKZoPS&h!?t4WR%{0%;gi!qBO^y^FIMiw2G~G^kFoM&YrGk& zn;9b_T*r#*m_SB?jntiD7sPa1>bx`%4A|lN%y-qFz00UwtGHEiD9jUHukgyqs-Vjq z*r@{t0aNb<%9pJpW>izcJjFZ$Y@n;6_fKr8pf1v+a+~DCT)z+V``CLFb^FYqt3TbS zp}b1C68VZl3r6)8`!A-qS!LEjD*=rd-6VElaE1swv34i(QTqHTADT8+y-!6fFO`O-n>M>P@KK4?maa0YA7Ft$)HXflfJAItv< zstFpfhUUNfEKkS{LQ90l5$p{)23;aGN9+z+924CWy;%mohxzxg4i4SwKdIC0(XPV^ zB4qnzM?y}BsiWw?Mo67k{J#!en%ndRH7m8Op4%<%Z=}*b2i6E3Dj5ZiQ7CDy*=T z=9N~~t}?8$qE%x1N^GU+YHV7Kt!!L_&19yE?t@ZwDY+udr_UMV<2P7;zPGeMs zxhgD23FZT}Mw$ztvn4{fN4PhX8zRi~pBcn*Yb$oyirpbih)|Ec>ah>Q8?bi+_GNe@ z?%#;L7~YEATd^nLr(SoL{d%k91l4URx6}e@_EA#zy~!^=6qlE9OXL~@C5UFSIIgh1 zaI9)9VMXr>cDaH*K!iNUXx_{(a#X`?!}QF`Fz+xwhDGzyTIh<15bYW5!?5}MeEHFV zr;BPNjzCa4h%iz;Qp4_69X8fs6KDioAHDkGV8rh9MAbwc+-aDQ=q7PIg4c}*O<3E+ z-UpbJXr&RyB&!$(goiMDYO$`?x~kOU@Om5t3K5|JM>OCefS@1kesI$bOj6Hc{bYqMxd?&E> z1TzqJQIX=P#zfkw>~t_FeCN6+b?D0cyHwx;#e%+|COmaipg6~|v;Hc72k;#2wgW?l zf~&xRIwJV;zGi4YG;fAHId_8jU^5I$^uE}JTYv!(qEu1#EL+m~bTh=120DEh`qbGR(Vk#{t}5JdzFZ!y5ULuV+*cvtp#Bv=yM7?#W@8<^F}0m)$uYsK1D zY!5cWI7wRvanOf%kO+di;K|Z8!Xd&cd55?S@qpxo1g8wdp8~8H=mZDGAjUq%8IBNP z1J-T84iG%}#eim>_;UeVW+Gg`+6%1xk2ajyee{FU*K~H@gcX~xnz{ddta+cglL))8 zW|uX75Ni%vZAR(^>I_=E^2vF4;T{Vdj;wZ<=E%|&bc3RYfW=ZAZ2EuAs= z)7Ladr`t^DA(ac~E}kt$wy1DU(M<76qzK;pg~1nuKww2c z6i~r{5rq?T&JxUu5kB<@XF!j?7q55r|K@p^o$9x$x~jUWx|=cnLx=7+W*EPj{!1ce zE&5f!FpM7gi(V8p_aQROF%iQo$=NqoRS@<$XYsSriy3fazw4AL5a zL^Vm)^DnJg6j3}x%V9)Jc3Jg8`2l=C>ye45dQFdsX?_~3Tk4Dpt$auJ_C6o6bL7L) z`0U6VH`>f9w)r1u-!l5uhvvvBkC%4w zZ2To7vg*7;a`d-=O0$=F-F+U!&v~7Dv;0rTl!aCqnew@8Y9Ha9c&k%q>wJvdo{gKC z5_{|1TFsB&Bh9)MrcBa{_wsUJg%*~pV|(hq617M}8V&c4Y5!2>Uz9fB?e@#V`|2xP zokR`)-If{sS8&XwXQL+P$-exOuRA&7vLwpxz(v~$GdspTuFl);H#;u=qM=D_1Im4O zmZprGl%+9j{-BPowfQM?HBv%%T;95}-CnVMM(U2zNx$f^r^dSf^w$IThVgzo%VJ~K z)j!C2x^kdpSKB%7du5k5m!3aobuBrqH1o}lP`$MBAx;H$hL7h3t&2UsxbQ;4FY_*3 zXe=DLc7^=)lja!Tbcfr@Oi~9bH^MuEKD!h);lriW#OtLTeF=sOh!%YbK3O4 zaj9EEVzs_}#Nq(-xuPS8Ulp-U&~(f5%l@9ze=7{0@8M^CXUwF}HU_(|^t&0@tUWPa znjXD0V#Xd|n0M`G3_Y?=Yq88ghT(ESmCK2~S!XLr4S)rw{M5Ef&hvDDpB z=qP2hL;|}(PT&qkR}&=e!ex#u<3$*(gjN!FC!v#s(Gr4ZCqq&5N3)gB_qel6Kf*9W zIK!y(MJJf+`ukLe?JoUUraOUCL6{20|Nc-G^9i>2<(uU!2|Pe3tPhEJYwG0-Jw-D9btVz=BYz=sL^vCoB_%6J?z*3Nul& zW=GG{XPA)ZxobbXy3J^?T!3DHvC21YFPQ8FOVvzV8A!^&kkWA-?g7g^(gnbB&EVb) zKGbTO11SEVm1ntw(CZ)!r5akVdoR2iGFY2v*nr(OV1KG1+2B3c zf&dNR*#N#2nB9AEVTx<868H$tAE5^YE)3Yz%lk-+1+rYQaj=;xAGt`aNR}E#D@9Y> zStisema79n9Z0AymTLq-qdMIRf>w1p2L(B(#$Sw@#i&Q+L(}y7toqkZnZ_UUC$;_cw=Fho^FTs+3> zQMXubGvH>>qjgH_ow%ticHYf;5Y&S%)y;BEAZSvj+d$B!PAgELKsEjn)GR?&l9aiK zzwWd=Zq}{sq`tB(@vdaTfJ_LY9y1qnuN6lacoF#)FlYf)>e)C(abD|g^&&v* zpxBXAoz*~_Sto2a_t9jz0x&25BU)lCw-ZD=L7S&bK~xIb)HRkn2JA7A@bpPwPpZ?E zU|9(^6kmL#I75`K;nRiX{Js2#@~vR3`B)b|NhjlE166n7LNG*)nu#+FsNF1S0M5D4 zmpAV&;9W4FaisAe;rh|0`GD<4|J^v5GETKxTvZ>u+?T}VGT>#d$?Bibw4r1;24gI!JrB;lRXiVdeFPa!L`fp5)Vzp^+mP@`8bpBmfcHU8 z^;4e(EElLBXhZdjJNFFU+I-N>2MgZK4)ExJpQsXnQIL0!fq#r6+CKt*1Xa11OQ%|H znbxcXicnI-+d~USL;U}?9hO@mSYbxJVY#h?9P=H#Gn zBg&mJQAny7Fb^cWnMJ@9feu}KmNb0atDYQt!C^0W@OTPh3g1VLd$`_vp>Z^X%=aDe zJ70-D@qfO#V?28rX>J|^J^cBu8zu_Vr)Jp?2orr|XHO^6vv8Ic9Zq|5q~7}q6H12T zed7Dk;gtig*R&Z8pGAh3881_XV7e$;Txya!hYV+Za`zoNLQ4kpmH1Mm$U|)YM>WotC=b}Kkw4q2r|(g(A@*ws6IvQ);=f4pNJ$t z7WU1;QB)$bXiRlQacmR;4kI2$LkcX9iFmE}YO;u!X4c)zjW&6f8z~tnqvk9wSv5t= zq<;*N-iWx7?`jy7Szz^_pd}qP_I?LxvQgS^5xqQ4GC)Ob}s zk&erW{x9JCh2Q%0tv%K+cWqcrfK!O4_i9~;mB{h|T8+3`oi0EuP^b5y=01MEB4-)IgFH^lh-9krz{rOu0Wbknrx`jO zi2vurSbxV;HY&V5nZ$iPTCPWH${8{A{Zno>ydnM2g*bZVU z@&0Y$uJ0-T*-Ex~xt-jRPhlHf8$G(6v)s>GKeIHUP0lSn-1NbP#G4D&3YOBo`fyN} z#j}IQZ6m`c5Kr*wsgZnC<5^FI64(T|2@L77O&i~BKAN*Ht4|IOQV_;q+hBWY2+J+dU7$Lm1mk~~&&m0&Ad@7Z za{~6H(&43wX+NKh&LhA((0j+Xa+(!nb$%Si`SnRb+Z6Pq8lFu1^`6ytEb7!?@($c7 zFsf{r#M4YhGLWl8tVA6O{B6F)Hu1R6J_2MRX7MHcdegI__mfs1Ag)BXMv%+WkB8;H zfzdZGr}Trwmo=3ygd~-?I6<6&G4~)odh?D#j0b0PU z1>C4Aaq2p^2@jrC5#S`^N&Y>ky@~ZYJvs1RHPM!ihUutEy$0B$4LWgW2+$6acHWgor#Cz-zN&SZ ztk6WmiMISSPXM3r2l-*Eq#=P}6Rr~Zy=c6bznBd64cV2My!jdd&H|nV6B-JtiDaen z{zrq&a!&!Dg6amaduqF#BI(f$BL4)up77h=-K0-}G8gtX0d}M1ZuFo&&PW&|>%Ok# zHnG^u-prZr03)?V>e8A&P&s@`$=)+}iF5}Tc7W<)(Y2dVQec6}eIlKMqjPW^Rm*Y; zv{&#qkj0nuBTe7O))5UG(Q+eSMV4ZtxXAP6b!1@*KvV!Mf2b(~QJH!duY#^s;7B8~ zAlqU_)7JG5h~OH~UITKz9FjphS)EQpVH&?}d(8{zx5Hffp)SiA+ZwC&JS(Y{Azv&3 z7|@0KPwvxJ0d_o92kv!zY+0@zr1hXW7bvfBj*a|2sl4wY8gzvaV|K2xgaQL?d66-3IRPouhxx;gmUwXw$ z0?Zc8)}aBrf5+i*b*cR;GMs>zfREGtcnVID z(bUXo8)2DMlHN#iVIAN)Q1xJz+XlFezb5M3yfiwz`}_|?z7|YtL3R7SCtI=cH?94x z1Xz!_p5HOwUz3=yE2gy()y3#rjB00<60|PCuGHdhTbpyg9NYUh(U1g~1cub+{DIL`IKUk2f=X=(;8s8YaqDBZ|5v`8}K%WXgHJ`2!2zilTeU^s_lP4H{;wR3tn`P z8cKvNi9r76{o^4w66~@S)#!5UQqI#+v%-H>az;mmMo&|I^i;lnJg)SEVS1=Ofm<}; zO;z*GQGzZ#d$_o|xqC2fDv)vKKRx(Q7ydIeVrIAnA#LQ=Vr?B=sh+;fK*E|inp+tf Pn;Mx|bd_6b*)snJfCark literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx b/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx new file mode 100644 index 0000000000000000000000000000000000000000..1bc818940d9c93ed6af8e6d4b3e104ebb39b22bb GIT binary patch literal 1962 zcmYk63s4hR6ozjC3E5=bguE8mKv)8X5S0Qx1{4*wjED*bL8}FkM+8d%i6CON#o7+F zDxx4-=s*>$j|#015EO|@sZ|^S5#*t*mbT!iGlhc1YU$l2SDQ)x$^7S>d+s^=C5DjT z;G=c`EKCT=HYILeZwmmhhadCijO58&0Ait$U0M{eEUot){9aw-PS2dxpENaRwU``o zdI+1IucDieo9MF%i!=I;4t(_U!P0y_#m5&X7%CoatMD8deEw{B^y;=hl0628cT^9> zJn*aOp{q`M?jEi>N4IbJGpZ&2ub!;@;I0MqnXV9#MN}>Q4x)-zm$iDb}Q5`w6EMxHlh5f;vpI09{5k++$N*vRdAE_)!FR~$XA9M_y zwU=!@*HIsI(Uht4p0g)-0ajy_ZQbBr5;^mWf5#=70F4hPE$8D3nSbRPgO{zef~;$a z8anmE(>=Z42bse!`o<|-@&g(Vc-8*)*yaAx>}l~%L#NvMoNx2=r2Uf^epKC|o3qKt z`dgP)rHN<5V?LD{lP?91D(#evI298fx*P7E*}$C*u_wHZD$^4ew*KrI54 z)KTie3J^@BtLM>)^=<|J1aK7tO2`*tte~)JRr}wKlB_Ts$k6e6zFq+^I~xa`0`}KU zmePW<==Vf`fOo^WxR@1Y0{DgrSI(S%w)VRBMgmNR{bdpv#R_UL;@luV|D@l<2RN)( zi`8;gm}2;|g7raLO6{WkPNWmS4c05|m1|%V7K>tu=4opy_<>%Ku-dco4`%P z3aWQ0v(3G|G`xw=PA?D(R1_m`r;poZkn8h?+X>(V>uCk;#LAm`p)x0}`FuA4NVt6w zCYrPF|$VPlYn}G{0 zEVJfhvEYzR00*b^D*aT1*`hs<2BiE|3!zP8v>a^@g<(sUK5I)`W!TIq^A4_r*$S>g zj-E9_k}I4*(VC4@O0H6ja0>X)GH{nPu7kCtfRv(5zw~WsJJ21N1Z_vmQGw|4DO=Ww z5h;u<SxxG%<8Uz!b`B(3V@6`idgi-1 ZBi_uKDinc#Adz#I)phr*xWG|?{{Z+iJS6}C literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx b/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx new file mode 100644 index 0000000000000000000000000000000000000000..b96d56679c3829075d89fb15fc2c85e83454d7ce GIT binary patch literal 6838 zcmdTIiC@jx_nvR>)mv`w)qAh))r%Hil_ioE(~!v4RD+4kD9ISRN=-G=WNc9+J0VQ? z4F+Qrk{G)XhK6EfNlK|KnPJBAyRY}X@AmT_{Q7*lx6?V_bIv{cy(5MW8njP~(15Rp zrp}m{G+u-dvLwHWNwcPmGC}AYIYOzM$|FLT+xy}_icieT6d#?InzFJGxGgJv)@@C5 zyWbkHwRhp*G0VJj{la$kf1l=FzO>)Xl3=bfP*Kv+z1$`E_~1Q_d-i+UghY*;pOJKK zlqTkoc4=JVk$?V~`n%ovZka1j=3UsaYu4Mt3m2R`lI2kMTZ{j-*xbObrCquWdY@d7 zKcvTl4!ZASrZ#ti%Omo79^N;+A))fx`XovGz3a`>mV926RC(_9rCSMwJ74bgwYu!_ zb-;JwTZfDfxtGu@Z%yx+BOOyZ&Z_r5Jx=3L)yb^*K;ai09JE#YNd2rlly-s=|`B!AIXOjjWYyPD!sRk(@4xvzC8b+EO+C zZo$mB{7*6}uK2%=>wHzzD|6&<#~*d&b;ta-T&u3_)uH@VKyK$*w=;iTc_pvDtop4^ z^{$v7x_+_w_n)S^g#WQZQIY@7jK5rQz{9cl)U=;>uS^fy(;A!nD)~)w6ReG%lY3(C z#j{IP5y7{!;?Z95J#po*luEP7i>HkXF7jDx9X$NcjS;_HvuT+Ua`Uhzt6%z!t$Ui6 z6)X>06PeUgbMt2zByD_H+gM_g;wh^sZ1V7M9{u8d$!xRN%O%~coi8pnuMTdFnwXW< zwWeu6bJJdj{kL{*^{gyh8*3@q^VPXEJEpI#3b1+DV|9n%3E2LjX!cj~QU5rPbI1J;DGrzj;T+6t^Ou$Y;t^QgYBlef~^mRBzOC2RYqvG{MCR{&$JsBgh>xN?Dyz0 z|FZh(^c%+$=i6m|$TjVH`Chu>>9%v-en<*B*!07u7N6c{tc&9PX21UI*{)T4yWIU{ zWKquX9vA9$1G^Mme44T1iRsj@w@qpuVrRcEV!rZ=edf!ajvi{eyS(-97gs7IE@PKU zZN^77I$!EO@b?k_tavu>?&*QQuP(_n{depOCTtIxE?go{q@Q(Okv1~lIN#@l}S;E-0`&c@$e zo7WWo*>mN(3)L%LdG(pDU0LSaG~qN%9w*KGTcKS74kCxm1rh*yG2*pSp(rYCXabgz4y%2#0INk}4O5q510 zl3I#gdUr%l2S-<}FH5tMBT^-JSI!l;EmVi>-kRo-0}8A#`jnuL06Ic+FllDUUJ_xnlW?}Sk>At?n^N=OVFB{4a^|F+E%bteVtYJvFf zrrj;t79=s)VsS$2#ap*=cLnk$XlIEtZd)jtU-)G1yU1_76sU_1@$L$DRog<*Cp-eI zhfnV6hfx5LUks?2$Tw_M{*HB#t!Huu4aCTika%Nno3;f>6(-&7JD^{GvD1Y#9QzET zfdoBCJjt?cK~fuen;(kDNLye3_L%~8BkFmoJZ;(*D&NJw_T&?%s4xX`CTJJzqG?+w zdclk(>(lE7L}27jn4bjkNw5?)BsJ&Uh!W2 zH71A0V$_R}RDigGa4~F1subB0E>ZrKGYXC@8jq1LAt?b=Lbw<<%BALe{}tc4C8S~$ zL`b%QcpF#?8&c?6!e2r%Ug{45|Tndg@nYgQ4%z3;x~zVR0v6|OOM_O ziAR9>JHXumE9&pD54U8tJid-hFrTJQvoqElJ`2=YU}v;7zY&xh!HSj)O2^3zHOY`? z2=n(qeGeQdcUq%3%hzC~YjX?D$!UW`K|*yv4t^RptsV3&_pO}z0Q0F_YCC8Fa0@`rJeLVvW_x%waH~O1jfwdJ;0i!a zQ&%Ju23j*jLP*hQ6A4jB15_j=FU@O_;6&1s(-vYW8aHoj#eRF+%4!zWoG@R3YPEV#`HgwO1o>W6> zQX)ubuSP*n!R9Fo^`yGE$D3S3T#2AEke7jqo{9EhQ<Lx~nshTm#}7hFGQ=w$x?$cn>i-A~#1S2qu}G z&ZW15vVqHncJNZ*mbQmC0k?_4aUP=^c12F~AkrR#_%V~FtFK?cGt(nX5vF2Lc*k&822HSF1<}!XSNcKWUMkoyY!apz_6^*bkT^eCw zC^y1}JEQ=T%KCBF`Z(K#k=XdXa8)qE!aQt*KkhmI+w7<}Nz@wdI82ehf%_X2)V&WM zYz#i}NEt~g=3Nlq1r0^eiG~!Wk4^CqG!I$i(+@yKZ$N^qOMk`2=;oh{BCH>R_#s$Q zN;<3%)*qj;0W=MaHT5oWkKuMiPx1d=R3?p#nPdI7_grF{IuO@^1vO1trZ(UDi!FM@ zpCJAdq!f|)`plCrhX1{Q3?*xUTMJT_3G0Ac$KXzrP2ZFRMlU3UH-Wnem~z1UOWEF_|8L0rpp7?sk@t3QlbL=a6NZUS4%pQ#h2R!6z zFJG(r7m6@{9!$@JjA?KQB$og)_zH-wfP}%K-Cky(L0ONeBK=Lo?F zWM)(Owx18(L%T7Tod?VFU`5rWS7_3_+MX-~7Bb$0^RQq>_2fJ)Xagsg8(VLlPFt7{ zmbJOQgG?D|fxdW7)f9?=M@MwW0hpZSO7YU=I~{T_k%5gU%}W>{mtRNK1* z@dU(A7-Hy#)n%`vYwi;d*h%c#-R0xBI5j)Q_uTjGpk=@<10yW^`B`~D-uuU-pSy8x zGWw2#`Tl18mDDhpKMT^cU`gHfuFBFo^0{q2QK%Zk)y$IgbK+0^8xR}geN;YH%xC@* ze>sEtJN0ijoCi4uIWf7hR0YsNv$p8QV literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/compile_commands.json b/ffi/examples/read-table/compile_commands.json new file mode 100644 index 000000000..df81ce112 --- /dev/null +++ b/ffi/examples/read-table/compile_commands.json @@ -0,0 +1,14 @@ +[ +{ + "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", + "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/read_table.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", + "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", + "output": "CMakeFiles/read_table.dir/read_table.c.o" +}, +{ + "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", + "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/arrow.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", + "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", + "output": "CMakeFiles/read_table.dir/arrow.c.o" +} +] \ No newline at end of file diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index fb6a57770..15c537917 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -1,99 +1,180 @@ -#include "delta_kernel_ffi.h" #include "assert.h" +#include "delta_kernel_ffi.h" #include "read_table.h" #include #include #include #include -enum OpType { +enum OpType +{ Add, - Sub + Sub, + Div, + Mul, + LT, + LE, + GT, + GE, + EQ, + NE, + In, + NotIn, }; -enum LitType { +enum LitType +{ i32, i16, i8 }; -struct Literal { +struct Literal +{ enum LitType type; int64_t value; }; -struct BinOp { +struct BinOp +{ enum OpType op; - struct Literal *left; - struct Literal *right; + struct Literal* left; + struct Literal* right; }; -enum VariadicType { +enum VariadicType +{ And, Or }; -enum ExpressionType { +enum ExpressionType +{ BinOp, Variadic, Literal }; -struct Variadic { +struct Variadic +{ enum VariadicType op; size_t len; size_t max_len; - struct ExpressionRef *expr_list; + struct ExpressionRef* expr_list; }; -struct ExpressionRef { +struct ExpressionRef +{ void* ref; enum ExpressionType type; }; -struct Data { +struct Data +{ size_t len; struct ExpressionRef handles[100]; }; -size_t put_handle(void *data, void *ref, enum ExpressionType type) { - struct Data * data_ptr = (struct Data *) data; - struct ExpressionRef expr = {.ref= ref, .type = type}; - data_ptr->handles[data_ptr->len] = expr; - return data_ptr->len++; -} -struct ExpressionRef *get_handle(void *data, size_t handle_index) { - struct Data * data_ptr = (struct Data *) data; +size_t put_handle(void* data, void* ref, enum ExpressionType type) +{ + struct Data* data_ptr = (struct Data*)data; + struct ExpressionRef expr = { .ref = ref, .type = type }; + data_ptr->handles[data_ptr->len] = expr; + return data_ptr->len++; +} +struct ExpressionRef* get_handle(void* data, size_t handle_index) +{ + struct Data* data_ptr = (struct Data*)data; if (handle_index > data_ptr->len) { return NULL; } return &data_ptr->handles[handle_index]; } -uintptr_t visit_add(void *data, uintptr_t a, uintptr_t b) { - struct BinOp *op = malloc(sizeof(struct BinOp)); - op->op = Add; - op->left = (struct Literal *) a; - op->right = (struct Literal *) b; - return put_handle(data, op, BinOp); +uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) +{ + struct BinOp* binop = malloc(sizeof(struct BinOp)); + binop->op = op; + binop->left = (struct Literal*)a; + binop->right = (struct Literal*)b; + return put_handle(data, binop, BinOp); +} +uintptr_t visit_minus(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, Add); +} +uintptr_t visit_add(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, Add); +} +uintptr_t visit_div(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, Div); +} +uintptr_t visit_mul(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, Mul); +} +uintptr_t visit_le(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, LE); +} +uintptr_t visit_lt(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, LT); +} +uintptr_t visit_gt(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, GT); +} +uintptr_t visit_ge(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, GE); +} +uintptr_t visit_eq(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, EQ); +} +uintptr_t visit_ne(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, NE); +} +uintptr_t visit_in(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, In); +} +uintptr_t visit_not_in(void* data, uintptr_t a, uintptr_t b) +{ + return visit_binop(data, a, b, NotIn); } -uintptr_t visit_int(void *data, int32_t a) { - struct Literal *val = malloc(sizeof(struct Literal)); +uintptr_t visit_int(void* data, int32_t a) +{ + struct Literal* val = malloc(sizeof(struct Literal)); val->type = i32; - val->value = (uintptr_t) a; + val->value = (uintptr_t)a; return put_handle(data, val, Literal); } - -uintptr_t visit_and(void *data, uintptr_t len) { - struct Variadic* and = malloc(sizeof(struct Variadic)); +uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) +{ + struct Variadic* var = malloc(sizeof(struct Variadic)); struct ExpressionRef* expr_lst = malloc(sizeof(struct ExpressionRef) * len); - and->len = 0; - and->max_len = len; - and->expr_list = expr_lst; - return put_handle(data, and, Variadic); + var->op = op; + var->len = 0; + var->max_len = len; + var->expr_list = expr_lst; + return put_handle(data, var, Variadic); +} +uintptr_t visit_and(void* data, uintptr_t len) +{ + return visit_variadic(data, len, And); +} +uintptr_t visit_or(void* data, uintptr_t len) +{ + return visit_variadic(data, len, Or); } -void visit_variadic_item(void *data, uintptr_t variadic_id, uintptr_t sub_expr_id) { - struct ExpressionRef *sub_expr_ref = get_handle(data, sub_expr_id); - struct ExpressionRef *variadic_ref = get_handle(data, variadic_id); +void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +{ + struct ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); + struct ExpressionRef* variadic_ref = get_handle(data, variadic_id); if (sub_expr_ref == NULL || variadic_ref == NULL) { abort(); } - struct Variadic *variadic = variadic_ref->ref; + struct Variadic* variadic = variadic_ref->ref; variadic->expr_list[variadic->len++] = *sub_expr_ref; } @@ -101,7 +182,7 @@ void visit_variadic_item(void *data, uintptr_t variadic_id, uintptr_t sub_expr_i struct ExpressionRef construct_predicate(KernelPredicate* predicate) { print_diag("Building schema\n"); - struct Data data = {0}; + struct Data data = { 0 }; EngineExpressionVisitor visitor = { .data = &data, .make_expr_list = NULL, @@ -112,14 +193,19 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_float = NULL, .visit_double = NULL, .visit_bool = NULL, + .visit_timestamp = NULL, + .visit_timestamp_ntz = NULL, + .visit_date = NULL, + .visit_binary = NULL, + .visit_decimal = NULL, .visit_string = NULL, .visit_and = visit_and, - .visit_or = NULL, + .visit_or = visit_or, .visit_variadic_item = visit_variadic_item, .visit_not = NULL, .visit_is_null = NULL, - .visit_lt = NULL, - .visit_le = NULL, + .visit_lt = visit_lt, + .visit_le = visit_le, .visit_gt = NULL, .visit_ge = NULL, .visit_eq = NULL, @@ -128,46 +214,90 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_in = NULL, .visit_not_in = NULL, .visit_add = visit_add, - .visit_minus = NULL, + .visit_minus = visit_minus, .visit_multiply = NULL, .visit_divide = NULL, .visit_column = NULL, + .visit_expr_struct = NULL, + .visit_expr_struct_item = NULL, }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; } -void tab_helper(int n) { - if (n == 0) return; +void tab_helper(int n) +{ + if (n == 0) + return; printf(" "); - tab_helper(n-1); + tab_helper(n - 1); } -void print_tree(struct ExpressionRef ref, int depth) { +void print_tree(struct ExpressionRef ref, int depth) +{ switch (ref.type) { case BinOp: { - struct BinOp *op = ref.ref; + struct BinOp* op = ref.ref; tab_helper(depth); - switch(op->op) { - case Add:{ + switch (op->op) { + case Add: { printf("ADD \n"); break; } case Sub: { printf("SUB \n"); break; + }; + case Div: { + printf("DIV\n"); + break; + }; + case Mul: { + printf("MUL\n"); + break; + }; + case LT: { + printf("LT\n"); + break; + }; + case LE: { + printf("LE\n"); + break; } - break; + case GT: { + printf("GT\n"); + break; + }; + case GE: { + printf("GE\n"); + break; + }; + case EQ: { + printf("EQ\n"); + break; + }; + case NE: { + printf("NE\n"); + break; + }; + case In: { + printf("In\n"); + break; + }; + case NotIn: { + printf("NotIn\n"); + break; + }; break; } - struct ExpressionRef left = {.ref = op->left, .type = Literal}; - struct ExpressionRef right = {.ref = op->right, .type = Literal}; - print_tree(left, depth+1); - print_tree(right, depth+1); + struct ExpressionRef left = { .ref = op->left, .type = Literal }; + struct ExpressionRef right = { .ref = op->right, .type = Literal }; + print_tree(left, depth + 1); + print_tree(right, depth + 1); break; } case Variadic: { - struct Variadic *var = ref.ref; + struct Variadic* var = ref.ref; tab_helper(depth); switch (var->op) { case And: @@ -177,37 +307,33 @@ void print_tree(struct ExpressionRef ref, int depth) { printf("OR (\n"); break; } - for (size_t i = 0; i < var->len; i ++) { - print_tree(var->expr_list[i], depth +1); + for (size_t i = 0; i < var->len; i++) { + print_tree(var->expr_list[i], depth + 1); } tab_helper(depth); printf(")\n"); - } - break; + } break; case Literal: { - struct Literal *lit = ref.ref; + struct Literal* lit = ref.ref; tab_helper(depth); switch (lit->type) { case i32: { printf("i32("); - } - break; - case i16:{ + } break; + case i16: { printf("i16("); - } - break; - case i8:{ + } break; + case i8: { printf("i8("); - } - break; + } break; } printf("%lld)\n", lit->value); - } - break; - } + } break; } +} -void test_kernel_expr() { +void test_kernel_expr() +{ KernelPredicate* pred = get_kernel_expression(); struct ExpressionRef ref = construct_predicate(pred); print_tree(ref, 0); diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 6ea8179bb..3ee7b6888 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -282,8 +282,13 @@ pub struct EngineExpressionVisitor { pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64) -> usize, pub visit_date: extern "C" fn(data: *mut c_void, value: i32) -> usize, pub visit_binary: extern "C" fn(data: *mut c_void, buf: *const u8, len: usize) -> usize, - // Scalar::Binary(_) => todo!(), TODO: Figure out how to pass over binary data - // Scalar::Decimal(_, _, _) => todo!(), + pub visit_decimal: extern "C" fn( + data: *mut c_void, + value_ms: u64, // Most significant half of decimal value + value_ls: u64, // Least significant half of decimal value + precision: u8, + scale: u8, + ) -> usize, // Scalar::Null(_) => todo!(), // Scalar::Struct(_) => todo!(), // Scalar::Array(_) => todo!(), @@ -310,6 +315,9 @@ pub struct EngineExpressionVisitor { pub visit_divide: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, + + pub visit_expr_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_expr_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), } #[no_mangle] @@ -317,6 +325,14 @@ pub unsafe extern "C" fn visit_expression( expression: &Handle, // TODO: This will likely be some kind of Handle visitor: &mut EngineExpressionVisitor, ) -> usize { + fn visit_expr_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { + let expr_struct_id = (visitor.visit_expr_struct)(visitor.data, exprs.len()); + for expr in exprs { + let expr_id = visit_expression(visitor, expr); + (visitor.visit_expr_struct_item)(visitor.data, expr_struct_id, expr_id) + } + expr_struct_id + } fn visit_variadic( visitor: &mut EngineExpressionVisitor, op: &VariadicOperator, @@ -351,18 +367,18 @@ pub unsafe extern "C" fn visit_expression( Scalar::Timestamp(val) => call!(visit_timestamp, *val), Scalar::TimestampNtz(val) => call!(visit_timestamp_ntz, *val), Scalar::Date(val) => call!(visit_date, *val), - Scalar::Binary(buf) => { - todo!() - // let len = buf.len(); - // call!(visit_binary, buf as *const, buf.len()); + Scalar::Binary(buf) => call!(visit_binary, buf.as_ptr(), buf.len()), + Scalar::Decimal(value, precision, scale) => { + let ms: u64 = (value >> 64) as u64; + let ls: u64 = *value as u64; + call!(visit_decimal, ms, ls, *precision, *scale) } - Scalar::Decimal(_, _, _) => todo!(), Scalar::Null(_) => todo!(), Scalar::Struct(_) => todo!(), Scalar::Array(_) => todo!(), }, Expression::Column(name) => call!(visit_column, name.into()), - Expression::Struct(_) => todo!(), + Expression::Struct(exprs) => visit_expr_struct(visitor, exprs), Expression::BinaryOperation { op, left, right } => { let left_id = visit_expression(visitor, left); let right_id = visit_expression(visitor, right); From 384056b88e70f215a9ca25a6c2dc598c2c407af2 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 1 Oct 2024 15:50:42 -0700 Subject: [PATCH 06/82] Add new impls for visitor --- ffi/examples/read-table/expression.h | 229 ++++++++++++++++----------- 1 file changed, 134 insertions(+), 95 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 15c537917..aee1ce4a2 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -6,6 +6,21 @@ #include #include +#define DECL_BINOP(fun_name, op) \ + uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ + { \ + return visit_binop(data, a, b, op); \ + } +#define DECL_SIMPLE_SCALAR(fun_name, enum_member, c_type) \ + uintptr_t fun_name(void* data, c_type val) \ + { \ + struct Literal* lit = malloc(sizeof(struct Literal)); \ + lit->type = enum_member; \ + lit->value = (uintptr_t)val; \ + return put_handle(data, lit, Literal); \ + } \ + _Static_assert( \ + sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") enum OpType { Add, @@ -18,14 +33,28 @@ enum OpType GE, EQ, NE, + Distinct, In, NotIn, }; enum LitType { - i32, - i16, - i8 + Integer, + Long, + Short, + Byte, + Float, + Double, + String, + Boolean, + Timestamp, + TimestampNtz, + Date, + Binary, + Decimal, + Null, + Struct, + Array }; struct Literal { @@ -92,62 +121,31 @@ uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) binop->right = (struct Literal*)b; return put_handle(data, binop, BinOp); } -uintptr_t visit_minus(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, Add); -} -uintptr_t visit_add(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, Add); -} -uintptr_t visit_div(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, Div); -} -uintptr_t visit_mul(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, Mul); -} -uintptr_t visit_le(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, LE); -} -uintptr_t visit_lt(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, LT); -} -uintptr_t visit_gt(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, GT); -} -uintptr_t visit_ge(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, GE); -} -uintptr_t visit_eq(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, EQ); -} -uintptr_t visit_ne(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, NE); -} -uintptr_t visit_in(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, In); -} -uintptr_t visit_not_in(void* data, uintptr_t a, uintptr_t b) -{ - return visit_binop(data, a, b, NotIn); -} +DECL_BINOP(visit_add, Add) +DECL_BINOP(visit_minus, Sub) +DECL_BINOP(visit_multiply, Mul) +DECL_BINOP(visit_divide, Div) +DECL_BINOP(visit_lt, LT) +DECL_BINOP(visit_le, LE) +DECL_BINOP(visit_gt, GT) +DECL_BINOP(visit_ge, GE) +DECL_BINOP(visit_eq, EQ) +DECL_BINOP(visit_ne, NE) +DECL_BINOP(visit_distinct, Distinct) +DECL_BINOP(visit_in, In) +DECL_BINOP(visit_not_in, NotIn) + +DECL_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); +DECL_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); +DECL_SIMPLE_SCALAR(visit_expr_short, Long, int16_t); +DECL_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t); +DECL_SIMPLE_SCALAR(visit_expr_float, Float, float); +DECL_SIMPLE_SCALAR(visit_expr_double, Double, double); +DECL_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool); +DECL_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t); +DECL_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t); +DECL_SIMPLE_SCALAR(visit_expr_date, Date, int64_t); -uintptr_t visit_int(void* data, int32_t a) -{ - struct Literal* val = malloc(sizeof(struct Literal)); - val->type = i32; - val->value = (uintptr_t)a; - return put_handle(data, val, Literal); -} uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) { struct Variadic* var = malloc(sizeof(struct Variadic)); @@ -158,15 +156,6 @@ uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) var->expr_list = expr_lst; return put_handle(data, var, Variadic); } -uintptr_t visit_and(void* data, uintptr_t len) -{ - return visit_variadic(data, len, And); -} -uintptr_t visit_or(void* data, uintptr_t len) -{ - return visit_variadic(data, len, Or); -} - void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) { struct ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); @@ -177,6 +166,14 @@ void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_i struct Variadic* variadic = variadic_ref->ref; variadic->expr_list[variadic->len++] = *sub_expr_ref; } +uintptr_t visit_and(void* data, uintptr_t len) +{ + return visit_variadic(data, len, And); +} +uintptr_t visit_or(void* data, uintptr_t len) +{ + return visit_variadic(data, len, Or); +} // Print the schema of the snapshot struct ExpressionRef construct_predicate(KernelPredicate* predicate) @@ -186,15 +183,15 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) EngineExpressionVisitor visitor = { .data = &data, .make_expr_list = NULL, - .visit_int = visit_int, - .visit_long = NULL, - .visit_short = NULL, - .visit_byte = NULL, - .visit_float = NULL, - .visit_double = NULL, - .visit_bool = NULL, - .visit_timestamp = NULL, - .visit_timestamp_ntz = NULL, + .visit_int = visit_expr_int, + .visit_long = visit_expr_long, + .visit_short = visit_expr_short, + .visit_byte = visit_expr_byte, + .visit_float = visit_expr_float, + .visit_double = visit_expr_double, + .visit_bool = visit_expr_boolean, + .visit_timestamp = visit_expr_timestamp, + .visit_timestamp_ntz = visit_expr_timestamp_ntz, .visit_date = NULL, .visit_binary = NULL, .visit_decimal = NULL, @@ -206,17 +203,17 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_is_null = NULL, .visit_lt = visit_lt, .visit_le = visit_le, - .visit_gt = NULL, - .visit_ge = NULL, - .visit_eq = NULL, - .visit_ne = NULL, - .visit_distinct = NULL, - .visit_in = NULL, - .visit_not_in = NULL, + .visit_gt = visit_gt, + .visit_ge = visit_ge, + .visit_eq = visit_eq, + .visit_ne = visit_ne, + .visit_distinct = visit_distinct, + .visit_in = visit_in, + .visit_not_in = visit_not_in, .visit_add = visit_add, .visit_minus = visit_minus, - .visit_multiply = NULL, - .visit_divide = NULL, + .visit_multiply = visit_multiply, + .visit_divide = visit_divide, .visit_column = NULL, .visit_expr_struct = NULL, .visit_expr_struct_item = NULL, @@ -288,6 +285,9 @@ void print_tree(struct ExpressionRef ref, int depth) printf("NotIn\n"); break; }; break; + case Distinct: + printf("Distinct"); + break; } struct ExpressionRef left = { .ref = op->left, .type = Literal }; @@ -317,17 +317,56 @@ void print_tree(struct ExpressionRef ref, int depth) struct Literal* lit = ref.ref; tab_helper(depth); switch (lit->type) { - case i32: { - printf("i32("); - } break; - case i16: { - printf("i16("); - } break; - case i8: { - printf("i8("); - } break; + case Integer: + printf("Integer"); + break; + case Short: + printf("Short"); + break; + case Byte: + printf("Byte"); + break; + case Float: + printf("Float"); + break; + case Double: + printf("Double"); + break; + case String: + printf("String"); + break; + case Boolean: + printf("Boolean"); + break; + case Timestamp: + printf("Timestamp"); + break; + case TimestampNtz: + printf("TimestampNtz"); + break; + case Date: + printf("Date"); + break; + case Binary: + printf("Binary"); + break; + case Decimal: + printf("Decimal"); + break; + case Null: + printf("Null"); + break; + case Struct: + printf("Struct"); + break; + case Array: + printf("Array"); + break; + case Long: + printf("Long"); + break; } - printf("%lld)\n", lit->value); + printf("(%lld)\n", lit->value); } break; } } From 10b6534966c93305419f3c23ba73411b03a68762 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 1 Oct 2024 16:22:43 -0700 Subject: [PATCH 07/82] Add even more expression visitor stuff --- .../index/expression.h.2E6480F4E885C761.idx | Bin 6454 -> 564 bytes ffi/examples/read-table/expression.h | 71 ++++++++++++++---- ffi/src/expressions.rs | 2 - 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx b/ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx index cba9111b888608bc2591ce0541930f884d13fa59..050990fd193e24aee52f049b3ad3077cc4bb6129 100644 GIT binary patch literal 564 zcmWIYbaT^TVqkDi@vO*AElFfyU|= zm2lBe*js(tst!x5N5#okk6gUVs;Icndinx|P%URw{%d=`*L^>()Af4UEydCqcAT|2 z4j1-){rr8K$>iqNHn%s&T}7YG*saeq&EQVhwjKMzHwwSrIKAx4>uTPRkD}SB^Jgs0 zT$Qx@{=ZKdZ*ykdvHkQ_IMc}|!tyy^&k;TTGX+hi&T=&+a+4i?8uNy%l5r}jXbhe| zL00ic(1RHP*~P0%f7>r&s9oIiU{mP588dXg9u8W);KaO=-BBDD|CBN)pDu4Uw6006j@wOIfF literal 6454 zcmYi~30PD|(%mC(X5P?qF))lEha3XRDeyQ{6a*9$1r!zWjCdl4fFKGAD(c1H)b`iN60VHpP23UOuZhykD95j?z+3HYSNPu69fAq zG%`DBS?=88*)|9vBmFNbE?iipKxmr^p=G-cPD~sBeX!xgo?4Y_VnR;x&9I+%;YIN2 ziOpXu%&#%c+)#Z#{@8?V{a^oo+c;Bt=V04?^dDK=Bv#xSWS3=A8Q|9mZI^hSw=E$T;uY4ZVobG+;6}>##c1qRJLD9V}wH`av zPky(3Yf0Vl4VyihdN7)8D3#ob>QmPtu?Mvo;6jHnooW=i<$-y}LtgZf|SW6*dOX zd*}A0>jmAHJ}cO?e}DVoLm8PJwtug2db&S3^H@vEvB#>bbq9ubci(*Z_1mh>DQEY# zP4wKdB&Yp&=91Clep%b38`qfqDrVfw@d^GDru}f_<;9y19~Y1I4Ib{_e^@iV8)Vn{ z#s1IhwwKQ{pGw|Z4`q{-}{dWIcdw1RTqj6VAVaU><8J7;P@?QVeZ+UjNdIm@5 zJ!vnyw=mZ0gvW?lb8YHsU%s~aEI)NlY}@dvX}hX=iKb}O#&xy%cKHXK7X5j3?5^0g zEe|&?TwPhyQ@y6--suy@mIzBq<&*99`}VE)@!OUsYiFc9GpS!CDRVw^T6?s)!+Y0- zj?QblN1Zy@Gg#wN_v6p~dixA@{GFrDsKx&$; z^zWwUJL7)7x!0GVD1?M4bCkOT5U=OZXkQe(;E>S$4GPww7)K;H+Bupf0H*_euJ8WN zeP_51B~Y@(&f+KmAkT1Hda^5HWh_C-lzBVm+Ofl5ABfjzQ2Ibrc>j~VDdDf^mQ=bW zNt0xf0OEBvXsjRloBqhf^i=_)2^vSqv6@(u1Q2gfisUc9YAd)ra!MLOBPlsl6>5|K z;!QkC^h0U0dy=C1bk5PE{*>&ZaIuj9;;G?5ekj!5+jehoK#mRtdn3Wu;@e*Wp#72h zLD}g6nZ-(Erl%cL4nzXdY09AM+K9KW3OU6D#Zht%=4!D1uMfm)`-sNn-3f?YI7_>f zpftKAlgqS70P%Kek(~ea|L%;5s9&c?UXQqJnNSa?*`o1 zMbLOkPSvEEB!GCMN_1=mSAM=Xq<)Vc*;6vF;5i8Z^S?b5F81si|A`(Ap=6yx$4NjU z2l%1b+_D*Ow@+-;q2biA1NZ?%0^r!lREuq3^ss|^$+7MVca-wi1BTU4F{jkI!=_btr`K4>oyby3fi7ZsLKvkfv1iVhxai^0LXuoRVA;SfASkWa-hun zDf)2|keJhi`k(VZK1;iyXrp(HYVL3Lcai{{UO74>dq`Sm8$pBU>1>B=UkM=I7JH2H zL7|8Ad`Z-tECLAFW0K@K9Jx{qh_ucy(>PN|L zoSR7kKwc4)plUwq_dto9D0!f2puGfu95y9s*rvSa52;(I-QM=zW(k1i9aVA0*qH~O zDWy#HQhDhG5y0u+@(oGXHmvE^BUj4YL*cuU z55yCNcojT~vM!$c{qXNfiGxvP)CnR00o7S&$Mk#c^^zu3UnB(aK|}(ep4aa0n!^i@ z{Gmh9lpO3B>?#2uPl&J8%yT*ShaL^1^7RV6jRYj-G+PVj&MW%WfY7vid)7aF(1C1- z5a$@@B0oJsvL)G7ey4=AzG-gq?Ge&F)BOO0r?!tqdSbyQB8*Xuv1JUt!ltjVBRn71 zzg_MAbc(-TLxflq%QLC5?y-JWSU{}6uq2ekGlpXgWBW2J6{Yf^5fM_|Q)T^#km;Qn zC_hR<=FrRphONhn_1G42G;G*8%|FYg+AHbRRSZzWjV6MR!bc6aghoLTq=w)SAryrw zY6vqd%O^|5pq|kqVljg!o(S>kc*6W!j|1!R zU{Ln+*5L8^_n(JR@kLyb4z!B7YuS9JToF!}7jcVBunaYkiVzDT6Oo`!&@vGXIH&=K zfrvkz9J*7|FC>EUD(A{&3rV+sOXS$A;_^yvr7WUP;ff(qXCDlw%SrA@J`j5-w^##3 z_AigX28HHLag`mFW6W^^y;qIh9RKB|nF*BXYRs+11|ZzZ3irJ7P)MYNd@f&J22&_g zu`d6`6A_^b8>_I)7V^PH^QJV*vJ-6+9e@WBQuq`Ts7ZuL{3H|Pa8B%6eb>F6nN<8* z%&o;n&?~F0$S}L+nn+l|t&n}@9kW3FjLe%wmrV*259J~@$9?gfb4O>>mU+S`(sNvgzqc%vCboptYb@i+u&W zpG3$|Wf;IRBFrMQ99W_@V@osc54`)tr#ydjw=tLUUXNYY;{dSR@K{|vz5J<2DCJ7+ zU>OpPiWGYl7LPdFI0JKZoPS&h!?t4WR%{0%;gi!qBO^y^FIMiw2G~G^kFoM&YrGk& zn;9b_T*r#*m_SB?jntiD7sPa1>bx`%4A|lN%y-qFz00UwtGHEiD9jUHukgyqs-Vjq z*r@{t0aNb<%9pJpW>izcJjFZ$Y@n;6_fKr8pf1v+a+~DCT)z+V``CLFb^FYqt3TbS zp}b1C68VZl3r6)8`!A-qS!LEjD*=rd-6VElaE1swv34i(QTqHTADT8+y-!6fFO`O-n>M>P@KK4?maa0YA7Ft$)HXflfJAItv< zstFpfhUUNfEKkS{LQ90l5$p{)23;aGN9+z+924CWy;%mohxzxg4i4SwKdIC0(XPV^ zB4qnzM?y}BsiWw?Mo67k{J#!en%ndRH7m8Op4%<%Z=}*b2i6E3Dj5ZiQ7CDy*=T z=9N~~t}?8$qE%x1N^GU+YHV7Kt!!L_&19yE?t@ZwDY+udr_UMV<2P7;zPGeMs zxhgD23FZT}Mw$ztvn4{fN4PhX8zRi~pBcn*Yb$oyirpbih)|Ec>ah>Q8?bi+_GNe@ z?%#;L7~YEATd^nLr(SoL{d%k91l4URx6}e@_EA#zy~!^=6qlE9OXL~@C5UFSIIgh1 zaI9)9VMXr>cDaH*K!iNUXx_{(a#X`?!}QF`Fz+xwhDGzyTIh<15bYW5!?5}MeEHFV zr;BPNjzCa4h%iz;Qp4_69X8fs6KDioAHDkGV8rh9MAbwc+-aDQ=q7PIg4c}*O<3E+ z-UpbJXr&RyB&!$(goiMDYO$`?x~kOU@Om5t3K5|JM>OCefS@1kesI$bOj6Hc{bYqMxd?&E> z1TzqJQIX=P#zfkw>~t_FeCN6+b?D0cyHwx;#e%+|COmaipg6~|v;Hc72k;#2wgW?l zf~&xRIwJV;zGi4YG;fAHId_8jU^5I$^uE}JTYv!(qEu1#EL+m~bTh=120DEh`qbGR(Vk#{t}5JdzFZ!y5ULuV+*cvtp#Bv=yM7?#W@8<^F}0m)$uYsK1D zY!5cWI7wRvanOf%kO+di;K|Z8!Xd&cd55?S@qpxo1g8wdp8~8H=mZDGAjUq%8IBNP z1J-T84iG%}#eim>_;UeVW+Gg`+6%1xk2ajyee{FU*K~H@gcX~xnz{ddta+cglL))8 zW|uX75Ni%vZAR(^>I_=E^2vF4;T{Vdj;wZ<=E%|&bc3RYfW=ZAZ2EuAs= z)7Ladr`t^DA(ac~E}kt$wy1DU(M<76qzK;pg~1nu #include #include +#include #define DECL_BINOP(fun_name, op) \ uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ @@ -21,6 +22,11 @@ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") +#define DECL_VARIADIC(fun_name, enum_member) \ + uintptr_t fun_name(void* data, uintptr_t len) \ + { \ + return visit_variadic(data, len, enum_member); \ + } enum OpType { Add, @@ -71,13 +77,16 @@ struct BinOp enum VariadicType { And, - Or + Or, + StructConstructor }; enum ExpressionType { BinOp, Variadic, - Literal + Literal, + BinaryLiteral, + DecimalLiteral }; struct Variadic { @@ -86,6 +95,17 @@ struct Variadic size_t max_len; struct ExpressionRef* expr_list; }; +struct Binary +{ + uint8_t* buf; + uintptr_t len; +}; +struct Decimal +{ + uint64_t value[2]; + uint8_t precision; + uint8_t scale; +}; struct ExpressionRef { void* ref; @@ -144,7 +164,7 @@ DECL_SIMPLE_SCALAR(visit_expr_double, Double, double); DECL_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool); DECL_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t); DECL_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t); -DECL_SIMPLE_SCALAR(visit_expr_date, Date, int64_t); +DECL_SIMPLE_SCALAR(visit_expr_date, Date, int32_t); uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) { @@ -166,13 +186,31 @@ void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_i struct Variadic* variadic = variadic_ref->ref; variadic->expr_list[variadic->len++] = *sub_expr_ref; } -uintptr_t visit_and(void* data, uintptr_t len) +DECL_VARIADIC(visit_and, And) +DECL_VARIADIC(visit_or, Or) +DECL_VARIADIC(visit_expr_struct, StructConstructor) + +uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) { - return visit_variadic(data, len, And); + struct Binary* bin = malloc(sizeof(struct Binary)); + bin->buf = malloc(len); + memcpy(bin->buf, buf, len); + return put_handle(data, bin, BinaryLiteral); } -uintptr_t visit_or(void* data, uintptr_t len) + +uintptr_t visit_expr_decimal( + void* data, + uint64_t value_ms, + uint64_t value_ls, + uint8_t precision, + uint8_t scale) { - return visit_variadic(data, len, Or); + struct Decimal* dec = malloc(sizeof(struct Decimal)); + dec->value[0] = value_ms; + dec->value[1] = value_ls; + dec->precision = precision; + dec->scale = scale; + return put_handle(data, dec, DecimalLiteral); } // Print the schema of the snapshot @@ -192,9 +230,9 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_bool = visit_expr_boolean, .visit_timestamp = visit_expr_timestamp, .visit_timestamp_ntz = visit_expr_timestamp_ntz, - .visit_date = NULL, - .visit_binary = NULL, - .visit_decimal = NULL, + .visit_date = visit_expr_date, + .visit_binary = visit_expr_binary, + .visit_decimal = visit_expr_decimal, .visit_string = NULL, .visit_and = visit_and, .visit_or = visit_or, @@ -215,8 +253,7 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_multiply = visit_multiply, .visit_divide = visit_divide, .visit_column = NULL, - .visit_expr_struct = NULL, - .visit_expr_struct_item = NULL, + .visit_expr_struct = visit_expr_struct, }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; @@ -301,12 +338,15 @@ void print_tree(struct ExpressionRef ref, int depth) tab_helper(depth); switch (var->op) { case And: - printf("AND (\n"); + printf("AND\n"); break; case Or: - printf("OR (\n"); + printf("OR\n"); + break; + case StructConstructor: break; } + printf("("); for (size_t i = 0; i < var->len; i++) { print_tree(var->expr_list[i], depth + 1); } @@ -368,6 +408,9 @@ void print_tree(struct ExpressionRef ref, int depth) } printf("(%lld)\n", lit->value); } break; + case BinaryLiteral: + case DecimalLiteral: + break; } } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 3ee7b6888..b9c7c8caa 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -317,7 +317,6 @@ pub struct EngineExpressionVisitor { pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, pub visit_expr_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_expr_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), } #[no_mangle] @@ -329,7 +328,6 @@ pub unsafe extern "C" fn visit_expression( let expr_struct_id = (visitor.visit_expr_struct)(visitor.data, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - (visitor.visit_expr_struct_item)(visitor.data, expr_struct_id, expr_id) } expr_struct_id } From e7aa68770c843952dba97517f573cf66f91cff37 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 2 Oct 2024 09:44:04 -0700 Subject: [PATCH 08/82] Patch expr_struct --- ffi/examples/read-table/expression.h | 1 + ffi/src/expressions.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 50d46b19c..783bf957a 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -254,6 +254,7 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_divide = visit_divide, .visit_column = NULL, .visit_expr_struct = visit_expr_struct, + .visit_expr_struct_item = visit_variadic_item, // treat expr struct like a variadic }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index b9c7c8caa..3ee7b6888 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -317,6 +317,7 @@ pub struct EngineExpressionVisitor { pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, pub visit_expr_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_expr_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), } #[no_mangle] @@ -328,6 +329,7 @@ pub unsafe extern "C" fn visit_expression( let expr_struct_id = (visitor.visit_expr_struct)(visitor.data, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); + (visitor.visit_expr_struct_item)(visitor.data, expr_struct_id, expr_id) } expr_struct_id } From 57fdf77ded1557e4076d30693af8d9004eeb00ae Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 2 Oct 2024 14:55:41 -0700 Subject: [PATCH 09/82] Fix ffi test --- ffi/examples/read-table/expression.h | 2 +- ffi/src/expressions.rs | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 783bf957a..703f7bb0a 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -407,7 +407,7 @@ void print_tree(struct ExpressionRef ref, int depth) printf("Long"); break; } - printf("(%lld)\n", lit->value); + printf("(%ld)\n", lit->value); } break; case BinaryLiteral: case DecimalLiteral: diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 3ee7b6888..43b63aa85 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -338,10 +338,11 @@ pub unsafe extern "C" fn visit_expression( op: &VariadicOperator, exprs: &Vec, ) -> usize { - let variadic_id = match op { - VariadicOperator::And => (visitor.visit_and)(visitor.data, exprs.len()), - VariadicOperator::Or => (visitor.visit_or)(visitor.data, exprs.len()), + let visit_fn = match op { + VariadicOperator::And => &visitor.visit_and, + VariadicOperator::Or => &visitor.visit_or, }; + let variadic_id = visit_fn(visitor.data, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); (visitor.visit_variadic_item)(visitor.data, variadic_id, expr_id) From 27736a0380021e122927da363d2f654955fed1e4 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 2 Oct 2024 15:05:00 -0700 Subject: [PATCH 10/82] Fix clippy issues --- ffi/examples/read-table/expression.h | 2 +- ffi/src/expressions.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 703f7bb0a..783bf957a 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -407,7 +407,7 @@ void print_tree(struct ExpressionRef ref, int depth) printf("Long"); break; } - printf("(%ld)\n", lit->value); + printf("(%lld)\n", lit->value); } break; case BinaryLiteral: case DecimalLiteral: diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 43b63aa85..f1ec4e55d 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,4 +1,4 @@ -use std::{ffi::c_void, io::Read, ops::Add, sync::Arc}; +use std::{ffi::c_void, sync::Arc}; use crate::{ handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, From 5ff43964ecffcad2684a835685a0bc629f31fee6 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 3 Oct 2024 16:56:11 -0700 Subject: [PATCH 11/82] Add support for structs, arrays, null --- .../index/expression.h.2E6480F4E885C761.idx | Bin 564 -> 14760 bytes ffi/examples/read-table/expression.h | 198 ++++++++++++------ ffi/src/expressions.rs | 174 ++++++++++----- kernel/src/expressions/scalars.rs | 3 +- 4 files changed, 254 insertions(+), 121 deletions(-) diff --git a/ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx b/ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx index 050990fd193e24aee52f049b3ad3077cc4bb6129..0ed21f6efadab528715ed79477efd3bb50cebd86 100644 GIT binary patch literal 14760 zcmdryXINBMuxHNh0{3!v7g(AiRf>wBV8dP#H5xQ(G%*$oR@7^Wm>7H4U>9uI3mChx z8>7aCQHe2j>|*b|i~8o?-PyB$-@m-a_c3SZn^We_nKNg~Y0;=bgT+-D`>b=LpL=!b zpK4@`dEb&?C1Gwzq!!+LXy{lua18fvUOaiO1;_`hxPb+@WiFPPqZH# z@IJcPiGt|H)7xYwX8M149Xw%a@|Dc~-h2GVkMB0l_jdG~hY$YBdsCrD#dW+eMrG%|_PlX&fvA@{jD|cVy6p!RyBCsoC!LYCHNy zroIYv@7|APSk5<^5_jrWp7*l$?f%Lt%GjK`G3Z6iX-n{iO=r%XoA@xRc=ONy>g)UW z;8jCC1_T9XTn>1$yKLdEoIlSz2wC_0&b1%XYy4F5XvDX8?p(Vecwyz8^rV_vtRZ%39~_qz4=MWyBxhJ`IL$JO0E zDk>nU>B;^F)0?DDYua~oz0)gBT34+qHMU#3YokW(%XxmdTh_4DyoW`LCgvZ%d1%I! z69@0iD2kiw^L1CHNA)(fe{Is~%XJGc6})y`v9_xYv}`f zb!?1Qfq8G6HeewzE33ztT85V~>VQc|>XgFr5*sN=aYfw1M!*+Lt!J%g(*g59I{tl= zzV41K!N*`_)p2+Xk1^_ihIifC=xWh?U&+eqBe^6mY19Fce+=84-qb5fR@iJn`b_Yg z2{uQ=qzDTe2`TKChs7FnSnY0A*kY~I3XoSoprc_@l(*h#%#(%}U-s|q>&D`+PpW&W zSqJ(;$mXco*`?<0{wlz#ldJJ+9;ya(a>o?*&XS*htxrmz!p3S#j0br<_&FLTm9*+h z6jv9lD@cAHWM$>CPi0VVNwYXeTh4h&Uf9|Dk|K{zQjJIcqyX}NL=0{V$XOSJ{DXosuw((5uQPf_rl&d_o95-`pVCbvho)8Ls_PN;q!&R4rq?#br+kw`$ef}WzBJl z1fJlo0}@eu%&7%u!{$}8uz@(bkx!!l9nk1HSqpot$;hj0Vf~Q7&Pr#i4v74q&iMS* z)gzLutS(L-#iNWmpsycV?O7LFe7)*c_6?G&@G71wrlyHa0kgg^zo?YTKKMZLREEBq#ACiwnl3Y$!IOkpR6q76#ado?YZ~jqXOSDcK!L$)g zm%U+9w2wa1*Pn9VbSV9Jn3c7_K9zVS4;>J%uje)xT_>&LaD~m$=9mkfb5S$xjZ+OO zgr?apje9@B%9629ERS{9fj$RX@xZQK4;LJ7F;ZbuwK{ z^5|#_8wDuxG|x0I9gs|)MK$a3=1kFOg)P!1*#(we;O}Ud6zQc;(&z6Z^J-ODI?l>k zW1m*Mm8=7uk<=9Cn_m6%66NPlu(CQxj^@!u9T1-p4<}Sv9?@}>QMYF6Kjfnf=$7mI?%a6HF))`Zc)_7%vn~}3dvzS%%lTi_Kvx`zemaRY!hpP z;>=VjEO*)`CdY(T}asMsDQY$Nm7`bi@Piu-X&LIzi3O$2G7~3SoX%LSao|1Oqp=JPFZ)!!60Q|puqwp@sHG)xMS zb)C3cX49G(cSmouvNG7G7O&;51AQTM0@klLH0oW;yvfQcBDpHB>ZSuCuP!>C(&zI* zJ1p$ynoRBO(c4o8B#B3V231dJ-|!EGP1HD847SA(;b@o?>Zx;ZX+lWB#wqi$(`2pF zA&?J22}i@EaI@a&d}+5!5AsZZYUdDLuFvGptU92TPVT#WE4o|wUMp*YbJXB9+;l*E z9@qfw=TB*Vz{-LkC2jJ1cGaUkBq3y0Jtg|KR?Erw(+ort_uB_KmD^^Pq+G z$1A#_rJBBglgfXE*Pj%+-v+n-0RtTayEls7f$K%X2h z*(G^bpEN(s|IwUDlwlwLKE-uF!-sv5(7Af^Gyf>;dmLT>eg#m<(J;vmnhj~kc9w8! zd#aA|ubX~rban4)Q8hs9*FRh_cSC-QGYXrb2{;FAIS}J$oO&?TtM;;szkhaCVdJz; z+2EOtN^5VN8uj{I`F~FIyoa48X`PmXZ8`3zy>Z_4B}nM5MR<>%g1e-ni-O)N!w4Q8hqp zjBUIB%Mu?W-=ZC>z|?dpUC{x3>B1_RwXVJBS){N9+7ffXa}F9Ad*fUpa>3cnmHKY_ zU}1f6j{X7tLv%oM^y|_!eNhaaAxA`Iko8E4E*S;yY`?h>;JnY(UEI8jTglmIcWu%5 z$G1)}FGbCOzzhhXpu~tCCbH#2-lq{a88^g-6u^j41YXHL6k!R_nkLl()17d`qP zKI8-#(Mv(O`j7`>#JiI%10T9qjd)&I4*1YnGNK)2x!^-K)`+KoEe9Xx2~k%;u~iW2 zjMLQBP;xbt7Wh60*ayMR%U#_M=KWw3_%R4P1|eh|DdbdSJ<_C&{qKw6Hn-j80koeK z8KW9#VX;ty+2A)D0-U$6x`ms!2sNOH8HZ`%vKW>HepwLUj8oKHZq60MD3azfh0myn z+Ml962e0Q4NVYYsbK1F8Vnm;h?E*9T zgGO|#*lsYB%XRACQDKi;m$gPKYG>ZrlcG8W<7td!BvNY)j~EZJu6ViekVLo<;~{ic zWr~%Ic;YeclF1qv(ak{9CzA_e#Lx(1Ynj3(BSvrN{K+ISBl-+@E6C(e7*SB@EyyG} zBgVQI`pOgs8_^ZQw1i9>Wkhp;F1Spl!iYBpx+F5)CPqBp7(B{!zKv)o(O}7BMF0Ex zy8_q>kVp>{RD-A$5HIkJ5VaBFoiPo@F*PM%)lO~u?Ol4{1C81 zAdy*6)KQ#|;+~|h6>A=l3kJ6FbwiJoxA!1NRZ&xTiU;{AXytjj2N{1w9l!>-Ib&jV zSlqA*&ZLQYnR{O5-gJB^p@ip9%O*L_0v@M*AngOI;Bp_7-3N(;A1c^2Y*dqX{&>7@ zf$0|b61$Xv!UL*hp2#&&)Nuy$ID?IrCGEG!lzwua!v9~#fwE~RUoDj;foNjod*+Jpt|~z>AcH?n3Nl&$NmCD+F61l%#E;j2p6{rQ3wL zodbSzAb<|LqW;d!zYB9qnK&G#rQyV|=@2v>LdYg5>Jn~V!o7(55pCmlEK7=rM(ub7 zrdMDlHd^&d__|53XABaqg6XQT$`5|c_gd4eghqGH)AE0B^#{D55+a89~Q_iuwY)UWn_35`;Lv zmK~%-i=s{hX(E_Oy%qHcFL#7jAi+@L5a-vjhC&22R~KgjYgoTn5u+ zu#x;Ivxv)TX+^Q@B#o5OQBB2T{^Ab!ih0vAf zWp^YcAJYi0!Sq_JPq|54UrS61eV7S;Ga-QP7)4#n&1;1kQO**FX$ed5**Q=;2NKBx zQ`8UK{DJ!u+my@1VOk=S5)les#NoRkkuG9IJ#0x!ngBc6v!p^l zuYmUz2&JVdTZ%hiPnA-~;mK1!-*%`&ePs4MnBIdIjkwshQpx;_l>W`x~RD6X+4>C%W1F9D&>Y9D&>Y9Ky-b!1e68St8(e zf$s+hqxC39i|c6#TCvLuA#x#%Sz2jiSjcF#$ja^LW0=89H^KB)y1jJh3dIbkJhW(GfeGtU`r~_ ze+^96gpFVMWMbdFJ@YlfLohuQAJU~s-CZL$EboYf#ZYE3l&6r667%?lT2@{dfL#!{ z3yPCxuBd0Z`K&NEt@A=7JN7K>g!A16(_JCTm9<0vs9!a>GZNl{>8&78iXRu!^7}%R zOCV|ql%bGPQJ-@2Q(>nm^^e1}yuTQ>7$O%#Df$dWeZARB+T7@FTSXgJ5bYL@$N16kelta`OxB^zaOyH+g!maAhz2 z$Wc6a6ptf)_Iltkw*8RD8sQ0;o`4S#%4C-rd2`!^ACT|~OrOA?2z1TihiEknVSh3} z%5c5)G9WGk63Id+Y9=>liru0QGRairaGI9Cx!d2ueNoh--0i3hi}2gDN)aVI z744{4}&xW)6z&KA7$cu~E_uPlBT$=~zjN$h=+(^F?VNyaKi2 zq`1r1g8N#KNxKwv2ZZf_l628v{Kz9N^C(gy3fOTydkvF5Oi_!#zepT;s%7GbYK2Up zADK`r6G~7_fPoY@FA(}cHBKC+)i#BECPVmSh^8Z|sMEPLom+@4QeR}tQSfw%;Td>6 z6Jj7A6Fb^#pMw9}5Of_=td-H$8P_F<=7eEpbw0n{-GkDWINySo}QLB`S zdot5t&NMh9@^{jW^@qBw*@!!M1x#0j38ZQ&ez8_c72}sfuQ5v8JzHke0?ASZa} zKYee^J-XQgy(91(1y=DPTfu)T6nDkrw*nS1?SEQE!2hx<{XY2Khd@Dp4Bn5yCE9xe zeovs7pnnA4kD~waYAr9s)&F@rH>wZ7^gw)=7EPMo37qP}9NiiGd zkD%=DP)_JyCJ)Z!VYJa&J|yZ0FD3AN9-Yri3;Z|_Kh7fsev^mY5)*i#f_d{`}WmBjc^l8 zH$@oZjEiKwT>XEiZj)^Y$u>AeN#%8vkXBn4iaW_*o+K2N%Iw%ptFeo2Qw`>+BA@Fl zx6Af2yF=WHI#3#D7TPf|VqmnuMu0Q|%#`6npBExVK(xRwfOJ7zw{+j*Nz>}}6ci}x z1dt|xr?4$ExX%ocNu?Ayet=fT7nXA!xUT~XSxzeHY2M%JVV!_D2Q+w?8rh;ffb>w?bF^7I(WPa0*;eH5_k(oD01N+j|Hh z!IMYgCEK3{IlFx!ec?rgD@AQ2HS(f*l2*40oCu-f5}gFaCP5hC|NTw7$;~%~y`@Sx zhD+KTDDqAeb#TbwaAEg`*oL@R!I2O;5+cYBQb`oW?RX#Ml;bP#dLk0J%rZG;_@t^1^B#*%X3}v zg4D}IT)UE+#3Zr8ZmHCOyDm+!jvf57DjuJ~gvKX%2Rarv1m z{*uctUGXcU^ZrKK>d}>F`7?6_q>qj)d+rYRDB(l@2Ul%o1Q`z)> zozi=1KYDAx{?~(kFxE}{?B)E))dAO^uN>Jxed*D)cds5jyQOt!`W*HiU7a0Sckwe# G{QMuwj5u!q literal 564 zcmWIYbaT^TVqkDi@vO*AElFfyU|= zm2lBe*js(tst!x5N5#okk6gUVs;Icndinx|P%URw{%d=`*L^>()Af4UEydCqcAT|2 z4j1-){rr8K$>iqNHn%s&T}7YG*saeq&EQVhwjKMzHwwSrIKAx4>uTPRkD}SB^Jgs0 zT$Qx@{=ZKdZ*ykdvHkQ_IMc}|!tyy^&k;TTGX+hi&T=&+a+4i?8uNy%l5r}jXbhe| zL00ic(1RHP*~P0%f7>r&s9oIiU{mP588dXg9u8W);KaO=-BBDD|CBN)pDu4Uw6006j@wOIfF diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 783bf957a..162090505 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -73,12 +73,14 @@ struct BinOp struct Literal* left; struct Literal* right; }; +struct Null; enum VariadicType { And, Or, - StructConstructor + StructConstructor, + ArrayData }; enum ExpressionType { @@ -86,7 +88,9 @@ enum ExpressionType Variadic, Literal, BinaryLiteral, - DecimalLiteral + DecimalLiteral, + StructLiteral, + NullLiteral }; struct Variadic { @@ -116,6 +120,13 @@ struct Data size_t len; struct ExpressionRef handles[100]; }; +struct Struct +{ + KernelStringSlice* field_names; + struct ExpressionRef* expressions; + size_t len; + size_t max_len; +}; size_t put_handle(void* data, void* ref, enum ExpressionType type) { @@ -136,9 +147,15 @@ struct ExpressionRef* get_handle(void* data, size_t handle_index) uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) { struct BinOp* binop = malloc(sizeof(struct BinOp)); + struct ExpressionRef* left_handle = get_handle(data, a); + struct ExpressionRef* right_handle = get_handle(data, b); + assert(right_handle != NULL && left_handle != NULL); + + struct Literal* left = left_handle->ref; + struct Literal* right = right_handle->ref; binop->op = op; - binop->left = (struct Literal*)a; - binop->right = (struct Literal*)b; + binop->left = left; + binop->right = right; return put_handle(data, binop, BinOp); } DECL_BINOP(visit_add, Add) @@ -155,6 +172,20 @@ DECL_BINOP(visit_distinct, Distinct) DECL_BINOP(visit_in, In) DECL_BINOP(visit_not_in, NotIn) +uintptr_t visit_expr_decimal( + void* data, + uint64_t value_ms, + uint64_t value_ls, + uint8_t precision, + uint8_t scale) +{ + struct Decimal* dec = malloc(sizeof(struct Decimal)); + dec->value[0] = value_ms; + dec->value[1] = value_ls; + dec->precision = precision; + dec->scale = scale; + return put_handle(data, dec, DecimalLiteral); +} DECL_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); DECL_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); DECL_SIMPLE_SCALAR(visit_expr_short, Long, int16_t); @@ -180,15 +211,16 @@ void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_i { struct ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); struct ExpressionRef* variadic_ref = get_handle(data, variadic_id); - if (sub_expr_ref == NULL || variadic_ref == NULL) { - abort(); - } + assert(sub_expr_ref != NULL && variadic_ref != NULL); + assert(variadic_ref->type == Variadic); + struct Variadic* variadic = variadic_ref->ref; variadic->expr_list[variadic->len++] = *sub_expr_ref; } DECL_VARIADIC(visit_and, And) DECL_VARIADIC(visit_or, Or) -DECL_VARIADIC(visit_expr_struct, StructConstructor) +DECL_VARIADIC(visit_struct_constructor, StructConstructor) +DECL_VARIADIC(visit_expr_array, ArrayData) uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) { @@ -198,19 +230,39 @@ uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) return put_handle(data, bin, BinaryLiteral); } -uintptr_t visit_expr_decimal( +uintptr_t visit_expr_struct(void* data, uintptr_t len) +{ + struct Struct* struct_data = malloc(sizeof(struct Struct)); + struct_data->len = 0; + struct_data->max_len = len; + struct_data->expressions = malloc(sizeof(struct ExpressionRef) * len); + struct_data->field_names = malloc(sizeof(KernelStringSlice) * len); + return put_handle(data, struct_data, StructLiteral); +} + +void visit_expr_struct_field( void* data, - uint64_t value_ms, - uint64_t value_ls, - uint8_t precision, - uint8_t scale) + uintptr_t struct_id, + KernelStringSlice field_name, + uintptr_t value_id) { - struct Decimal* dec = malloc(sizeof(struct Decimal)); - dec->value[0] = value_ms; - dec->value[1] = value_ls; - dec->precision = precision; - dec->scale = scale; - return put_handle(data, dec, DecimalLiteral); + struct ExpressionRef* value = get_handle(data, value_id); + struct ExpressionRef* struct_handle = get_handle(data, struct_id); + assert(struct_handle != NULL && value != NULL); + assert(struct_handle->type == StructLiteral); + + struct Struct* struct_ref = (struct Struct*)struct_handle->ref; + size_t len = struct_ref->len; + assert(len < struct_ref->max_len); + + struct_ref->expressions[len] = *value; + struct_ref->field_names[len] = field_name; + struct_ref->len++; +} + +uintptr_t visit_null(void* data) +{ + return put_handle(data, NULL, NullLiteral); } // Print the schema of the snapshot @@ -218,44 +270,47 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) { print_diag("Building schema\n"); struct Data data = { 0 }; - EngineExpressionVisitor visitor = { - .data = &data, - .make_expr_list = NULL, - .visit_int = visit_expr_int, - .visit_long = visit_expr_long, - .visit_short = visit_expr_short, - .visit_byte = visit_expr_byte, - .visit_float = visit_expr_float, - .visit_double = visit_expr_double, - .visit_bool = visit_expr_boolean, - .visit_timestamp = visit_expr_timestamp, - .visit_timestamp_ntz = visit_expr_timestamp_ntz, - .visit_date = visit_expr_date, - .visit_binary = visit_expr_binary, - .visit_decimal = visit_expr_decimal, - .visit_string = NULL, - .visit_and = visit_and, - .visit_or = visit_or, - .visit_variadic_item = visit_variadic_item, - .visit_not = NULL, - .visit_is_null = NULL, - .visit_lt = visit_lt, - .visit_le = visit_le, - .visit_gt = visit_gt, - .visit_ge = visit_ge, - .visit_eq = visit_eq, - .visit_ne = visit_ne, - .visit_distinct = visit_distinct, - .visit_in = visit_in, - .visit_not_in = visit_not_in, - .visit_add = visit_add, - .visit_minus = visit_minus, - .visit_multiply = visit_multiply, - .visit_divide = visit_divide, - .visit_column = NULL, - .visit_expr_struct = visit_expr_struct, - .visit_expr_struct_item = visit_variadic_item, // treat expr struct like a variadic - }; + EngineExpressionVisitor visitor = { .data = &data, + .visit_int = visit_expr_int, + .visit_long = visit_expr_long, + .visit_short = visit_expr_short, + .visit_byte = visit_expr_byte, + .visit_float = visit_expr_float, + .visit_double = visit_expr_double, + .visit_bool = visit_expr_boolean, + .visit_timestamp = visit_expr_timestamp, + .visit_timestamp_ntz = visit_expr_timestamp_ntz, + .visit_date = visit_expr_date, + .visit_binary = visit_expr_binary, + .visit_decimal = visit_expr_decimal, + .visit_string = NULL, + .visit_and = visit_and, + .visit_or = visit_or, + .visit_variadic_item = visit_variadic_item, + .visit_not = NULL, + .visit_is_null = NULL, + .visit_lt = visit_lt, + .visit_le = visit_le, + .visit_gt = visit_gt, + .visit_ge = visit_ge, + .visit_eq = visit_eq, + .visit_ne = visit_ne, + .visit_distinct = visit_distinct, + .visit_in = visit_in, + .visit_not_in = visit_not_in, + .visit_add = visit_add, + .visit_minus = visit_minus, + .visit_multiply = visit_multiply, + .visit_divide = visit_divide, + .visit_column = NULL, + .visit_expr_struct = visit_struct_constructor, + .visit_expr_struct_item = + visit_variadic_item, // treating expr struct like a variadic + .visit_null = visit_null, + .visit_struct = visit_expr_struct, + .visit_struct_field = visit_expr_struct_field, + .visit_array = visit_expr_array, + .visit_array_item = visit_variadic_item }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; } @@ -339,20 +394,21 @@ void print_tree(struct ExpressionRef ref, int depth) tab_helper(depth); switch (var->op) { case And: - printf("AND\n"); + printf("And\n"); break; case Or: - printf("OR\n"); + printf("Or\n"); break; case StructConstructor: + printf("StructConstructor\n"); + break; + case ArrayData: + printf("ArrayData\n"); break; } - printf("("); for (size_t i = 0; i < var->len; i++) { print_tree(var->expr_list[i], depth + 1); } - tab_helper(depth); - printf(")\n"); } break; case Literal: { struct Literal* lit = ref.ref; @@ -410,7 +466,25 @@ void print_tree(struct ExpressionRef ref, int depth) printf("(%lld)\n", lit->value); } break; case BinaryLiteral: + tab_helper(depth); + printf("BinaryLiteral\n"); case DecimalLiteral: + tab_helper(depth); + printf("DecimalLiteral\n"); + break; + case StructLiteral: + tab_helper(depth); + struct Struct* struct_data = ref.ref; + printf("Struct\n"); + for (size_t i = 0; i < struct_data->len; i++) { + tab_helper(depth); + printf("Field: %s\n", struct_data->field_names[i].ptr); + print_tree(struct_data->expressions[i], depth + 1); + } + break; + case NullLiteral: + tab_helper(depth); + printf("Null\n"); break; } } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index f1ec4e55d..f5d90a5c2 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -5,7 +5,10 @@ use crate::{ KernelPredicate, KernelStringSlice, ReferenceSet, TryFromStringSlice, }; use delta_kernel::{ - expressions::{BinaryOperator, Expression, Scalar, UnaryOperator, VariadicOperator}, + expressions::{ + ArrayData, BinaryOperator, Expression, Scalar, StructData, UnaryOperator, VariadicOperator, + }, + schema::{ArrayType, DataType, PrimitiveType, StructField, StructType}, DeltaResult, }; @@ -249,13 +252,35 @@ pub extern "C" fn visit_expression_literal_bool( #[no_mangle] pub unsafe extern "C" fn get_kernel_expression() -> Handle { use Expression as Expr; + + let array_type = ArrayType::new( + DataType::Primitive(delta_kernel::schema::PrimitiveType::Short), + false, + ); + let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]); + let nested_fields = vec![ + StructField::new("a", DataType::Primitive(PrimitiveType::Integer), false), + StructField::new("b", DataType::Array(Box::new(array_type)), false), + ]; + let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data)]; + let nested = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); + let nested_type = StructType::new(nested_fields); + let top = StructData::try_new( + vec![StructField::new( + "top", + DataType::Struct(Box::new(nested_type)), + true, + )], + vec![Scalar::Struct(nested)], + ) + .unwrap(); Arc::new(Expr::and_from(vec![ Expr::and_from(vec![ Expr::literal(Scalar::Integer(5)), - Expr::literal(Scalar::Integer(20)), + Expr::literal(Scalar::Long(20)), ]), Expr::literal(Scalar::Integer(10)), - Expr::literal(Scalar::Integer(10)), + Expr::literal(Scalar::Struct(top)), ])) .into() } @@ -266,9 +291,6 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { pub struct EngineExpressionVisitor { /// opaque state pointer pub data: *mut c_void, - /// Creates a new field list, optionally reserving capacity up front - pub make_expr_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, @@ -284,14 +306,12 @@ pub struct EngineExpressionVisitor { pub visit_binary: extern "C" fn(data: *mut c_void, buf: *const u8, len: usize) -> usize, pub visit_decimal: extern "C" fn( data: *mut c_void, - value_ms: u64, // Most significant half of decimal value - value_ls: u64, // Least significant half of decimal value + value_ms: u64, // Most significant 64 bits of decimal value + value_ls: u64, // Least significant 64 bits of decimal value precision: u8, scale: u8, ) -> usize, - // Scalar::Null(_) => todo!(), - // Scalar::Struct(_) => todo!(), - // Scalar::Array(_) => todo!(), + pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, pub visit_variadic_item: @@ -318,6 +338,17 @@ pub struct EngineExpressionVisitor { pub visit_expr_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, pub visit_expr_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), + + pub visit_struct: extern "C" fn(data: *mut c_void, num_fields: usize) -> usize, + pub visit_struct_field: extern "C" fn( + data: *mut c_void, + struct_id: usize, + field_name: KernelStringSlice, + field_value: usize, + ), + pub visit_null: extern "C" fn(data: *mut c_void) -> usize, + pub visit_array: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_array_item: extern "C" fn(data: *mut c_void, array_id: usize, item_id: usize), } #[no_mangle] @@ -325,11 +356,40 @@ pub unsafe extern "C" fn visit_expression( expression: &Handle, // TODO: This will likely be some kind of Handle visitor: &mut EngineExpressionVisitor, ) -> usize { + macro_rules! call { + ( $visitor:ident, $visitor_fn:ident $(, $extra_args:expr) *) => { + ($visitor.$visitor_fn)($visitor.data $(, $extra_args) *) + }; + } + fn visit_array(visitor: &mut EngineExpressionVisitor, array: &ArrayData) -> usize { + #[allow(deprecated)] + let elements = array.array_elements(); + let array_id = call!(visitor, visit_array, elements.len()); + for scalar in elements { + let scalar_id = visit_scalar(visitor, scalar); + call!(visitor, visit_array_item, array_id, scalar_id); + } + array_id + } + fn visit_struct(visitor: &mut EngineExpressionVisitor, struct_data: &StructData) -> usize { + let struct_id = call!(visitor, visit_struct, struct_data.fields().len()); + for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { + let value_id = visit_scalar(visitor, value); + call!( + visitor, + visit_struct_field, + struct_id, + field.name().into(), + value_id + ); + } + struct_id + } fn visit_expr_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { - let expr_struct_id = (visitor.visit_expr_struct)(visitor.data, exprs.len()); + let expr_struct_id = call!(visitor, visit_expr_struct, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - (visitor.visit_expr_struct_item)(visitor.data, expr_struct_id, expr_id) + call!(visitor, visit_expr_struct_item, expr_struct_id, expr_id) } expr_struct_id } @@ -345,65 +405,65 @@ pub unsafe extern "C" fn visit_expression( let variadic_id = visit_fn(visitor.data, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - (visitor.visit_variadic_item)(visitor.data, variadic_id, expr_id) + call!(visitor, visit_variadic_item, variadic_id, expr_id) } variadic_id } - fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { - macro_rules! call { - ( $visitor_fn:ident $(, $extra_args:expr) *) => { - (visitor.$visitor_fn)(visitor.data $(, $extra_args) *) - }; + fn visit_scalar(visitor: &mut EngineExpressionVisitor, scalar: &Scalar) -> usize { + match scalar { + Scalar::Integer(val) => call!(visitor, visit_int, *val), + Scalar::Long(val) => call!(visitor, visit_long, *val), + Scalar::Short(val) => call!(visitor, visit_short, *val), + Scalar::Byte(val) => call!(visitor, visit_byte, *val), + Scalar::Float(val) => call!(visitor, visit_float, *val), + Scalar::Double(val) => call!(visitor, visit_double, *val), + Scalar::String(val) => call!(visitor, visit_string, val.into()), + Scalar::Boolean(val) => call!(visitor, visit_bool, *val), + Scalar::Timestamp(val) => call!(visitor, visit_timestamp, *val), + Scalar::TimestampNtz(val) => call!(visitor, visit_timestamp_ntz, *val), + Scalar::Date(val) => call!(visitor, visit_date, *val), + Scalar::Binary(buf) => call!(visitor, visit_binary, buf.as_ptr(), buf.len()), + Scalar::Decimal(value, precision, scale) => { + let ms: u64 = (value >> 64) as u64; + let ls: u64 = *value as u64; + call!(visitor, visit_decimal, ms, ls, *precision, *scale) + } + Scalar::Null(_) => call!(visitor, visit_null), + Scalar::Struct(struct_data) => visit_struct(visitor, struct_data), + Scalar::Array(array) => visit_array(visitor, array), } + } + fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { match expression { - Expression::Literal(lit) => match lit { - Scalar::Integer(val) => call!(visit_int, *val), - Scalar::Long(val) => call!(visit_long, *val), - Scalar::Short(val) => call!(visit_short, *val), - Scalar::Byte(val) => call!(visit_byte, *val), - Scalar::Float(val) => call!(visit_float, *val), - Scalar::Double(val) => call!(visit_double, *val), - Scalar::String(val) => call!(visit_string, val.into()), - Scalar::Boolean(val) => call!(visit_bool, *val), - Scalar::Timestamp(val) => call!(visit_timestamp, *val), - Scalar::TimestampNtz(val) => call!(visit_timestamp_ntz, *val), - Scalar::Date(val) => call!(visit_date, *val), - Scalar::Binary(buf) => call!(visit_binary, buf.as_ptr(), buf.len()), - Scalar::Decimal(value, precision, scale) => { - let ms: u64 = (value >> 64) as u64; - let ls: u64 = *value as u64; - call!(visit_decimal, ms, ls, *precision, *scale) - } - Scalar::Null(_) => todo!(), - Scalar::Struct(_) => todo!(), - Scalar::Array(_) => todo!(), - }, - Expression::Column(name) => call!(visit_column, name.into()), + Expression::Literal(scalar) => visit_scalar(visitor, scalar), + Expression::Column(name) => call!(visitor, visit_column, name.into()), Expression::Struct(exprs) => visit_expr_struct(visitor, exprs), Expression::BinaryOperation { op, left, right } => { let left_id = visit_expression(visitor, left); let right_id = visit_expression(visitor, right); match op { - BinaryOperator::Plus => call!(visit_add, left_id, right_id), - BinaryOperator::Minus => call!(visit_minus, left_id, right_id), - BinaryOperator::Multiply => call!(visit_multiply, left_id, right_id), - BinaryOperator::Divide => call!(visit_divide, left_id, right_id), - BinaryOperator::LessThan => call!(visit_lt, left_id, right_id), - BinaryOperator::LessThanOrEqual => call!(visit_le, left_id, right_id), - BinaryOperator::GreaterThan => call!(visit_gt, left_id, right_id), - BinaryOperator::GreaterThanOrEqual => call!(visit_ge, left_id, right_id), - BinaryOperator::Equal => call!(visit_eq, left_id, right_id), - BinaryOperator::NotEqual => call!(visit_ne, left_id, right_id), - BinaryOperator::Distinct => call!(visit_distinct, left_id, right_id), - BinaryOperator::In => call!(visit_in, left_id, right_id), - BinaryOperator::NotIn => call!(visit_not_in, left_id, right_id), + BinaryOperator::Plus => call!(visitor, visit_add, left_id, right_id), + BinaryOperator::Minus => call!(visitor, visit_minus, left_id, right_id), + BinaryOperator::Multiply => call!(visitor, visit_multiply, left_id, right_id), + BinaryOperator::Divide => call!(visitor, visit_divide, left_id, right_id), + BinaryOperator::LessThan => call!(visitor, visit_lt, left_id, right_id), + BinaryOperator::LessThanOrEqual => call!(visitor, visit_le, left_id, right_id), + BinaryOperator::GreaterThan => call!(visitor, visit_gt, left_id, right_id), + BinaryOperator::GreaterThanOrEqual => { + call!(visitor, visit_ge, left_id, right_id) + } + BinaryOperator::Equal => call!(visitor, visit_eq, left_id, right_id), + BinaryOperator::NotEqual => call!(visitor, visit_ne, left_id, right_id), + BinaryOperator::Distinct => call!(visitor, visit_distinct, left_id, right_id), + BinaryOperator::In => call!(visitor, visit_in, left_id, right_id), + BinaryOperator::NotIn => call!(visitor, visit_not_in, left_id, right_id), } } Expression::UnaryOperation { op, expr } => { let expr_id = visit_expression(visitor, expr); match op { - UnaryOperator::Not => call!(visit_not, expr_id), - UnaryOperator::IsNull => call!(visit_is_null, expr_id), + UnaryOperator::Not => call!(visitor, visit_not, expr_id), + UnaryOperator::IsNull => call!(visitor, visit_is_null, expr_id), } } Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index 3c36dcfb4..53afd7d2b 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -21,8 +21,7 @@ pub struct ArrayData { } impl ArrayData { - #[cfg(test)] - pub(crate) fn new(tpe: ArrayType, elements: Vec) -> Self { + pub fn new(tpe: ArrayType, elements: Vec) -> Self { Self { tpe, elements } } pub fn array_type(&self) -> &ArrayType { From bacc7b23b42c5e8232d66114aa5b199b41bec4fe Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 4 Oct 2024 12:05:21 -0700 Subject: [PATCH 12/82] Improve literal type system --- ffi/examples/read-table/expression.h | 145 +++++++++++++++++---------- 1 file changed, 93 insertions(+), 52 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 162090505..a0f5b2914 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -17,7 +17,7 @@ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ - lit->value = (uintptr_t)val; \ + lit->value.simple = (uintptr_t)val; \ return put_handle(data, lit, Literal); \ } \ _Static_assert( \ @@ -62,11 +62,6 @@ enum LitType Struct, Array }; -struct Literal -{ - enum LitType type; - int64_t value; -}; struct BinOp { enum OpType op; @@ -87,10 +82,6 @@ enum ExpressionType BinOp, Variadic, Literal, - BinaryLiteral, - DecimalLiteral, - StructLiteral, - NullLiteral }; struct Variadic { @@ -128,6 +119,27 @@ struct Struct size_t max_len; }; +struct ArrayData +{ + size_t len; + size_t max_len; + struct ExpressionRef* expr_list; +}; + +struct Literal +{ + enum LitType type; + union LiteralValue + { + uint64_t simple; + struct KernelStringSlice string_data; + struct Struct struct_data; + struct ArrayData array_data; + struct Binary binary; + struct Decimal decimal; + } value; +}; + size_t put_handle(void* data, void* ref, enum ExpressionType type) { struct Data* data_ptr = (struct Data*)data; @@ -179,12 +191,14 @@ uintptr_t visit_expr_decimal( uint8_t precision, uint8_t scale) { - struct Decimal* dec = malloc(sizeof(struct Decimal)); + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Decimal; + struct Decimal* dec = &literal->value.decimal; dec->value[0] = value_ms; dec->value[1] = value_ls; dec->precision = precision; dec->scale = scale; - return put_handle(data, dec, DecimalLiteral); + return put_handle(data, dec, Literal); } DECL_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); DECL_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); @@ -220,24 +234,49 @@ void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_i DECL_VARIADIC(visit_and, And) DECL_VARIADIC(visit_or, Or) DECL_VARIADIC(visit_struct_constructor, StructConstructor) -DECL_VARIADIC(visit_expr_array, ArrayData) + +void visit_array_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +{ + struct ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); + struct ExpressionRef* array_handle = get_handle(data, variadic_id); + assert(sub_expr_handle != NULL && array_handle != NULL); + assert(array_handle->type == Literal); + struct Literal* literal = array_handle->ref; + assert(literal->type == Array); + struct ArrayData* array = &literal->value.array_data; + array->expr_list[array->len++] = *sub_expr_handle; +} +uintptr_t visit_expr_array(void* data, uintptr_t len) +{ + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Array; + struct ArrayData* arr = &(literal->value.array_data); + arr->len = 0; + arr->max_len = 0; + arr->expr_list = malloc(sizeof(struct ExpressionRef) * len); + return put_handle(data, literal, Literal); +} uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) { - struct Binary* bin = malloc(sizeof(struct Binary)); + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Binary; + struct Binary* bin = &literal->value.binary; bin->buf = malloc(len); memcpy(bin->buf, buf, len); - return put_handle(data, bin, BinaryLiteral); + return put_handle(data, literal, Literal); } uintptr_t visit_expr_struct(void* data, uintptr_t len) { - struct Struct* struct_data = malloc(sizeof(struct Struct)); + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Struct; + struct Struct* struct_data = &literal->value.struct_data; struct_data->len = 0; struct_data->max_len = len; struct_data->expressions = malloc(sizeof(struct ExpressionRef) * len); struct_data->field_names = malloc(sizeof(KernelStringSlice) * len); - return put_handle(data, struct_data, StructLiteral); + return put_handle(data, literal, Literal); } void visit_expr_struct_field( @@ -247,11 +286,13 @@ void visit_expr_struct_field( uintptr_t value_id) { struct ExpressionRef* value = get_handle(data, value_id); - struct ExpressionRef* struct_handle = get_handle(data, struct_id); - assert(struct_handle != NULL && value != NULL); - assert(struct_handle->type == StructLiteral); + struct ExpressionRef* literal_handle = get_handle(data, struct_id); + assert(literal_handle != NULL && value != NULL); + assert(literal_handle->type == Literal); + struct Literal* literal = literal_handle->ref; + assert(literal->type == Struct); - struct Struct* struct_ref = (struct Struct*)struct_handle->ref; + struct Struct* struct_ref = &literal->value.struct_data; size_t len = struct_ref->len; assert(len < struct_ref->max_len); @@ -262,7 +303,9 @@ void visit_expr_struct_field( uintptr_t visit_null(void* data) { - return put_handle(data, NULL, NullLiteral); + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Null; + return put_handle(data, literal, Literal); } // Print the schema of the snapshot @@ -310,7 +353,7 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_struct = visit_expr_struct, .visit_struct_field = visit_expr_struct_field, .visit_array = visit_expr_array, - .visit_array_item = visit_variadic_item }; + .visit_array_item = visit_array_item }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; } @@ -331,11 +374,11 @@ void print_tree(struct ExpressionRef ref, int depth) tab_helper(depth); switch (op->op) { case Add: { - printf("ADD \n"); + printf("ADD\n"); break; } case Sub: { - printf("SUB \n"); + printf("SUB\n"); break; }; case Div: { @@ -416,33 +459,46 @@ void print_tree(struct ExpressionRef ref, int depth) switch (lit->type) { case Integer: printf("Integer"); + printf("(%lld)\n", lit->value.simple); + break; + case Long: + printf("Long"); + printf("(%lld)\n", lit->value.simple); break; case Short: printf("Short"); + printf("(%lld)\n", lit->value.simple); break; case Byte: printf("Byte"); + printf("(%lld)\n", lit->value.simple); break; case Float: printf("Float"); + printf("(%lld)\n", lit->value.simple); break; case Double: printf("Double"); + printf("(%lld)\n", lit->value.simple); break; case String: printf("String"); break; case Boolean: printf("Boolean"); + printf("(%lld)\n", lit->value.simple); break; case Timestamp: printf("Timestamp"); + printf("(%lld)\n", lit->value.simple); break; case TimestampNtz: printf("TimestampNtz"); + printf("(%lld)\n", lit->value.simple); break; case Date: printf("Date"); + printf("(%lld)\n", lit->value.simple); break; case Binary: printf("Binary"); @@ -454,38 +510,23 @@ void print_tree(struct ExpressionRef ref, int depth) printf("Null"); break; case Struct: - printf("Struct"); + printf("Struct\n"); + struct Struct* struct_data = &lit->value.struct_data; + for (size_t i = 0; i < struct_data->len; i++) { + tab_helper(depth); + printf("Field: %s\n", struct_data->field_names[i].ptr); + print_tree(struct_data->expressions[i], depth + 1); + } break; case Array: - printf("Array"); - break; - case Long: - printf("Long"); + printf("Array\n"); + struct ArrayData* array = &lit->value.array_data; + for (size_t i = 0; i < array->len; i++) { + print_tree(array->expr_list[i], depth + 1); + } break; } - printf("(%lld)\n", lit->value); } break; - case BinaryLiteral: - tab_helper(depth); - printf("BinaryLiteral\n"); - case DecimalLiteral: - tab_helper(depth); - printf("DecimalLiteral\n"); - break; - case StructLiteral: - tab_helper(depth); - struct Struct* struct_data = ref.ref; - printf("Struct\n"); - for (size_t i = 0; i < struct_data->len; i++) { - tab_helper(depth); - printf("Field: %s\n", struct_data->field_names[i].ptr); - print_tree(struct_data->expressions[i], depth + 1); - } - break; - case NullLiteral: - tab_helper(depth); - printf("Null\n"); - break; } } From 318453e9e9420beffed7a56dad3d09fb546cf64b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 7 Oct 2024 10:15:54 -0700 Subject: [PATCH 13/82] Remove cache files --- .../clangd/index/arrow.c.593AB35726E16CF5.idx | Bin 6730 -> 0 bytes .../clangd/index/arrow.h.5C86D068362A9230.idx | Bin 1184 -> 0 bytes .../index/expression.h.2E6480F4E885C761.idx | Bin 14760 -> 0 bytes .../index/read_table.c.A243E3FFE9F1D262.idx | Bin 6072 -> 0 bytes .../index/read_table.h.B5A915C23F6DC678.idx | Bin 1962 -> 0 bytes .../clangd/index/schema.h.9F10114AF5265F91.idx | Bin 6838 -> 0 bytes ffi/examples/read-table/compile_commands.json | 14 -------------- 7 files changed, 14 deletions(-) delete mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx delete mode 100644 ffi/examples/read-table/compile_commands.json diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx deleted file mode 100644 index 2f130e517ab0571e0acb7fd574cff1a09dd7f767..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6730 zcmZ8l3tWs@|9^g`X_{tE%`{!6%XFKXO6f^T(Y>VGs+MIJvE&*GE0$c7iYS+mQi@%x zNNcg%Dj|6nOS!Dd5?3JHBn~&cln$0pX z_g!xL?t#OvU7O42CE)9)*j7mR`P9mmKb|yfx#GP{nIFHbS}xAXyImHVZ+~rI)HT`S z2K3}oDt?f+Zd>U}_hA8p!v%e@&8J&dAJ1O-yxlm_FiQ93pE_H@OSN`{|Jk|yF?#_c7HP!QfS~Y(EZ;is=oA&MaiuD_PEAi!dr_iSv75nB3$4lP$Opd;jJ%#Tv zVG`dj*~Umu;az(=@o!%16^UX;%^p{K#XB+Iuin^xzsX<}%)v#wL z`rH@KZ%KW#7r4JXSO39@ruzPVtBfn6 zO3&pl+@4M}-Fz`MdRb4Aw&SbeuH$kSnI&{4{t*8q^bej!%Ik-em%jg1_pnn!;GZ`i z1eGZpH~sw6o}hhSmw4MwIug4sc46q4`4!@?1Ra9~`Df4Wo;g1Md2VFnm*I_}g)e@Z z&^ygyT=Sp2{~X-9cyyzSQ|xUVR~>a=s$y?jm0)?@=FS@pyZ7!}7F;^crsCN4-wa!~ z{o}9j^^McHy!qU*Pra47rSh)+Gnus~N4N&Ze&3XB?{!UL_h?_0G}!Oc6+ZSpt4!Cn zzH9zEOB~gb*js7a?tI?xb@bE0%u7Fv)TzDOd24*n_Fr@(pXa@f5$@bxE(zW5^Ki4p zORZzC#&uiBCpBvaJl!7q=<3FtYX`NRuj+?QsQ-1+T1%575y{2VDy}UJTT)z-oRT!V z4;Ur}=ew<(8Dor@(G0^@0!W*A|41KMvu^6^Dksxs!9oY}p01ZGAM7Kye*=4PgCCS!DYz>IMKHwZK`w|<-D z;EtJ*gh64fupW9)d5id4(p*Vhf}!s24?;ym=A8swq3 zE}V<=-Na{{3Ar0|xR%(;sJD`0BbSeK-!S?U(?pUNF zz^qUtlyYjNu2N5RA+R|h$^of5S_BWz0S^ujg5MyFRi}(iL*Z#ChQsN|IUTukI1{;M zB2S8ET>j>1=dK_1h;=d@nTUp`@phU`_mtscGJF7Z4uFJ~ch$AJn6fhg5<+MKaT7?X zHwg<9XYC)8FCjsy0^KSwQLh>HD(GGX6Ur!>`J_y6pv{nI$VFzk$c>sp1(5K6mKJ8) zU~C&qq#6c%4;%KKx*;bT+QFn9Bg3P%!))JV=khpAWj7{E@o*kHVvXV zoB{F-aNuwz=w*V8!;eA!7#!#{R|9r=dsV)+B=a}GXASTwn41MsJeCiUs! zk=pAWiN^E5I}iGtBb~tS1PNuNEsbdW(9bxH*=SfcS7)~*8%E9ds2@f=mLjQ?mUoUc zD%);nyz@{fQ3D96{JZ`F2hoXi7edHK{CtFImE;$Hb@XQLk${7H&0Z>w7C>IEadOJ0SHgyt3RY#}8 znB8Eu!H()W{qKgrZM`yYVzHI3l|E(SO>R6qVzzWN87>4~Az+$h>p-+kXipT-u98gfR zFxwB}evs2jXwpjA`G;OlAepuZctzk!?TPs0mDk<|98L%ec?-pK;zxaURbf`n5yT!} zJ-zZ_<|7b0=f$x)Ys{UcYo)oES^GqeFf+h=+IpivzdsWi9|G= z!yKfxf|40i3FBxqT5TU@*P^vz8X#?#?@wFb{CXOh<^`Y^+-me$SKD=0Hh(6O9wQhd zrS@WWonW21-?xlv2<&mz*}1>@Z*Q@0755q&Zxo($)MV?Kn_D5nPAyw_(EF^?!c z0Ez>kqy)^~2ibjYw@PyO+i{cf=XpHL&f?9|u)P#eDd=%xeSrEvkER#18Hk^O3}^`a zgJv$C7*n=@7?O$1GLao6?1|{N`}6uG6`>8R+JL3eO^BJLcm9jT2_mu}y`|&mlE7>n zKTe~?&Ee0{Knr*aGy;^!OVmK=z)RQMATxlMp^1xuUkpZ^LuG)3 zwgB-9kP*c+_u9gb=O}u#Q5C+RA21i0ez5BYM-FEpQ5F(YMP%V1{~Q!d@r8==mvNWf z7Lfoof_Ec?&;Zc`nwIe6v5cFs4`Tb^b8d!hNVE+}sm9<{MLIJ)%9at0b>LnHfwXH= z2GVvu9Oh{AD4#O!^(XTB6+}W|qi~~rD7og}-;8sM_?isMMRGCa-f5M$I5j9{H5sl3 zR1M}-KZ?!&*7~d1)uia^K(CG?VD>TSJ?4r|{&Ib*?zn9#4LL}hgB+=4Ma}hg>HWJ@ z8mb|(8e*wxRdB2uK}mx{8#QT_szWC=4YM2VH+s?~S7*7Xq;7HR8ZyljKu^GyN^kh( z#iRXuy*7|(lwQgJ>Mv&5AU1-krr{>>|8Q{SCV2y%Zy=P5|5ha4ima&<$^`!3=Yqb> z(8g>iFI1ndRm|=Lv=cD34zp(godwLfcoWb~z|=_0b^*VO+hdE(cDTVv_rqkOK9Ki8AXi-($UFo2)9NZXv}x^~5ynS| zhBRcJ#t|;oj`W`ROV|(r4Aa1sniQ58Q6sf4`i@A%2x6S5YtNUxy*ixz>!^?%eRNz* zX&y;d5o^dHFC8yaDuP*Wu>f^~xw_9418jh=yVYX0xW^wRZ@bt^9)E`KUX31ua-t7{ z$SESNv{8D}rO}endGA8I_Gu!00eBZcK#K>nPl5lGt4l}Sk~K$4Bxi`cuf&(^)*mjI z9+nqg|0NCnk;oe&Lq4}*ea`<}NSQ3x-(1#lpy5X|a+xz0nsH?yG8M@=WF|H<Yakb#KA?QC zpmh;4BdBfUiqC3E0}bW{>(Z2Avb~}n(4qS+W(R>c2)eWwF`J5XQ;|N!ccNZA-x(^c zBXtmB7osjsRp&9d!5!-q#V&oHCtOsq)j(hkgmC3{9&FEpJ;n3_L3U-;=MCx>HIFnD z)kh6FjM=SVu$8-{hHsKgk52fao_PBVWY4&hd;Q-DPQ{_3yM#~+jSZWs)r{{?~#LkjB|`1r)HMttS}Ch!pKqxrAi~7Rwqb65E0HB>z#@@vo<^$9>#3aFO5#utR`HQ#t+bQihi${b|FX=Py69@z`HQgB7C z05}2xy)#`yP1jK}eJRIAu?7z?iNsVuzo^Pkn7sEir>Z7)jY|bln5m zZ+QNfcySJ7=Rirb@QvwU_vOuxRfLTp8^dY1{CDp@%2Vh+C&Ps>tPohbI- zEY&zTZUf#n5K`|kdmMPjLCE=b0{ADm8$V`G0{^5YZU$a6x4&WbA@Cn+9yB*2{$}pe z3TF2s{(endgm^_rozx{xvXaUr?Smv%JRP2TUm@Q~6XR7*Coi9qJk+n$O^acM(`Ycj znqApZT$8e@M$2uuE90srFmsaTCR!3`ZDFY+5Q_Bl#Rd|rE3-F~ON~qnjm>RME%;W< F{{e|$m3IID diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx deleted file mode 100644 index 8ea41df84fe5ee822da09fbd91a8385e8181fd92..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1184 zcmWIYbaR`*!oc91;#rZKT9OE4G6({3aY<2T5+eh{TxJG_iaBe84(1&);5obe%Yj6N zTMp$nt_Vxt%5>jSJZ0Ik-f3#FkC=pR?br8BT%40Ut0hEp+x^d-1T-UFf={7xw=i+_Yo|7*Nj7p3=HupS{ab@l6$T5mq zBi8IUF}EW@tm2?l-u)9l`W9gMG=RVF-@0-1C!WM>Bn>yaI zWd8NnCmrg$-pk}}eN&=xUBjom45f9NVX^KyZ{!aB*HYT9nxHNd9=GH*hsd?y8SH*4 zzRg>VOrMBtE8+Ai^sYSocYfz#yTX+ze$UHK>dXvW*_zdTc$w`d(SM3kH!_Rnt9|yL zyH0F*+>aaij1d7T?YmWiylu2!Zc97nzys8OjFI&i&UOb}->6s4vW&q4|>MlNtT!JMCwnUgTxa)tmS55Fj< zs2I#dSnvT=gTfA`+91&C>T@e|RiJ8d4smgqYFKEpa6v*59Ec1IJp958!aQ(K@=Hic zsKP^qUs6I+1s2LMnvoL{%1{GJx2>GcF^$;@Xq^~`m?X@)n1vIUs;kb601ERn@w3BH z0?d&>H-lpWYREFFl|iK!wiNP%OZ# z^76TOY<wBV8dP#H5xQ(G%*$oR@7^Wm>7H4U>9uI3mChx z8>7aCQHe2j>|*b|i~8o?-PyB$-@m-a_c3SZn^We_nKNg~Y0;=bgT+-D`>b=LpL=!b zpK4@`dEb&?C1Gwzq!!+LXy{lua18fvUOaiO1;_`hxPb+@WiFPPqZH# z@IJcPiGt|H)7xYwX8M149Xw%a@|Dc~-h2GVkMB0l_jdG~hY$YBdsCrD#dW+eMrG%|_PlX&fvA@{jD|cVy6p!RyBCsoC!LYCHNy zroIYv@7|APSk5<^5_jrWp7*l$?f%Lt%GjK`G3Z6iX-n{iO=r%XoA@xRc=ONy>g)UW z;8jCC1_T9XTn>1$yKLdEoIlSz2wC_0&b1%XYy4F5XvDX8?p(Vecwyz8^rV_vtRZ%39~_qz4=MWyBxhJ`IL$JO0E zDk>nU>B;^F)0?DDYua~oz0)gBT34+qHMU#3YokW(%XxmdTh_4DyoW`LCgvZ%d1%I! z69@0iD2kiw^L1CHNA)(fe{Is~%XJGc6})y`v9_xYv}`f zb!?1Qfq8G6HeewzE33ztT85V~>VQc|>XgFr5*sN=aYfw1M!*+Lt!J%g(*g59I{tl= zzV41K!N*`_)p2+Xk1^_ihIifC=xWh?U&+eqBe^6mY19Fce+=84-qb5fR@iJn`b_Yg z2{uQ=qzDTe2`TKChs7FnSnY0A*kY~I3XoSoprc_@l(*h#%#(%}U-s|q>&D`+PpW&W zSqJ(;$mXco*`?<0{wlz#ldJJ+9;ya(a>o?*&XS*htxrmz!p3S#j0br<_&FLTm9*+h z6jv9lD@cAHWM$>CPi0VVNwYXeTh4h&Uf9|Dk|K{zQjJIcqyX}NL=0{V$XOSJ{DXosuw((5uQPf_rl&d_o95-`pVCbvho)8Ls_PN;q!&R4rq?#br+kw`$ef}WzBJl z1fJlo0}@eu%&7%u!{$}8uz@(bkx!!l9nk1HSqpot$;hj0Vf~Q7&Pr#i4v74q&iMS* z)gzLutS(L-#iNWmpsycV?O7LFe7)*c_6?G&@G71wrlyHa0kgg^zo?YTKKMZLREEBq#ACiwnl3Y$!IOkpR6q76#ado?YZ~jqXOSDcK!L$)g zm%U+9w2wa1*Pn9VbSV9Jn3c7_K9zVS4;>J%uje)xT_>&LaD~m$=9mkfb5S$xjZ+OO zgr?apje9@B%9629ERS{9fj$RX@xZQK4;LJ7F;ZbuwK{ z^5|#_8wDuxG|x0I9gs|)MK$a3=1kFOg)P!1*#(we;O}Ud6zQc;(&z6Z^J-ODI?l>k zW1m*Mm8=7uk<=9Cn_m6%66NPlu(CQxj^@!u9T1-p4<}Sv9?@}>QMYF6Kjfnf=$7mI?%a6HF))`Zc)_7%vn~}3dvzS%%lTi_Kvx`zemaRY!hpP z;>=VjEO*)`CdY(T}asMsDQY$Nm7`bi@Piu-X&LIzi3O$2G7~3SoX%LSao|1Oqp=JPFZ)!!60Q|puqwp@sHG)xMS zb)C3cX49G(cSmouvNG7G7O&;51AQTM0@klLH0oW;yvfQcBDpHB>ZSuCuP!>C(&zI* zJ1p$ynoRBO(c4o8B#B3V231dJ-|!EGP1HD847SA(;b@o?>Zx;ZX+lWB#wqi$(`2pF zA&?J22}i@EaI@a&d}+5!5AsZZYUdDLuFvGptU92TPVT#WE4o|wUMp*YbJXB9+;l*E z9@qfw=TB*Vz{-LkC2jJ1cGaUkBq3y0Jtg|KR?Erw(+ort_uB_KmD^^Pq+G z$1A#_rJBBglgfXE*Pj%+-v+n-0RtTayEls7f$K%X2h z*(G^bpEN(s|IwUDlwlwLKE-uF!-sv5(7Af^Gyf>;dmLT>eg#m<(J;vmnhj~kc9w8! zd#aA|ubX~rban4)Q8hs9*FRh_cSC-QGYXrb2{;FAIS}J$oO&?TtM;;szkhaCVdJz; z+2EOtN^5VN8uj{I`F~FIyoa48X`PmXZ8`3zy>Z_4B}nM5MR<>%g1e-ni-O)N!w4Q8hqp zjBUIB%Mu?W-=ZC>z|?dpUC{x3>B1_RwXVJBS){N9+7ffXa}F9Ad*fUpa>3cnmHKY_ zU}1f6j{X7tLv%oM^y|_!eNhaaAxA`Iko8E4E*S;yY`?h>;JnY(UEI8jTglmIcWu%5 z$G1)}FGbCOzzhhXpu~tCCbH#2-lq{a88^g-6u^j41YXHL6k!R_nkLl()17d`qP zKI8-#(Mv(O`j7`>#JiI%10T9qjd)&I4*1YnGNK)2x!^-K)`+KoEe9Xx2~k%;u~iW2 zjMLQBP;xbt7Wh60*ayMR%U#_M=KWw3_%R4P1|eh|DdbdSJ<_C&{qKw6Hn-j80koeK z8KW9#VX;ty+2A)D0-U$6x`ms!2sNOH8HZ`%vKW>HepwLUj8oKHZq60MD3azfh0myn z+Ml962e0Q4NVYYsbK1F8Vnm;h?E*9T zgGO|#*lsYB%XRACQDKi;m$gPKYG>ZrlcG8W<7td!BvNY)j~EZJu6ViekVLo<;~{ic zWr~%Ic;YeclF1qv(ak{9CzA_e#Lx(1Ynj3(BSvrN{K+ISBl-+@E6C(e7*SB@EyyG} zBgVQI`pOgs8_^ZQw1i9>Wkhp;F1Spl!iYBpx+F5)CPqBp7(B{!zKv)o(O}7BMF0Ex zy8_q>kVp>{RD-A$5HIkJ5VaBFoiPo@F*PM%)lO~u?Ol4{1C81 zAdy*6)KQ#|;+~|h6>A=l3kJ6FbwiJoxA!1NRZ&xTiU;{AXytjj2N{1w9l!>-Ib&jV zSlqA*&ZLQYnR{O5-gJB^p@ip9%O*L_0v@M*AngOI;Bp_7-3N(;A1c^2Y*dqX{&>7@ zf$0|b61$Xv!UL*hp2#&&)Nuy$ID?IrCGEG!lzwua!v9~#fwE~RUoDj;foNjod*+Jpt|~z>AcH?n3Nl&$NmCD+F61l%#E;j2p6{rQ3wL zodbSzAb<|LqW;d!zYB9qnK&G#rQyV|=@2v>LdYg5>Jn~V!o7(55pCmlEK7=rM(ub7 zrdMDlHd^&d__|53XABaqg6XQT$`5|c_gd4eghqGH)AE0B^#{D55+a89~Q_iuwY)UWn_35`;Lv zmK~%-i=s{hX(E_Oy%qHcFL#7jAi+@L5a-vjhC&22R~KgjYgoTn5u+ zu#x;Ivxv)TX+^Q@B#o5OQBB2T{^Ab!ih0vAf zWp^YcAJYi0!Sq_JPq|54UrS61eV7S;Ga-QP7)4#n&1;1kQO**FX$ed5**Q=;2NKBx zQ`8UK{DJ!u+my@1VOk=S5)les#NoRkkuG9IJ#0x!ngBc6v!p^l zuYmUz2&JVdTZ%hiPnA-~;mK1!-*%`&ePs4MnBIdIjkwshQpx;_l>W`x~RD6X+4>C%W1F9D&>Y9D&>Y9Ky-b!1e68St8(e zf$s+hqxC39i|c6#TCvLuA#x#%Sz2jiSjcF#$ja^LW0=89H^KB)y1jJh3dIbkJhW(GfeGtU`r~_ ze+^96gpFVMWMbdFJ@YlfLohuQAJU~s-CZL$EboYf#ZYE3l&6r667%?lT2@{dfL#!{ z3yPCxuBd0Z`K&NEt@A=7JN7K>g!A16(_JCTm9<0vs9!a>GZNl{>8&78iXRu!^7}%R zOCV|ql%bGPQJ-@2Q(>nm^^e1}yuTQ>7$O%#Df$dWeZARB+T7@FTSXgJ5bYL@$N16kelta`OxB^zaOyH+g!maAhz2 z$Wc6a6ptf)_Iltkw*8RD8sQ0;o`4S#%4C-rd2`!^ACT|~OrOA?2z1TihiEknVSh3} z%5c5)G9WGk63Id+Y9=>liru0QGRairaGI9Cx!d2ueNoh--0i3hi}2gDN)aVI z744{4}&xW)6z&KA7$cu~E_uPlBT$=~zjN$h=+(^F?VNyaKi2 zq`1r1g8N#KNxKwv2ZZf_l628v{Kz9N^C(gy3fOTydkvF5Oi_!#zepT;s%7GbYK2Up zADK`r6G~7_fPoY@FA(}cHBKC+)i#BECPVmSh^8Z|sMEPLom+@4QeR}tQSfw%;Td>6 z6Jj7A6Fb^#pMw9}5Of_=td-H$8P_F<=7eEpbw0n{-GkDWINySo}QLB`S zdot5t&NMh9@^{jW^@qBw*@!!M1x#0j38ZQ&ez8_c72}sfuQ5v8JzHke0?ASZa} zKYee^J-XQgy(91(1y=DPTfu)T6nDkrw*nS1?SEQE!2hx<{XY2Khd@Dp4Bn5yCE9xe zeovs7pnnA4kD~waYAr9s)&F@rH>wZ7^gw)=7EPMo37qP}9NiiGd zkD%=DP)_JyCJ)Z!VYJa&J|yZ0FD3AN9-Yri3;Z|_Kh7fsev^mY5)*i#f_d{`}WmBjc^l8 zH$@oZjEiKwT>XEiZj)^Y$u>AeN#%8vkXBn4iaW_*o+K2N%Iw%ptFeo2Qw`>+BA@Fl zx6Af2yF=WHI#3#D7TPf|VqmnuMu0Q|%#`6npBExVK(xRwfOJ7zw{+j*Nz>}}6ci}x z1dt|xr?4$ExX%ocNu?Ayet=fT7nXA!xUT~XSxzeHY2M%JVV!_D2Q+w?8rh;ffb>w?bF^7I(WPa0*;eH5_k(oD01N+j|Hh z!IMYgCEK3{IlFx!ec?rgD@AQ2HS(f*l2*40oCu-f5}gFaCP5hC|NTw7$;~%~y`@Sx zhD+KTDDqAeb#TbwaAEg`*oL@R!I2O;5+cYBQb`oW?RX#Ml;bP#dLk0J%rZG;_@t^1^B#*%X3}v zg4D}IT)UE+#3Zr8ZmHCOyDm+!jvf57DjuJ~gvKX%2Rarv1m z{*uctUGXcU^ZrKK>d}>F`7?6_q>qj)d+rYRDB(l@2Ul%o1Q`z)> zozi=1KYDAx{?~(kFxE}{?B)E))dAO^uN>Jxed*D)cds5jyQOt!`W*HiU7a0Sckwe# G{QMuwj5u!q diff --git a/ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx b/ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx deleted file mode 100644 index 9b263910458b143343155249ab4eddecbbb33118..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6072 zcmZ8l2UrwI)1E4Jaajg-fh8_kf}jXPQZaIv5IIcfsTfWSs33lzpd4mEQQ_1>Kww2c z6i~r{5rq?T&JxUu5kB<@XF!j?7q55r|K@p^o$9x$x~jUWx|=cnLx=7+W*EPj{!1ce zE&5f!FpM7gi(V8p_aQROF%iQo$=NqoRS@<$XYsSriy3fazw4AL5a zL^Vm)^DnJg6j3}x%V9)Jc3Jg8`2l=C>ye45dQFdsX?_~3Tk4Dpt$auJ_C6o6bL7L) z`0U6VH`>f9w)r1u-!l5uhvvvBkC%4w zZ2To7vg*7;a`d-=O0$=F-F+U!&v~7Dv;0rTl!aCqnew@8Y9Ha9c&k%q>wJvdo{gKC z5_{|1TFsB&Bh9)MrcBa{_wsUJg%*~pV|(hq617M}8V&c4Y5!2>Uz9fB?e@#V`|2xP zokR`)-If{sS8&XwXQL+P$-exOuRA&7vLwpxz(v~$GdspTuFl);H#;u=qM=D_1Im4O zmZprGl%+9j{-BPowfQM?HBv%%T;95}-CnVMM(U2zNx$f^r^dSf^w$IThVgzo%VJ~K z)j!C2x^kdpSKB%7du5k5m!3aobuBrqH1o}lP`$MBAx;H$hL7h3t&2UsxbQ;4FY_*3 zXe=DLc7^=)lja!Tbcfr@Oi~9bH^MuEKD!h);lriW#OtLTeF=sOh!%YbK3O4 zaj9EEVzs_}#Nq(-xuPS8Ulp-U&~(f5%l@9ze=7{0@8M^CXUwF}HU_(|^t&0@tUWPa znjXD0V#Xd|n0M`G3_Y?=Yq88ghT(ESmCK2~S!XLr4S)rw{M5Ef&hvDDpB z=qP2hL;|}(PT&qkR}&=e!ex#u<3$*(gjN!FC!v#s(Gr4ZCqq&5N3)gB_qel6Kf*9W zIK!y(MJJf+`ukLe?JoUUraOUCL6{20|Nc-G^9i>2<(uU!2|Pe3tPhEJYwG0-Jw-D9btVz=BYz=sL^vCoB_%6J?z*3Nul& zW=GG{XPA)ZxobbXy3J^?T!3DHvC21YFPQ8FOVvzV8A!^&kkWA-?g7g^(gnbB&EVb) zKGbTO11SEVm1ntw(CZ)!r5akVdoR2iGFY2v*nr(OV1KG1+2B3c zf&dNR*#N#2nB9AEVTx<868H$tAE5^YE)3Yz%lk-+1+rYQaj=;xAGt`aNR}E#D@9Y> zStisema79n9Z0AymTLq-qdMIRf>w1p2L(B(#$Sw@#i&Q+L(}y7toqkZnZ_UUC$;_cw=Fho^FTs+3> zQMXubGvH>>qjgH_ow%ticHYf;5Y&S%)y;BEAZSvj+d$B!PAgELKsEjn)GR?&l9aiK zzwWd=Zq}{sq`tB(@vdaTfJ_LY9y1qnuN6lacoF#)FlYf)>e)C(abD|g^&&v* zpxBXAoz*~_Sto2a_t9jz0x&25BU)lCw-ZD=L7S&bK~xIb)HRkn2JA7A@bpPwPpZ?E zU|9(^6kmL#I75`K;nRiX{Js2#@~vR3`B)b|NhjlE166n7LNG*)nu#+FsNF1S0M5D4 zmpAV&;9W4FaisAe;rh|0`GD<4|J^v5GETKxTvZ>u+?T}VGT>#d$?Bibw4r1;24gI!JrB;lRXiVdeFPa!L`fp5)Vzp^+mP@`8bpBmfcHU8 z^;4e(EElLBXhZdjJNFFU+I-N>2MgZK4)ExJpQsXnQIL0!fq#r6+CKt*1Xa11OQ%|H znbxcXicnI-+d~USL;U}?9hO@mSYbxJVY#h?9P=H#Gn zBg&mJQAny7Fb^cWnMJ@9feu}KmNb0atDYQt!C^0W@OTPh3g1VLd$`_vp>Z^X%=aDe zJ70-D@qfO#V?28rX>J|^J^cBu8zu_Vr)Jp?2orr|XHO^6vv8Ic9Zq|5q~7}q6H12T zed7Dk;gtig*R&Z8pGAh3881_XV7e$;Txya!hYV+Za`zoNLQ4kpmH1Mm$U|)YM>WotC=b}Kkw4q2r|(g(A@*ws6IvQ);=f4pNJ$t z7WU1;QB)$bXiRlQacmR;4kI2$LkcX9iFmE}YO;u!X4c)zjW&6f8z~tnqvk9wSv5t= zq<;*N-iWx7?`jy7Szz^_pd}qP_I?LxvQgS^5xqQ4GC)Ob}s zk&erW{x9JCh2Q%0tv%K+cWqcrfK!O4_i9~;mB{h|T8+3`oi0EuP^b5y=01MEB4-)IgFH^lh-9krz{rOu0Wbknrx`jO zi2vurSbxV;HY&V5nZ$iPTCPWH${8{A{Zno>ydnM2g*bZVU z@&0Y$uJ0-T*-Ex~xt-jRPhlHf8$G(6v)s>GKeIHUP0lSn-1NbP#G4D&3YOBo`fyN} z#j}IQZ6m`c5Kr*wsgZnC<5^FI64(T|2@L77O&i~BKAN*Ht4|IOQV_;q+hBWY2+J+dU7$Lm1mk~~&&m0&Ad@7Z za{~6H(&43wX+NKh&LhA((0j+Xa+(!nb$%Si`SnRb+Z6Pq8lFu1^`6ytEb7!?@($c7 zFsf{r#M4YhGLWl8tVA6O{B6F)Hu1R6J_2MRX7MHcdegI__mfs1Ag)BXMv%+WkB8;H zfzdZGr}Trwmo=3ygd~-?I6<6&G4~)odh?D#j0b0PU z1>C4Aaq2p^2@jrC5#S`^N&Y>ky@~ZYJvs1RHPM!ihUutEy$0B$4LWgW2+$6acHWgor#Cz-zN&SZ ztk6WmiMISSPXM3r2l-*Eq#=P}6Rr~Zy=c6bznBd64cV2My!jdd&H|nV6B-JtiDaen z{zrq&a!&!Dg6amaduqF#BI(f$BL4)up77h=-K0-}G8gtX0d}M1ZuFo&&PW&|>%Ok# zHnG^u-prZr03)?V>e8A&P&s@`$=)+}iF5}Tc7W<)(Y2dVQec6}eIlKMqjPW^Rm*Y; zv{&#qkj0nuBTe7O))5UG(Q+eSMV4ZtxXAP6b!1@*KvV!Mf2b(~QJH!duY#^s;7B8~ zAlqU_)7JG5h~OH~UITKz9FjphS)EQpVH&?}d(8{zx5Hffp)SiA+ZwC&JS(Y{Azv&3 z7|@0KPwvxJ0d_o92kv!zY+0@zr1hXW7bvfBj*a|2sl4wY8gzvaV|K2xgaQL?d66-3IRPouhxx;gmUwXw$ z0?Zc8)}aBrf5+i*b*cR;GMs>zfREGtcnVID z(bUXo8)2DMlHN#iVIAN)Q1xJz+XlFezb5M3yfiwz`}_|?z7|YtL3R7SCtI=cH?94x z1Xz!_p5HOwUz3=yE2gy()y3#rjB00<60|PCuGHdhTbpyg9NYUh(U1g~1cub+{DIL`IKUk2f=X=(;8s8YaqDBZ|5v`8}K%WXgHJ`2!2zilTeU^s_lP4H{;wR3tn`P z8cKvNi9r76{o^4w66~@S)#!5UQqI#+v%-H>az;mmMo&|I^i;lnJg)SEVS1=Ofm<}; zO;z*GQGzZ#d$_o|xqC2fDv)vKKRx(Q7ydIeVrIAnA#LQ=Vr?B=sh+;fK*E|inp+tf Pn;Mx|bd_6b*)snJfCark diff --git a/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx b/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx deleted file mode 100644 index 1bc818940d9c93ed6af8e6d4b3e104ebb39b22bb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1962 zcmYk63s4hR6ozjC3E5=bguE8mKv)8X5S0Qx1{4*wjED*bL8}FkM+8d%i6CON#o7+F zDxx4-=s*>$j|#015EO|@sZ|^S5#*t*mbT!iGlhc1YU$l2SDQ)x$^7S>d+s^=C5DjT z;G=c`EKCT=HYILeZwmmhhadCijO58&0Ait$U0M{eEUot){9aw-PS2dxpENaRwU``o zdI+1IucDieo9MF%i!=I;4t(_U!P0y_#m5&X7%CoatMD8deEw{B^y;=hl0628cT^9> zJn*aOp{q`M?jEi>N4IbJGpZ&2ub!;@;I0MqnXV9#MN}>Q4x)-zm$iDb}Q5`w6EMxHlh5f;vpI09{5k++$N*vRdAE_)!FR~$XA9M_y zwU=!@*HIsI(Uht4p0g)-0ajy_ZQbBr5;^mWf5#=70F4hPE$8D3nSbRPgO{zef~;$a z8anmE(>=Z42bse!`o<|-@&g(Vc-8*)*yaAx>}l~%L#NvMoNx2=r2Uf^epKC|o3qKt z`dgP)rHN<5V?LD{lP?91D(#evI298fx*P7E*}$C*u_wHZD$^4ew*KrI54 z)KTie3J^@BtLM>)^=<|J1aK7tO2`*tte~)JRr}wKlB_Ts$k6e6zFq+^I~xa`0`}KU zmePW<==Vf`fOo^WxR@1Y0{DgrSI(S%w)VRBMgmNR{bdpv#R_UL;@luV|D@l<2RN)( zi`8;gm}2;|g7raLO6{WkPNWmS4c05|m1|%V7K>tu=4opy_<>%Ku-dco4`%P z3aWQ0v(3G|G`xw=PA?D(R1_m`r;poZkn8h?+X>(V>uCk;#LAm`p)x0}`FuA4NVt6w zCYrPF|$VPlYn}G{0 zEVJfhvEYzR00*b^D*aT1*`hs<2BiE|3!zP8v>a^@g<(sUK5I)`W!TIq^A4_r*$S>g zj-E9_k}I4*(VC4@O0H6ja0>X)GH{nPu7kCtfRv(5zw~WsJJ21N1Z_vmQGw|4DO=Ww z5h;u<SxxG%<8Uz!b`B(3V@6`idgi-1 ZBi_uKDinc#Adz#I)phr*xWG|?{{Z+iJS6}C diff --git a/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx b/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx deleted file mode 100644 index b96d56679c3829075d89fb15fc2c85e83454d7ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6838 zcmdTIiC@jx_nvR>)mv`w)qAh))r%Hil_ioE(~!v4RD+4kD9ISRN=-G=WNc9+J0VQ? z4F+Qrk{G)XhK6EfNlK|KnPJBAyRY}X@AmT_{Q7*lx6?V_bIv{cy(5MW8njP~(15Rp zrp}m{G+u-dvLwHWNwcPmGC}AYIYOzM$|FLT+xy}_icieT6d#?InzFJGxGgJv)@@C5 zyWbkHwRhp*G0VJj{la$kf1l=FzO>)Xl3=bfP*Kv+z1$`E_~1Q_d-i+UghY*;pOJKK zlqTkoc4=JVk$?V~`n%ovZka1j=3UsaYu4Mt3m2R`lI2kMTZ{j-*xbObrCquWdY@d7 zKcvTl4!ZASrZ#ti%Omo79^N;+A))fx`XovGz3a`>mV926RC(_9rCSMwJ74bgwYu!_ zb-;JwTZfDfxtGu@Z%yx+BOOyZ&Z_r5Jx=3L)yb^*K;ai09JE#YNd2rlly-s=|`B!AIXOjjWYyPD!sRk(@4xvzC8b+EO+C zZo$mB{7*6}uK2%=>wHzzD|6&<#~*d&b;ta-T&u3_)uH@VKyK$*w=;iTc_pvDtop4^ z^{$v7x_+_w_n)S^g#WQZQIY@7jK5rQz{9cl)U=;>uS^fy(;A!nD)~)w6ReG%lY3(C z#j{IP5y7{!;?Z95J#po*luEP7i>HkXF7jDx9X$NcjS;_HvuT+Ua`Uhzt6%z!t$Ui6 z6)X>06PeUgbMt2zByD_H+gM_g;wh^sZ1V7M9{u8d$!xRN%O%~coi8pnuMTdFnwXW< zwWeu6bJJdj{kL{*^{gyh8*3@q^VPXEJEpI#3b1+DV|9n%3E2LjX!cj~QU5rPbI1J;DGrzj;T+6t^Ou$Y;t^QgYBlef~^mRBzOC2RYqvG{MCR{&$JsBgh>xN?Dyz0 z|FZh(^c%+$=i6m|$TjVH`Chu>>9%v-en<*B*!07u7N6c{tc&9PX21UI*{)T4yWIU{ zWKquX9vA9$1G^Mme44T1iRsj@w@qpuVrRcEV!rZ=edf!ajvi{eyS(-97gs7IE@PKU zZN^77I$!EO@b?k_tavu>?&*QQuP(_n{depOCTtIxE?go{q@Q(Okv1~lIN#@l}S;E-0`&c@$e zo7WWo*>mN(3)L%LdG(pDU0LSaG~qN%9w*KGTcKS74kCxm1rh*yG2*pSp(rYCXabgz4y%2#0INk}4O5q510 zl3I#gdUr%l2S-<}FH5tMBT^-JSI!l;EmVi>-kRo-0}8A#`jnuL06Ic+FllDUUJ_xnlW?}Sk>At?n^N=OVFB{4a^|F+E%bteVtYJvFf zrrj;t79=s)VsS$2#ap*=cLnk$XlIEtZd)jtU-)G1yU1_76sU_1@$L$DRog<*Cp-eI zhfnV6hfx5LUks?2$Tw_M{*HB#t!Huu4aCTika%Nno3;f>6(-&7JD^{GvD1Y#9QzET zfdoBCJjt?cK~fuen;(kDNLye3_L%~8BkFmoJZ;(*D&NJw_T&?%s4xX`CTJJzqG?+w zdclk(>(lE7L}27jn4bjkNw5?)BsJ&Uh!W2 zH71A0V$_R}RDigGa4~F1subB0E>ZrKGYXC@8jq1LAt?b=Lbw<<%BALe{}tc4C8S~$ zL`b%QcpF#?8&c?6!e2r%Ug{45|Tndg@nYgQ4%z3;x~zVR0v6|OOM_O ziAR9>JHXumE9&pD54U8tJid-hFrTJQvoqElJ`2=YU}v;7zY&xh!HSj)O2^3zHOY`? z2=n(qeGeQdcUq%3%hzC~YjX?D$!UW`K|*yv4t^RptsV3&_pO}z0Q0F_YCC8Fa0@`rJeLVvW_x%waH~O1jfwdJ;0i!a zQ&%Ju23j*jLP*hQ6A4jB15_j=FU@O_;6&1s(-vYW8aHoj#eRF+%4!zWoG@R3YPEV#`HgwO1o>W6> zQX)ubuSP*n!R9Fo^`yGE$D3S3T#2AEke7jqo{9EhQ<Lx~nshTm#}7hFGQ=w$x?$cn>i-A~#1S2qu}G z&ZW15vVqHncJNZ*mbQmC0k?_4aUP=^c12F~AkrR#_%V~FtFK?cGt(nX5vF2Lc*k&822HSF1<}!XSNcKWUMkoyY!apz_6^*bkT^eCw zC^y1}JEQ=T%KCBF`Z(K#k=XdXa8)qE!aQt*KkhmI+w7<}Nz@wdI82ehf%_X2)V&WM zYz#i}NEt~g=3Nlq1r0^eiG~!Wk4^CqG!I$i(+@yKZ$N^qOMk`2=;oh{BCH>R_#s$Q zN;<3%)*qj;0W=MaHT5oWkKuMiPx1d=R3?p#nPdI7_grF{IuO@^1vO1trZ(UDi!FM@ zpCJAdq!f|)`plCrhX1{Q3?*xUTMJT_3G0Ac$KXzrP2ZFRMlU3UH-Wnem~z1UOWEF_|8L0rpp7?sk@t3QlbL=a6NZUS4%pQ#h2R!6z zFJG(r7m6@{9!$@JjA?KQB$og)_zH-wfP}%K-Cky(L0ONeBK=Lo?F zWM)(Owx18(L%T7Tod?VFU`5rWS7_3_+MX-~7Bb$0^RQq>_2fJ)Xagsg8(VLlPFt7{ zmbJOQgG?D|fxdW7)f9?=M@MwW0hpZSO7YU=I~{T_k%5gU%}W>{mtRNK1* z@dU(A7-Hy#)n%`vYwi;d*h%c#-R0xBI5j)Q_uTjGpk=@<10yW^`B`~D-uuU-pSy8x zGWw2#`Tl18mDDhpKMT^cU`gHfuFBFo^0{q2QK%Zk)y$IgbK+0^8xR}geN;YH%xC@* ze>sEtJN0ijoCi4uIWf7hR0YsNv$p8QV diff --git a/ffi/examples/read-table/compile_commands.json b/ffi/examples/read-table/compile_commands.json deleted file mode 100644 index df81ce112..000000000 --- a/ffi/examples/read-table/compile_commands.json +++ /dev/null @@ -1,14 +0,0 @@ -[ -{ - "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", - "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/read_table.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", - "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", - "output": "CMakeFiles/read_table.dir/read_table.c.o" -}, -{ - "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", - "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/arrow.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", - "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", - "output": "CMakeFiles/read_table.dir/arrow.c.o" -} -] \ No newline at end of file From 5217dc6c1910380948334a9810c4932a47022835 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 7 Oct 2024 15:18:52 -0700 Subject: [PATCH 14/82] Add Remaining expression visitor functionality --- ffi/examples/read-table/expression.h | 107 ++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 19 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index a0f5b2914..2d22c1b87 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -27,6 +27,11 @@ { \ return visit_variadic(data, len, enum_member); \ } +#define DECL_UNARY(fun_name, op) \ + uintptr_t fun_name(void* data, uintptr_t sub_expr) \ + { \ + return visit_unary(data, sub_expr, op); \ + } enum OpType { Add, @@ -62,6 +67,19 @@ enum LitType Struct, Array }; +enum ExpressionType +{ + BinOp, + Variadic, + Literal, + Unary, + Column +}; +struct ExpressionRef +{ + void* ref; + enum ExpressionType type; +}; struct BinOp { enum OpType op; @@ -77,11 +95,10 @@ enum VariadicType StructConstructor, ArrayData }; -enum ExpressionType +enum UnaryType { - BinOp, - Variadic, - Literal, + Not, + IsNull }; struct Variadic { @@ -90,7 +107,12 @@ struct Variadic size_t max_len; struct ExpressionRef* expr_list; }; -struct Binary +struct Unary +{ + enum UnaryType type; + struct ExpressionRef sub_expr; +}; +struct BinaryData { uint8_t* buf; uintptr_t len; @@ -101,11 +123,6 @@ struct Decimal uint8_t precision; uint8_t scale; }; -struct ExpressionRef -{ - void* ref; - enum ExpressionType type; -}; struct Data { size_t len; @@ -135,7 +152,7 @@ struct Literal struct KernelStringSlice string_data; struct Struct struct_data; struct ArrayData array_data; - struct Binary binary; + struct BinaryData binary; struct Decimal decimal; } value; }; @@ -155,6 +172,14 @@ struct ExpressionRef* get_handle(void* data, size_t handle_index) } return &data_ptr->handles[handle_index]; } +KernelStringSlice copy_kernel_string(KernelStringSlice string) +{ + char* contents = malloc(string.len); + size_t len = strlcpy(contents, string.ptr, string.len); + assert(len == string.len); + KernelStringSlice out = { .len = len, .ptr = contents }; + return out; +} uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) { @@ -184,6 +209,14 @@ DECL_BINOP(visit_distinct, Distinct) DECL_BINOP(visit_in, In) DECL_BINOP(visit_not_in, NotIn) +uintptr_t visit_expr_string(void* data, KernelStringSlice string) +{ + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = String; + literal->value.string_data = copy_kernel_string(string); + return put_handle(data, literal, Literal); +} + uintptr_t visit_expr_decimal( void* data, uint64_t value_ms, @@ -261,7 +294,7 @@ uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Binary; - struct Binary* bin = &literal->value.binary; + struct BinaryData* bin = &literal->value.binary; bin->buf = malloc(len); memcpy(bin->buf, buf, len); return put_handle(data, literal, Literal); @@ -297,7 +330,7 @@ void visit_expr_struct_field( assert(len < struct_ref->max_len); struct_ref->expressions[len] = *value; - struct_ref->field_names[len] = field_name; + struct_ref->field_names[len] = copy_kernel_string(field_name); struct_ref->len++; } @@ -308,6 +341,24 @@ uintptr_t visit_null(void* data) return put_handle(data, literal, Literal); } +uintptr_t visit_unary(void* data, uintptr_t sub_expr_id, enum UnaryType type) +{ + struct Unary* unary = malloc(sizeof(struct Unary)); + unary->type = type; + struct ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); + unary->sub_expr = *sub_expr_handle; + return put_handle(data, unary, Unary); +} +DECL_UNARY(visit_is_null, IsNull) +DECL_UNARY(visit_not, Not) + +uintptr_t visit_column(void* data, KernelStringSlice string) +{ + struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); + *heap_string = copy_kernel_string(string); + return put_handle(data, heap_string, Column); +} + // Print the schema of the snapshot struct ExpressionRef construct_predicate(KernelPredicate* predicate) { @@ -326,12 +377,12 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_date = visit_expr_date, .visit_binary = visit_expr_binary, .visit_decimal = visit_expr_decimal, - .visit_string = NULL, + .visit_string = visit_expr_string, .visit_and = visit_and, .visit_or = visit_or, .visit_variadic_item = visit_variadic_item, - .visit_not = NULL, - .visit_is_null = NULL, + .visit_not = visit_not, + .visit_is_null = visit_is_null, .visit_lt = visit_lt, .visit_le = visit_le, .visit_gt = visit_gt, @@ -345,10 +396,10 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_minus = visit_minus, .visit_multiply = visit_multiply, .visit_divide = visit_divide, - .visit_column = NULL, + .visit_column = visit_column, .visit_expr_struct = visit_struct_constructor, .visit_expr_struct_item = - visit_variadic_item, // treating expr struct like a variadic + visit_variadic_item, // We treat expr struct as a variadic .visit_null = visit_null, .visit_struct = visit_expr_struct, .visit_struct_field = visit_expr_struct_field, @@ -422,7 +473,7 @@ void print_tree(struct ExpressionRef ref, int depth) break; }; break; case Distinct: - printf("Distinct"); + printf("Distinct\n"); break; } @@ -527,6 +578,24 @@ void print_tree(struct ExpressionRef ref, int depth) break; } } break; + case Unary: { + tab_helper(depth); + struct Unary* unary = ref.ref; + switch (unary->type) { + case Not: + printf("Not\n"); + break; + case IsNull: + printf("IsNull\n"); + break; + } + print_tree(unary->sub_expr, depth + 1); + } + case Column: + tab_helper(depth); + KernelStringSlice* string = ref.ref; + printf("Column: %s", string->ptr); + break; } } From 778c84cc91d0a6a9b46ffaa4233d4fe3f9c07f0d Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 7 Oct 2024 15:50:17 -0700 Subject: [PATCH 15/82] Add deconstructor and make functions consistently named --- ffi/examples/read-table/expression.h | 229 +++++++++++++++++---------- ffi/src/expressions.rs | 16 +- 2 files changed, 151 insertions(+), 94 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 2d22c1b87..a99331bcd 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -7,12 +7,12 @@ #include #include -#define DECL_BINOP(fun_name, op) \ +#define DEFINE_BINOP(fun_name, op) \ uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ { \ - return visit_binop(data, a, b, op); \ + return visit_expr_binop(data, a, b, op); \ } -#define DECL_SIMPLE_SCALAR(fun_name, enum_member, c_type) \ +#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type) \ uintptr_t fun_name(void* data, c_type val) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ @@ -22,15 +22,15 @@ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") -#define DECL_VARIADIC(fun_name, enum_member) \ +#define DEFINE_VARIADIC(fun_name, enum_member) \ uintptr_t fun_name(void* data, uintptr_t len) \ { \ - return visit_variadic(data, len, enum_member); \ + return visit_expr_variadic(data, len, enum_member); \ } -#define DECL_UNARY(fun_name, op) \ +#define DEFINE_UNARY(fun_name, op) \ uintptr_t fun_name(void* data, uintptr_t sub_expr) \ { \ - return visit_unary(data, sub_expr, op); \ + return visit_expr_unary(data, sub_expr, op); \ } enum OpType { @@ -181,7 +181,7 @@ KernelStringSlice copy_kernel_string(KernelStringSlice string) return out; } -uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) +uintptr_t visit_expr_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) { struct BinOp* binop = malloc(sizeof(struct BinOp)); struct ExpressionRef* left_handle = get_handle(data, a); @@ -195,19 +195,19 @@ uintptr_t visit_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) binop->right = right; return put_handle(data, binop, BinOp); } -DECL_BINOP(visit_add, Add) -DECL_BINOP(visit_minus, Sub) -DECL_BINOP(visit_multiply, Mul) -DECL_BINOP(visit_divide, Div) -DECL_BINOP(visit_lt, LT) -DECL_BINOP(visit_le, LE) -DECL_BINOP(visit_gt, GT) -DECL_BINOP(visit_ge, GE) -DECL_BINOP(visit_eq, EQ) -DECL_BINOP(visit_ne, NE) -DECL_BINOP(visit_distinct, Distinct) -DECL_BINOP(visit_in, In) -DECL_BINOP(visit_not_in, NotIn) +DEFINE_BINOP(visit_expr_add, Add) +DEFINE_BINOP(visit_expr_minus, Sub) +DEFINE_BINOP(visit_expr_multiply, Mul) +DEFINE_BINOP(visit_expr_divide, Div) +DEFINE_BINOP(visit_expr_lt, LT) +DEFINE_BINOP(visit_expr_le, LE) +DEFINE_BINOP(visit_expr_gt, GT) +DEFINE_BINOP(visit_expr_ge, GE) +DEFINE_BINOP(visit_expr_eq, EQ) +DEFINE_BINOP(visit_expr_ne, NE) +DEFINE_BINOP(visit_expr_distinct, Distinct) +DEFINE_BINOP(visit_expr_in, In) +DEFINE_BINOP(visit_expr_not_in, NotIn) uintptr_t visit_expr_string(void* data, KernelStringSlice string) { @@ -233,18 +233,18 @@ uintptr_t visit_expr_decimal( dec->scale = scale; return put_handle(data, dec, Literal); } -DECL_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); -DECL_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); -DECL_SIMPLE_SCALAR(visit_expr_short, Long, int16_t); -DECL_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t); -DECL_SIMPLE_SCALAR(visit_expr_float, Float, float); -DECL_SIMPLE_SCALAR(visit_expr_double, Double, double); -DECL_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool); -DECL_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t); -DECL_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t); -DECL_SIMPLE_SCALAR(visit_expr_date, Date, int32_t); +DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); +DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); +DEFINE_SIMPLE_SCALAR(visit_expr_short, Long, int16_t); +DEFINE_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t); +DEFINE_SIMPLE_SCALAR(visit_expr_float, Float, float); +DEFINE_SIMPLE_SCALAR(visit_expr_double, Double, double); +DEFINE_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t); +DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t); -uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) +uintptr_t visit_expr_variadic(void* data, uintptr_t len, enum VariadicType op) { struct Variadic* var = malloc(sizeof(struct Variadic)); struct ExpressionRef* expr_lst = malloc(sizeof(struct ExpressionRef) * len); @@ -254,7 +254,7 @@ uintptr_t visit_variadic(void* data, uintptr_t len, enum VariadicType op) var->expr_list = expr_lst; return put_handle(data, var, Variadic); } -void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +void visit_expr_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) { struct ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); struct ExpressionRef* variadic_ref = get_handle(data, variadic_id); @@ -264,11 +264,11 @@ void visit_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_i struct Variadic* variadic = variadic_ref->ref; variadic->expr_list[variadic->len++] = *sub_expr_ref; } -DECL_VARIADIC(visit_and, And) -DECL_VARIADIC(visit_or, Or) -DECL_VARIADIC(visit_struct_constructor, StructConstructor) +DEFINE_VARIADIC(visit_expr_and, And) +DEFINE_VARIADIC(visit_expr_or, Or) +DEFINE_VARIADIC(visit_expr_struct, StructConstructor) -void visit_array_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +void visit_expr_array_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) { struct ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); struct ExpressionRef* array_handle = get_handle(data, variadic_id); @@ -300,7 +300,7 @@ uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) return put_handle(data, literal, Literal); } -uintptr_t visit_expr_struct(void* data, uintptr_t len) +uintptr_t visit_expr_struct_literal(void* data, uintptr_t len) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Struct; @@ -312,7 +312,7 @@ uintptr_t visit_expr_struct(void* data, uintptr_t len) return put_handle(data, literal, Literal); } -void visit_expr_struct_field( +void visit_expr_struct_literal_field( void* data, uintptr_t struct_id, KernelStringSlice field_name, @@ -334,14 +334,14 @@ void visit_expr_struct_field( struct_ref->len++; } -uintptr_t visit_null(void* data) +uintptr_t visit_expr_null(void* data) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; return put_handle(data, literal, Literal); } -uintptr_t visit_unary(void* data, uintptr_t sub_expr_id, enum UnaryType type) +uintptr_t visit_expr_unary(void* data, uintptr_t sub_expr_id, enum UnaryType type) { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; @@ -349,10 +349,10 @@ uintptr_t visit_unary(void* data, uintptr_t sub_expr_id, enum UnaryType type) unary->sub_expr = *sub_expr_handle; return put_handle(data, unary, Unary); } -DECL_UNARY(visit_is_null, IsNull) -DECL_UNARY(visit_not, Not) +DEFINE_UNARY(visit_expr_is_null, IsNull) +DEFINE_UNARY(visit_expr_not, Not) -uintptr_t visit_column(void* data, KernelStringSlice string) +uintptr_t visit_expr_column(void* data, KernelStringSlice string) { struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); *heap_string = copy_kernel_string(string); @@ -364,47 +364,48 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) { print_diag("Building schema\n"); struct Data data = { 0 }; - EngineExpressionVisitor visitor = { .data = &data, - .visit_int = visit_expr_int, - .visit_long = visit_expr_long, - .visit_short = visit_expr_short, - .visit_byte = visit_expr_byte, - .visit_float = visit_expr_float, - .visit_double = visit_expr_double, - .visit_bool = visit_expr_boolean, - .visit_timestamp = visit_expr_timestamp, - .visit_timestamp_ntz = visit_expr_timestamp_ntz, - .visit_date = visit_expr_date, - .visit_binary = visit_expr_binary, - .visit_decimal = visit_expr_decimal, - .visit_string = visit_expr_string, - .visit_and = visit_and, - .visit_or = visit_or, - .visit_variadic_item = visit_variadic_item, - .visit_not = visit_not, - .visit_is_null = visit_is_null, - .visit_lt = visit_lt, - .visit_le = visit_le, - .visit_gt = visit_gt, - .visit_ge = visit_ge, - .visit_eq = visit_eq, - .visit_ne = visit_ne, - .visit_distinct = visit_distinct, - .visit_in = visit_in, - .visit_not_in = visit_not_in, - .visit_add = visit_add, - .visit_minus = visit_minus, - .visit_multiply = visit_multiply, - .visit_divide = visit_divide, - .visit_column = visit_column, - .visit_expr_struct = visit_struct_constructor, - .visit_expr_struct_item = - visit_variadic_item, // We treat expr struct as a variadic - .visit_null = visit_null, - .visit_struct = visit_expr_struct, - .visit_struct_field = visit_expr_struct_field, - .visit_array = visit_expr_array, - .visit_array_item = visit_array_item }; + EngineExpressionVisitor visitor = { + .data = &data, + .visit_int = visit_expr_int, + .visit_long = visit_expr_long, + .visit_short = visit_expr_short, + .visit_byte = visit_expr_byte, + .visit_float = visit_expr_float, + .visit_double = visit_expr_double, + .visit_bool = visit_expr_boolean, + .visit_timestamp = visit_expr_timestamp, + .visit_timestamp_ntz = visit_expr_timestamp_ntz, + .visit_date = visit_expr_date, + .visit_binary = visit_expr_binary, + .visit_decimal = visit_expr_decimal, + .visit_string = visit_expr_string, + .visit_and = visit_expr_and, + .visit_or = visit_expr_or, + .visit_variadic_item = visit_expr_variadic_item, + .visit_not = visit_expr_not, + .visit_is_null = visit_expr_is_null, + .visit_lt = visit_expr_lt, + .visit_le = visit_expr_le, + .visit_gt = visit_expr_gt, + .visit_ge = visit_expr_ge, + .visit_eq = visit_expr_eq, + .visit_ne = visit_expr_ne, + .visit_distinct = visit_expr_distinct, + .visit_in = visit_expr_in, + .visit_not_in = visit_expr_not_in, + .visit_add = visit_expr_add, + .visit_minus = visit_expr_minus, + .visit_multiply = visit_expr_multiply, + .visit_divide = visit_expr_divide, + .visit_column = visit_expr_column, + .visit_struct = visit_expr_struct, + .visit_struct_item = visit_expr_variadic_item, // We treat expr struct as a variadic + .visit_null = visit_expr_null, + .visit_struct_literal = visit_expr_struct_literal, + .visit_struct_literal_field = visit_expr_struct_literal_field, + .visit_array = visit_expr_array, + .visit_array_item = visit_expr_array_item + }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; } @@ -416,7 +417,62 @@ void tab_helper(int n) printf(" "); tab_helper(n - 1); } - +void free_expression(struct ExpressionRef ref) +{ + switch (ref.type) { + case BinOp: { + struct BinOp* op = ref.ref; + struct ExpressionRef left = { .ref = op->left, .type = Literal }; + struct ExpressionRef right = { .ref = op->right, .type = Literal }; + free_expression(left); + free_expression(right); + free(op); + break; + } + case Variadic: { + struct Variadic* var = ref.ref; + for (size_t i = 0; i < var->len; i++) { + free_expression(var->expr_list[i]); + } + free(var); + } break; + case Literal: { + struct Literal* lit = ref.ref; + switch (lit->type) { + case Struct: + printf("Struct\n"); + struct Struct* struct_data = &lit->value.struct_data; + for (size_t i = 0; i < struct_data->len; i++) { + free((void*)struct_data->field_names[i].ptr); + } + break; + case Array: + printf("Array\n"); + struct ArrayData* array = &lit->value.array_data; + for (size_t i = 0; i < array->len; i++) { + free_expression(array->expr_list[i]); + } + free(array->expr_list); + break; + default: + break; + } + free(lit); + } break; + case Unary: { + struct Unary* unary = ref.ref; + free_expression(unary->sub_expr); + free(unary); + break; + } + case Column: { + KernelStringSlice* string = ref.ref; + free((void*)string->ptr); + free(string); + break; + } + } +} void print_tree(struct ExpressionRef ref, int depth) { switch (ref.type) { @@ -604,4 +660,5 @@ void test_kernel_expr() KernelPredicate* pred = get_kernel_expression(); struct ExpressionRef ref = construct_predicate(pred); print_tree(ref, 0); + free_expression(ref); } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index f5d90a5c2..0a240882b 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -336,11 +336,11 @@ pub struct EngineExpressionVisitor { pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, - pub visit_expr_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_expr_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), + pub visit_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), - pub visit_struct: extern "C" fn(data: *mut c_void, num_fields: usize) -> usize, - pub visit_struct_field: extern "C" fn( + pub visit_struct_literal: extern "C" fn(data: *mut c_void, num_fields: usize) -> usize, + pub visit_struct_literal_field: extern "C" fn( data: *mut c_void, struct_id: usize, field_name: KernelStringSlice, @@ -372,12 +372,12 @@ pub unsafe extern "C" fn visit_expression( array_id } fn visit_struct(visitor: &mut EngineExpressionVisitor, struct_data: &StructData) -> usize { - let struct_id = call!(visitor, visit_struct, struct_data.fields().len()); + let struct_id = call!(visitor, visit_struct_literal, struct_data.fields().len()); for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { let value_id = visit_scalar(visitor, value); call!( visitor, - visit_struct_field, + visit_struct_literal_field, struct_id, field.name().into(), value_id @@ -386,10 +386,10 @@ pub unsafe extern "C" fn visit_expression( struct_id } fn visit_expr_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { - let expr_struct_id = call!(visitor, visit_expr_struct, exprs.len()); + let expr_struct_id = call!(visitor, visit_struct, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - call!(visitor, visit_expr_struct_item, expr_struct_id, expr_id) + call!(visitor, visit_struct_item, expr_struct_id, expr_id) } expr_struct_id } From 07c458b153564b1df364a30a481d7bd4bcb1054e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 14:08:35 -0700 Subject: [PATCH 16/82] More fixes to expression --- .../clangd/index/arrow.c.593AB35726E16CF5.idx | Bin 0 -> 6730 bytes .../clangd/index/arrow.h.5C86D068362A9230.idx | Bin 0 -> 1184 bytes .../index/expression.h.2E6480F4E885C761.idx | Bin 0 -> 23854 bytes .../index/read_table.c.A243E3FFE9F1D262.idx | Bin 0 -> 6072 bytes .../index/read_table.h.B5A915C23F6DC678.idx | Bin 0 -> 1962 bytes .../index/schema.h.9F10114AF5265F91.idx | Bin 0 -> 6838 bytes ffi/examples/read-table/CMakeLists.txt | 2 +- ffi/examples/read-table/compile_commands.json | 14 +++ ffi/examples/read-table/expression.h | 78 ++++++++----- ffi/src/expressions.rs | 108 ++++++++++++++++-- 10 files changed, 163 insertions(+), 39 deletions(-) create mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx create mode 100644 ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx create mode 100644 ffi/examples/read-table/compile_commands.json diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx new file mode 100644 index 0000000000000000000000000000000000000000..2f130e517ab0571e0acb7fd574cff1a09dd7f767 GIT binary patch literal 6730 zcmZ8l3tWs@|9^g`X_{tE%`{!6%XFKXO6f^T(Y>VGs+MIJvE&*GE0$c7iYS+mQi@%x zNNcg%Dj|6nOS!Dd5?3JHBn~&cln$0pX z_g!xL?t#OvU7O42CE)9)*j7mR`P9mmKb|yfx#GP{nIFHbS}xAXyImHVZ+~rI)HT`S z2K3}oDt?f+Zd>U}_hA8p!v%e@&8J&dAJ1O-yxlm_FiQ93pE_H@OSN`{|Jk|yF?#_c7HP!QfS~Y(EZ;is=oA&MaiuD_PEAi!dr_iSv75nB3$4lP$Opd;jJ%#Tv zVG`dj*~Umu;az(=@o!%16^UX;%^p{K#XB+Iuin^xzsX<}%)v#wL z`rH@KZ%KW#7r4JXSO39@ruzPVtBfn6 zO3&pl+@4M}-Fz`MdRb4Aw&SbeuH$kSnI&{4{t*8q^bej!%Ik-em%jg1_pnn!;GZ`i z1eGZpH~sw6o}hhSmw4MwIug4sc46q4`4!@?1Ra9~`Df4Wo;g1Md2VFnm*I_}g)e@Z z&^ygyT=Sp2{~X-9cyyzSQ|xUVR~>a=s$y?jm0)?@=FS@pyZ7!}7F;^crsCN4-wa!~ z{o}9j^^McHy!qU*Pra47rSh)+Gnus~N4N&Ze&3XB?{!UL_h?_0G}!Oc6+ZSpt4!Cn zzH9zEOB~gb*js7a?tI?xb@bE0%u7Fv)TzDOd24*n_Fr@(pXa@f5$@bxE(zW5^Ki4p zORZzC#&uiBCpBvaJl!7q=<3FtYX`NRuj+?QsQ-1+T1%575y{2VDy}UJTT)z-oRT!V z4;Ur}=ew<(8Dor@(G0^@0!W*A|41KMvu^6^Dksxs!9oY}p01ZGAM7Kye*=4PgCCS!DYz>IMKHwZK`w|<-D z;EtJ*gh64fupW9)d5id4(p*Vhf}!s24?;ym=A8swq3 zE}V<=-Na{{3Ar0|xR%(;sJD`0BbSeK-!S?U(?pUNF zz^qUtlyYjNu2N5RA+R|h$^of5S_BWz0S^ujg5MyFRi}(iL*Z#ChQsN|IUTukI1{;M zB2S8ET>j>1=dK_1h;=d@nTUp`@phU`_mtscGJF7Z4uFJ~ch$AJn6fhg5<+MKaT7?X zHwg<9XYC)8FCjsy0^KSwQLh>HD(GGX6Ur!>`J_y6pv{nI$VFzk$c>sp1(5K6mKJ8) zU~C&qq#6c%4;%KKx*;bT+QFn9Bg3P%!))JV=khpAWj7{E@o*kHVvXV zoB{F-aNuwz=w*V8!;eA!7#!#{R|9r=dsV)+B=a}GXASTwn41MsJeCiUs! zk=pAWiN^E5I}iGtBb~tS1PNuNEsbdW(9bxH*=SfcS7)~*8%E9ds2@f=mLjQ?mUoUc zD%);nyz@{fQ3D96{JZ`F2hoXi7edHK{CtFImE;$Hb@XQLk${7H&0Z>w7C>IEadOJ0SHgyt3RY#}8 znB8Eu!H()W{qKgrZM`yYVzHI3l|E(SO>R6qVzzWN87>4~Az+$h>p-+kXipT-u98gfR zFxwB}evs2jXwpjA`G;OlAepuZctzk!?TPs0mDk<|98L%ec?-pK;zxaURbf`n5yT!} zJ-zZ_<|7b0=f$x)Ys{UcYo)oES^GqeFf+h=+IpivzdsWi9|G= z!yKfxf|40i3FBxqT5TU@*P^vz8X#?#?@wFb{CXOh<^`Y^+-me$SKD=0Hh(6O9wQhd zrS@WWonW21-?xlv2<&mz*}1>@Z*Q@0755q&Zxo($)MV?Kn_D5nPAyw_(EF^?!c z0Ez>kqy)^~2ibjYw@PyO+i{cf=XpHL&f?9|u)P#eDd=%xeSrEvkER#18Hk^O3}^`a zgJv$C7*n=@7?O$1GLao6?1|{N`}6uG6`>8R+JL3eO^BJLcm9jT2_mu}y`|&mlE7>n zKTe~?&Ee0{Knr*aGy;^!OVmK=z)RQMATxlMp^1xuUkpZ^LuG)3 zwgB-9kP*c+_u9gb=O}u#Q5C+RA21i0ez5BYM-FEpQ5F(YMP%V1{~Q!d@r8==mvNWf z7Lfoof_Ec?&;Zc`nwIe6v5cFs4`Tb^b8d!hNVE+}sm9<{MLIJ)%9at0b>LnHfwXH= z2GVvu9Oh{AD4#O!^(XTB6+}W|qi~~rD7og}-;8sM_?isMMRGCa-f5M$I5j9{H5sl3 zR1M}-KZ?!&*7~d1)uia^K(CG?VD>TSJ?4r|{&Ib*?zn9#4LL}hgB+=4Ma}hg>HWJ@ z8mb|(8e*wxRdB2uK}mx{8#QT_szWC=4YM2VH+s?~S7*7Xq;7HR8ZyljKu^GyN^kh( z#iRXuy*7|(lwQgJ>Mv&5AU1-krr{>>|8Q{SCV2y%Zy=P5|5ha4ima&<$^`!3=Yqb> z(8g>iFI1ndRm|=Lv=cD34zp(godwLfcoWb~z|=_0b^*VO+hdE(cDTVv_rqkOK9Ki8AXi-($UFo2)9NZXv}x^~5ynS| zhBRcJ#t|;oj`W`ROV|(r4Aa1sniQ58Q6sf4`i@A%2x6S5YtNUxy*ixz>!^?%eRNz* zX&y;d5o^dHFC8yaDuP*Wu>f^~xw_9418jh=yVYX0xW^wRZ@bt^9)E`KUX31ua-t7{ z$SESNv{8D}rO}endGA8I_Gu!00eBZcK#K>nPl5lGt4l}Sk~K$4Bxi`cuf&(^)*mjI z9+nqg|0NCnk;oe&Lq4}*ea`<}NSQ3x-(1#lpy5X|a+xz0nsH?yG8M@=WF|H<Yakb#KA?QC zpmh;4BdBfUiqC3E0}bW{>(Z2Avb~}n(4qS+W(R>c2)eWwF`J5XQ;|N!ccNZA-x(^c zBXtmB7osjsRp&9d!5!-q#V&oHCtOsq)j(hkgmC3{9&FEpJ;n3_L3U-;=MCx>HIFnD z)kh6FjM=SVu$8-{hHsKgk52fao_PBVWY4&hd;Q-DPQ{_3yM#~+jSZWs)r{{?~#LkjB|`1r)HMttS}Ch!pKqxrAi~7Rwqb65E0HB>z#@@vo<^$9>#3aFO5#utR`HQ#t+bQihi${b|FX=Py69@z`HQgB7C z05}2xy)#`yP1jK}eJRIAu?7z?iNsVuzo^Pkn7sEir>Z7)jY|bln5m zZ+QNfcySJ7=Rirb@QvwU_vOuxRfLTp8^dY1{CDp@%2Vh+C&Ps>tPohbI- zEY&zTZUf#n5K`|kdmMPjLCE=b0{ADm8$V`G0{^5YZU$a6x4&WbA@Cn+9yB*2{$}pe z3TF2s{(endgm^_rozx{xvXaUr?Smv%JRP2TUm@Q~6XR7*Coi9qJk+n$O^acM(`Ycj znqApZT$8e@M$2uuE90srFmsaTCR!3`ZDFY+5Q_Bl#Rd|rE3-F~ON~qnjm>RME%;W< F{{e|$m3IID literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx new file mode 100644 index 0000000000000000000000000000000000000000..8ea41df84fe5ee822da09fbd91a8385e8181fd92 GIT binary patch literal 1184 zcmWIYbaR`*!oc91;#rZKT9OE4G6({3aY<2T5+eh{TxJG_iaBe84(1&);5obe%Yj6N zTMp$nt_Vxt%5>jSJZ0Ik-f3#FkC=pR?br8BT%40Ut0hEp+x^d-1T-UFf={7xw=i+_Yo|7*Nj7p3=HupS{ab@l6$T5mq zBi8IUF}EW@tm2?l-u)9l`W9gMG=RVF-@0-1C!WM>Bn>yaI zWd8NnCmrg$-pk}}eN&=xUBjom45f9NVX^KyZ{!aB*HYT9nxHNd9=GH*hsd?y8SH*4 zzRg>VOrMBtE8+Ai^sYSocYfz#yTX+ze$UHK>dXvW*_zdTc$w`d(SM3kH!_Rnt9|yL zyH0F*+>aaij1d7T?YmWiylu2!Zc97nzys8OjFI&i&UOb}->6s4vW&q4|>MlNtT!JMCwnUgTxa)tmS55Fj< zs2I#dSnvT=gTfA`+91&C>T@e|RiJ8d4smgqYFKEpa6v*59Ec1IJp958!aQ(K@=Hic zsKP^qUs6I+1s2LMnvoL{%1{GJx2>GcF^$;@Xq^~`m?X@)n1vIUs;kb601ERn@w3BH z0?d&>H-lpWYREFFl|iK!wiNP%OZ# z^76TOY<IioGM) zJ9beKdqqXXE*c90f+9A2ch9+d&z?{3^M1_h^JF*v?wr}#-PzgM*;%`^Yuh&PH$iCC zuidzz{YUo^1wn8m|BV_wa!>*JOQMq?j9a$lr~G00kq*l{UTIZhZ_7U#4EEg*#(otqALO0+%03@uHW?S z)$6_9cOG5xGU;v7l`g~GKMy^&^I5pxk~r^+1A6}tPw(HkWZ?G|>%RWgZv9&Q^v2(h z9=&gK&vD!SwF%wRdj5|QgI2g+UzoeR>E>}Q-~3>_^Sof_&Y%B%yXkY2;T_+-T=uGS z=fa>iNk+$N8zkpyhgV+Paww|il7h2Mqs;%BzFOYMHa=uQ-9%@%DY=hA1C|xtxo}si zcIRrTc}vfpUAvAS-Q!-=qWYm{zu9*?$z^qN{?%b6@!z)H{;0;$_)V*4^xw0z587Py z$uGX&{&3iDY)(<(m2$_AUC+(DPzzf9}*T#C$5GVZKk3Ilm^f+ES#OcCz#CzN30?S-rDHGym;t zgTKvMG5J*YsFvq`jJnfz!|HVp*R5WcQMtNcNZ0V=O`_sap253UD|BM?{D2h44GkJs z%zff_Jz>rK#(@`RR&hN2*H=-~Qg;s=)F*6WleQ%t-0P;6|B`p7m+s&zyQuc=6Ox)g z-1g_~bh_FH37%ipWnEwWI+A!AD7JT`naEcmAwZZlzQ3S^zOO3b4A;d%#5gt zagtZ@gYO=6%x#b~V*Rkti+x6%95G4C`)9<3-FtEtd>5Bdf~!@B1^jQp%mZ(1_6Og+ysY)fvs=WF>o!~5mVW&xH>aOX`%@FL zYTYTg6P~!(#mViuQ&IOHk^;xyx-jfb$L;M3Rt&tI=2Cypl^d;#y{G%8@pL@`teTx+)?A}=Vt$~@-ytC#4s1sqoZc-EhIPt%{@vqNEq*9+uICo8`<2(9<0jAkwrjgCUB`YO-)r^e zUw^(ixW&%5ZKr?!RycWn?zE4=C?z1_ubk%PZI3&-ZW^Nrd!S3^=-nTCO=+)i!?8_E8t@r6;LLJlc$9LKn(a7^^Z`-Ghos;y@ za|ZuZ`d~zx2c29FWEQr$InTCfT4tBMc^{$&NBMMFKglz3;-Sihsf+ygW>k#4TmC#E z-0|kFsFv3+ZF)L*xM5SS(^th0rUcYZEFLv}=pW0Sso~3<|IZ2}n1wEa5Rd9W9Z@L- ziTpSfO%U?W_U`aYyQat_#GpZhBnDNDL4K?j>3&@?-f>g^%*g%9D zu(#cVPs>~+v+y&e3Wi|9h?Z7Hv!4(Cex^&1$s{D9CWIs&N%6>m)l49h&bQi}ut_d9 z!a&Mr8nT^+jI5SA&L@x1z*`IrA!b_#QE-b6!7Y8r)mZvN0$0B z=r2RHc%;;Vj*Cp@K4kgUo9#bVHw!Ij zdrhG!5uKLS_D;TWJJUC8pr=VlLEjLP3Hk|StZB(QHmRWbY!_488YW?;JZm$M>kQ-_ zkCd8GP5I`@i6v+Ky$r%>S)f9 zf(^m~nNb>YOQUitwIJ1W5Z0jaf@jXZMZc%j3Ns07QBT4t6*;9MGLlNoy0_ZPxCIFbRv$ zSA=4WVT@TR8HH{8h*^`j|548%?3bH5kBsN(04TM!srFm%boemKP~R*Jpaakl8rmwQ zb!uw|U0a^#y|jTzSSAbelhaRC;t?%bQ-x#ktV6oj4b8&Wl*jk*JyAzm$~>k_5DHtw zp4-&aH_|M0rUao7N~VjJ)={3+q_n?t?XR1fg&~xp88ow3N~;+C#jW~(?@S~Ve^H8F z(2FP-EoF-7MCW(+jP#nkR|u{3^JafJyvRKkDv5kcV<-T!zhDrP$s*KjF%~y zQd`L?V(wg8Q?2wX8(|o2Z>a51gHp=vkwxe=vcX^PJN(enARLnUyhN@qsTopgK?*j? zs@ltI%+cH3CKIytGT8#1Zh_7<9x1gTRj)2wMH7Xx4vsgP_TSgqAZ(P$=77T-s1c8p z+PV_EwG5v#vs2f0bl$1$n~9{EhzO?CY(gG93~D=TVthM;a7AVo4Z3K$td&}j>Q|H7 zY~=W?&Dxh2zcUEuWU{TG-wLD$N^Na(ch05AsQvAKFbJtK*)ptKhRNV7wIJ2NGPkw^ zE1yo7F{_hVAf9YIY5)yvl+rqdnTIcqIMb?oXM?arX0#I7u0)RUNT~&>fxEDh%wyIx zr$z@(ru=9U#!}TDZ9m$il&pI_!ySgKE;4kr5gOB$<{;ZS)Q~DQn;xH;?>A;v%aWl=twCdA(AW( zT3UHrx_bV5iuM4j=*d)wA zBMFniRR{YhrIljr^R*X7ZyanEzN4))hQ?%}($d<>#-?uz*SxDa(jc6dr|1Ca4uF3= zQffhpaFgeH|G2|lZ(sgrq>bk3zIlx-3&w53)4+Px;&rb?Z*=xrX>@mGP&ZX3-7XH z48nYw(ORrqi(TT8QVWuoD38(b^JkYfZ?tBnS@@eu^eud=S4vqTVtrjkHErz?zHE+J z=tK!>LQSIQw6u!s`)W?3G~Yks48j(k6II!>Dc?My#OqKw;1R!KeZM9^CehE7qVl^d?&eE#>Cu zGJZ96d#x?*)e;TD4w=y-&_ANxQ))r-wUy8jEu?Du~cd{d4GJb+j0BcFN+MqBsvINk<(U0){#;Rk|zk=#tFTi zy!&6x1*;aBg|3vSAM_*YMoU@rX+-q9eH$*EcDi=5PUuT1e09F0bF^e*Szg-mQ$Xdy zWV0}s5(Gd1abRdE6HtA;^T(0qHwL^;p=N>loe8LF0wUc~Y9{Y9;~+OXzfp^I!YIn; zSKY5<(rC#VI_%cw(Xqax7aN34^5`rE{bD*BN^KpTLz{O!_D|}y#4LPE`P7EmL>p;o zl_zjMJB z48k#4m>guxq288KTQ#WV(DA+ITo%otcCB)&1Rt;6*~|DiP*HX ziXGXhs(tMyQCrN4S^NgRA;yE2G6h+xQ^QQ-JnUSy8-x>bOBa#xB3&6uZEdMzMs~qK zQ)5DQQ6?)xu4SlJJW^^w3NgtF{UCJL=0(q^?=%aMRG?PSiZn+{xjA}F*tQL~8`j*o z%Ph2^6wRSI*}iCLrTFsAnU$uLiM#0#QtgRDQXF+Sl$wo@v}^I^8_7-g8HBB}K$%FN zN##~*L8|R63v_wmxpmL~N#0M7K-7DQMN%x?xs{qR+w|#5ui>pH954w>Q3pcR(XJy= zdRj7yA38rMO53vGfI&DeH+KpdPtgfcYU?l+9j)Fp;8|89l-&H51L% zf79Ayqv{?t2$y7Ld7#gu%S5RKsa_SCS?=+BPXqt-IcBCCXgq2OEy=*s(mL?jJJ4^- z5;|v_m4o2d@HMdww6szzNglZ1R8FlN8=)2DG0Aok875lF6vWP+FI^Ni?84(5oiKtD z^wISpE)XplL3wc42>(V;a!f+9?AmuVbtSVwOV*wA5gjwexeq);XN<0(zofqi1uYpx zi)*eeU3|v{9;p9n^p9}J!=}fz7KY0J!l>-e+eS3JMunzM$8g8t#QUcu z6S~cr`s;U`>vF>&T#y-+BG*zB5|5PHx|-kJ3cq-D@QXZyuu~>`gIwQGZB}aQc_XJ? z!12kclWrS?Ju=y8WIRp9QfjMMon2k4yT0(vHwlxe;aiHFm(m5R)J(Es?C;-VNv!=?TSk9EQil%kEU z4cRJb$yl_l`CCoT9GfQw;ixQcA#yFG-BN06?}EEwgZ`0S~5QM zySIyqANKk=)n%%w16>A^Gczq&Zyb+Y-|=;3LZL|*L%X~Lxh*?UuojpA+%upL0$R$4Ma-VAqWmpf~EnOXRSO4<(E zkujkq?t}3@kex}XtuAd$ zj}zZ}RQkWC=D9U#ZHhF7Oc5$LWlxWM zF$kCC=E{-2oNlN}ElA<6ve^HAr}@|B0v!mijWK29SBg2gkt2v9CregAfbft zu$(>!@EQrPkqdKx2~diJQslyxE&tgAL2RiJ7W$jds?@#sDB zdyhgmU@3Sk1z)SD6^~Yd^D3yu0cqfo2EJB*JsxF%F$0`AU^{qh2Vd*aJ|67>&pqJB z0eiu7uLigZ9#_HF8ZwMWx1j1RsA1j8;!y#F7C<=jqgen}B$KZMUs=?W%;_F5?*VtN zxpQDT2i2?xOES2ke~DO!=>vdNpR!FG8gP~!OTtFKJ2^?SFs))$$W^; zd&DYiv{}5Hu3H-B=GN^695S%LI{`=t={z#9Kr{h}e{%sDm?jZ`9A7RX13UE) zfH=*Ukbxbz2tc-@%gDfX69SNk<`rbHI$zY$6mpTo*@%23v*SM<_?3 z<)|J9L_=sa)UyV7X-v0eG!1w>Yz1K}NSrm>3gKI! zF$Ww1I0W{bc{>7dL<3v~;W9{^y~_h34{wKR{Pi#R%Z?q9$X}8eV4g@=p9p^-y1~?6N+$iqA_B*g6wE1){McEOC5N-gc4N#R+$cLiwq3F!|#FUEe zFqUglb%R@oofcvr)(Bfml+Bi7dj5WX^TDh&(Q$8f>p3a?KQtGgbeH?R&Hl z%O$hg$PDZ-L-mi?r88}W-Z*oOoI<{*PypM9Nh3wrScDx|PojI&KCm{@H;_(PEY`(h zBNM#a$cP_1c-jZiUtWXmwdy$)ZaC+#xs!+d%XF-pu71Oeh@Qw{xz1KaK7axbXuOJp zDCi)HU|t1jW*P)dg9x_s5MVk4P1gYPA#^^}=eCgr5VQayI3NqWvmlgP_{SjR7}Vo{ zd!gVFSgXvp zLxdH*$7lB^%yFX9SZ?iAX=1QLjH*%W7N2Sqy~5{g#VZu_3Wc#HK*T#D8jpxhtnnQO z!#fUt{JK8Xo6n&83?|k?cJEJnNN@YO9v(;1ab)6pn1RDHa09M~sbYAl*nl;{5(FqK znis%=LZr>75cCvkaeHqrmU6L~V>MeR&DL2@2%n3#-5cu>(TK`B6YFNG+H5EbI}q9a zlKjgoteb_MSx;E{0UaHhb-=B+bI9Wys=?MS;rRg@Kd9%CF?%yo?2>U4dX$)fU1Fm74Ynr`suFhtz&U)lysTH*MG_QiwjZ##z6xC-z4AQ0RqVc-u#-@#> zT+lCRrUknrB7||^7YFq@7jlDW+#uFqU$SEYRb5N^g{r#Ku){P}b(w2VNi8WDD(P(O zFk2Km>S+YQIDcSlEXEZ&279RF8#QSoQ|B6Et$D8rS%Ai~H`T zR#cA{VcjCtvM|q`vbSV^sO&$3<7buql=kylXP)lehBAr6x;X5>RQ^tn27c$BH*8CP zc>}sPVEu);qtsJVvPQVAb{Cj;ffJ{Wr=j|3sKKe@1u$IzM-DiIYahZ9++e*DYrhgB zm?p68f$ESY{Y34}7pVRP>M@y#s0(nl1vr8M;~!K%<~As)9qq(ItXqh!(EuhP9V1J& zipn(+dnDopjL9!Qwkc_I;o)~wX;P4V3i4nX83ZT*sQ}#BT4ISVc7&@Xxkcr=8pCSr zz(~lq53tPxZ1uSauu`;HsXAc<*e%-Z)&NDKp-6ORLqjM&i#DG%6mxVob97dwETx7H z9?hrWmTn;mEJUF!%1Kxq5RC^^kDVPDXjILTUZZy91d>i5XU=@zMAA*<%$cR+y->;y;#DH)tN2XHw^%fX48o#WW?ICkgk$y?F!t-5GN=Zth-zQX2b zY7>&ME=e_cgImrp-+r|87y3&A)+MNinI0EMIrLeOEB_LMbup^Xz>>shw`pz|x4_C! zoifyv#q$ZzTcYunSdA^-p6!pF+n1Hxo3{1})qaH7XdBaRI|C;w6jx=OHm{{1_& zJUn$r+8^|rA{17H>av>+mf%KvNAuda{d+EW%>^HJjwN5N6OHTC!%X*+UjBa!e%P1t z`~bQSU_BUSeC2hpO=Hh~^p`nUH%A?1mMlkGqd9WiSQew8Vid;iSICz+qA^F^P+1Zk z{gUR+abFgpz#1$ zdB}s=JC=J#n{Pokpj_Jpc-t`^3KR1w!<7Z_1898v4Y!(=1fh*fl3B^wAxf2I+Yh{~Q zb(`qJbhMr&-H&#RX8Ca)D?z~}s5ZOcBCO7e#nFEygGY2T~XAV%} z&m5q{pE*E@KXZT*f93!s{>%YN{Fwuk_%jD6@n;TD;?E3Vju4$^OOB-FC(_GB*mn_b z$;OA}OwybYIaks`$j((%1LPD*mBD@_?bn>M_M=++QC)85-hho8G_LA>Y`l-Hr#q$z z)T6PaR;t_=i#CfzYiNPUyF|2Eq5-yvcH7itN+?Q1Ly3AmAiz7(@J<87>TF_lJkul{ zBMHYm}}tnk>q#vm40i2CB*|4QchFXuPPdGnP$Czobc|+{yF`^1gxsxOgc=y=QnChi)EV9MreL1XN#U7+b60mBH}$owog?cuKeRS_1Xo@pj}>o zbqlZ?o1EB+?B`uOznekhgQH=z#QX>H*9kB|Vi8F4*NHGuV%CuS^*)08>g5ap9w2z2 z0oFn7bn!h1(L&MzgUslJ?Nvr#l|R4r2DyT@x@dUAaMRi9|Ai^ghr z>ts#$w5LT2Me@sd7_Vomg#0xbiOI;o@_Y$!7r|YmXInM_?jg9R0_+$-rLp7`tAfwN zF7vR}sIiC%CAH)gtEBU>%Y2oTwNH<5N^UXpVE$9*v3+9jKCw14qGaq2iNS|7K$aMs zr2(?V;A{@@MxI&U2@xQbDF+-86L*XRDibPEJWz2W6n`KxdZuN4_gZ&CAhu z%^sjFWmVdo@lz4u{h>$C})ti>_GK*phg_96II!X+&N$u z^4x{I8NhPpsTed>o)hCF@;r&W+1&=={RkT$sS5BqI%`s&@n=@jai52E^VAa^-aE5q zzl%SwqQC4w<{ik*GJJwx+gD#Ebz8WO{_+WQpVadq(=$5emTY`=%;T_goI2*7-Inx^ zKbVwGnIvOfGInL#4ZC5dTPw}h=MF_5k?lvMXERK`oCV&qz>nKncVOQg8aHmM=(|-6 zU~0yK5tO%*=FiM36$8SOp&{Fd2(TP#E{6t8kqB@chabmHI3<27hQC#>8kw5V?pvM* zsNJ81-Dcqs)=BnUfX*0wCBV(tY2nz0Oxs}Kt3l> zAhQbO%QP&dVG}3sJuKbBCN>)EI+Mz4d1Ro<8;c!c)$N(ZC@87rp@B+T0o5ziNbVh< zpKf@)Shk(UTwg%Q3y5T$CP1PN5>+pXy-}dkL7ygYzH<_|CxI6?8Oy+F8C2mWBMTd| zG^d|3(O9PX$RDJFZKwNh-bod#1au|p-4=WIK*ydwci>umg96{6P}HaAp&C6CYnR32q$th9RycydFy0&G`+o*TjwIQRsv$w^o)2A7L9nSOXqx;Ua> z$%Wk#>F!2kzY&=&bFSplRXc<{H9+_wRG$N8K(!gbQ_O&f8PI^ESO^{q!CM2=UkK#A zit>jvdmO4C2QLi}b{xVPz;vC49W2jHEI$!9>Hpuvw<}G$=GSU7ZXea{xmY(BTOC`L zUPN_Ud04|;@azWjZgAs5-DjZs8SrIc4EFSf`keH^4a>SGjjTaFYmh&?S0KO<6nF%M zu;C}bQ51X>)#QL!2#bX}oM~PJp^Kmvr;gbWmJM}S>x9P!?70DZbLyCm1JZFY2V~&D z3>?A%+p*7f?9Tz2*gF&Zall3FeNh8k!-3Z{Jnm!9``CKYCvv2S0V!fITLlD26$4Yn z5Dr)&`m7NBIbfIQy-W1tfJ)K3QUlD@1mn@1)nv2J7@#7B7hs0w`n-4``@x~g5-qo2!H z$iI|>u3Qa6vqw0J)-Xt_H{xee*OxzUZ5;0q%>w_ccJFSgTO1 z%c=WYG4!pP)(_=%JG-wl9i=`>RlBNHEJ{eX4JAR91C#_+4p0(Q8Nfnx)Sy|O(OG^X z8ufq4?qu)m=(qHF9kSQIahH7mipGD{jh?-~qx&j-cE^3W1Dtk1Rc@kBg8NDE z<|cYSHtxsH+`PUJjW5)&pY`P);VnPdv-F1y&T1sBMn=xeu13DAQ6LATBPksjS?(73 zBpvytqd*RL1=1_9-Vd?ECmjO$@t@k*aNFSy?0p(xln1TypgkLIcGjfd(I@SeNb>+ZAAmOt7ZV^_XN*>3(Cl3y{gOT>WT#0&I0wAvfFDb^Ai!$TxLS>Xv-gJd zT9>{u)a-_})}RP(R$`z^3{>a#xVZ{78y7) z12^N;=q@(hRipbv;1sc8ihBAWK&lv-DmG)w^0@fY-EOmv z&m%2E&$b6TDazYO%MWzzF4B1}(uK1SD2pW1p_QLTs4(rj3ps_i2`1tFqZ5@fW4w|uX^{%4xRK%`8}uF zcqiLVu59g)?^dGPD^WAHR@h}Y725K~Q~mNaIK5WKldU&TLfU~o^;8K5OM~6m)hTIW zgfzm9UGLvwBZZK(^@#>mDBGIJm(P*ca}>zT(SOQ|PXw<-2;?^5)!?^UIjmKAP3HF9YxYn z4R9Sv*EQ$1>nP$nisV$_5t1IMn>T6n5sG+(BL9;*u?VC^;K1p`QjnHvZZ4KW#8Qal zj(TYzrGW#p6QuiT5RnFv9Izdv?cl&^&31^`4v`#?4N^8ZaB6fFq^sb-snJ#PG6Etw zAQKxiu?we0yF}wIbq8jbNYu1h{#gKZ)~90Esn~kdXOGgT0i*vo06Tz?uB=5h)*>Hn z<6DQE*J*ab4CIx8eA!OOc2&ws`8NbK%nWOhGskQ*a@mZkvu{ari?M038t8B0aD4fm z#s5^&e^+7^hE>>(ORznF^#?SI<`~u=(*TdL{;?VsCp`Ye`hPXRORRsX0gAD{SOb({ zeVGP$hxP9?z(=hAr~$rU{TB@|UDQuk*D8@CR@BF8fVrZ6t_GMV>gQ>IL{Xop0oI86 zH5y>Os9&!E(nWo`2G}I(H)()OQJ<*+_KEs^8sMO)Kd1o?iTXntfE?kEYJed$L{OQQag2DmEfuWEo?QJ<>;ZixCD z8X!;9=V^f3qW-o9$QSkb8sMI&zo!B2i~9Q-;E||*qye6Y`X?ITnW%rJ0bYvwml~j0 zG!&~B7es^Ji264gim5vNRP|6xC}!yNGc-V)&L&Q0U|R@t8|XT<{3{+7>p*t!|JOId zKww2c z6i~r{5rq?T&JxUu5kB<@XF!j?7q55r|K@p^o$9x$x~jUWx|=cnLx=7+W*EPj{!1ce zE&5f!FpM7gi(V8p_aQROF%iQo$=NqoRS@<$XYsSriy3fazw4AL5a zL^Vm)^DnJg6j3}x%V9)Jc3Jg8`2l=C>ye45dQFdsX?_~3Tk4Dpt$auJ_C6o6bL7L) z`0U6VH`>f9w)r1u-!l5uhvvvBkC%4w zZ2To7vg*7;a`d-=O0$=F-F+U!&v~7Dv;0rTl!aCqnew@8Y9Ha9c&k%q>wJvdo{gKC z5_{|1TFsB&Bh9)MrcBa{_wsUJg%*~pV|(hq617M}8V&c4Y5!2>Uz9fB?e@#V`|2xP zokR`)-If{sS8&XwXQL+P$-exOuRA&7vLwpxz(v~$GdspTuFl);H#;u=qM=D_1Im4O zmZprGl%+9j{-BPowfQM?HBv%%T;95}-CnVMM(U2zNx$f^r^dSf^w$IThVgzo%VJ~K z)j!C2x^kdpSKB%7du5k5m!3aobuBrqH1o}lP`$MBAx;H$hL7h3t&2UsxbQ;4FY_*3 zXe=DLc7^=)lja!Tbcfr@Oi~9bH^MuEKD!h);lriW#OtLTeF=sOh!%YbK3O4 zaj9EEVzs_}#Nq(-xuPS8Ulp-U&~(f5%l@9ze=7{0@8M^CXUwF}HU_(|^t&0@tUWPa znjXD0V#Xd|n0M`G3_Y?=Yq88ghT(ESmCK2~S!XLr4S)rw{M5Ef&hvDDpB z=qP2hL;|}(PT&qkR}&=e!ex#u<3$*(gjN!FC!v#s(Gr4ZCqq&5N3)gB_qel6Kf*9W zIK!y(MJJf+`ukLe?JoUUraOUCL6{20|Nc-G^9i>2<(uU!2|Pe3tPhEJYwG0-Jw-D9btVz=BYz=sL^vCoB_%6J?z*3Nul& zW=GG{XPA)ZxobbXy3J^?T!3DHvC21YFPQ8FOVvzV8A!^&kkWA-?g7g^(gnbB&EVb) zKGbTO11SEVm1ntw(CZ)!r5akVdoR2iGFY2v*nr(OV1KG1+2B3c zf&dNR*#N#2nB9AEVTx<868H$tAE5^YE)3Yz%lk-+1+rYQaj=;xAGt`aNR}E#D@9Y> zStisema79n9Z0AymTLq-qdMIRf>w1p2L(B(#$Sw@#i&Q+L(}y7toqkZnZ_UUC$;_cw=Fho^FTs+3> zQMXubGvH>>qjgH_ow%ticHYf;5Y&S%)y;BEAZSvj+d$B!PAgELKsEjn)GR?&l9aiK zzwWd=Zq}{sq`tB(@vdaTfJ_LY9y1qnuN6lacoF#)FlYf)>e)C(abD|g^&&v* zpxBXAoz*~_Sto2a_t9jz0x&25BU)lCw-ZD=L7S&bK~xIb)HRkn2JA7A@bpPwPpZ?E zU|9(^6kmL#I75`K;nRiX{Js2#@~vR3`B)b|NhjlE166n7LNG*)nu#+FsNF1S0M5D4 zmpAV&;9W4FaisAe;rh|0`GD<4|J^v5GETKxTvZ>u+?T}VGT>#d$?Bibw4r1;24gI!JrB;lRXiVdeFPa!L`fp5)Vzp^+mP@`8bpBmfcHU8 z^;4e(EElLBXhZdjJNFFU+I-N>2MgZK4)ExJpQsXnQIL0!fq#r6+CKt*1Xa11OQ%|H znbxcXicnI-+d~USL;U}?9hO@mSYbxJVY#h?9P=H#Gn zBg&mJQAny7Fb^cWnMJ@9feu}KmNb0atDYQt!C^0W@OTPh3g1VLd$`_vp>Z^X%=aDe zJ70-D@qfO#V?28rX>J|^J^cBu8zu_Vr)Jp?2orr|XHO^6vv8Ic9Zq|5q~7}q6H12T zed7Dk;gtig*R&Z8pGAh3881_XV7e$;Txya!hYV+Za`zoNLQ4kpmH1Mm$U|)YM>WotC=b}Kkw4q2r|(g(A@*ws6IvQ);=f4pNJ$t z7WU1;QB)$bXiRlQacmR;4kI2$LkcX9iFmE}YO;u!X4c)zjW&6f8z~tnqvk9wSv5t= zq<;*N-iWx7?`jy7Szz^_pd}qP_I?LxvQgS^5xqQ4GC)Ob}s zk&erW{x9JCh2Q%0tv%K+cWqcrfK!O4_i9~;mB{h|T8+3`oi0EuP^b5y=01MEB4-)IgFH^lh-9krz{rOu0Wbknrx`jO zi2vurSbxV;HY&V5nZ$iPTCPWH${8{A{Zno>ydnM2g*bZVU z@&0Y$uJ0-T*-Ex~xt-jRPhlHf8$G(6v)s>GKeIHUP0lSn-1NbP#G4D&3YOBo`fyN} z#j}IQZ6m`c5Kr*wsgZnC<5^FI64(T|2@L77O&i~BKAN*Ht4|IOQV_;q+hBWY2+J+dU7$Lm1mk~~&&m0&Ad@7Z za{~6H(&43wX+NKh&LhA((0j+Xa+(!nb$%Si`SnRb+Z6Pq8lFu1^`6ytEb7!?@($c7 zFsf{r#M4YhGLWl8tVA6O{B6F)Hu1R6J_2MRX7MHcdegI__mfs1Ag)BXMv%+WkB8;H zfzdZGr}Trwmo=3ygd~-?I6<6&G4~)odh?D#j0b0PU z1>C4Aaq2p^2@jrC5#S`^N&Y>ky@~ZYJvs1RHPM!ihUutEy$0B$4LWgW2+$6acHWgor#Cz-zN&SZ ztk6WmiMISSPXM3r2l-*Eq#=P}6Rr~Zy=c6bznBd64cV2My!jdd&H|nV6B-JtiDaen z{zrq&a!&!Dg6amaduqF#BI(f$BL4)up77h=-K0-}G8gtX0d}M1ZuFo&&PW&|>%Ok# zHnG^u-prZr03)?V>e8A&P&s@`$=)+}iF5}Tc7W<)(Y2dVQec6}eIlKMqjPW^Rm*Y; zv{&#qkj0nuBTe7O))5UG(Q+eSMV4ZtxXAP6b!1@*KvV!Mf2b(~QJH!duY#^s;7B8~ zAlqU_)7JG5h~OH~UITKz9FjphS)EQpVH&?}d(8{zx5Hffp)SiA+ZwC&JS(Y{Azv&3 z7|@0KPwvxJ0d_o92kv!zY+0@zr1hXW7bvfBj*a|2sl4wY8gzvaV|K2xgaQL?d66-3IRPouhxx;gmUwXw$ z0?Zc8)}aBrf5+i*b*cR;GMs>zfREGtcnVID z(bUXo8)2DMlHN#iVIAN)Q1xJz+XlFezb5M3yfiwz`}_|?z7|YtL3R7SCtI=cH?94x z1Xz!_p5HOwUz3=yE2gy()y3#rjB00<60|PCuGHdhTbpyg9NYUh(U1g~1cub+{DIL`IKUk2f=X=(;8s8YaqDBZ|5v`8}K%WXgHJ`2!2zilTeU^s_lP4H{;wR3tn`P z8cKvNi9r76{o^4w66~@S)#!5UQqI#+v%-H>az;mmMo&|I^i;lnJg)SEVS1=Ofm<}; zO;z*GQGzZ#d$_o|xqC2fDv)vKKRx(Q7ydIeVrIAnA#LQ=Vr?B=sh+;fK*E|inp+tf Pn;Mx|bd_6b*)snJfCark literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx b/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx new file mode 100644 index 0000000000000000000000000000000000000000..1bc818940d9c93ed6af8e6d4b3e104ebb39b22bb GIT binary patch literal 1962 zcmYk63s4hR6ozjC3E5=bguE8mKv)8X5S0Qx1{4*wjED*bL8}FkM+8d%i6CON#o7+F zDxx4-=s*>$j|#015EO|@sZ|^S5#*t*mbT!iGlhc1YU$l2SDQ)x$^7S>d+s^=C5DjT z;G=c`EKCT=HYILeZwmmhhadCijO58&0Ait$U0M{eEUot){9aw-PS2dxpENaRwU``o zdI+1IucDieo9MF%i!=I;4t(_U!P0y_#m5&X7%CoatMD8deEw{B^y;=hl0628cT^9> zJn*aOp{q`M?jEi>N4IbJGpZ&2ub!;@;I0MqnXV9#MN}>Q4x)-zm$iDb}Q5`w6EMxHlh5f;vpI09{5k++$N*vRdAE_)!FR~$XA9M_y zwU=!@*HIsI(Uht4p0g)-0ajy_ZQbBr5;^mWf5#=70F4hPE$8D3nSbRPgO{zef~;$a z8anmE(>=Z42bse!`o<|-@&g(Vc-8*)*yaAx>}l~%L#NvMoNx2=r2Uf^epKC|o3qKt z`dgP)rHN<5V?LD{lP?91D(#evI298fx*P7E*}$C*u_wHZD$^4ew*KrI54 z)KTie3J^@BtLM>)^=<|J1aK7tO2`*tte~)JRr}wKlB_Ts$k6e6zFq+^I~xa`0`}KU zmePW<==Vf`fOo^WxR@1Y0{DgrSI(S%w)VRBMgmNR{bdpv#R_UL;@luV|D@l<2RN)( zi`8;gm}2;|g7raLO6{WkPNWmS4c05|m1|%V7K>tu=4opy_<>%Ku-dco4`%P z3aWQ0v(3G|G`xw=PA?D(R1_m`r;poZkn8h?+X>(V>uCk;#LAm`p)x0}`FuA4NVt6w zCYrPF|$VPlYn}G{0 zEVJfhvEYzR00*b^D*aT1*`hs<2BiE|3!zP8v>a^@g<(sUK5I)`W!TIq^A4_r*$S>g zj-E9_k}I4*(VC4@O0H6ja0>X)GH{nPu7kCtfRv(5zw~WsJJ21N1Z_vmQGw|4DO=Ww z5h;u<SxxG%<8Uz!b`B(3V@6`idgi-1 ZBi_uKDinc#Adz#I)phr*xWG|?{{Z+iJS6}C literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx b/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx new file mode 100644 index 0000000000000000000000000000000000000000..b96d56679c3829075d89fb15fc2c85e83454d7ce GIT binary patch literal 6838 zcmdTIiC@jx_nvR>)mv`w)qAh))r%Hil_ioE(~!v4RD+4kD9ISRN=-G=WNc9+J0VQ? z4F+Qrk{G)XhK6EfNlK|KnPJBAyRY}X@AmT_{Q7*lx6?V_bIv{cy(5MW8njP~(15Rp zrp}m{G+u-dvLwHWNwcPmGC}AYIYOzM$|FLT+xy}_icieT6d#?InzFJGxGgJv)@@C5 zyWbkHwRhp*G0VJj{la$kf1l=FzO>)Xl3=bfP*Kv+z1$`E_~1Q_d-i+UghY*;pOJKK zlqTkoc4=JVk$?V~`n%ovZka1j=3UsaYu4Mt3m2R`lI2kMTZ{j-*xbObrCquWdY@d7 zKcvTl4!ZASrZ#ti%Omo79^N;+A))fx`XovGz3a`>mV926RC(_9rCSMwJ74bgwYu!_ zb-;JwTZfDfxtGu@Z%yx+BOOyZ&Z_r5Jx=3L)yb^*K;ai09JE#YNd2rlly-s=|`B!AIXOjjWYyPD!sRk(@4xvzC8b+EO+C zZo$mB{7*6}uK2%=>wHzzD|6&<#~*d&b;ta-T&u3_)uH@VKyK$*w=;iTc_pvDtop4^ z^{$v7x_+_w_n)S^g#WQZQIY@7jK5rQz{9cl)U=;>uS^fy(;A!nD)~)w6ReG%lY3(C z#j{IP5y7{!;?Z95J#po*luEP7i>HkXF7jDx9X$NcjS;_HvuT+Ua`Uhzt6%z!t$Ui6 z6)X>06PeUgbMt2zByD_H+gM_g;wh^sZ1V7M9{u8d$!xRN%O%~coi8pnuMTdFnwXW< zwWeu6bJJdj{kL{*^{gyh8*3@q^VPXEJEpI#3b1+DV|9n%3E2LjX!cj~QU5rPbI1J;DGrzj;T+6t^Ou$Y;t^QgYBlef~^mRBzOC2RYqvG{MCR{&$JsBgh>xN?Dyz0 z|FZh(^c%+$=i6m|$TjVH`Chu>>9%v-en<*B*!07u7N6c{tc&9PX21UI*{)T4yWIU{ zWKquX9vA9$1G^Mme44T1iRsj@w@qpuVrRcEV!rZ=edf!ajvi{eyS(-97gs7IE@PKU zZN^77I$!EO@b?k_tavu>?&*QQuP(_n{depOCTtIxE?go{q@Q(Okv1~lIN#@l}S;E-0`&c@$e zo7WWo*>mN(3)L%LdG(pDU0LSaG~qN%9w*KGTcKS74kCxm1rh*yG2*pSp(rYCXabgz4y%2#0INk}4O5q510 zl3I#gdUr%l2S-<}FH5tMBT^-JSI!l;EmVi>-kRo-0}8A#`jnuL06Ic+FllDUUJ_xnlW?}Sk>At?n^N=OVFB{4a^|F+E%bteVtYJvFf zrrj;t79=s)VsS$2#ap*=cLnk$XlIEtZd)jtU-)G1yU1_76sU_1@$L$DRog<*Cp-eI zhfnV6hfx5LUks?2$Tw_M{*HB#t!Huu4aCTika%Nno3;f>6(-&7JD^{GvD1Y#9QzET zfdoBCJjt?cK~fuen;(kDNLye3_L%~8BkFmoJZ;(*D&NJw_T&?%s4xX`CTJJzqG?+w zdclk(>(lE7L}27jn4bjkNw5?)BsJ&Uh!W2 zH71A0V$_R}RDigGa4~F1subB0E>ZrKGYXC@8jq1LAt?b=Lbw<<%BALe{}tc4C8S~$ zL`b%QcpF#?8&c?6!e2r%Ug{45|Tndg@nYgQ4%z3;x~zVR0v6|OOM_O ziAR9>JHXumE9&pD54U8tJid-hFrTJQvoqElJ`2=YU}v;7zY&xh!HSj)O2^3zHOY`? z2=n(qeGeQdcUq%3%hzC~YjX?D$!UW`K|*yv4t^RptsV3&_pO}z0Q0F_YCC8Fa0@`rJeLVvW_x%waH~O1jfwdJ;0i!a zQ&%Ju23j*jLP*hQ6A4jB15_j=FU@O_;6&1s(-vYW8aHoj#eRF+%4!zWoG@R3YPEV#`HgwO1o>W6> zQX)ubuSP*n!R9Fo^`yGE$D3S3T#2AEke7jqo{9EhQ<Lx~nshTm#}7hFGQ=w$x?$cn>i-A~#1S2qu}G z&ZW15vVqHncJNZ*mbQmC0k?_4aUP=^c12F~AkrR#_%V~FtFK?cGt(nX5vF2Lc*k&822HSF1<}!XSNcKWUMkoyY!apz_6^*bkT^eCw zC^y1}JEQ=T%KCBF`Z(K#k=XdXa8)qE!aQt*KkhmI+w7<}Nz@wdI82ehf%_X2)V&WM zYz#i}NEt~g=3Nlq1r0^eiG~!Wk4^CqG!I$i(+@yKZ$N^qOMk`2=;oh{BCH>R_#s$Q zN;<3%)*qj;0W=MaHT5oWkKuMiPx1d=R3?p#nPdI7_grF{IuO@^1vO1trZ(UDi!FM@ zpCJAdq!f|)`plCrhX1{Q3?*xUTMJT_3G0Ac$KXzrP2ZFRMlU3UH-Wnem~z1UOWEF_|8L0rpp7?sk@t3QlbL=a6NZUS4%pQ#h2R!6z zFJG(r7m6@{9!$@JjA?KQB$og)_zH-wfP}%K-Cky(L0ONeBK=Lo?F zWM)(Owx18(L%T7Tod?VFU`5rWS7_3_+MX-~7Bb$0^RQq>_2fJ)Xagsg8(VLlPFt7{ zmbJOQgG?D|fxdW7)f9?=M@MwW0hpZSO7YU=I~{T_k%5gU%}W>{mtRNK1* z@dU(A7-Hy#)n%`vYwi;d*h%c#-R0xBI5j)Q_uTjGpk=@<10yW^`B`~D-uuU-pSy8x zGWw2#`Tl18mDDhpKMT^cU`gHfuFBFo^0{q2QK%Zk)y$IgbK+0^8xR}geN;YH%xC@* ze>sEtJN0ijoCi4uIWf7hR0YsNv$p8QV literal 0 HcmV?d00001 diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 2df2b38e4..edd1c00f7 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -25,7 +25,7 @@ if(MSVC) target_compile_options(read_table PRIVATE /W4 /WX) else() # no-strict-prototypes because arrow headers have fn defs without prototypes - target_compile_options(read_table PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) + target_compile_options(read_table PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g) endif() if(PRINT_DATA) diff --git a/ffi/examples/read-table/compile_commands.json b/ffi/examples/read-table/compile_commands.json new file mode 100644 index 000000000..df81ce112 --- /dev/null +++ b/ffi/examples/read-table/compile_commands.json @@ -0,0 +1,14 @@ +[ +{ + "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", + "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/read_table.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", + "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", + "output": "CMakeFiles/read_table.dir/read_table.c.o" +}, +{ + "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", + "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/arrow.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", + "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", + "output": "CMakeFiles/read_table.dir/arrow.c.o" +} +] \ No newline at end of file diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index a99331bcd..a871d1aa0 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -174,10 +174,10 @@ struct ExpressionRef* get_handle(void* data, size_t handle_index) } KernelStringSlice copy_kernel_string(KernelStringSlice string) { - char* contents = malloc(string.len); - size_t len = strlcpy(contents, string.ptr, string.len); - assert(len == string.len); - KernelStringSlice out = { .len = len, .ptr = contents }; + char* contents = malloc(string.len + 1); + strncpy(contents, string.ptr, string.len); + contents[string.len] = '\0'; + KernelStringSlice out = { .len = string.len, .ptr = contents }; return out; } @@ -231,11 +231,11 @@ uintptr_t visit_expr_decimal( dec->value[1] = value_ls; dec->precision = precision; dec->scale = scale; - return put_handle(data, dec, Literal); + return put_handle(data, literal, Literal); } DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); -DEFINE_SIMPLE_SCALAR(visit_expr_short, Long, int16_t); +DEFINE_SIMPLE_SCALAR(visit_expr_short, Short, int16_t); DEFINE_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t); DEFINE_SIMPLE_SCALAR(visit_expr_float, Float, float); DEFINE_SIMPLE_SCALAR(visit_expr_double, Double, double); @@ -356,6 +356,7 @@ uintptr_t visit_expr_column(void* data, KernelStringSlice string) { struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); *heap_string = copy_kernel_string(string); + printf("Creating column with len %lu: %s\n", string.len, heap_string->ptr); return put_handle(data, heap_string, Column); } @@ -410,12 +411,12 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) return data.handles[schema_list_id]; } -void tab_helper(int n) +void print_n_spaces(int n) { if (n == 0) return; printf(" "); - tab_helper(n - 1); + print_n_spaces(n - 1); } void free_expression(struct ExpressionRef ref) { @@ -435,30 +436,49 @@ void free_expression(struct ExpressionRef ref) free_expression(var->expr_list[i]); } free(var); - } break; + break; + }; case Literal: { struct Literal* lit = ref.ref; switch (lit->type) { - case Struct: - printf("Struct\n"); + case Struct: { struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->len; i++) { free((void*)struct_data->field_names[i].ptr); } break; - case Array: - printf("Array\n"); + } + case Array: { struct ArrayData* array = &lit->value.array_data; for (size_t i = 0; i < array->len; i++) { free_expression(array->expr_list[i]); } free(array->expr_list); break; - default: + } + case String: { + struct KernelStringSlice* string = &lit->value.string_data; + free((void*)string->ptr); + break; + } + case Integer: + case Long: + case Short: + case Byte: + case Float: + case Double: + case Boolean: + case Timestamp: + case TimestampNtz: + case Date: + case Binary: + case Decimal: + case Null: break; } free(lit); - } break; + break; + }; case Unary: { struct Unary* unary = ref.ref; free_expression(unary->sub_expr); @@ -478,7 +498,7 @@ void print_tree(struct ExpressionRef ref, int depth) switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - tab_helper(depth); + print_n_spaces(depth); switch (op->op) { case Add: { printf("ADD\n"); @@ -541,7 +561,7 @@ void print_tree(struct ExpressionRef ref, int depth) } case Variadic: { struct Variadic* var = ref.ref; - tab_helper(depth); + print_n_spaces(depth); switch (var->op) { case And: printf("And\n"); @@ -562,7 +582,7 @@ void print_tree(struct ExpressionRef ref, int depth) } break; case Literal: { struct Literal* lit = ref.ref; - tab_helper(depth); + print_n_spaces(depth); switch (lit->type) { case Integer: printf("Integer"); @@ -588,9 +608,10 @@ void print_tree(struct ExpressionRef ref, int depth) printf("Double"); printf("(%lld)\n", lit->value.simple); break; - case String: - printf("String"); + case String: { + printf("String(%s)\n", lit->value.string_data.ptr); break; + } case Boolean: printf("Boolean"); printf("(%lld)\n", lit->value.simple); @@ -608,21 +629,21 @@ void print_tree(struct ExpressionRef ref, int depth) printf("(%lld)\n", lit->value.simple); break; case Binary: - printf("Binary"); + printf("Binary\n"); break; case Decimal: - printf("Decimal"); + printf("Decimal\n"); break; case Null: - printf("Null"); + printf("Null\n"); break; case Struct: printf("Struct\n"); struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->len; i++) { - tab_helper(depth); + print_n_spaces(depth + 1); printf("Field: %s\n", struct_data->field_names[i].ptr); - print_tree(struct_data->expressions[i], depth + 1); + print_tree(struct_data->expressions[i], depth + 2); } break; case Array: @@ -635,7 +656,7 @@ void print_tree(struct ExpressionRef ref, int depth) } } break; case Unary: { - tab_helper(depth); + print_n_spaces(depth); struct Unary* unary = ref.ref; switch (unary->type) { case Not: @@ -646,11 +667,12 @@ void print_tree(struct ExpressionRef ref, int depth) break; } print_tree(unary->sub_expr, depth + 1); + break; } case Column: - tab_helper(depth); + print_n_spaces(depth); KernelStringSlice* string = ref.ref; - printf("Column: %s", string->ptr); + printf("Column(%s)\n", string->ptr); break; } } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 0a240882b..890b637a4 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,4 +1,4 @@ -use std::{ffi::c_void, sync::Arc}; +use std::{ffi::c_void, ops::Not, sync::Arc}; use crate::{ handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, @@ -262,7 +262,7 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { StructField::new("a", DataType::Primitive(PrimitiveType::Integer), false), StructField::new("b", DataType::Array(Box::new(array_type)), false), ]; - let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data)]; + let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())]; let nested = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); let nested_type = StructType::new(nested_fields); let top = StructData::try_new( @@ -275,12 +275,97 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { ) .unwrap(); Arc::new(Expr::and_from(vec![ - Expr::and_from(vec![ + Expr::literal(Scalar::Byte(i8::MAX)), + Expr::literal(Scalar::Byte(i8::MIN)), + Expr::literal(Scalar::Float(f32::MAX)), + Expr::literal(Scalar::Float(f32::MIN)), + Expr::literal(Scalar::Double(f64::MAX)), + Expr::literal(Scalar::Double(f64::MIN)), + Expr::literal(Scalar::Integer(i32::MAX)), + Expr::literal(Scalar::Integer(i32::MIN)), + Expr::literal(Scalar::Long(i64::MAX)), + Expr::literal(Scalar::Long(i64::MIN)), + Expr::literal(Scalar::String("hello expressions".into())), + Expr::literal(Scalar::Boolean(true)), + Expr::literal(Scalar::Boolean(false)), + Expr::literal(Scalar::Timestamp(50)), + Expr::literal(Scalar::TimestampNtz(100)), + Expr::literal(Scalar::Date(32)), + Expr::literal(Scalar::Binary(b"0xdeadbeefcafe".to_vec())), + Expr::literal(Scalar::Decimal(1, 2, 3)), + Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), + Expr::literal(Scalar::Struct(top)), + Expr::literal(Scalar::Array(array_data)), + Expr::binary( + BinaryOperator::In, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Plus, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Minus, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Equal, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::NotEqual, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::NotIn, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Divide, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Multiply, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::LessThan, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::LessThanOrEqual, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::GreaterThan, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::GreaterThanOrEqual, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::binary( + BinaryOperator::Distinct, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ), + Expr::struct_expr(vec![Expr::or_from(vec![ Expr::literal(Scalar::Integer(5)), Expr::literal(Scalar::Long(20)), - ]), - Expr::literal(Scalar::Integer(10)), - Expr::literal(Scalar::Struct(top)), + ])]), + Expr::not(Expr::is_null(Expr::column("col"))), ])) .into() } @@ -371,7 +456,10 @@ pub unsafe extern "C" fn visit_expression( } array_id } - fn visit_struct(visitor: &mut EngineExpressionVisitor, struct_data: &StructData) -> usize { + fn visit_struct_literal( + visitor: &mut EngineExpressionVisitor, + struct_data: &StructData, + ) -> usize { let struct_id = call!(visitor, visit_struct_literal, struct_data.fields().len()); for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { let value_id = visit_scalar(visitor, value); @@ -385,7 +473,7 @@ pub unsafe extern "C" fn visit_expression( } struct_id } - fn visit_expr_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { + fn visit_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { let expr_struct_id = call!(visitor, visit_struct, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); @@ -429,7 +517,7 @@ pub unsafe extern "C" fn visit_expression( call!(visitor, visit_decimal, ms, ls, *precision, *scale) } Scalar::Null(_) => call!(visitor, visit_null), - Scalar::Struct(struct_data) => visit_struct(visitor, struct_data), + Scalar::Struct(struct_data) => visit_struct_literal(visitor, struct_data), Scalar::Array(array) => visit_array(visitor, array), } } @@ -437,7 +525,7 @@ pub unsafe extern "C" fn visit_expression( match expression { Expression::Literal(scalar) => visit_scalar(visitor, scalar), Expression::Column(name) => call!(visitor, visit_column, name.into()), - Expression::Struct(exprs) => visit_expr_struct(visitor, exprs), + Expression::Struct(exprs) => visit_struct(visitor, exprs), Expression::BinaryOperation { op, left, right } => { let left_id = visit_expression(visitor, left); let right_id = visit_expression(visitor, right); From 23d1ed27ba0c1d8fe1d515aeb090f4f75af1b638 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 16:36:51 -0700 Subject: [PATCH 17/82] Fix all memory leaks --- ffi/examples/read-table/expression.h | 11 ++++++++++- ffi/src/expressions.rs | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index a871d1aa0..1a7d2d2da 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -435,6 +435,7 @@ void free_expression(struct ExpressionRef ref) for (size_t i = 0; i < var->len; i++) { free_expression(var->expr_list[i]); } + free(var->expr_list); free(var); break; }; @@ -444,8 +445,11 @@ void free_expression(struct ExpressionRef ref) case Struct: { struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->len; i++) { + free_expression(struct_data->expressions[i]); free((void*)struct_data->field_names[i].ptr); } + free(struct_data->expressions); + free(struct_data->field_names); break; } case Array: { @@ -461,6 +465,11 @@ void free_expression(struct ExpressionRef ref) free((void*)string->ptr); break; } + case Binary: { + struct BinaryData* binary = &lit->value.binary; + free(binary->buf); + break; + } case Integer: case Long: case Short: @@ -471,7 +480,6 @@ void free_expression(struct ExpressionRef ref) case Timestamp: case TimestampNtz: case Date: - case Binary: case Decimal: case Null: break; @@ -683,4 +691,5 @@ void test_kernel_expr() struct ExpressionRef ref = construct_predicate(pred); print_tree(ref, 0); free_expression(ref); + free_kernel_predicate(pred); } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 890b637a4..fa4b1ccab 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -249,6 +249,12 @@ pub extern "C" fn visit_expression_literal_bool( wrap_expression(state, Expression::literal(value)) } +/// Free the memory from the passed KernelPredicate +#[no_mangle] +pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { + data.drop_handle(); +} + #[no_mangle] pub unsafe extern "C" fn get_kernel_expression() -> Handle { use Expression as Expr; From c66fb3e4ec81d69a2eb4984cf283bc613288b2ea Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 16:39:45 -0700 Subject: [PATCH 18/82] Remove clang cache --- .../clangd/index/arrow.c.593AB35726E16CF5.idx | Bin 6730 -> 0 bytes .../clangd/index/arrow.h.5C86D068362A9230.idx | Bin 1184 -> 0 bytes .../index/expression.h.2E6480F4E885C761.idx | Bin 23854 -> 0 bytes .../index/read_table.c.A243E3FFE9F1D262.idx | Bin 6072 -> 0 bytes .../index/read_table.h.B5A915C23F6DC678.idx | Bin 1962 -> 0 bytes .../clangd/index/schema.h.9F10114AF5265F91.idx | Bin 6838 -> 0 bytes 6 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/expression.h.2E6480F4E885C761.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.c.A243E3FFE9F1D262.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx delete mode 100644 ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.c.593AB35726E16CF5.idx deleted file mode 100644 index 2f130e517ab0571e0acb7fd574cff1a09dd7f767..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6730 zcmZ8l3tWs@|9^g`X_{tE%`{!6%XFKXO6f^T(Y>VGs+MIJvE&*GE0$c7iYS+mQi@%x zNNcg%Dj|6nOS!Dd5?3JHBn~&cln$0pX z_g!xL?t#OvU7O42CE)9)*j7mR`P9mmKb|yfx#GP{nIFHbS}xAXyImHVZ+~rI)HT`S z2K3}oDt?f+Zd>U}_hA8p!v%e@&8J&dAJ1O-yxlm_FiQ93pE_H@OSN`{|Jk|yF?#_c7HP!QfS~Y(EZ;is=oA&MaiuD_PEAi!dr_iSv75nB3$4lP$Opd;jJ%#Tv zVG`dj*~Umu;az(=@o!%16^UX;%^p{K#XB+Iuin^xzsX<}%)v#wL z`rH@KZ%KW#7r4JXSO39@ruzPVtBfn6 zO3&pl+@4M}-Fz`MdRb4Aw&SbeuH$kSnI&{4{t*8q^bej!%Ik-em%jg1_pnn!;GZ`i z1eGZpH~sw6o}hhSmw4MwIug4sc46q4`4!@?1Ra9~`Df4Wo;g1Md2VFnm*I_}g)e@Z z&^ygyT=Sp2{~X-9cyyzSQ|xUVR~>a=s$y?jm0)?@=FS@pyZ7!}7F;^crsCN4-wa!~ z{o}9j^^McHy!qU*Pra47rSh)+Gnus~N4N&Ze&3XB?{!UL_h?_0G}!Oc6+ZSpt4!Cn zzH9zEOB~gb*js7a?tI?xb@bE0%u7Fv)TzDOd24*n_Fr@(pXa@f5$@bxE(zW5^Ki4p zORZzC#&uiBCpBvaJl!7q=<3FtYX`NRuj+?QsQ-1+T1%575y{2VDy}UJTT)z-oRT!V z4;Ur}=ew<(8Dor@(G0^@0!W*A|41KMvu^6^Dksxs!9oY}p01ZGAM7Kye*=4PgCCS!DYz>IMKHwZK`w|<-D z;EtJ*gh64fupW9)d5id4(p*Vhf}!s24?;ym=A8swq3 zE}V<=-Na{{3Ar0|xR%(;sJD`0BbSeK-!S?U(?pUNF zz^qUtlyYjNu2N5RA+R|h$^of5S_BWz0S^ujg5MyFRi}(iL*Z#ChQsN|IUTukI1{;M zB2S8ET>j>1=dK_1h;=d@nTUp`@phU`_mtscGJF7Z4uFJ~ch$AJn6fhg5<+MKaT7?X zHwg<9XYC)8FCjsy0^KSwQLh>HD(GGX6Ur!>`J_y6pv{nI$VFzk$c>sp1(5K6mKJ8) zU~C&qq#6c%4;%KKx*;bT+QFn9Bg3P%!))JV=khpAWj7{E@o*kHVvXV zoB{F-aNuwz=w*V8!;eA!7#!#{R|9r=dsV)+B=a}GXASTwn41MsJeCiUs! zk=pAWiN^E5I}iGtBb~tS1PNuNEsbdW(9bxH*=SfcS7)~*8%E9ds2@f=mLjQ?mUoUc zD%);nyz@{fQ3D96{JZ`F2hoXi7edHK{CtFImE;$Hb@XQLk${7H&0Z>w7C>IEadOJ0SHgyt3RY#}8 znB8Eu!H()W{qKgrZM`yYVzHI3l|E(SO>R6qVzzWN87>4~Az+$h>p-+kXipT-u98gfR zFxwB}evs2jXwpjA`G;OlAepuZctzk!?TPs0mDk<|98L%ec?-pK;zxaURbf`n5yT!} zJ-zZ_<|7b0=f$x)Ys{UcYo)oES^GqeFf+h=+IpivzdsWi9|G= z!yKfxf|40i3FBxqT5TU@*P^vz8X#?#?@wFb{CXOh<^`Y^+-me$SKD=0Hh(6O9wQhd zrS@WWonW21-?xlv2<&mz*}1>@Z*Q@0755q&Zxo($)MV?Kn_D5nPAyw_(EF^?!c z0Ez>kqy)^~2ibjYw@PyO+i{cf=XpHL&f?9|u)P#eDd=%xeSrEvkER#18Hk^O3}^`a zgJv$C7*n=@7?O$1GLao6?1|{N`}6uG6`>8R+JL3eO^BJLcm9jT2_mu}y`|&mlE7>n zKTe~?&Ee0{Knr*aGy;^!OVmK=z)RQMATxlMp^1xuUkpZ^LuG)3 zwgB-9kP*c+_u9gb=O}u#Q5C+RA21i0ez5BYM-FEpQ5F(YMP%V1{~Q!d@r8==mvNWf z7Lfoof_Ec?&;Zc`nwIe6v5cFs4`Tb^b8d!hNVE+}sm9<{MLIJ)%9at0b>LnHfwXH= z2GVvu9Oh{AD4#O!^(XTB6+}W|qi~~rD7og}-;8sM_?isMMRGCa-f5M$I5j9{H5sl3 zR1M}-KZ?!&*7~d1)uia^K(CG?VD>TSJ?4r|{&Ib*?zn9#4LL}hgB+=4Ma}hg>HWJ@ z8mb|(8e*wxRdB2uK}mx{8#QT_szWC=4YM2VH+s?~S7*7Xq;7HR8ZyljKu^GyN^kh( z#iRXuy*7|(lwQgJ>Mv&5AU1-krr{>>|8Q{SCV2y%Zy=P5|5ha4ima&<$^`!3=Yqb> z(8g>iFI1ndRm|=Lv=cD34zp(godwLfcoWb~z|=_0b^*VO+hdE(cDTVv_rqkOK9Ki8AXi-($UFo2)9NZXv}x^~5ynS| zhBRcJ#t|;oj`W`ROV|(r4Aa1sniQ58Q6sf4`i@A%2x6S5YtNUxy*ixz>!^?%eRNz* zX&y;d5o^dHFC8yaDuP*Wu>f^~xw_9418jh=yVYX0xW^wRZ@bt^9)E`KUX31ua-t7{ z$SESNv{8D}rO}endGA8I_Gu!00eBZcK#K>nPl5lGt4l}Sk~K$4Bxi`cuf&(^)*mjI z9+nqg|0NCnk;oe&Lq4}*ea`<}NSQ3x-(1#lpy5X|a+xz0nsH?yG8M@=WF|H<Yakb#KA?QC zpmh;4BdBfUiqC3E0}bW{>(Z2Avb~}n(4qS+W(R>c2)eWwF`J5XQ;|N!ccNZA-x(^c zBXtmB7osjsRp&9d!5!-q#V&oHCtOsq)j(hkgmC3{9&FEpJ;n3_L3U-;=MCx>HIFnD z)kh6FjM=SVu$8-{hHsKgk52fao_PBVWY4&hd;Q-DPQ{_3yM#~+jSZWs)r{{?~#LkjB|`1r)HMttS}Ch!pKqxrAi~7Rwqb65E0HB>z#@@vo<^$9>#3aFO5#utR`HQ#t+bQihi${b|FX=Py69@z`HQgB7C z05}2xy)#`yP1jK}eJRIAu?7z?iNsVuzo^Pkn7sEir>Z7)jY|bln5m zZ+QNfcySJ7=Rirb@QvwU_vOuxRfLTp8^dY1{CDp@%2Vh+C&Ps>tPohbI- zEY&zTZUf#n5K`|kdmMPjLCE=b0{ADm8$V`G0{^5YZU$a6x4&WbA@Cn+9yB*2{$}pe z3TF2s{(endgm^_rozx{xvXaUr?Smv%JRP2TUm@Q~6XR7*Coi9qJk+n$O^acM(`Ycj znqApZT$8e@M$2uuE90srFmsaTCR!3`ZDFY+5Q_Bl#Rd|rE3-F~ON~qnjm>RME%;W< F{{e|$m3IID diff --git a/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx b/ffi/examples/read-table/.cache/clangd/index/arrow.h.5C86D068362A9230.idx deleted file mode 100644 index 8ea41df84fe5ee822da09fbd91a8385e8181fd92..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1184 zcmWIYbaR`*!oc91;#rZKT9OE4G6({3aY<2T5+eh{TxJG_iaBe84(1&);5obe%Yj6N zTMp$nt_Vxt%5>jSJZ0Ik-f3#FkC=pR?br8BT%40Ut0hEp+x^d-1T-UFf={7xw=i+_Yo|7*Nj7p3=HupS{ab@l6$T5mq zBi8IUF}EW@tm2?l-u)9l`W9gMG=RVF-@0-1C!WM>Bn>yaI zWd8NnCmrg$-pk}}eN&=xUBjom45f9NVX^KyZ{!aB*HYT9nxHNd9=GH*hsd?y8SH*4 zzRg>VOrMBtE8+Ai^sYSocYfz#yTX+ze$UHK>dXvW*_zdTc$w`d(SM3kH!_Rnt9|yL zyH0F*+>aaij1d7T?YmWiylu2!Zc97nzys8OjFI&i&UOb}->6s4vW&q4|>MlNtT!JMCwnUgTxa)tmS55Fj< zs2I#dSnvT=gTfA`+91&C>T@e|RiJ8d4smgqYFKEpa6v*59Ec1IJp958!aQ(K@=Hic zsKP^qUs6I+1s2LMnvoL{%1{GJx2>GcF^$;@Xq^~`m?X@)n1vIUs;kb601ERn@w3BH z0?d&>H-lpWYREFFl|iK!wiNP%OZ# z^76TOY<IioGM) zJ9beKdqqXXE*c90f+9A2ch9+d&z?{3^M1_h^JF*v?wr}#-PzgM*;%`^Yuh&PH$iCC zuidzz{YUo^1wn8m|BV_wa!>*JOQMq?j9a$lr~G00kq*l{UTIZhZ_7U#4EEg*#(otqALO0+%03@uHW?S z)$6_9cOG5xGU;v7l`g~GKMy^&^I5pxk~r^+1A6}tPw(HkWZ?G|>%RWgZv9&Q^v2(h z9=&gK&vD!SwF%wRdj5|QgI2g+UzoeR>E>}Q-~3>_^Sof_&Y%B%yXkY2;T_+-T=uGS z=fa>iNk+$N8zkpyhgV+Paww|il7h2Mqs;%BzFOYMHa=uQ-9%@%DY=hA1C|xtxo}si zcIRrTc}vfpUAvAS-Q!-=qWYm{zu9*?$z^qN{?%b6@!z)H{;0;$_)V*4^xw0z587Py z$uGX&{&3iDY)(<(m2$_AUC+(DPzzf9}*T#C$5GVZKk3Ilm^f+ES#OcCz#CzN30?S-rDHGym;t zgTKvMG5J*YsFvq`jJnfz!|HVp*R5WcQMtNcNZ0V=O`_sap253UD|BM?{D2h44GkJs z%zff_Jz>rK#(@`RR&hN2*H=-~Qg;s=)F*6WleQ%t-0P;6|B`p7m+s&zyQuc=6Ox)g z-1g_~bh_FH37%ipWnEwWI+A!AD7JT`naEcmAwZZlzQ3S^zOO3b4A;d%#5gt zagtZ@gYO=6%x#b~V*Rkti+x6%95G4C`)9<3-FtEtd>5Bdf~!@B1^jQp%mZ(1_6Og+ysY)fvs=WF>o!~5mVW&xH>aOX`%@FL zYTYTg6P~!(#mViuQ&IOHk^;xyx-jfb$L;M3Rt&tI=2Cypl^d;#y{G%8@pL@`teTx+)?A}=Vt$~@-ytC#4s1sqoZc-EhIPt%{@vqNEq*9+uICo8`<2(9<0jAkwrjgCUB`YO-)r^e zUw^(ixW&%5ZKr?!RycWn?zE4=C?z1_ubk%PZI3&-ZW^Nrd!S3^=-nTCO=+)i!?8_E8t@r6;LLJlc$9LKn(a7^^Z`-Ghos;y@ za|ZuZ`d~zx2c29FWEQr$InTCfT4tBMc^{$&NBMMFKglz3;-Sihsf+ygW>k#4TmC#E z-0|kFsFv3+ZF)L*xM5SS(^th0rUcYZEFLv}=pW0Sso~3<|IZ2}n1wEa5Rd9W9Z@L- ziTpSfO%U?W_U`aYyQat_#GpZhBnDNDL4K?j>3&@?-f>g^%*g%9D zu(#cVPs>~+v+y&e3Wi|9h?Z7Hv!4(Cex^&1$s{D9CWIs&N%6>m)l49h&bQi}ut_d9 z!a&Mr8nT^+jI5SA&L@x1z*`IrA!b_#QE-b6!7Y8r)mZvN0$0B z=r2RHc%;;Vj*Cp@K4kgUo9#bVHw!Ij zdrhG!5uKLS_D;TWJJUC8pr=VlLEjLP3Hk|StZB(QHmRWbY!_488YW?;JZm$M>kQ-_ zkCd8GP5I`@i6v+Ky$r%>S)f9 zf(^m~nNb>YOQUitwIJ1W5Z0jaf@jXZMZc%j3Ns07QBT4t6*;9MGLlNoy0_ZPxCIFbRv$ zSA=4WVT@TR8HH{8h*^`j|548%?3bH5kBsN(04TM!srFm%boemKP~R*Jpaakl8rmwQ zb!uw|U0a^#y|jTzSSAbelhaRC;t?%bQ-x#ktV6oj4b8&Wl*jk*JyAzm$~>k_5DHtw zp4-&aH_|M0rUao7N~VjJ)={3+q_n?t?XR1fg&~xp88ow3N~;+C#jW~(?@S~Ve^H8F z(2FP-EoF-7MCW(+jP#nkR|u{3^JafJyvRKkDv5kcV<-T!zhDrP$s*KjF%~y zQd`L?V(wg8Q?2wX8(|o2Z>a51gHp=vkwxe=vcX^PJN(enARLnUyhN@qsTopgK?*j? zs@ltI%+cH3CKIytGT8#1Zh_7<9x1gTRj)2wMH7Xx4vsgP_TSgqAZ(P$=77T-s1c8p z+PV_EwG5v#vs2f0bl$1$n~9{EhzO?CY(gG93~D=TVthM;a7AVo4Z3K$td&}j>Q|H7 zY~=W?&Dxh2zcUEuWU{TG-wLD$N^Na(ch05AsQvAKFbJtK*)ptKhRNV7wIJ2NGPkw^ zE1yo7F{_hVAf9YIY5)yvl+rqdnTIcqIMb?oXM?arX0#I7u0)RUNT~&>fxEDh%wyIx zr$z@(ru=9U#!}TDZ9m$il&pI_!ySgKE;4kr5gOB$<{;ZS)Q~DQn;xH;?>A;v%aWl=twCdA(AW( zT3UHrx_bV5iuM4j=*d)wA zBMFniRR{YhrIljr^R*X7ZyanEzN4))hQ?%}($d<>#-?uz*SxDa(jc6dr|1Ca4uF3= zQffhpaFgeH|G2|lZ(sgrq>bk3zIlx-3&w53)4+Px;&rb?Z*=xrX>@mGP&ZX3-7XH z48nYw(ORrqi(TT8QVWuoD38(b^JkYfZ?tBnS@@eu^eud=S4vqTVtrjkHErz?zHE+J z=tK!>LQSIQw6u!s`)W?3G~Yks48j(k6II!>Dc?My#OqKw;1R!KeZM9^CehE7qVl^d?&eE#>Cu zGJZ96d#x?*)e;TD4w=y-&_ANxQ))r-wUy8jEu?Du~cd{d4GJb+j0BcFN+MqBsvINk<(U0){#;Rk|zk=#tFTi zy!&6x1*;aBg|3vSAM_*YMoU@rX+-q9eH$*EcDi=5PUuT1e09F0bF^e*Szg-mQ$Xdy zWV0}s5(Gd1abRdE6HtA;^T(0qHwL^;p=N>loe8LF0wUc~Y9{Y9;~+OXzfp^I!YIn; zSKY5<(rC#VI_%cw(Xqax7aN34^5`rE{bD*BN^KpTLz{O!_D|}y#4LPE`P7EmL>p;o zl_zjMJB z48k#4m>guxq288KTQ#WV(DA+ITo%otcCB)&1Rt;6*~|DiP*HX ziXGXhs(tMyQCrN4S^NgRA;yE2G6h+xQ^QQ-JnUSy8-x>bOBa#xB3&6uZEdMzMs~qK zQ)5DQQ6?)xu4SlJJW^^w3NgtF{UCJL=0(q^?=%aMRG?PSiZn+{xjA}F*tQL~8`j*o z%Ph2^6wRSI*}iCLrTFsAnU$uLiM#0#QtgRDQXF+Sl$wo@v}^I^8_7-g8HBB}K$%FN zN##~*L8|R63v_wmxpmL~N#0M7K-7DQMN%x?xs{qR+w|#5ui>pH954w>Q3pcR(XJy= zdRj7yA38rMO53vGfI&DeH+KpdPtgfcYU?l+9j)Fp;8|89l-&H51L% zf79Ayqv{?t2$y7Ld7#gu%S5RKsa_SCS?=+BPXqt-IcBCCXgq2OEy=*s(mL?jJJ4^- z5;|v_m4o2d@HMdww6szzNglZ1R8FlN8=)2DG0Aok875lF6vWP+FI^Ni?84(5oiKtD z^wISpE)XplL3wc42>(V;a!f+9?AmuVbtSVwOV*wA5gjwexeq);XN<0(zofqi1uYpx zi)*eeU3|v{9;p9n^p9}J!=}fz7KY0J!l>-e+eS3JMunzM$8g8t#QUcu z6S~cr`s;U`>vF>&T#y-+BG*zB5|5PHx|-kJ3cq-D@QXZyuu~>`gIwQGZB}aQc_XJ? z!12kclWrS?Ju=y8WIRp9QfjMMon2k4yT0(vHwlxe;aiHFm(m5R)J(Es?C;-VNv!=?TSk9EQil%kEU z4cRJb$yl_l`CCoT9GfQw;ixQcA#yFG-BN06?}EEwgZ`0S~5QM zySIyqANKk=)n%%w16>A^Gczq&Zyb+Y-|=;3LZL|*L%X~Lxh*?UuojpA+%upL0$R$4Ma-VAqWmpf~EnOXRSO4<(E zkujkq?t}3@kex}XtuAd$ zj}zZ}RQkWC=D9U#ZHhF7Oc5$LWlxWM zF$kCC=E{-2oNlN}ElA<6ve^HAr}@|B0v!mijWK29SBg2gkt2v9CregAfbft zu$(>!@EQrPkqdKx2~diJQslyxE&tgAL2RiJ7W$jds?@#sDB zdyhgmU@3Sk1z)SD6^~Yd^D3yu0cqfo2EJB*JsxF%F$0`AU^{qh2Vd*aJ|67>&pqJB z0eiu7uLigZ9#_HF8ZwMWx1j1RsA1j8;!y#F7C<=jqgen}B$KZMUs=?W%;_F5?*VtN zxpQDT2i2?xOES2ke~DO!=>vdNpR!FG8gP~!OTtFKJ2^?SFs))$$W^; zd&DYiv{}5Hu3H-B=GN^695S%LI{`=t={z#9Kr{h}e{%sDm?jZ`9A7RX13UE) zfH=*Ukbxbz2tc-@%gDfX69SNk<`rbHI$zY$6mpTo*@%23v*SM<_?3 z<)|J9L_=sa)UyV7X-v0eG!1w>Yz1K}NSrm>3gKI! zF$Ww1I0W{bc{>7dL<3v~;W9{^y~_h34{wKR{Pi#R%Z?q9$X}8eV4g@=p9p^-y1~?6N+$iqA_B*g6wE1){McEOC5N-gc4N#R+$cLiwq3F!|#FUEe zFqUglb%R@oofcvr)(Bfml+Bi7dj5WX^TDh&(Q$8f>p3a?KQtGgbeH?R&Hl z%O$hg$PDZ-L-mi?r88}W-Z*oOoI<{*PypM9Nh3wrScDx|PojI&KCm{@H;_(PEY`(h zBNM#a$cP_1c-jZiUtWXmwdy$)ZaC+#xs!+d%XF-pu71Oeh@Qw{xz1KaK7axbXuOJp zDCi)HU|t1jW*P)dg9x_s5MVk4P1gYPA#^^}=eCgr5VQayI3NqWvmlgP_{SjR7}Vo{ zd!gVFSgXvp zLxdH*$7lB^%yFX9SZ?iAX=1QLjH*%W7N2Sqy~5{g#VZu_3Wc#HK*T#D8jpxhtnnQO z!#fUt{JK8Xo6n&83?|k?cJEJnNN@YO9v(;1ab)6pn1RDHa09M~sbYAl*nl;{5(FqK znis%=LZr>75cCvkaeHqrmU6L~V>MeR&DL2@2%n3#-5cu>(TK`B6YFNG+H5EbI}q9a zlKjgoteb_MSx;E{0UaHhb-=B+bI9Wys=?MS;rRg@Kd9%CF?%yo?2>U4dX$)fU1Fm74Ynr`suFhtz&U)lysTH*MG_QiwjZ##z6xC-z4AQ0RqVc-u#-@#> zT+lCRrUknrB7||^7YFq@7jlDW+#uFqU$SEYRb5N^g{r#Ku){P}b(w2VNi8WDD(P(O zFk2Km>S+YQIDcSlEXEZ&279RF8#QSoQ|B6Et$D8rS%Ai~H`T zR#cA{VcjCtvM|q`vbSV^sO&$3<7buql=kylXP)lehBAr6x;X5>RQ^tn27c$BH*8CP zc>}sPVEu);qtsJVvPQVAb{Cj;ffJ{Wr=j|3sKKe@1u$IzM-DiIYahZ9++e*DYrhgB zm?p68f$ESY{Y34}7pVRP>M@y#s0(nl1vr8M;~!K%<~As)9qq(ItXqh!(EuhP9V1J& zipn(+dnDopjL9!Qwkc_I;o)~wX;P4V3i4nX83ZT*sQ}#BT4ISVc7&@Xxkcr=8pCSr zz(~lq53tPxZ1uSauu`;HsXAc<*e%-Z)&NDKp-6ORLqjM&i#DG%6mxVob97dwETx7H z9?hrWmTn;mEJUF!%1Kxq5RC^^kDVPDXjILTUZZy91d>i5XU=@zMAA*<%$cR+y->;y;#DH)tN2XHw^%fX48o#WW?ICkgk$y?F!t-5GN=Zth-zQX2b zY7>&ME=e_cgImrp-+r|87y3&A)+MNinI0EMIrLeOEB_LMbup^Xz>>shw`pz|x4_C! zoifyv#q$ZzTcYunSdA^-p6!pF+n1Hxo3{1})qaH7XdBaRI|C;w6jx=OHm{{1_& zJUn$r+8^|rA{17H>av>+mf%KvNAuda{d+EW%>^HJjwN5N6OHTC!%X*+UjBa!e%P1t z`~bQSU_BUSeC2hpO=Hh~^p`nUH%A?1mMlkGqd9WiSQew8Vid;iSICz+qA^F^P+1Zk z{gUR+abFgpz#1$ zdB}s=JC=J#n{Pokpj_Jpc-t`^3KR1w!<7Z_1898v4Y!(=1fh*fl3B^wAxf2I+Yh{~Q zb(`qJbhMr&-H&#RX8Ca)D?z~}s5ZOcBCO7e#nFEygGY2T~XAV%} z&m5q{pE*E@KXZT*f93!s{>%YN{Fwuk_%jD6@n;TD;?E3Vju4$^OOB-FC(_GB*mn_b z$;OA}OwybYIaks`$j((%1LPD*mBD@_?bn>M_M=++QC)85-hho8G_LA>Y`l-Hr#q$z z)T6PaR;t_=i#CfzYiNPUyF|2Eq5-yvcH7itN+?Q1Ly3AmAiz7(@J<87>TF_lJkul{ zBMHYm}}tnk>q#vm40i2CB*|4QchFXuPPdGnP$Czobc|+{yF`^1gxsxOgc=y=QnChi)EV9MreL1XN#U7+b60mBH}$owog?cuKeRS_1Xo@pj}>o zbqlZ?o1EB+?B`uOznekhgQH=z#QX>H*9kB|Vi8F4*NHGuV%CuS^*)08>g5ap9w2z2 z0oFn7bn!h1(L&MzgUslJ?Nvr#l|R4r2DyT@x@dUAaMRi9|Ai^ghr z>ts#$w5LT2Me@sd7_Vomg#0xbiOI;o@_Y$!7r|YmXInM_?jg9R0_+$-rLp7`tAfwN zF7vR}sIiC%CAH)gtEBU>%Y2oTwNH<5N^UXpVE$9*v3+9jKCw14qGaq2iNS|7K$aMs zr2(?V;A{@@MxI&U2@xQbDF+-86L*XRDibPEJWz2W6n`KxdZuN4_gZ&CAhu z%^sjFWmVdo@lz4u{h>$C})ti>_GK*phg_96II!X+&N$u z^4x{I8NhPpsTed>o)hCF@;r&W+1&=={RkT$sS5BqI%`s&@n=@jai52E^VAa^-aE5q zzl%SwqQC4w<{ik*GJJwx+gD#Ebz8WO{_+WQpVadq(=$5emTY`=%;T_goI2*7-Inx^ zKbVwGnIvOfGInL#4ZC5dTPw}h=MF_5k?lvMXERK`oCV&qz>nKncVOQg8aHmM=(|-6 zU~0yK5tO%*=FiM36$8SOp&{Fd2(TP#E{6t8kqB@chabmHI3<27hQC#>8kw5V?pvM* zsNJ81-Dcqs)=BnUfX*0wCBV(tY2nz0Oxs}Kt3l> zAhQbO%QP&dVG}3sJuKbBCN>)EI+Mz4d1Ro<8;c!c)$N(ZC@87rp@B+T0o5ziNbVh< zpKf@)Shk(UTwg%Q3y5T$CP1PN5>+pXy-}dkL7ygYzH<_|CxI6?8Oy+F8C2mWBMTd| zG^d|3(O9PX$RDJFZKwNh-bod#1au|p-4=WIK*ydwci>umg96{6P}HaAp&C6CYnR32q$th9RycydFy0&G`+o*TjwIQRsv$w^o)2A7L9nSOXqx;Ua> z$%Wk#>F!2kzY&=&bFSplRXc<{H9+_wRG$N8K(!gbQ_O&f8PI^ESO^{q!CM2=UkK#A zit>jvdmO4C2QLi}b{xVPz;vC49W2jHEI$!9>Hpuvw<}G$=GSU7ZXea{xmY(BTOC`L zUPN_Ud04|;@azWjZgAs5-DjZs8SrIc4EFSf`keH^4a>SGjjTaFYmh&?S0KO<6nF%M zu;C}bQ51X>)#QL!2#bX}oM~PJp^Kmvr;gbWmJM}S>x9P!?70DZbLyCm1JZFY2V~&D z3>?A%+p*7f?9Tz2*gF&Zall3FeNh8k!-3Z{Jnm!9``CKYCvv2S0V!fITLlD26$4Yn z5Dr)&`m7NBIbfIQy-W1tfJ)K3QUlD@1mn@1)nv2J7@#7B7hs0w`n-4``@x~g5-qo2!H z$iI|>u3Qa6vqw0J)-Xt_H{xee*OxzUZ5;0q%>w_ccJFSgTO1 z%c=WYG4!pP)(_=%JG-wl9i=`>RlBNHEJ{eX4JAR91C#_+4p0(Q8Nfnx)Sy|O(OG^X z8ufq4?qu)m=(qHF9kSQIahH7mipGD{jh?-~qx&j-cE^3W1Dtk1Rc@kBg8NDE z<|cYSHtxsH+`PUJjW5)&pY`P);VnPdv-F1y&T1sBMn=xeu13DAQ6LATBPksjS?(73 zBpvytqd*RL1=1_9-Vd?ECmjO$@t@k*aNFSy?0p(xln1TypgkLIcGjfd(I@SeNb>+ZAAmOt7ZV^_XN*>3(Cl3y{gOT>WT#0&I0wAvfFDb^Ai!$TxLS>Xv-gJd zT9>{u)a-_})}RP(R$`z^3{>a#xVZ{78y7) z12^N;=q@(hRipbv;1sc8ihBAWK&lv-DmG)w^0@fY-EOmv z&m%2E&$b6TDazYO%MWzzF4B1}(uK1SD2pW1p_QLTs4(rj3ps_i2`1tFqZ5@fW4w|uX^{%4xRK%`8}uF zcqiLVu59g)?^dGPD^WAHR@h}Y725K~Q~mNaIK5WKldU&TLfU~o^;8K5OM~6m)hTIW zgfzm9UGLvwBZZK(^@#>mDBGIJm(P*ca}>zT(SOQ|PXw<-2;?^5)!?^UIjmKAP3HF9YxYn z4R9Sv*EQ$1>nP$nisV$_5t1IMn>T6n5sG+(BL9;*u?VC^;K1p`QjnHvZZ4KW#8Qal zj(TYzrGW#p6QuiT5RnFv9Izdv?cl&^&31^`4v`#?4N^8ZaB6fFq^sb-snJ#PG6Etw zAQKxiu?we0yF}wIbq8jbNYu1h{#gKZ)~90Esn~kdXOGgT0i*vo06Tz?uB=5h)*>Hn z<6DQE*J*ab4CIx8eA!OOc2&ws`8NbK%nWOhGskQ*a@mZkvu{ari?M038t8B0aD4fm z#s5^&e^+7^hE>>(ORznF^#?SI<`~u=(*TdL{;?VsCp`Ye`hPXRORRsX0gAD{SOb({ zeVGP$hxP9?z(=hAr~$rU{TB@|UDQuk*D8@CR@BF8fVrZ6t_GMV>gQ>IL{Xop0oI86 zH5y>Os9&!E(nWo`2G}I(H)()OQJ<*+_KEs^8sMO)Kd1o?iTXntfE?kEYJed$L{OQQag2DmEfuWEo?QJ<>;ZixCD z8X!;9=V^f3qW-o9$QSkb8sMI&zo!B2i~9Q-;E||*qye6Y`X?ITnW%rJ0bYvwml~j0 zG!&~B7es^Ji264gim5vNRP|6xC}!yNGc-V)&L&Q0U|R@t8|XT<{3{+7>p*t!|JOId zKww2c z6i~r{5rq?T&JxUu5kB<@XF!j?7q55r|K@p^o$9x$x~jUWx|=cnLx=7+W*EPj{!1ce zE&5f!FpM7gi(V8p_aQROF%iQo$=NqoRS@<$XYsSriy3fazw4AL5a zL^Vm)^DnJg6j3}x%V9)Jc3Jg8`2l=C>ye45dQFdsX?_~3Tk4Dpt$auJ_C6o6bL7L) z`0U6VH`>f9w)r1u-!l5uhvvvBkC%4w zZ2To7vg*7;a`d-=O0$=F-F+U!&v~7Dv;0rTl!aCqnew@8Y9Ha9c&k%q>wJvdo{gKC z5_{|1TFsB&Bh9)MrcBa{_wsUJg%*~pV|(hq617M}8V&c4Y5!2>Uz9fB?e@#V`|2xP zokR`)-If{sS8&XwXQL+P$-exOuRA&7vLwpxz(v~$GdspTuFl);H#;u=qM=D_1Im4O zmZprGl%+9j{-BPowfQM?HBv%%T;95}-CnVMM(U2zNx$f^r^dSf^w$IThVgzo%VJ~K z)j!C2x^kdpSKB%7du5k5m!3aobuBrqH1o}lP`$MBAx;H$hL7h3t&2UsxbQ;4FY_*3 zXe=DLc7^=)lja!Tbcfr@Oi~9bH^MuEKD!h);lriW#OtLTeF=sOh!%YbK3O4 zaj9EEVzs_}#Nq(-xuPS8Ulp-U&~(f5%l@9ze=7{0@8M^CXUwF}HU_(|^t&0@tUWPa znjXD0V#Xd|n0M`G3_Y?=Yq88ghT(ESmCK2~S!XLr4S)rw{M5Ef&hvDDpB z=qP2hL;|}(PT&qkR}&=e!ex#u<3$*(gjN!FC!v#s(Gr4ZCqq&5N3)gB_qel6Kf*9W zIK!y(MJJf+`ukLe?JoUUraOUCL6{20|Nc-G^9i>2<(uU!2|Pe3tPhEJYwG0-Jw-D9btVz=BYz=sL^vCoB_%6J?z*3Nul& zW=GG{XPA)ZxobbXy3J^?T!3DHvC21YFPQ8FOVvzV8A!^&kkWA-?g7g^(gnbB&EVb) zKGbTO11SEVm1ntw(CZ)!r5akVdoR2iGFY2v*nr(OV1KG1+2B3c zf&dNR*#N#2nB9AEVTx<868H$tAE5^YE)3Yz%lk-+1+rYQaj=;xAGt`aNR}E#D@9Y> zStisema79n9Z0AymTLq-qdMIRf>w1p2L(B(#$Sw@#i&Q+L(}y7toqkZnZ_UUC$;_cw=Fho^FTs+3> zQMXubGvH>>qjgH_ow%ticHYf;5Y&S%)y;BEAZSvj+d$B!PAgELKsEjn)GR?&l9aiK zzwWd=Zq}{sq`tB(@vdaTfJ_LY9y1qnuN6lacoF#)FlYf)>e)C(abD|g^&&v* zpxBXAoz*~_Sto2a_t9jz0x&25BU)lCw-ZD=L7S&bK~xIb)HRkn2JA7A@bpPwPpZ?E zU|9(^6kmL#I75`K;nRiX{Js2#@~vR3`B)b|NhjlE166n7LNG*)nu#+FsNF1S0M5D4 zmpAV&;9W4FaisAe;rh|0`GD<4|J^v5GETKxTvZ>u+?T}VGT>#d$?Bibw4r1;24gI!JrB;lRXiVdeFPa!L`fp5)Vzp^+mP@`8bpBmfcHU8 z^;4e(EElLBXhZdjJNFFU+I-N>2MgZK4)ExJpQsXnQIL0!fq#r6+CKt*1Xa11OQ%|H znbxcXicnI-+d~USL;U}?9hO@mSYbxJVY#h?9P=H#Gn zBg&mJQAny7Fb^cWnMJ@9feu}KmNb0atDYQt!C^0W@OTPh3g1VLd$`_vp>Z^X%=aDe zJ70-D@qfO#V?28rX>J|^J^cBu8zu_Vr)Jp?2orr|XHO^6vv8Ic9Zq|5q~7}q6H12T zed7Dk;gtig*R&Z8pGAh3881_XV7e$;Txya!hYV+Za`zoNLQ4kpmH1Mm$U|)YM>WotC=b}Kkw4q2r|(g(A@*ws6IvQ);=f4pNJ$t z7WU1;QB)$bXiRlQacmR;4kI2$LkcX9iFmE}YO;u!X4c)zjW&6f8z~tnqvk9wSv5t= zq<;*N-iWx7?`jy7Szz^_pd}qP_I?LxvQgS^5xqQ4GC)Ob}s zk&erW{x9JCh2Q%0tv%K+cWqcrfK!O4_i9~;mB{h|T8+3`oi0EuP^b5y=01MEB4-)IgFH^lh-9krz{rOu0Wbknrx`jO zi2vurSbxV;HY&V5nZ$iPTCPWH${8{A{Zno>ydnM2g*bZVU z@&0Y$uJ0-T*-Ex~xt-jRPhlHf8$G(6v)s>GKeIHUP0lSn-1NbP#G4D&3YOBo`fyN} z#j}IQZ6m`c5Kr*wsgZnC<5^FI64(T|2@L77O&i~BKAN*Ht4|IOQV_;q+hBWY2+J+dU7$Lm1mk~~&&m0&Ad@7Z za{~6H(&43wX+NKh&LhA((0j+Xa+(!nb$%Si`SnRb+Z6Pq8lFu1^`6ytEb7!?@($c7 zFsf{r#M4YhGLWl8tVA6O{B6F)Hu1R6J_2MRX7MHcdegI__mfs1Ag)BXMv%+WkB8;H zfzdZGr}Trwmo=3ygd~-?I6<6&G4~)odh?D#j0b0PU z1>C4Aaq2p^2@jrC5#S`^N&Y>ky@~ZYJvs1RHPM!ihUutEy$0B$4LWgW2+$6acHWgor#Cz-zN&SZ ztk6WmiMISSPXM3r2l-*Eq#=P}6Rr~Zy=c6bznBd64cV2My!jdd&H|nV6B-JtiDaen z{zrq&a!&!Dg6amaduqF#BI(f$BL4)up77h=-K0-}G8gtX0d}M1ZuFo&&PW&|>%Ok# zHnG^u-prZr03)?V>e8A&P&s@`$=)+}iF5}Tc7W<)(Y2dVQec6}eIlKMqjPW^Rm*Y; zv{&#qkj0nuBTe7O))5UG(Q+eSMV4ZtxXAP6b!1@*KvV!Mf2b(~QJH!duY#^s;7B8~ zAlqU_)7JG5h~OH~UITKz9FjphS)EQpVH&?}d(8{zx5Hffp)SiA+ZwC&JS(Y{Azv&3 z7|@0KPwvxJ0d_o92kv!zY+0@zr1hXW7bvfBj*a|2sl4wY8gzvaV|K2xgaQL?d66-3IRPouhxx;gmUwXw$ z0?Zc8)}aBrf5+i*b*cR;GMs>zfREGtcnVID z(bUXo8)2DMlHN#iVIAN)Q1xJz+XlFezb5M3yfiwz`}_|?z7|YtL3R7SCtI=cH?94x z1Xz!_p5HOwUz3=yE2gy()y3#rjB00<60|PCuGHdhTbpyg9NYUh(U1g~1cub+{DIL`IKUk2f=X=(;8s8YaqDBZ|5v`8}K%WXgHJ`2!2zilTeU^s_lP4H{;wR3tn`P z8cKvNi9r76{o^4w66~@S)#!5UQqI#+v%-H>az;mmMo&|I^i;lnJg)SEVS1=Ofm<}; zO;z*GQGzZ#d$_o|xqC2fDv)vKKRx(Q7ydIeVrIAnA#LQ=Vr?B=sh+;fK*E|inp+tf Pn;Mx|bd_6b*)snJfCark diff --git a/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx b/ffi/examples/read-table/.cache/clangd/index/read_table.h.B5A915C23F6DC678.idx deleted file mode 100644 index 1bc818940d9c93ed6af8e6d4b3e104ebb39b22bb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1962 zcmYk63s4hR6ozjC3E5=bguE8mKv)8X5S0Qx1{4*wjED*bL8}FkM+8d%i6CON#o7+F zDxx4-=s*>$j|#015EO|@sZ|^S5#*t*mbT!iGlhc1YU$l2SDQ)x$^7S>d+s^=C5DjT z;G=c`EKCT=HYILeZwmmhhadCijO58&0Ait$U0M{eEUot){9aw-PS2dxpENaRwU``o zdI+1IucDieo9MF%i!=I;4t(_U!P0y_#m5&X7%CoatMD8deEw{B^y;=hl0628cT^9> zJn*aOp{q`M?jEi>N4IbJGpZ&2ub!;@;I0MqnXV9#MN}>Q4x)-zm$iDb}Q5`w6EMxHlh5f;vpI09{5k++$N*vRdAE_)!FR~$XA9M_y zwU=!@*HIsI(Uht4p0g)-0ajy_ZQbBr5;^mWf5#=70F4hPE$8D3nSbRPgO{zef~;$a z8anmE(>=Z42bse!`o<|-@&g(Vc-8*)*yaAx>}l~%L#NvMoNx2=r2Uf^epKC|o3qKt z`dgP)rHN<5V?LD{lP?91D(#evI298fx*P7E*}$C*u_wHZD$^4ew*KrI54 z)KTie3J^@BtLM>)^=<|J1aK7tO2`*tte~)JRr}wKlB_Ts$k6e6zFq+^I~xa`0`}KU zmePW<==Vf`fOo^WxR@1Y0{DgrSI(S%w)VRBMgmNR{bdpv#R_UL;@luV|D@l<2RN)( zi`8;gm}2;|g7raLO6{WkPNWmS4c05|m1|%V7K>tu=4opy_<>%Ku-dco4`%P z3aWQ0v(3G|G`xw=PA?D(R1_m`r;poZkn8h?+X>(V>uCk;#LAm`p)x0}`FuA4NVt6w zCYrPF|$VPlYn}G{0 zEVJfhvEYzR00*b^D*aT1*`hs<2BiE|3!zP8v>a^@g<(sUK5I)`W!TIq^A4_r*$S>g zj-E9_k}I4*(VC4@O0H6ja0>X)GH{nPu7kCtfRv(5zw~WsJJ21N1Z_vmQGw|4DO=Ww z5h;u<SxxG%<8Uz!b`B(3V@6`idgi-1 ZBi_uKDinc#Adz#I)phr*xWG|?{{Z+iJS6}C diff --git a/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx b/ffi/examples/read-table/.cache/clangd/index/schema.h.9F10114AF5265F91.idx deleted file mode 100644 index b96d56679c3829075d89fb15fc2c85e83454d7ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6838 zcmdTIiC@jx_nvR>)mv`w)qAh))r%Hil_ioE(~!v4RD+4kD9ISRN=-G=WNc9+J0VQ? z4F+Qrk{G)XhK6EfNlK|KnPJBAyRY}X@AmT_{Q7*lx6?V_bIv{cy(5MW8njP~(15Rp zrp}m{G+u-dvLwHWNwcPmGC}AYIYOzM$|FLT+xy}_icieT6d#?InzFJGxGgJv)@@C5 zyWbkHwRhp*G0VJj{la$kf1l=FzO>)Xl3=bfP*Kv+z1$`E_~1Q_d-i+UghY*;pOJKK zlqTkoc4=JVk$?V~`n%ovZka1j=3UsaYu4Mt3m2R`lI2kMTZ{j-*xbObrCquWdY@d7 zKcvTl4!ZASrZ#ti%Omo79^N;+A))fx`XovGz3a`>mV926RC(_9rCSMwJ74bgwYu!_ zb-;JwTZfDfxtGu@Z%yx+BOOyZ&Z_r5Jx=3L)yb^*K;ai09JE#YNd2rlly-s=|`B!AIXOjjWYyPD!sRk(@4xvzC8b+EO+C zZo$mB{7*6}uK2%=>wHzzD|6&<#~*d&b;ta-T&u3_)uH@VKyK$*w=;iTc_pvDtop4^ z^{$v7x_+_w_n)S^g#WQZQIY@7jK5rQz{9cl)U=;>uS^fy(;A!nD)~)w6ReG%lY3(C z#j{IP5y7{!;?Z95J#po*luEP7i>HkXF7jDx9X$NcjS;_HvuT+Ua`Uhzt6%z!t$Ui6 z6)X>06PeUgbMt2zByD_H+gM_g;wh^sZ1V7M9{u8d$!xRN%O%~coi8pnuMTdFnwXW< zwWeu6bJJdj{kL{*^{gyh8*3@q^VPXEJEpI#3b1+DV|9n%3E2LjX!cj~QU5rPbI1J;DGrzj;T+6t^Ou$Y;t^QgYBlef~^mRBzOC2RYqvG{MCR{&$JsBgh>xN?Dyz0 z|FZh(^c%+$=i6m|$TjVH`Chu>>9%v-en<*B*!07u7N6c{tc&9PX21UI*{)T4yWIU{ zWKquX9vA9$1G^Mme44T1iRsj@w@qpuVrRcEV!rZ=edf!ajvi{eyS(-97gs7IE@PKU zZN^77I$!EO@b?k_tavu>?&*QQuP(_n{depOCTtIxE?go{q@Q(Okv1~lIN#@l}S;E-0`&c@$e zo7WWo*>mN(3)L%LdG(pDU0LSaG~qN%9w*KGTcKS74kCxm1rh*yG2*pSp(rYCXabgz4y%2#0INk}4O5q510 zl3I#gdUr%l2S-<}FH5tMBT^-JSI!l;EmVi>-kRo-0}8A#`jnuL06Ic+FllDUUJ_xnlW?}Sk>At?n^N=OVFB{4a^|F+E%bteVtYJvFf zrrj;t79=s)VsS$2#ap*=cLnk$XlIEtZd)jtU-)G1yU1_76sU_1@$L$DRog<*Cp-eI zhfnV6hfx5LUks?2$Tw_M{*HB#t!Huu4aCTika%Nno3;f>6(-&7JD^{GvD1Y#9QzET zfdoBCJjt?cK~fuen;(kDNLye3_L%~8BkFmoJZ;(*D&NJw_T&?%s4xX`CTJJzqG?+w zdclk(>(lE7L}27jn4bjkNw5?)BsJ&Uh!W2 zH71A0V$_R}RDigGa4~F1subB0E>ZrKGYXC@8jq1LAt?b=Lbw<<%BALe{}tc4C8S~$ zL`b%QcpF#?8&c?6!e2r%Ug{45|Tndg@nYgQ4%z3;x~zVR0v6|OOM_O ziAR9>JHXumE9&pD54U8tJid-hFrTJQvoqElJ`2=YU}v;7zY&xh!HSj)O2^3zHOY`? z2=n(qeGeQdcUq%3%hzC~YjX?D$!UW`K|*yv4t^RptsV3&_pO}z0Q0F_YCC8Fa0@`rJeLVvW_x%waH~O1jfwdJ;0i!a zQ&%Ju23j*jLP*hQ6A4jB15_j=FU@O_;6&1s(-vYW8aHoj#eRF+%4!zWoG@R3YPEV#`HgwO1o>W6> zQX)ubuSP*n!R9Fo^`yGE$D3S3T#2AEke7jqo{9EhQ<Lx~nshTm#}7hFGQ=w$x?$cn>i-A~#1S2qu}G z&ZW15vVqHncJNZ*mbQmC0k?_4aUP=^c12F~AkrR#_%V~FtFK?cGt(nX5vF2Lc*k&822HSF1<}!XSNcKWUMkoyY!apz_6^*bkT^eCw zC^y1}JEQ=T%KCBF`Z(K#k=XdXa8)qE!aQt*KkhmI+w7<}Nz@wdI82ehf%_X2)V&WM zYz#i}NEt~g=3Nlq1r0^eiG~!Wk4^CqG!I$i(+@yKZ$N^qOMk`2=;oh{BCH>R_#s$Q zN;<3%)*qj;0W=MaHT5oWkKuMiPx1d=R3?p#nPdI7_grF{IuO@^1vO1trZ(UDi!FM@ zpCJAdq!f|)`plCrhX1{Q3?*xUTMJT_3G0Ac$KXzrP2ZFRMlU3UH-Wnem~z1UOWEF_|8L0rpp7?sk@t3QlbL=a6NZUS4%pQ#h2R!6z zFJG(r7m6@{9!$@JjA?KQB$og)_zH-wfP}%K-Cky(L0ONeBK=Lo?F zWM)(Owx18(L%T7Tod?VFU`5rWS7_3_+MX-~7Bb$0^RQq>_2fJ)Xagsg8(VLlPFt7{ zmbJOQgG?D|fxdW7)f9?=M@MwW0hpZSO7YU=I~{T_k%5gU%}W>{mtRNK1* z@dU(A7-Hy#)n%`vYwi;d*h%c#-R0xBI5j)Q_uTjGpk=@<10yW^`B`~D-uuU-pSy8x zGWw2#`Tl18mDDhpKMT^cU`gHfuFBFo^0{q2QK%Zk)y$IgbK+0^8xR}geN;YH%xC@* ze>sEtJN0ijoCi4uIWf7hR0YsNv$p8QV From 45c2a99274bf2c92524cdedec948d933a4d8f330 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 16:47:58 -0700 Subject: [PATCH 19/82] Fix safety, remove unnecessary diffs --- ffi/examples/read-table/CMakeLists.txt | 2 +- ffi/examples/read-table/compile_commands.json | 14 -------------- ffi/src/expressions.rs | 16 ++++++++++++++-- ffi/src/lib.rs | 3 --- 4 files changed, 15 insertions(+), 20 deletions(-) delete mode 100644 ffi/examples/read-table/compile_commands.json diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index edd1c00f7..2df2b38e4 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -25,7 +25,7 @@ if(MSVC) target_compile_options(read_table PRIVATE /W4 /WX) else() # no-strict-prototypes because arrow headers have fn defs without prototypes - target_compile_options(read_table PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g) + target_compile_options(read_table PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) endif() if(PRINT_DATA) diff --git a/ffi/examples/read-table/compile_commands.json b/ffi/examples/read-table/compile_commands.json deleted file mode 100644 index df81ce112..000000000 --- a/ffi/examples/read-table/compile_commands.json +++ /dev/null @@ -1,14 +0,0 @@ -[ -{ - "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", - "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/read_table.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", - "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/read_table.c", - "output": "CMakeFiles/read_table.dir/read_table.c.o" -}, -{ - "directory": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/build", - "command": "/Library/Developer/CommandLineTools/usr/bin/cc -DDEFINE_DEFAULT_ENGINE -DPRINT_ARROW_DATA -I/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/../../../target/ffi-headers -I/opt/homebrew/Cellar/apache-arrow-glib/17.0.0/include -I/opt/homebrew/Cellar/glib/2.82.1/include -I/opt/homebrew/Cellar/glib/2.82.1/include/glib-2.0 -I/opt/homebrew/Cellar/glib/2.82.1/lib/glib-2.0/include -I/opt/homebrew/opt/gettext/include -I/opt/homebrew/Cellar/pcre2/10.44/include -I/opt/homebrew/Cellar/apache-arrow/17.0.0_6/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/ffi -arch arm64 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -o CMakeFiles/read_table.dir/arrow.c.o -c /Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", - "file": "/Users/oussama.saoudi/delta-kernel-rs/ffi/examples/read-table/arrow.c", - "output": "CMakeFiles/read_table.dir/arrow.c.o" -} -] \ No newline at end of file diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index fa4b1ccab..194e5084f 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -2,7 +2,7 @@ use std::{ffi::c_void, ops::Not, sync::Arc}; use crate::{ handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, - KernelPredicate, KernelStringSlice, ReferenceSet, TryFromStringSlice, + KernelStringSlice, ReferenceSet, TryFromStringSlice, }; use delta_kernel::{ expressions::{ @@ -11,6 +11,7 @@ use delta_kernel::{ schema::{ArrayType, DataType, PrimitiveType, StructField, StructType}, DeltaResult, }; +use delta_kernel_ffi_macros::handle_descriptor; #[derive(Default)] pub struct KernelExpressionVisitorState { @@ -249,12 +250,23 @@ pub extern "C" fn visit_expression_literal_bool( wrap_expression(state, Expression::literal(value)) } -/// Free the memory from the passed KernelPredicate +#[handle_descriptor(target=Expression, mutable=false, sized=true)] +pub struct KernelPredicate; + +/// Free the memory the passed KernelPredicate +/// +/// # Safety +/// Engine is responsible for passing a valid KernelPredicate #[no_mangle] pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { data.drop_handle(); } +/// Constructs a kernel expression that is passed back as a KernelPredicate handle +/// +/// # Safety +/// The caller is responsible for freeing the retured memory, either by calling +/// [`free_kernel_predicate`], or [`Handle::drop_handle`] #[no_mangle] pub unsafe extern "C" fn get_kernel_expression() -> Handle { use Expression as Expr; diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 04479dae1..2a267f131 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -692,9 +692,6 @@ pub unsafe extern "C" fn free_engine(engine: Handle) { #[handle_descriptor(target=Snapshot, mutable=false, sized=true)] pub struct SharedSnapshot; -#[handle_descriptor(target=Expression, mutable=false, sized=true)] -pub struct KernelPredicate; - /// Get the latest snapshot from the specified table /// /// # Safety From 097fe8b507d4b08527175a41d5307b9cd40be27f Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 17:21:24 -0700 Subject: [PATCH 20/82] Add docs to visit_expression --- ffi/src/expressions.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 194e5084f..0b9aff6aa 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -454,9 +454,25 @@ pub struct EngineExpressionVisitor { pub visit_array_item: extern "C" fn(data: *mut c_void, array_id: usize, item_id: usize), } +/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own +/// representation of an expression from a particular expression within the kernel. +/// +/// Visit operations where the engine allocates an expression must return an associated `id`, which is an integer +/// identifier ([`usize`]). This identifier can be passed back to the engine to identify the expression. +/// The [`EngineExpressionVisitor`] handles both simple and complex types. +/// 1. For simple types, the engine is expected to allocate that data and return its identifier. +/// 2. For complex types such as structs, arrays, and variadic expressions, there will be a call to +/// construct the expression, and populate sub-expressions. For instance, [`visit_and`] recieves +/// the expected number of sub-expressions and must return an identifier. The kernel will +/// subsequently call [`visit_variadic_item`] with the identifier of the And expression, and the +/// identifier for a sub-expression. +/// +/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed +/// to visitor methods +/// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. #[no_mangle] -pub unsafe extern "C" fn visit_expression( - expression: &Handle, // TODO: This will likely be some kind of Handle +pub extern "C" fn visit_expression( + expression: &Handle, visitor: &mut EngineExpressionVisitor, ) -> usize { macro_rules! call { From 4b0ab65bc6df23a297caa1633c42ba2561eb8955 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 9 Oct 2024 17:57:37 -0700 Subject: [PATCH 21/82] Add expressions test --- ffi/examples/read-table/expression.h | 196 ++++++++++-------- .../read-table/expression_test_results.txt | 78 +++++++ ffi/src/expressions.rs | 15 +- 3 files changed, 199 insertions(+), 90 deletions(-) create mode 100644 ffi/examples/read-table/expression_test_results.txt diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 1a7d2d2da..4168b0882 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -12,12 +12,12 @@ { \ return visit_expr_binop(data, a, b, op); \ } -#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type) \ +#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ uintptr_t fun_name(void* data, c_type val) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ - lit->value.simple = (uintptr_t)val; \ + lit->value.literal_field = val; \ return put_handle(data, lit, Literal); \ } \ _Static_assert( \ @@ -148,7 +148,13 @@ struct Literal enum LitType type; union LiteralValue { - uint64_t simple; + int32_t integer_data; + int64_t long_data; + int16_t short_data; + int8_t byte_data; + float float_data; + double double_data; + bool boolean_data; struct KernelStringSlice string_data; struct Struct struct_data; struct ArrayData array_data; @@ -233,16 +239,16 @@ uintptr_t visit_expr_decimal( dec->scale = scale; return put_handle(data, literal, Literal); } -DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t); -DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t); -DEFINE_SIMPLE_SCALAR(visit_expr_short, Short, int16_t); -DEFINE_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t); -DEFINE_SIMPLE_SCALAR(visit_expr_float, Float, float); -DEFINE_SIMPLE_SCALAR(visit_expr_double, Double, double); -DEFINE_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t); -DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t); +DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t, integer_data); +DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_short, Short, int16_t, short_data); +DEFINE_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t, byte_data); +DEFINE_SIMPLE_SCALAR(visit_expr_float, Float, float, float_data); +DEFINE_SIMPLE_SCALAR(visit_expr_double, Double, double, double_data); +DEFINE_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool, boolean_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t, integer_data); uintptr_t visit_expr_variadic(void* data, uintptr_t len, enum VariadicType op) { @@ -356,7 +362,6 @@ uintptr_t visit_expr_column(void* data, KernelStringSlice string) { struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); *heap_string = copy_kernel_string(string); - printf("Creating column with len %lu: %s\n", string.len, heap_string->ptr); return put_handle(data, heap_string, Column); } @@ -411,13 +416,6 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) return data.handles[schema_list_id]; } -void print_n_spaces(int n) -{ - if (n == 0) - return; - printf(" "); - print_n_spaces(n - 1); -} void free_expression(struct ExpressionRef ref) { switch (ref.type) { @@ -501,195 +499,225 @@ void free_expression(struct ExpressionRef ref) } } } -void print_tree(struct ExpressionRef ref, int depth) + +void print_n_spaces(FILE* to, int n) +{ + if (n == 0) + return; + fprintf(to, " "); + print_n_spaces(to, n - 1); +} +void print_tree(FILE* to, struct ExpressionRef ref, int depth) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - print_n_spaces(depth); + print_n_spaces(to, depth); switch (op->op) { case Add: { - printf("ADD\n"); + fprintf(to, "ADD\n"); break; } case Sub: { - printf("SUB\n"); + fprintf(to, "SUB\n"); break; }; case Div: { - printf("DIV\n"); + fprintf(to, "DIV\n"); break; }; case Mul: { - printf("MUL\n"); + fprintf(to, "MUL\n"); break; }; case LT: { - printf("LT\n"); + fprintf(to, "LT\n"); break; }; case LE: { - printf("LE\n"); + fprintf(to, "LE\n"); break; } case GT: { - printf("GT\n"); + fprintf(to, "GT\n"); break; }; case GE: { - printf("GE\n"); + fprintf(to, "GE\n"); break; }; case EQ: { - printf("EQ\n"); + fprintf(to, "EQ\n"); break; }; case NE: { - printf("NE\n"); + fprintf(to, "NE\n"); break; }; case In: { - printf("In\n"); + fprintf(to, "In\n"); break; }; case NotIn: { - printf("NotIn\n"); + fprintf(to, "NotIn\n"); break; }; break; case Distinct: - printf("Distinct\n"); + fprintf(to, "Distinct\n"); break; } struct ExpressionRef left = { .ref = op->left, .type = Literal }; struct ExpressionRef right = { .ref = op->right, .type = Literal }; - print_tree(left, depth + 1); - print_tree(right, depth + 1); + print_tree(to, left, depth + 1); + print_tree(to, right, depth + 1); break; } case Variadic: { struct Variadic* var = ref.ref; - print_n_spaces(depth); + print_n_spaces(to, depth); switch (var->op) { case And: - printf("And\n"); + fprintf(to, "And\n"); break; case Or: - printf("Or\n"); + fprintf(to, "Or\n"); break; case StructConstructor: - printf("StructConstructor\n"); + fprintf(to, "StructConstructor\n"); break; case ArrayData: - printf("ArrayData\n"); + fprintf(to, "ArrayData\n"); break; } for (size_t i = 0; i < var->len; i++) { - print_tree(var->expr_list[i], depth + 1); + print_tree(to, var->expr_list[i], depth + 1); } } break; case Literal: { struct Literal* lit = ref.ref; - print_n_spaces(depth); + print_n_spaces(to, depth); switch (lit->type) { case Integer: - printf("Integer"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Integer"); + fprintf(to, "(%d)\n", lit->value.integer_data); break; case Long: - printf("Long"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Long"); + fprintf(to, "(%lld)\n", lit->value.long_data); break; case Short: - printf("Short"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Short"); + fprintf(to, "(%hd)\n", lit->value.short_data); break; case Byte: - printf("Byte"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Byte"); + fprintf(to, "(%hhd)\n", lit->value.byte_data); break; case Float: - printf("Float"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Float"); + fprintf(to, "(%f)\n", (float)lit->value.float_data); break; case Double: - printf("Double"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Double"); + fprintf(to, "(%f)\n", lit->value.double_data); break; case String: { - printf("String(%s)\n", lit->value.string_data.ptr); + fprintf(to, "String(%s)\n", lit->value.string_data.ptr); break; } case Boolean: - printf("Boolean"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Boolean"); + fprintf(to, "(%d)\n", lit->value.boolean_data); break; case Timestamp: - printf("Timestamp"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Timestamp"); + fprintf(to, "(%lld)\n", lit->value.long_data); break; case TimestampNtz: - printf("TimestampNtz"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "TimestampNtz"); + fprintf(to, "(%lld)\n", lit->value.long_data); break; case Date: - printf("Date"); - printf("(%lld)\n", lit->value.simple); + fprintf(to, "Date"); + fprintf(to, "(%d)\n", lit->value.integer_data); break; case Binary: - printf("Binary\n"); - break; - case Decimal: - printf("Decimal\n"); + fprintf(to, "Binary\n"); + break; + case Decimal: { + struct Decimal* dec = &lit->value.decimal; + fprintf( + to, + "Decimal(%lld,%lld, %d, %d)\n", + dec->value[0], + dec->value[1], + dec->scale, + dec->precision); break; + } case Null: - printf("Null\n"); + fprintf(to, "Null\n"); break; case Struct: - printf("Struct\n"); + fprintf(to, "Struct\n"); struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->len; i++) { - print_n_spaces(depth + 1); - printf("Field: %s\n", struct_data->field_names[i].ptr); - print_tree(struct_data->expressions[i], depth + 2); + print_n_spaces(to, depth + 1); + fprintf(to, "Field: %s\n", struct_data->field_names[i].ptr); + print_tree(to, struct_data->expressions[i], depth + 2); } break; case Array: - printf("Array\n"); + fprintf(to, "Array\n"); struct ArrayData* array = &lit->value.array_data; for (size_t i = 0; i < array->len; i++) { - print_tree(array->expr_list[i], depth + 1); + print_tree(to, array->expr_list[i], depth + 1); } break; } } break; case Unary: { - print_n_spaces(depth); + print_n_spaces(to, depth); struct Unary* unary = ref.ref; switch (unary->type) { case Not: - printf("Not\n"); + fprintf(to, "Not\n"); break; case IsNull: - printf("IsNull\n"); + fprintf(to, "IsNull\n"); break; } - print_tree(unary->sub_expr, depth + 1); + print_tree(to, unary->sub_expr, depth + 1); break; } case Column: - print_n_spaces(depth); + print_n_spaces(to, depth); KernelStringSlice* string = ref.ref; - printf("Column(%s)\n", string->ptr); + fprintf(to, "Column(%s)\n", string->ptr); break; } } +#define TEST_BUF_SIZE 4096 + void test_kernel_expr() { KernelPredicate* pred = get_kernel_expression(); struct ExpressionRef ref = construct_predicate(pred); - print_tree(ref, 0); + + char out_buf[TEST_BUF_SIZE] = { 0 }; + char expected_buf[TEST_BUF_SIZE] = { 0 }; + FILE* out_file = fmemopen(out_buf, sizeof(out_buf), "w"); + FILE* expected_file = fopen("expression_test_results.txt", "r"); + int x = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, expected_file); + assert(x > 0); + + print_tree(out_file, ref, 0); + + for (int i = 0; i < TEST_BUF_SIZE; i++) { + assert(out_buf[i] == expected_buf[i]); + } free_expression(ref); free_kernel_predicate(pred); } diff --git a/ffi/examples/read-table/expression_test_results.txt b/ffi/examples/read-table/expression_test_results.txt new file mode 100644 index 000000000..e966d0446 --- /dev/null +++ b/ffi/examples/read-table/expression_test_results.txt @@ -0,0 +1,78 @@ +And + Byte(127) + Byte(-128) + Float(340282346638528859811704183484516925440.000000) + Float(-340282346638528859811704183484516925440.000000) + Double(179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000) + Double(-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000) + Integer(2147483647) + Integer(-2147483648) + Long(9223372036854775807) + Long(-9223372036854775808) + String(hello expressions) + Boolean(1) + Boolean(0) + Timestamp(50) + TimestampNtz(100) + Date(32) + Binary + Decimal(1,1, 3, 2) + Null + Struct + Field: top + Struct + Field: a + Integer(500) + Field: b + Array + Short(5) + Short(0) + Array + Short(5) + Short(0) + In + Integer(0) + Long(0) + ADD + Integer(0) + Long(0) + SUB + Integer(0) + Long(0) + EQ + Integer(0) + Long(0) + NE + Integer(0) + Long(0) + NotIn + Integer(0) + Long(0) + DIV + Integer(0) + Long(0) + MUL + Integer(0) + Long(0) + LT + Integer(0) + Long(0) + LE + Integer(0) + Long(0) + GT + Integer(0) + Long(0) + GE + Integer(0) + Long(0) + Distinct + Integer(0) + Long(0) + StructConstructor + Or + Integer(5) + Long(20) + Not + IsNull + Column(col) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 0b9aff6aa..71d7e6ab3 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -310,7 +310,7 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { Expr::literal(Scalar::TimestampNtz(100)), Expr::literal(Scalar::Date(32)), Expr::literal(Scalar::Binary(b"0xdeadbeefcafe".to_vec())), - Expr::literal(Scalar::Decimal(1, 2, 3)), + Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), // Both the most and least significant u64 of value are 1 Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), Expr::literal(Scalar::Struct(top)), Expr::literal(Scalar::Array(array_data)), @@ -462,16 +462,19 @@ pub struct EngineExpressionVisitor { /// The [`EngineExpressionVisitor`] handles both simple and complex types. /// 1. For simple types, the engine is expected to allocate that data and return its identifier. /// 2. For complex types such as structs, arrays, and variadic expressions, there will be a call to -/// construct the expression, and populate sub-expressions. For instance, [`visit_and`] recieves -/// the expected number of sub-expressions and must return an identifier. The kernel will -/// subsequently call [`visit_variadic_item`] with the identifier of the And expression, and the -/// identifier for a sub-expression. +/// construct the expression, and populate sub-expressions. For instance, [`visit_and`] recieves +/// the expected number of sub-expressions and must return an identifier. The kernel will +/// subsequently call [`visit_variadic_item`] with the identifier of the And expression, and the +/// identifier for a sub-expression. /// /// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed /// to visitor methods /// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. +/// +/// # Safety +/// The caller must pass a valid KernelPredicate Handle to the expression field #[no_mangle] -pub extern "C" fn visit_expression( +pub unsafe extern "C" fn visit_expression( expression: &Handle, visitor: &mut EngineExpressionVisitor, ) -> usize { From 1af29397a2d2b8b7c38691cdea793cedd947a610 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 15:03:05 -0700 Subject: [PATCH 22/82] Add docs, improve style --- ffi/examples/read-table/expression.h | 110 +++++++++------- ffi/src/{expressions.rs => expression.rs} | 149 ++++++++++++++++------ ffi/src/lib.rs | 4 +- ffi/src/scan.rs | 2 +- 4 files changed, 175 insertions(+), 90 deletions(-) rename ffi/src/{expressions.rs => expression.rs} (77%) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 4168b0882..41077e9f1 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -6,7 +6,14 @@ #include #include #include - +/** + * This module defines a very simple model of an expression, used only to be able to print the + * provided expression. It consists of an "ExpressionBuilder" which is our user data that gets + * passed into each visit_x call. This simply keeps track of all the expressions we are asked to + * allocate. + * + * Each expression is an "ExpressionRef", which tracks the type and pointer to the expression. + */ #define DEFINE_BINOP(fun_name, op) \ uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ { \ @@ -75,11 +82,11 @@ enum ExpressionType Unary, Column }; -struct ExpressionRef +typedef struct { void* ref; enum ExpressionType type; -}; +} ExpressionRef; struct BinOp { enum OpType op; @@ -105,12 +112,12 @@ struct Variadic enum VariadicType op; size_t len; size_t max_len; - struct ExpressionRef* expr_list; + ExpressionRef* expr_list; }; struct Unary { enum UnaryType type; - struct ExpressionRef sub_expr; + ExpressionRef sub_expr; }; struct BinaryData { @@ -123,15 +130,15 @@ struct Decimal uint8_t precision; uint8_t scale; }; -struct Data +typedef struct { size_t len; - struct ExpressionRef handles[100]; -}; + ExpressionRef handles[100]; +} ExpressionBuilder; struct Struct { KernelStringSlice* field_names; - struct ExpressionRef* expressions; + ExpressionRef* expressions; size_t len; size_t max_len; }; @@ -140,7 +147,7 @@ struct ArrayData { size_t len; size_t max_len; - struct ExpressionRef* expr_list; + ExpressionRef* expr_list; }; struct Literal @@ -165,14 +172,14 @@ struct Literal size_t put_handle(void* data, void* ref, enum ExpressionType type) { - struct Data* data_ptr = (struct Data*)data; - struct ExpressionRef expr = { .ref = ref, .type = type }; + ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; + ExpressionRef expr = { .ref = ref, .type = type }; data_ptr->handles[data_ptr->len] = expr; return data_ptr->len++; } -struct ExpressionRef* get_handle(void* data, size_t handle_index) +ExpressionRef* get_handle(void* data, size_t handle_index) { - struct Data* data_ptr = (struct Data*)data; + ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; if (handle_index > data_ptr->len) { return NULL; } @@ -190,8 +197,8 @@ KernelStringSlice copy_kernel_string(KernelStringSlice string) uintptr_t visit_expr_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) { struct BinOp* binop = malloc(sizeof(struct BinOp)); - struct ExpressionRef* left_handle = get_handle(data, a); - struct ExpressionRef* right_handle = get_handle(data, b); + ExpressionRef* left_handle = get_handle(data, a); + ExpressionRef* right_handle = get_handle(data, b); assert(right_handle != NULL && left_handle != NULL); struct Literal* left = left_handle->ref; @@ -253,17 +260,17 @@ DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t, integer_data); uintptr_t visit_expr_variadic(void* data, uintptr_t len, enum VariadicType op) { struct Variadic* var = malloc(sizeof(struct Variadic)); - struct ExpressionRef* expr_lst = malloc(sizeof(struct ExpressionRef) * len); + ExpressionRef* expr_lst = malloc(sizeof(ExpressionRef) * len); var->op = op; var->len = 0; var->max_len = len; var->expr_list = expr_lst; return put_handle(data, var, Variadic); } -void visit_expr_variadic_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +void visit_expr_variadic_sub_expr(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) { - struct ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); - struct ExpressionRef* variadic_ref = get_handle(data, variadic_id); + ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); + ExpressionRef* variadic_ref = get_handle(data, variadic_id); assert(sub_expr_ref != NULL && variadic_ref != NULL); assert(variadic_ref->type == Variadic); @@ -274,10 +281,10 @@ DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct, StructConstructor) -void visit_expr_array_item(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) +void visit_expr_array_sub_expr(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) { - struct ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); - struct ExpressionRef* array_handle = get_handle(data, variadic_id); + ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); + ExpressionRef* array_handle = get_handle(data, variadic_id); assert(sub_expr_handle != NULL && array_handle != NULL); assert(array_handle->type == Literal); struct Literal* literal = array_handle->ref; @@ -292,7 +299,7 @@ uintptr_t visit_expr_array(void* data, uintptr_t len) struct ArrayData* arr = &(literal->value.array_data); arr->len = 0; arr->max_len = 0; - arr->expr_list = malloc(sizeof(struct ExpressionRef) * len); + arr->expr_list = malloc(sizeof(ExpressionRef) * len); return put_handle(data, literal, Literal); } @@ -313,7 +320,7 @@ uintptr_t visit_expr_struct_literal(void* data, uintptr_t len) struct Struct* struct_data = &literal->value.struct_data; struct_data->len = 0; struct_data->max_len = len; - struct_data->expressions = malloc(sizeof(struct ExpressionRef) * len); + struct_data->expressions = malloc(sizeof(ExpressionRef) * len); struct_data->field_names = malloc(sizeof(KernelStringSlice) * len); return put_handle(data, literal, Literal); } @@ -324,8 +331,8 @@ void visit_expr_struct_literal_field( KernelStringSlice field_name, uintptr_t value_id) { - struct ExpressionRef* value = get_handle(data, value_id); - struct ExpressionRef* literal_handle = get_handle(data, struct_id); + ExpressionRef* value = get_handle(data, value_id); + ExpressionRef* literal_handle = get_handle(data, struct_id); assert(literal_handle != NULL && value != NULL); assert(literal_handle->type == Literal); struct Literal* literal = literal_handle->ref; @@ -351,7 +358,7 @@ uintptr_t visit_expr_unary(void* data, uintptr_t sub_expr_id, enum UnaryType typ { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; - struct ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); + ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); unary->sub_expr = *sub_expr_handle; return put_handle(data, unary, Unary); } @@ -366,10 +373,10 @@ uintptr_t visit_expr_column(void* data, KernelStringSlice string) } // Print the schema of the snapshot -struct ExpressionRef construct_predicate(KernelPredicate* predicate) +ExpressionRef construct_predicate(SharedExpression* predicate) { print_diag("Building schema\n"); - struct Data data = { 0 }; + ExpressionBuilder data = { 0 }; EngineExpressionVisitor visitor = { .data = &data, .visit_int = visit_expr_int, @@ -387,7 +394,7 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_string = visit_expr_string, .visit_and = visit_expr_and, .visit_or = visit_expr_or, - .visit_variadic_item = visit_expr_variadic_item, + .visit_variadic_sub_expr = visit_expr_variadic_sub_expr, .visit_not = visit_expr_not, .visit_is_null = visit_expr_is_null, .visit_lt = visit_expr_lt, @@ -405,24 +412,24 @@ struct ExpressionRef construct_predicate(KernelPredicate* predicate) .visit_divide = visit_expr_divide, .visit_column = visit_expr_column, .visit_struct = visit_expr_struct, - .visit_struct_item = visit_expr_variadic_item, // We treat expr struct as a variadic + .visit_struct_sub_expr = visit_expr_variadic_sub_expr, // We treat expr struct as a variadic .visit_null = visit_expr_null, .visit_struct_literal = visit_expr_struct_literal, .visit_struct_literal_field = visit_expr_struct_literal_field, .visit_array = visit_expr_array, - .visit_array_item = visit_expr_array_item + .visit_array_element = visit_expr_array_sub_expr }; uintptr_t schema_list_id = visit_expression(&predicate, &visitor); return data.handles[schema_list_id]; } -void free_expression(struct ExpressionRef ref) +void free_expression(ExpressionRef ref) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - struct ExpressionRef left = { .ref = op->left, .type = Literal }; - struct ExpressionRef right = { .ref = op->right, .type = Literal }; + ExpressionRef left = { .ref = op->left, .type = Literal }; + ExpressionRef right = { .ref = op->right, .type = Literal }; free_expression(left); free_expression(right); free(op); @@ -507,7 +514,7 @@ void print_n_spaces(FILE* to, int n) fprintf(to, " "); print_n_spaces(to, n - 1); } -void print_tree(FILE* to, struct ExpressionRef ref, int depth) +void print_tree(FILE* to, ExpressionRef ref, int depth) { switch (ref.type) { case BinOp: { @@ -567,8 +574,8 @@ void print_tree(FILE* to, struct ExpressionRef ref, int depth) break; } - struct ExpressionRef left = { .ref = op->left, .type = Literal }; - struct ExpressionRef right = { .ref = op->right, .type = Literal }; + ExpressionRef left = { .ref = op->left, .type = Literal }; + ExpressionRef right = { .ref = op->right, .type = Literal }; print_tree(to, left, depth + 1); print_tree(to, right, depth + 1); break; @@ -701,23 +708,34 @@ void print_tree(FILE* to, struct ExpressionRef ref, int depth) #define TEST_BUF_SIZE 4096 +void read_expected_expression_tree(char* expected_buf) +{ + FILE* data_file = fopen("expression_test_results.txt", "r"); + int x = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, data_file); + assert(x > 0); +} +void get_expression_tree(ExpressionRef ref, char* out_buf, size_t buf_len) +{ + + FILE* out_file = fmemopen(out_buf, buf_len, "w"); + print_tree(out_file, ref, 0); + fclose(out_file); +} void test_kernel_expr() { - KernelPredicate* pred = get_kernel_expression(); - struct ExpressionRef ref = construct_predicate(pred); + SharedExpression* pred = get_kernel_expression(); + ExpressionRef ref = construct_predicate(pred); char out_buf[TEST_BUF_SIZE] = { 0 }; char expected_buf[TEST_BUF_SIZE] = { 0 }; - FILE* out_file = fmemopen(out_buf, sizeof(out_buf), "w"); - FILE* expected_file = fopen("expression_test_results.txt", "r"); - int x = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, expected_file); - assert(x > 0); - print_tree(out_file, ref, 0); + read_expected_expression_tree(expected_buf); + get_expression_tree(ref, out_buf, TEST_BUF_SIZE); for (int i = 0; i < TEST_BUF_SIZE; i++) { assert(out_buf[i] == expected_buf[i]); } + free_expression(ref); free_kernel_predicate(pred); } diff --git a/ffi/src/expressions.rs b/ffi/src/expression.rs similarity index 77% rename from ffi/src/expressions.rs rename to ffi/src/expression.rs index 71d7e6ab3..247cfa15e 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expression.rs @@ -251,24 +251,24 @@ pub extern "C" fn visit_expression_literal_bool( } #[handle_descriptor(target=Expression, mutable=false, sized=true)] -pub struct KernelPredicate; +pub struct SharedExpression; -/// Free the memory the passed KernelPredicate +/// Free the memory the passed SharedExpression /// /// # Safety -/// Engine is responsible for passing a valid KernelPredicate +/// Engine is responsible for passing a valid SharedExpression #[no_mangle] -pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { +pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { data.drop_handle(); } -/// Constructs a kernel expression that is passed back as a KernelPredicate handle +/// Constructs a kernel expression that is passed back as a SharedExpression handle /// /// # Safety /// The caller is responsible for freeing the retured memory, either by calling /// [`free_kernel_predicate`], or [`Handle::drop_handle`] #[no_mangle] -pub unsafe extern "C" fn get_kernel_expression() -> Handle { +pub unsafe extern "C" fn get_kernel_expression() -> Handle { use Expression as Expr; let array_type = ArrayType::new( @@ -310,7 +310,8 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { Expr::literal(Scalar::TimestampNtz(100)), Expr::literal(Scalar::Date(32)), Expr::literal(Scalar::Binary(b"0xdeadbeefcafe".to_vec())), - Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), // Both the most and least significant u64 of value are 1 + // Both the most and least significant u64 of the Decimal value will be 1 + Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), Expr::literal(Scalar::Struct(top)), Expr::literal(Scalar::Array(array_data)), @@ -388,25 +389,53 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { .into() } -/// Kernel Expression to Engine Expression +/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own +/// representation of an expression from a particular expression within the kernel. /// +/// Visit operations where the engine allocates an expression must return an associated `id`, which is an integer +/// identifier ([`usize`]). This identifier can be passed back to the engine to identify the expression. +/// The [`EngineExpressionVisitor`] handles both simple and complex types. +/// 1. For simple types, the engine is expected to allocate that data and return its identifier. +/// 2. For complex types such as structs, arrays, and variadic expressions, there will be a call to +/// construct the expression, and populate sub-expressions. For instance, [`visit_and`] recieves +/// the expected number of sub-expressions and must return an identifier. The kernel will +/// subsequently call [`visit_variadic_sub_expr`] with the identifier of the And expression, and the +/// identifier for a sub-expression. +/// +/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed +/// to visitor methods +/// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. #[repr(C)] pub struct EngineExpressionVisitor { - /// opaque state pointer + /// An opaque state pointer pub data: *mut c_void, - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + /// Visit a 32bit `integer` pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, + /// Visit a 64bit `long`. pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, + /// Visit a 16bit `short`. pub visit_short: extern "C" fn(data: *mut c_void, value: i16) -> usize, + /// Visit an 8bit `byte`. pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, + /// Visit a 32bit `float`. pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, + /// Visit a 64bit `double`. pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, + /// Visit a `string`. pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, + /// Visit a `boolean`. pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, + /// Visit a 64bit timestamp. The timestamp is microsecond precision and adjusted to UTC. pub visit_timestamp: extern "C" fn(data: *mut c_void, value: i64) -> usize, + /// Visit a 64bit timestamp. The timestamp is microsecond precision with no timezone. pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64) -> usize, + /// Visit a 32bit int date representing days since UNIX epoch 1970-01-01. pub visit_date: extern "C" fn(data: *mut c_void, value: i32) -> usize, - pub visit_binary: extern "C" fn(data: *mut c_void, buf: *const u8, len: usize) -> usize, + /// Visit binary data at the `buffer` with length `len`. + pub visit_binary: extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize) -> usize, + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. pub visit_decimal: extern "C" fn( data: *mut c_void, value_ms: u64, // Most significant 64 bits of decimal value @@ -414,68 +443,106 @@ pub struct EngineExpressionVisitor { precision: u8, scale: u8, ) -> usize, - + /// Visits a null value. + pub visit_null: extern "C" fn(data: *mut c_void) -> usize, + /// Visits an `and` expression which is made of a list of sub-expressions. This declares the + /// number of sub-expressions that the `and` expression will be made of. The visitor will populate + /// the list of expressions using the [`visit_variadic_sub_expr`] method. pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, + /// Visits an `or` expression which is made of a list of sub-expressions. This declares the + /// number of sub-expressions that the `or` expression will be made of. The visitor will populate + /// the list of expressions using the [`visit_variadic_sub_expr`] method. pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_variadic_item: + /// Visits a variadic sub-expression. This appends a sub-expression to a variadic expression + /// constructed in either [`visit_and`] or [`visit_or`]. The variadic expression is identified + /// by `variadic_id`, and the sub-expression is identified by `sub_expr_id`. + pub visit_variadic_sub_expr: extern "C" fn(data: *mut c_void, variadic_id: usize, sub_expr_id: usize), + ///Visits a `not` expression, bulit using the sub-expression `inner_expr`. pub visit_not: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + ///Visits an `is_null` expression, built using the sub-expression `inner_expr`. pub visit_is_null: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, - + /// Visit the `less than` binary operation, which takes the left sub expression id `a` and the + /// right sub-expression id `b`. pub visit_lt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `less than or equal` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_le: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `greater than` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `greater than or equal` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `equal` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `not equal` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_ne: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `distinct` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_distinct: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `in` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `not in` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_not_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - + /// Visit the `add` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_add: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `minus` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_minus: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `multiply` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_multiply: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + /// Visit the `divide` binary operation, which takes the left sub expression id `a` + /// and the right sub-expression id `b`. pub visit_divide: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, - + /// Visit the `colmun` identified by the `name` string. pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, - + /// Visit a `struct` which is constructed from an ordered list of expressions. This declares + /// the number of expressions that the struct will be made of. The visitor will populate the + /// list of expressions using the [`visit_struct_sub_expr`] method. pub visit_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_struct_item: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), - + /// Visits a `struct` sub expression. This appends a sub-expression to a struct constructed by + /// [`visit_struct`]. The struct is identified by `struct_id`, and the sub-expression is identified + /// by `expr_id`. + pub visit_struct_sub_expr: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), + /// Visit a struct literal which is made up of a list of field names and values. This declares + /// the number of fields that the struct will have. The visitor will populate the struct fields + /// using the [`visit_struct_literal_field`] method. pub visit_struct_literal: extern "C" fn(data: *mut c_void, num_fields: usize) -> usize, + /// Visit a struct literal field. This adds a field to the struct declared by [`visit_struct_literal`]. + /// The struct literal is identified by `struct_id`. The sub-expression is identified by `expr_id`. pub visit_struct_literal_field: extern "C" fn( data: *mut c_void, struct_id: usize, field_name: KernelStringSlice, field_value: usize, ), - pub visit_null: extern "C" fn(data: *mut c_void) -> usize, + /// Visit an `arary`, declaring the length `len`. The visitor will populate the array + /// elements using the [`visit_array_element`] method. pub visit_array: extern "C" fn(data: *mut c_void, len: usize) -> usize, - pub visit_array_item: extern "C" fn(data: *mut c_void, array_id: usize, item_id: usize), + /// Visit an array element. This adds the element to the array declared in [`visit_array`]. The + /// array is identified by `array_id`, and the element identified by `element_id` + pub visit_array_element: extern "C" fn(data: *mut c_void, array_id: usize, element_id: usize), } -/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own -/// representation of an expression from a particular expression within the kernel. -/// -/// Visit operations where the engine allocates an expression must return an associated `id`, which is an integer -/// identifier ([`usize`]). This identifier can be passed back to the engine to identify the expression. -/// The [`EngineExpressionVisitor`] handles both simple and complex types. -/// 1. For simple types, the engine is expected to allocate that data and return its identifier. -/// 2. For complex types such as structs, arrays, and variadic expressions, there will be a call to -/// construct the expression, and populate sub-expressions. For instance, [`visit_and`] recieves -/// the expected number of sub-expressions and must return an identifier. The kernel will -/// subsequently call [`visit_variadic_item`] with the identifier of the And expression, and the -/// identifier for a sub-expression. +/// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. +/// See the documentation of [`EngineExpressionVisitor`] for a description of how this visitor +/// works. /// -/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed -/// to visitor methods -/// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. +/// This method returns the id that the engine generated for the top level expression /// /// # Safety -/// The caller must pass a valid KernelPredicate Handle to the expression field +/// +/// The caller must pass a valid SharedExpression Handle and expression visitor #[no_mangle] pub unsafe extern "C" fn visit_expression( - expression: &Handle, + expression: &Handle, visitor: &mut EngineExpressionVisitor, ) -> usize { macro_rules! call { @@ -489,7 +556,7 @@ pub unsafe extern "C" fn visit_expression( let array_id = call!(visitor, visit_array, elements.len()); for scalar in elements { let scalar_id = visit_scalar(visitor, scalar); - call!(visitor, visit_array_item, array_id, scalar_id); + call!(visitor, visit_array_element, array_id, scalar_id); } array_id } @@ -514,7 +581,7 @@ pub unsafe extern "C" fn visit_expression( let expr_struct_id = call!(visitor, visit_struct, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - call!(visitor, visit_struct_item, expr_struct_id, expr_id) + call!(visitor, visit_struct_sub_expr, expr_struct_id, expr_id) } expr_struct_id } @@ -530,7 +597,7 @@ pub unsafe extern "C" fn visit_expression( let variadic_id = visit_fn(visitor.data, exprs.len()); for expr in exprs { let expr_id = visit_expression(visitor, expr); - call!(visitor, visit_variadic_item, variadic_id, expr_id) + call!(visitor, visit_variadic_sub_expr, variadic_id, expr_id) } variadic_id } diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 2a267f131..2b5cb885e 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -11,7 +11,7 @@ use tracing::debug; use url::Url; use delta_kernel::snapshot::Snapshot; -use delta_kernel::{DeltaResult, Engine, EngineData, Error, Expression, Table}; +use delta_kernel::{DeltaResult, Engine, EngineData, Error, Table}; use delta_kernel_ffi_macros::handle_descriptor; // cbindgen doesn't understand our use of feature flags here, and by default it parses `mod handle` @@ -30,7 +30,7 @@ use handle::Handle; extern crate self as delta_kernel_ffi; pub mod engine_funcs; -pub mod expressions; +pub mod expression; pub mod scan; pub mod schema; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 92d067621..ce6c45e8e 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,7 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expressions::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; +use crate::expression::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; use crate::{ AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, From 4df19e3abeab2d96e4bfa2e4a807689ab5ef2253 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 15:03:29 -0700 Subject: [PATCH 23/82] Fix spacing --- ffi/examples/read-table/expression.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 41077e9f1..6ae1b5d25 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -14,6 +14,7 @@ * * Each expression is an "ExpressionRef", which tracks the type and pointer to the expression. */ + #define DEFINE_BINOP(fun_name, op) \ uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ { \ From 283594bf447ea92ae90ccfd2c00c8b8474ef3475 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 15:06:52 -0700 Subject: [PATCH 24/82] Fix diff, move test to beginning of read_table --- ffi/examples/read-table/read_table.c | 2 +- ffi/src/{expression.rs => expressions.rs} | 0 ffi/src/lib.rs | 2 +- ffi/src/scan.rs | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename ffi/src/{expression.rs => expressions.rs} (100%) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index f7793c752..79dbebdd7 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -201,7 +201,7 @@ PartitionList* get_partition_list(SharedGlobalScanState* state) int main(int argc, char* argv[]) { test_kernel_expr(); - return -1; + if (argc < 2) { printf("Usage: %s table/path\n", argv[0]); return -1; diff --git a/ffi/src/expression.rs b/ffi/src/expressions.rs similarity index 100% rename from ffi/src/expression.rs rename to ffi/src/expressions.rs diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 2b5cb885e..75a5f34a3 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -30,7 +30,7 @@ use handle::Handle; extern crate self as delta_kernel_ffi; pub mod engine_funcs; -pub mod expression; +pub mod expressions; pub mod scan; pub mod schema; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index ce6c45e8e..92d067621 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,7 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expression::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; +use crate::expressions::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; use crate::{ AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, From 57271fb1eb74fd9120f150701706efe644b21972 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 15:18:51 -0700 Subject: [PATCH 25/82] Fix cross-platform printing --- ffi/examples/read-table/expression.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 6ae1b5d25..4ec25675f 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -376,7 +376,6 @@ uintptr_t visit_expr_column(void* data, KernelStringSlice string) // Print the schema of the snapshot ExpressionRef construct_predicate(SharedExpression* predicate) { - print_diag("Building schema\n"); ExpressionBuilder data = { 0 }; EngineExpressionVisitor visitor = { .data = &data, @@ -612,7 +611,7 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) break; case Long: fprintf(to, "Long"); - fprintf(to, "(%lld)\n", lit->value.long_data); + fprintf(to, "(%lld)\n", (long long)lit->value.long_data); break; case Short: fprintf(to, "Short"); @@ -640,11 +639,11 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) break; case Timestamp: fprintf(to, "Timestamp"); - fprintf(to, "(%lld)\n", lit->value.long_data); + fprintf(to, "(%lld)\n", (long long)lit->value.long_data); break; case TimestampNtz: fprintf(to, "TimestampNtz"); - fprintf(to, "(%lld)\n", lit->value.long_data); + fprintf(to, "(%lld)\n", (long long)lit->value.long_data); break; case Date: fprintf(to, "Date"); @@ -658,8 +657,8 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) fprintf( to, "Decimal(%lld,%lld, %d, %d)\n", - dec->value[0], - dec->value[1], + (long long)dec->value[0], + (long long)dec->value[1], dec->scale, dec->precision); break; From a24419dd060fda37049ac720655f4cc8c94d0ec2 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 16:03:29 -0700 Subject: [PATCH 26/82] Improve C error handling, remove kernel expression from main --- ffi/examples/read-table/expression.h | 20 +++++++++++++++----- ffi/examples/read-table/read_table.c | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 4ec25675f..b613a8cdf 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -711,15 +711,25 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) void read_expected_expression_tree(char* expected_buf) { FILE* data_file = fopen("expression_test_results.txt", "r"); - int x = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, data_file); - assert(x > 0); + if (NULL == data_file) { + abort(); + } + if (NULL == fgets(expected_buf, TEST_BUF_SIZE - 1, data_file)) { + abort(); + } + if (0 != fclose(data_file)) { + abort(); + } } void get_expression_tree(ExpressionRef ref, char* out_buf, size_t buf_len) { - FILE* out_file = fmemopen(out_buf, buf_len, "w"); - print_tree(out_file, ref, 0); - fclose(out_file); + FILE* buf_stream = fmemopen(out_buf, buf_len, "w"); + if (NULL == buf_stream) { + abort(); + } + print_tree(buf_stream, ref, 0); + fclose(buf_stream); } void test_kernel_expr() { diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 79dbebdd7..5b90912f9 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -200,10 +200,10 @@ PartitionList* get_partition_list(SharedGlobalScanState* state) int main(int argc, char* argv[]) { - test_kernel_expr(); + // test_kernel_expr(); if (argc < 2) { - printf("Usage: %s table/path\n", argv[0]); + printf("Usage: %s read_table table/path\n", argv[0]); return -1; } From 11fc68a1970a7b8c5c0f47b7ec7e2b0c1810f465 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 16:09:50 -0700 Subject: [PATCH 27/82] Remove changes from read_table.c --- ffi/examples/read-table/read_table.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 5b90912f9..51e10d99f 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -6,6 +6,7 @@ #include "expression.h" #include "read_table.h" #include "schema.h" +#include "expression.h" // some diagnostic functions void print_diag(char* fmt, ...) From c3160adb73ea68f25bca685a31e06ae3b04489cd Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 16:12:11 -0700 Subject: [PATCH 28/82] remove unnecessary changes --- ffi/examples/read-table/read_table.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 51e10d99f..dbb26b012 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -6,7 +6,6 @@ #include "expression.h" #include "read_table.h" #include "schema.h" -#include "expression.h" // some diagnostic functions void print_diag(char* fmt, ...) @@ -107,11 +106,7 @@ void scan_row_callback( { (void)size; // not using this at the moment struct EngineContext* context = engine_context; - print_diag( - "Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", - (int)path.len, - path.ptr, - size); + print_diag("Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", (int)path.len, path.ptr, size); if (stats) { print_diag("%" PRId64 ")\n", stats->num_records); } else { @@ -201,10 +196,8 @@ PartitionList* get_partition_list(SharedGlobalScanState* state) int main(int argc, char* argv[]) { - // test_kernel_expr(); - if (argc < 2) { - printf("Usage: %s read_table table/path\n", argv[0]); + printf("Usage: %s table/path\n", argv[0]); return -1; } From 295e8e81de4a4cc545527d29dacb97534f1cc9e4 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 16:49:20 -0700 Subject: [PATCH 29/82] Move testing code into separate executable --- ffi/examples/read-table/CMakeLists.txt | 10 +++ ffi/examples/read-table/expression.h | 44 ------------- ffi/examples/read-table/read_table.c | 7 ++- ffi/examples/read-table/test_expr.c | 61 +++++++++++++++++++ .../expression_test_results.txt | 0 5 files changed, 76 insertions(+), 46 deletions(-) create mode 100644 ffi/examples/read-table/test_expr.c rename ffi/{examples/read-table => tests/test_expression_visitor}/expression_test_results.txt (100%) diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 2df2b38e4..5ac89954d 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.12) project(read_table) option(PRINT_DATA "Print out the table data. Requires arrow-glib" ON) + +# Configuration for the read_table executable add_executable(read_table read_table.c arrow.c) target_compile_definitions(read_table PUBLIC DEFINE_DEFAULT_ENGINE) target_include_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") @@ -8,6 +10,14 @@ target_link_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../ target_link_libraries(read_table PUBLIC delta_kernel_ffi) target_compile_options(read_table PUBLIC) +# Configuration for the `test_expr executable +add_executable(test_expr test_expr.c) +target_compile_definitions(test_expr PUBLIC DEFINE_DEFAULT_ENGINE) +target_include_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") +target_link_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") +target_link_libraries(test_expr PUBLIC delta_kernel_ffi) +target_compile_options(test_expr PUBLIC) + # Add the test include(CTest) set(TestRunner "../../../tests/read-table-testing/run_test.sh") diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index b613a8cdf..34be114de 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -705,47 +705,3 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) break; } } - -#define TEST_BUF_SIZE 4096 - -void read_expected_expression_tree(char* expected_buf) -{ - FILE* data_file = fopen("expression_test_results.txt", "r"); - if (NULL == data_file) { - abort(); - } - if (NULL == fgets(expected_buf, TEST_BUF_SIZE - 1, data_file)) { - abort(); - } - if (0 != fclose(data_file)) { - abort(); - } -} -void get_expression_tree(ExpressionRef ref, char* out_buf, size_t buf_len) -{ - - FILE* buf_stream = fmemopen(out_buf, buf_len, "w"); - if (NULL == buf_stream) { - abort(); - } - print_tree(buf_stream, ref, 0); - fclose(buf_stream); -} -void test_kernel_expr() -{ - SharedExpression* pred = get_kernel_expression(); - ExpressionRef ref = construct_predicate(pred); - - char out_buf[TEST_BUF_SIZE] = { 0 }; - char expected_buf[TEST_BUF_SIZE] = { 0 }; - - read_expected_expression_tree(expected_buf); - get_expression_tree(ref, out_buf, TEST_BUF_SIZE); - - for (int i = 0; i < TEST_BUF_SIZE; i++) { - assert(out_buf[i] == expected_buf[i]); - } - - free_expression(ref); - free_kernel_predicate(pred); -} diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index dbb26b012..4c769dd31 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -3,7 +3,6 @@ #include #include "arrow.h" -#include "expression.h" #include "read_table.h" #include "schema.h" @@ -106,7 +105,11 @@ void scan_row_callback( { (void)size; // not using this at the moment struct EngineContext* context = engine_context; - print_diag("Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", (int)path.len, path.ptr, size); + print_diag( + "Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", + (int)path.len, + path.ptr, + size); if (stats) { print_diag("%" PRId64 ")\n", stats->num_records); } else { diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c new file mode 100644 index 000000000..1f074139a --- /dev/null +++ b/ffi/examples/read-table/test_expr.c @@ -0,0 +1,61 @@ +#include "expression.h" + +#define TEST_BUF_SIZE 4096 + +void read_expected_expression_tree(char* expected_buf, char* expected_path) +{ + FILE* data_file = fopen(expected_path, "r"); + if (NULL == data_file) { + printf("Failed to open file\n"); + abort(); + } + size_t offset = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, data_file); + if (0 == offset) { + printf("Failed to read file\n"); + abort(); + } + if (0 != fclose(data_file)) { + printf("Error while closing file\n"); + abort(); + } +} +void get_expression_tree(ExpressionRef ref, char* out_buf, size_t buf_len) +{ + + FILE* buf_stream = fmemopen(out_buf, buf_len, "w"); + if (NULL == buf_stream) { + abort(); + } + print_tree(buf_stream, ref, 0); + fclose(buf_stream); +} +void test_kernel_expr(char* expected_path) +{ + SharedExpression* pred = get_kernel_expression(); + ExpressionRef ref = construct_predicate(pred); + + char out_buf[TEST_BUF_SIZE] = { 0 }; + char expected_buf[TEST_BUF_SIZE] = { 0 }; + + read_expected_expression_tree(expected_buf, expected_path); + get_expression_tree(ref, out_buf, TEST_BUF_SIZE); + + for (int i = 0; i < TEST_BUF_SIZE; i++) { + assert(out_buf[i] == expected_buf[i]); + } + + free_expression(ref); + free_kernel_predicate(pred); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + printf("Usage: %s expected results path\n", argv[0]); + return -1; + } + + char* expected_path = argv[1]; + test_kernel_expr(expected_path); + printf("Success!\n"); +} diff --git a/ffi/examples/read-table/expression_test_results.txt b/ffi/tests/test_expression_visitor/expression_test_results.txt similarity index 100% rename from ffi/examples/read-table/expression_test_results.txt rename to ffi/tests/test_expression_visitor/expression_test_results.txt From b069cf1704ed98ac91c414158c1fcc8e23604be6 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 17:11:38 -0700 Subject: [PATCH 30/82] Revamp testing --- ffi/examples/read-table/CMakeLists.txt | 24 ++-- ffi/examples/read-table/expression.h | 127 +++++++++--------- ffi/examples/read-table/test_expr.c | 58 +------- ...pression_test_results.txt => expected.txt} | 0 ffi/tests/test_expression_visitor/run_test.sh | 13 ++ 5 files changed, 93 insertions(+), 129 deletions(-) rename ffi/tests/test_expression_visitor/{expression_test_results.txt => expected.txt} (100%) create mode 100644 ffi/tests/test_expression_visitor/run_test.sh diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 5ac89954d..bf4da567a 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -10,15 +10,7 @@ target_link_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../ target_link_libraries(read_table PUBLIC delta_kernel_ffi) target_compile_options(read_table PUBLIC) -# Configuration for the `test_expr executable -add_executable(test_expr test_expr.c) -target_compile_definitions(test_expr PUBLIC DEFINE_DEFAULT_ENGINE) -target_include_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") -target_link_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") -target_link_libraries(test_expr PUBLIC delta_kernel_ffi) -target_compile_options(test_expr PUBLIC) - -# Add the test +# Add the read_table test include(CTest) set(TestRunner "../../../tests/read-table-testing/run_test.sh") set(DatPath "../../../../acceptance/tests/dat/out/reader_tests/generated") @@ -48,3 +40,17 @@ if(PRINT_DATA) target_compile_options(read_table PUBLIC ${ARROW_GLIB_CFLAGS_OTHER}) target_compile_definitions(read_table PUBLIC PRINT_ARROW_DATA) endif(PRINT_DATA) + +# Configuration for the `test_expr executable +add_executable(test_expr test_expr.c) +target_compile_definitions(test_expr PUBLIC DEFINE_DEFAULT_ENGINE) +target_include_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") +target_link_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") +target_link_libraries(test_expr PUBLIC delta_kernel_ffi) +target_compile_options(test_expr PUBLIC) + +# Add the read_table test +include(CTest) +set(TestRunner "../../../tests/test_expression_visitor/run_test.sh") +set(ExpectedPath "../../../tests/test_expression_visitor/expected.txt") +add_test(NAME test_expression_visitor COMMAND ./test_expr ${DatPath}) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 34be114de..c99014b37 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -507,155 +507,154 @@ void free_expression(ExpressionRef ref) } } -void print_n_spaces(FILE* to, int n) +void print_n_spaces(int n) { if (n == 0) return; - fprintf(to, " "); - print_n_spaces(to, n - 1); + printf(" "); + print_n_spaces(n - 1); } -void print_tree(FILE* to, ExpressionRef ref, int depth) +void print_tree(ExpressionRef ref, int depth) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - print_n_spaces(to, depth); + print_n_spaces(depth); switch (op->op) { case Add: { - fprintf(to, "ADD\n"); + printf("ADD\n"); break; } case Sub: { - fprintf(to, "SUB\n"); + printf("SUB\n"); break; }; case Div: { - fprintf(to, "DIV\n"); + printf("DIV\n"); break; }; case Mul: { - fprintf(to, "MUL\n"); + printf("MUL\n"); break; }; case LT: { - fprintf(to, "LT\n"); + printf("LT\n"); break; }; case LE: { - fprintf(to, "LE\n"); + printf("LE\n"); break; } case GT: { - fprintf(to, "GT\n"); + printf("GT\n"); break; }; case GE: { - fprintf(to, "GE\n"); + printf("GE\n"); break; }; case EQ: { - fprintf(to, "EQ\n"); + printf("EQ\n"); break; }; case NE: { - fprintf(to, "NE\n"); + printf("NE\n"); break; }; case In: { - fprintf(to, "In\n"); + printf("In\n"); break; }; case NotIn: { - fprintf(to, "NotIn\n"); + printf("NotIn\n"); break; }; break; case Distinct: - fprintf(to, "Distinct\n"); + printf("Distinct\n"); break; } ExpressionRef left = { .ref = op->left, .type = Literal }; ExpressionRef right = { .ref = op->right, .type = Literal }; - print_tree(to, left, depth + 1); - print_tree(to, right, depth + 1); + print_tree(left, depth + 1); + print_tree(right, depth + 1); break; } case Variadic: { struct Variadic* var = ref.ref; - print_n_spaces(to, depth); + print_n_spaces(depth); switch (var->op) { case And: - fprintf(to, "And\n"); + printf("And\n"); break; case Or: - fprintf(to, "Or\n"); + printf("Or\n"); break; case StructConstructor: - fprintf(to, "StructConstructor\n"); + printf("StructConstructor\n"); break; case ArrayData: - fprintf(to, "ArrayData\n"); + printf("ArrayData\n"); break; } for (size_t i = 0; i < var->len; i++) { - print_tree(to, var->expr_list[i], depth + 1); + print_tree(var->expr_list[i], depth + 1); } } break; case Literal: { struct Literal* lit = ref.ref; - print_n_spaces(to, depth); + print_n_spaces(depth); switch (lit->type) { case Integer: - fprintf(to, "Integer"); - fprintf(to, "(%d)\n", lit->value.integer_data); + printf("Integer"); + printf("(%d)\n", lit->value.integer_data); break; case Long: - fprintf(to, "Long"); - fprintf(to, "(%lld)\n", (long long)lit->value.long_data); + printf("Long"); + printf("(%lld)\n", (long long)lit->value.long_data); break; case Short: - fprintf(to, "Short"); - fprintf(to, "(%hd)\n", lit->value.short_data); + printf("Short"); + printf("(%hd)\n", lit->value.short_data); break; case Byte: - fprintf(to, "Byte"); - fprintf(to, "(%hhd)\n", lit->value.byte_data); + printf("Byte"); + printf("(%hhd)\n", lit->value.byte_data); break; case Float: - fprintf(to, "Float"); - fprintf(to, "(%f)\n", (float)lit->value.float_data); + printf("Float"); + printf("(%f)\n", (float)lit->value.float_data); break; case Double: - fprintf(to, "Double"); - fprintf(to, "(%f)\n", lit->value.double_data); + printf("Double"); + printf("(%f)\n", lit->value.double_data); break; case String: { - fprintf(to, "String(%s)\n", lit->value.string_data.ptr); + printf("String(%s)\n", lit->value.string_data.ptr); break; } case Boolean: - fprintf(to, "Boolean"); - fprintf(to, "(%d)\n", lit->value.boolean_data); + printf("Boolean"); + printf("(%d)\n", lit->value.boolean_data); break; case Timestamp: - fprintf(to, "Timestamp"); - fprintf(to, "(%lld)\n", (long long)lit->value.long_data); + printf("Timestamp"); + printf("(%lld)\n", (long long)lit->value.long_data); break; case TimestampNtz: - fprintf(to, "TimestampNtz"); - fprintf(to, "(%lld)\n", (long long)lit->value.long_data); + printf("TimestampNtz"); + printf("(%lld)\n", (long long)lit->value.long_data); break; case Date: - fprintf(to, "Date"); - fprintf(to, "(%d)\n", lit->value.integer_data); + printf("Date"); + printf("(%d)\n", lit->value.integer_data); break; case Binary: - fprintf(to, "Binary\n"); + printf("Binary\n"); break; case Decimal: { struct Decimal* dec = &lit->value.decimal; - fprintf( - to, + printf( "Decimal(%lld,%lld, %d, %d)\n", (long long)dec->value[0], (long long)dec->value[1], @@ -664,44 +663,44 @@ void print_tree(FILE* to, ExpressionRef ref, int depth) break; } case Null: - fprintf(to, "Null\n"); + printf("Null\n"); break; case Struct: - fprintf(to, "Struct\n"); + printf("Struct\n"); struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->len; i++) { - print_n_spaces(to, depth + 1); - fprintf(to, "Field: %s\n", struct_data->field_names[i].ptr); - print_tree(to, struct_data->expressions[i], depth + 2); + print_n_spaces(depth + 1); + printf("Field: %s\n", struct_data->field_names[i].ptr); + print_tree(struct_data->expressions[i], depth + 2); } break; case Array: - fprintf(to, "Array\n"); + printf("Array\n"); struct ArrayData* array = &lit->value.array_data; for (size_t i = 0; i < array->len; i++) { - print_tree(to, array->expr_list[i], depth + 1); + print_tree(array->expr_list[i], depth + 1); } break; } } break; case Unary: { - print_n_spaces(to, depth); + print_n_spaces(depth); struct Unary* unary = ref.ref; switch (unary->type) { case Not: - fprintf(to, "Not\n"); + printf("Not\n"); break; case IsNull: - fprintf(to, "IsNull\n"); + printf("IsNull\n"); break; } - print_tree(to, unary->sub_expr, depth + 1); + print_tree(unary->sub_expr, depth + 1); break; } case Column: - print_n_spaces(to, depth); + print_n_spaces(depth); KernelStringSlice* string = ref.ref; - fprintf(to, "Column(%s)\n", string->ptr); + printf("Column(%s)\n", string->ptr); break; } } diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c index 1f074139a..53d2c40bc 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/read-table/test_expr.c @@ -1,61 +1,7 @@ #include "expression.h" - -#define TEST_BUF_SIZE 4096 - -void read_expected_expression_tree(char* expected_buf, char* expected_path) -{ - FILE* data_file = fopen(expected_path, "r"); - if (NULL == data_file) { - printf("Failed to open file\n"); - abort(); - } - size_t offset = fread(expected_buf, sizeof(char), TEST_BUF_SIZE, data_file); - if (0 == offset) { - printf("Failed to read file\n"); - abort(); - } - if (0 != fclose(data_file)) { - printf("Error while closing file\n"); - abort(); - } -} -void get_expression_tree(ExpressionRef ref, char* out_buf, size_t buf_len) -{ - - FILE* buf_stream = fmemopen(out_buf, buf_len, "w"); - if (NULL == buf_stream) { - abort(); - } - print_tree(buf_stream, ref, 0); - fclose(buf_stream); -} -void test_kernel_expr(char* expected_path) +int main() { SharedExpression* pred = get_kernel_expression(); ExpressionRef ref = construct_predicate(pred); - - char out_buf[TEST_BUF_SIZE] = { 0 }; - char expected_buf[TEST_BUF_SIZE] = { 0 }; - - read_expected_expression_tree(expected_buf, expected_path); - get_expression_tree(ref, out_buf, TEST_BUF_SIZE); - - for (int i = 0; i < TEST_BUF_SIZE; i++) { - assert(out_buf[i] == expected_buf[i]); - } - - free_expression(ref); - free_kernel_predicate(pred); -} - -int main(int argc, char* argv[]) -{ - if (argc < 2) { - printf("Usage: %s expected results path\n", argv[0]); - return -1; - } - - char* expected_path = argv[1]; - test_kernel_expr(expected_path); - printf("Success!\n"); + print_tree(ref, 0); } diff --git a/ffi/tests/test_expression_visitor/expression_test_results.txt b/ffi/tests/test_expression_visitor/expected.txt similarity index 100% rename from ffi/tests/test_expression_visitor/expression_test_results.txt rename to ffi/tests/test_expression_visitor/expected.txt diff --git a/ffi/tests/test_expression_visitor/run_test.sh b/ffi/tests/test_expression_visitor/run_test.sh new file mode 100644 index 000000000..1e8fd7f47 --- /dev/null +++ b/ffi/tests/test_expression_visitor/run_test.sh @@ -0,0 +1,13 @@ + +#!/bin/bash + +set -euxo pipefail + +OUT_FILE=$(mktemp) +./read_table "$1" | tee "$OUT_FILE" +diff -s "$OUT_FILE" "$2" +DIFF_EXIT_CODE=$? +echo "Diff exited with $DIFF_EXIT_CODE" +rm "$OUT_FILE" +exit "$DIFF_EXIT_CODE" + From 69e92f413482fe5c72aa156d6cdbf22717ebaf76 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 18:57:29 -0700 Subject: [PATCH 31/82] Move to sibling-list_based approach --- ffi/examples/read-table/CMakeLists.txt | 7 + ffi/examples/read-table/expression.h | 280 +++++++++++------------ ffi/examples/read-table/test_expr.c | 2 +- ffi/src/expressions.rs | 294 ++++++++++++++----------- 4 files changed, 299 insertions(+), 284 deletions(-) diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index bf4da567a..88b3a17bf 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -49,6 +49,13 @@ target_link_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../t target_link_libraries(test_expr PUBLIC delta_kernel_ffi) target_compile_options(test_expr PUBLIC) +if(MSVC) + target_compile_options(test_expr PRIVATE /W4 /WX) +else() + # no-strict-prototypes because arrow headers have fn defs without prototypes + target_compile_options(test_expr PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g) +endif() + # Add the read_table test include(CTest) set(TestRunner "../../../tests/test_expression_visitor/run_test.sh") diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index c99014b37..361f4b437 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -12,33 +12,33 @@ * passed into each visit_x call. This simply keeps track of all the expressions we are asked to * allocate. * - * Each expression is an "ExpressionRef", which tracks the type and pointer to the expression. + * Each expression is an "ExpressionItem", which tracks the type and pointer to the expression. */ #define DEFINE_BINOP(fun_name, op) \ - uintptr_t fun_name(void* data, uintptr_t a, uintptr_t b) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ - return visit_expr_binop(data, a, b, op); \ + visit_expr_binop(data, op, child_list_id, sibling_list_id); \ } #define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ - uintptr_t fun_name(void* data, c_type val) \ + void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ lit->value.literal_field = val; \ - return put_handle(data, lit, Literal); \ + put_handle(data, lit, Literal, sibling_list_id); \ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") #define DEFINE_VARIADIC(fun_name, enum_member) \ - uintptr_t fun_name(void* data, uintptr_t len) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ - return visit_expr_variadic(data, len, enum_member); \ + visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ } #define DEFINE_UNARY(fun_name, op) \ - uintptr_t fun_name(void* data, uintptr_t sub_expr) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ - return visit_expr_unary(data, sub_expr, op); \ + visit_expr_unary(data, op, child_list_id, sibling_list_id); \ } enum OpType { @@ -87,12 +87,17 @@ typedef struct { void* ref; enum ExpressionType type; -} ExpressionRef; +} ExpressionItem; + +typedef struct +{ + uint32_t len; + ExpressionItem* exprList; +} ExpressionItemList; struct BinOp { enum OpType op; - struct Literal* left; - struct Literal* right; + ExpressionItemList exprs; }; struct Null; @@ -111,14 +116,12 @@ enum UnaryType struct Variadic { enum VariadicType op; - size_t len; - size_t max_len; - ExpressionRef* expr_list; + ExpressionItemList expr_list; }; struct Unary { enum UnaryType type; - ExpressionRef sub_expr; + ExpressionItemList sub_expr; }; struct BinaryData { @@ -133,22 +136,18 @@ struct Decimal }; typedef struct { - size_t len; - ExpressionRef handles[100]; + size_t list_count; + ExpressionItemList* lists; } ExpressionBuilder; struct Struct { - KernelStringSlice* field_names; - ExpressionRef* expressions; - size_t len; - size_t max_len; + ExpressionItemList fields; + ExpressionItemList values; }; struct ArrayData { - size_t len; - size_t max_len; - ExpressionRef* expr_list; + ExpressionItemList expr_list; }; struct Literal @@ -171,20 +170,20 @@ struct Literal } value; }; -size_t put_handle(void* data, void* ref, enum ExpressionType type) +void put_handle(void* data, void* ref, enum ExpressionType type, size_t sibling_list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; - ExpressionRef expr = { .ref = ref, .type = type }; - data_ptr->handles[data_ptr->len] = expr; - return data_ptr->len++; + ExpressionItem expr = { .ref = ref, .type = type }; + ExpressionItemList* list = &data_ptr->lists[sibling_list_id]; + list->exprList[list->len++] = expr; } -ExpressionRef* get_handle(void* data, size_t handle_index) +ExpressionItemList get_handle(void* data, size_t list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; - if (handle_index > data_ptr->len) { - return NULL; + if (list_id > data_ptr->list_count) { + abort(); } - return &data_ptr->handles[handle_index]; + return data_ptr->lists[list_id]; } KernelStringSlice copy_kernel_string(KernelStringSlice string) { @@ -195,19 +194,16 @@ KernelStringSlice copy_kernel_string(KernelStringSlice string) return out; } -uintptr_t visit_expr_binop(void* data, uintptr_t a, uintptr_t b, enum OpType op) +void visit_expr_binop( + void* data, + enum OpType op, + uintptr_t child_id_list, + uintptr_t sibling_id_list) { struct BinOp* binop = malloc(sizeof(struct BinOp)); - ExpressionRef* left_handle = get_handle(data, a); - ExpressionRef* right_handle = get_handle(data, b); - assert(right_handle != NULL && left_handle != NULL); - - struct Literal* left = left_handle->ref; - struct Literal* right = right_handle->ref; binop->op = op; - binop->left = left; - binop->right = right; - return put_handle(data, binop, BinOp); + binop->exprs = get_handle(data, child_id_list); + put_handle(data, binop, BinOp, sibling_id_list); } DEFINE_BINOP(visit_expr_add, Add) DEFINE_BINOP(visit_expr_minus, Sub) @@ -223,20 +219,21 @@ DEFINE_BINOP(visit_expr_distinct, Distinct) DEFINE_BINOP(visit_expr_in, In) DEFINE_BINOP(visit_expr_not_in, NotIn) -uintptr_t visit_expr_string(void* data, KernelStringSlice string) +void visit_expr_string(void* data, KernelStringSlice string, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; literal->value.string_data = copy_kernel_string(string); - return put_handle(data, literal, Literal); + put_handle(data, literal, Literal, sibling_list_id); } -uintptr_t visit_expr_decimal( +void visit_expr_decimal( void* data, uint64_t value_ms, uint64_t value_ls, uint8_t precision, - uint8_t scale) + uint8_t scale, + uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Decimal; @@ -245,7 +242,7 @@ uintptr_t visit_expr_decimal( dec->value[1] = value_ls; dec->precision = precision; dec->scale = scale; - return put_handle(data, literal, Literal); + put_handle(data, literal, Literal, sibling_list_id); } DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t, integer_data); DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t, long_data); @@ -258,127 +255,102 @@ DEFINE_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t, integer_data); -uintptr_t visit_expr_variadic(void* data, uintptr_t len, enum VariadicType op) +void visit_expr_variadic( + void* data, + enum VariadicType op, + uintptr_t child_list_id, + uintptr_t sibling_list_id) { struct Variadic* var = malloc(sizeof(struct Variadic)); - ExpressionRef* expr_lst = malloc(sizeof(ExpressionRef) * len); var->op = op; - var->len = 0; - var->max_len = len; - var->expr_list = expr_lst; - return put_handle(data, var, Variadic); -} -void visit_expr_variadic_sub_expr(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) -{ - ExpressionRef* sub_expr_ref = get_handle(data, sub_expr_id); - ExpressionRef* variadic_ref = get_handle(data, variadic_id); - assert(sub_expr_ref != NULL && variadic_ref != NULL); - assert(variadic_ref->type == Variadic); - - struct Variadic* variadic = variadic_ref->ref; - variadic->expr_list[variadic->len++] = *sub_expr_ref; + var->expr_list = get_handle(data, child_list_id); + put_handle(data, var, Variadic, sibling_list_id); } DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct, StructConstructor) -void visit_expr_array_sub_expr(void* data, uintptr_t variadic_id, uintptr_t sub_expr_id) -{ - ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); - ExpressionRef* array_handle = get_handle(data, variadic_id); - assert(sub_expr_handle != NULL && array_handle != NULL); - assert(array_handle->type == Literal); - struct Literal* literal = array_handle->ref; - assert(literal->type == Array); - struct ArrayData* array = &literal->value.array_data; - array->expr_list[array->len++] = *sub_expr_handle; -} -uintptr_t visit_expr_array(void* data, uintptr_t len) +void visit_expr_array(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; struct ArrayData* arr = &(literal->value.array_data); - arr->len = 0; - arr->max_len = 0; - arr->expr_list = malloc(sizeof(ExpressionRef) * len); - return put_handle(data, literal, Literal); + arr->expr_list = get_handle(data, child_list_id); + put_handle(data, literal, Literal, sibling_list_id); } -uintptr_t visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len) +void visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Binary; struct BinaryData* bin = &literal->value.binary; bin->buf = malloc(len); memcpy(bin->buf, buf, len); - return put_handle(data, literal, Literal); + put_handle(data, literal, Literal, sibling_list_id); } -uintptr_t visit_expr_struct_literal(void* data, uintptr_t len) +void visit_expr_struct_literal( + void* data, + uintptr_t child_field_list_id, + uintptr_t child_value_list_id, + uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Struct; struct Struct* struct_data = &literal->value.struct_data; - struct_data->len = 0; - struct_data->max_len = len; - struct_data->expressions = malloc(sizeof(ExpressionRef) * len); - struct_data->field_names = malloc(sizeof(KernelStringSlice) * len); - return put_handle(data, literal, Literal); -} - -void visit_expr_struct_literal_field( - void* data, - uintptr_t struct_id, - KernelStringSlice field_name, - uintptr_t value_id) -{ - ExpressionRef* value = get_handle(data, value_id); - ExpressionRef* literal_handle = get_handle(data, struct_id); - assert(literal_handle != NULL && value != NULL); - assert(literal_handle->type == Literal); - struct Literal* literal = literal_handle->ref; - assert(literal->type == Struct); - - struct Struct* struct_ref = &literal->value.struct_data; - size_t len = struct_ref->len; - assert(len < struct_ref->max_len); - - struct_ref->expressions[len] = *value; - struct_ref->field_names[len] = copy_kernel_string(field_name); - struct_ref->len++; + struct_data->fields = get_handle(data, child_field_list_id); + struct_data->values = get_handle(data, child_value_list_id); + put_handle(data, literal, Literal, sibling_list_id); } -uintptr_t visit_expr_null(void* data) +void visit_expr_null(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; - return put_handle(data, literal, Literal); + put_handle(data, literal, Literal, sibling_id_list); } -uintptr_t visit_expr_unary(void* data, uintptr_t sub_expr_id, enum UnaryType type) +void visit_expr_unary( + void* data, + enum UnaryType type, + uintptr_t child_list_id, + uintptr_t sibling_list_id) { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; - ExpressionRef* sub_expr_handle = get_handle(data, sub_expr_id); - unary->sub_expr = *sub_expr_handle; - return put_handle(data, unary, Unary); + unary->sub_expr = get_handle(data, child_list_id); + put_handle(data, unary, Unary, sibling_list_id); } DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) -uintptr_t visit_expr_column(void* data, KernelStringSlice string) +void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); *heap_string = copy_kernel_string(string); - return put_handle(data, heap_string, Column); + put_handle(data, heap_string, Column, sibling_id_list); +} + +uintptr_t make_field_list(void* data, uintptr_t reserve) +{ + ExpressionBuilder* builder = data; + int id = builder->list_count; + builder->list_count++; + builder->lists = realloc(builder->lists, sizeof(ExpressionItemList) * builder->list_count); + ExpressionItem* list = calloc(reserve, sizeof(ExpressionItem)); + builder->lists[id].len = 0; + builder->lists[id].exprList = list; + return id; } // Print the schema of the snapshot -ExpressionRef construct_predicate(SharedExpression* predicate) +ExpressionItem construct_predicate(SharedExpression* predicate) { ExpressionBuilder data = { 0 }; + data.lists = malloc(sizeof(ExpressionItem) * 100); EngineExpressionVisitor visitor = { .data = &data, + .make_field_list = make_field_list, .visit_int = visit_expr_int, .visit_long = visit_expr_long, .visit_short = visit_expr_short, @@ -394,7 +366,6 @@ ExpressionRef construct_predicate(SharedExpression* predicate) .visit_string = visit_expr_string, .visit_and = visit_expr_and, .visit_or = visit_expr_or, - .visit_variadic_sub_expr = visit_expr_variadic_sub_expr, .visit_not = visit_expr_not, .visit_is_null = visit_expr_is_null, .visit_lt = visit_expr_lt, @@ -412,35 +383,29 @@ ExpressionRef construct_predicate(SharedExpression* predicate) .visit_divide = visit_expr_divide, .visit_column = visit_expr_column, .visit_struct = visit_expr_struct, - .visit_struct_sub_expr = visit_expr_variadic_sub_expr, // We treat expr struct as a variadic .visit_null = visit_expr_null, .visit_struct_literal = visit_expr_struct_literal, - .visit_struct_literal_field = visit_expr_struct_literal_field, .visit_array = visit_expr_array, - .visit_array_element = visit_expr_array_sub_expr }; - uintptr_t schema_list_id = visit_expression(&predicate, &visitor); - return data.handles[schema_list_id]; + uintptr_t top_level_id = visit_expression(&predicate, &visitor); + ExpressionItem ret = data.lists[top_level_id].exprList[0]; + return ret; } -void free_expression(ExpressionRef ref) +void free_expression_item_list(ExpressionItemList list); + +void free_expression_item(ExpressionItem ref) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - ExpressionRef left = { .ref = op->left, .type = Literal }; - ExpressionRef right = { .ref = op->right, .type = Literal }; - free_expression(left); - free_expression(right); + free_expression_item_list(op->exprs); free(op); break; } case Variadic: { struct Variadic* var = ref.ref; - for (size_t i = 0; i < var->len; i++) { - free_expression(var->expr_list[i]); - } - free(var->expr_list); + free_expression_item_list(var->expr_list); free(var); break; }; @@ -449,20 +414,15 @@ void free_expression(ExpressionRef ref) switch (lit->type) { case Struct: { struct Struct* struct_data = &lit->value.struct_data; - for (size_t i = 0; i < struct_data->len; i++) { - free_expression(struct_data->expressions[i]); - free((void*)struct_data->field_names[i].ptr); - } - free(struct_data->expressions); - free(struct_data->field_names); + free_expression_item_list(struct_data->values); + free_expression_item_list(struct_data->fields); + free(struct_data); break; } case Array: { struct ArrayData* array = &lit->value.array_data; - for (size_t i = 0; i < array->len; i++) { - free_expression(array->expr_list[i]); - } - free(array->expr_list); + free_expression_item_list(array->expr_list); + free(array); break; } case String: { @@ -494,7 +454,7 @@ void free_expression(ExpressionRef ref) }; case Unary: { struct Unary* unary = ref.ref; - free_expression(unary->sub_expr); + free_expression_item_list(unary->sub_expr); free(unary); break; } @@ -507,6 +467,13 @@ void free_expression(ExpressionRef ref) } } +void free_expression_item_list(ExpressionItemList list) +{ + for (size_t i = 0; i < list.len; i++) { + free_expression_item(list.exprList[i]); + } + free(list.exprList); +} void print_n_spaces(int n) { if (n == 0) @@ -514,7 +481,7 @@ void print_n_spaces(int n) printf(" "); print_n_spaces(n - 1); } -void print_tree(ExpressionRef ref, int depth) +void print_tree(ExpressionItem ref, int depth) { switch (ref.type) { case BinOp: { @@ -574,8 +541,8 @@ void print_tree(ExpressionRef ref, int depth) break; } - ExpressionRef left = { .ref = op->left, .type = Literal }; - ExpressionRef right = { .ref = op->right, .type = Literal }; + ExpressionItem left = op->exprs.exprList[0]; + ExpressionItem right = op->exprs.exprList[1]; print_tree(left, depth + 1); print_tree(right, depth + 1); break; @@ -597,8 +564,8 @@ void print_tree(ExpressionRef ref, int depth) printf("ArrayData\n"); break; } - for (size_t i = 0; i < var->len; i++) { - print_tree(var->expr_list[i], depth + 1); + for (size_t i = 0; i < var->expr_list.len; i++) { + print_tree(var->expr_list.exprList[i], depth + 1); } } break; case Literal: { @@ -668,17 +635,18 @@ void print_tree(ExpressionRef ref, int depth) case Struct: printf("Struct\n"); struct Struct* struct_data = &lit->value.struct_data; - for (size_t i = 0; i < struct_data->len; i++) { + for (size_t i = 0; i < struct_data->values.len; i++) { print_n_spaces(depth + 1); - printf("Field: %s\n", struct_data->field_names[i].ptr); - print_tree(struct_data->expressions[i], depth + 2); + printf("Field\n"); + print_tree(struct_data->fields.exprList[i], depth + 2); + print_tree(struct_data->values.exprList[i], depth + 2); } break; case Array: printf("Array\n"); struct ArrayData* array = &lit->value.array_data; - for (size_t i = 0; i < array->len; i++) { - print_tree(array->expr_list[i], depth + 1); + for (size_t i = 0; i < array->expr_list.len; i++) { + print_tree(array->expr_list.exprList[i], depth + 1); } break; } @@ -694,7 +662,7 @@ void print_tree(ExpressionRef ref, int depth) printf("IsNull\n"); break; } - print_tree(unary->sub_expr, depth + 1); + print_tree(unary->sub_expr.exprList[0], depth + 1); break; } case Column: diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c index 53d2c40bc..8146a0611 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/read-table/test_expr.c @@ -2,6 +2,6 @@ int main() { SharedExpression* pred = get_kernel_expression(); - ExpressionRef ref = construct_predicate(pred); + ExpressionItem ref = construct_predicate(pred); print_tree(ref, 0); } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 247cfa15e..8226a8a40 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -409,30 +409,34 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { pub struct EngineExpressionVisitor { /// An opaque state pointer pub data: *mut c_void, - /// Visit a 32bit `integer` - pub visit_int: extern "C" fn(data: *mut c_void, value: i32) -> usize, + /// Creates a new expression list, optionally reserving capacity up front + pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + /// Visit a 32bit `integer + pub visit_int: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), /// Visit a 64bit `long`. - pub visit_long: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_long: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 16bit `short`. - pub visit_short: extern "C" fn(data: *mut c_void, value: i16) -> usize, + pub visit_short: extern "C" fn(data: *mut c_void, value: i16, sibling_list_id: usize), /// Visit an 8bit `byte`. - pub visit_byte: extern "C" fn(data: *mut c_void, value: i8) -> usize, + pub visit_byte: extern "C" fn(data: *mut c_void, value: i8, sibling_list_id: usize), /// Visit a 32bit `float`. - pub visit_float: extern "C" fn(data: *mut c_void, value: f32) -> usize, + pub visit_float: extern "C" fn(data: *mut c_void, value: f32, sibling_list_id: usize), /// Visit a 64bit `double`. - pub visit_double: extern "C" fn(data: *mut c_void, value: f64) -> usize, + pub visit_double: extern "C" fn(data: *mut c_void, value: f64, sibling_list_id: usize), /// Visit a `string`. - pub visit_string: extern "C" fn(data: *mut c_void, value: KernelStringSlice) -> usize, + pub visit_string: + extern "C" fn(data: *mut c_void, value: KernelStringSlice, sibling_list_id: usize), /// Visit a `boolean`. - pub visit_bool: extern "C" fn(data: *mut c_void, value: bool) -> usize, + pub visit_bool: extern "C" fn(data: *mut c_void, value: bool, sibling_list_id: usize), /// Visit a 64bit timestamp. The timestamp is microsecond precision and adjusted to UTC. - pub visit_timestamp: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_timestamp: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 64bit timestamp. The timestamp is microsecond precision with no timezone. - pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64) -> usize, + pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 32bit int date representing days since UNIX epoch 1970-01-01. - pub visit_date: extern "C" fn(data: *mut c_void, value: i32) -> usize, + pub visit_date: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), /// Visit binary data at the `buffer` with length `len`. - pub visit_binary: extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize) -> usize, + pub visit_binary: + extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize, sibling_list_id: usize), /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 /// bits in `value_ls`. @@ -442,93 +446,86 @@ pub struct EngineExpressionVisitor { value_ls: u64, // Least significant 64 bits of decimal value precision: u8, scale: u8, - ) -> usize, + sibling_list_id: usize, + ), /// Visits a null value. - pub visit_null: extern "C" fn(data: *mut c_void) -> usize, + pub visit_null: extern "C" fn(data: *mut c_void, sibling_list_id: usize), /// Visits an `and` expression which is made of a list of sub-expressions. This declares the /// number of sub-expressions that the `and` expression will be made of. The visitor will populate /// the list of expressions using the [`visit_variadic_sub_expr`] method. - pub visit_and: extern "C" fn(data: *mut c_void, len: usize) -> usize, + pub visit_and: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visits an `or` expression which is made of a list of sub-expressions. This declares the /// number of sub-expressions that the `or` expression will be made of. The visitor will populate /// the list of expressions using the [`visit_variadic_sub_expr`] method. - pub visit_or: extern "C" fn(data: *mut c_void, len: usize) -> usize, - /// Visits a variadic sub-expression. This appends a sub-expression to a variadic expression - /// constructed in either [`visit_and`] or [`visit_or`]. The variadic expression is identified - /// by `variadic_id`, and the sub-expression is identified by `sub_expr_id`. - pub visit_variadic_sub_expr: - extern "C" fn(data: *mut c_void, variadic_id: usize, sub_expr_id: usize), + pub visit_or: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), ///Visits a `not` expression, bulit using the sub-expression `inner_expr`. - pub visit_not: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + pub visit_not: extern "C" fn(data: *mut c_void, chilrd_list_id: usize, sibling_list_id: usize), ///Visits an `is_null` expression, built using the sub-expression `inner_expr`. - pub visit_is_null: extern "C" fn(data: *mut c_void, inner_expr: usize) -> usize, + pub visit_is_null: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `less than` binary operation, which takes the left sub expression id `a` and the /// right sub-expression id `b`. - pub visit_lt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_lt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `less than or equal` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_le: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_le: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `greater than` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_gt: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_gt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `greater than or equal` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_ge: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_ge: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `equal` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_eq: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_eq: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `not equal` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_ne: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_ne: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `distinct` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_distinct: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_distinct: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `in` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_in: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `not in` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_not_in: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_not_in: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `add` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_add: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_add: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `minus` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_minus: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_minus: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `multiply` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_multiply: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_multiply: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `divide` binary operation, which takes the left sub expression id `a` /// and the right sub-expression id `b`. - pub visit_divide: extern "C" fn(data: *mut c_void, a: usize, b: usize) -> usize, + pub visit_divide: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit the `colmun` identified by the `name` string. - pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice) -> usize, + pub visit_column: + extern "C" fn(data: *mut c_void, name: KernelStringSlice, sibling_list_id: usize), /// Visit a `struct` which is constructed from an ordered list of expressions. This declares /// the number of expressions that the struct will be made of. The visitor will populate the /// list of expressions using the [`visit_struct_sub_expr`] method. - pub visit_struct: extern "C" fn(data: *mut c_void, len: usize) -> usize, - /// Visits a `struct` sub expression. This appends a sub-expression to a struct constructed by - /// [`visit_struct`]. The struct is identified by `struct_id`, and the sub-expression is identified - /// by `expr_id`. - pub visit_struct_sub_expr: extern "C" fn(data: *mut c_void, struct_id: usize, expr_id: usize), + pub visit_struct: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit a struct literal which is made up of a list of field names and values. This declares /// the number of fields that the struct will have. The visitor will populate the struct fields /// using the [`visit_struct_literal_field`] method. - pub visit_struct_literal: extern "C" fn(data: *mut c_void, num_fields: usize) -> usize, - /// Visit a struct literal field. This adds a field to the struct declared by [`visit_struct_literal`]. - /// The struct literal is identified by `struct_id`. The sub-expression is identified by `expr_id`. - pub visit_struct_literal_field: extern "C" fn( + pub visit_struct_literal: extern "C" fn( data: *mut c_void, - struct_id: usize, - field_name: KernelStringSlice, - field_value: usize, + child_field_list_value: usize, + child_value_list_id: usize, + sibling_list_id: usize, ), /// Visit an `arary`, declaring the length `len`. The visitor will populate the array /// elements using the [`visit_array_element`] method. - pub visit_array: extern "C" fn(data: *mut c_void, len: usize) -> usize, - /// Visit an array element. This adds the element to the array declared in [`visit_array`]. The - /// array is identified by `array_id`, and the element identified by `element_id` - pub visit_array_element: extern "C" fn(data: *mut c_void, array_id: usize, element_id: usize), + pub visit_array: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), } /// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. @@ -550,116 +547,159 @@ pub unsafe extern "C" fn visit_expression( ($visitor.$visitor_fn)($visitor.data $(, $extra_args) *) }; } - fn visit_array(visitor: &mut EngineExpressionVisitor, array: &ArrayData) -> usize { + fn visit_array( + visitor: &mut EngineExpressionVisitor, + array: &ArrayData, + sibling_list_id: usize, + ) { #[allow(deprecated)] let elements = array.array_elements(); - let array_id = call!(visitor, visit_array, elements.len()); + let child_list_id = call!(visitor, make_field_list, elements.len()); for scalar in elements { - let scalar_id = visit_scalar(visitor, scalar); - call!(visitor, visit_array_element, array_id, scalar_id); + visit_scalar(visitor, scalar, child_list_id); } - array_id + call!(visitor, visit_array, child_list_id, sibling_list_id); } fn visit_struct_literal( visitor: &mut EngineExpressionVisitor, struct_data: &StructData, - ) -> usize { - let struct_id = call!(visitor, visit_struct_literal, struct_data.fields().len()); + sibling_list_id: usize, + ) { + let child_value_list_id = call!(visitor, make_field_list, struct_data.fields().len()); + let child_field_list_id = call!(visitor, make_field_list, struct_data.fields().len()); for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { - let value_id = visit_scalar(visitor, value); - call!( + visit_scalar( visitor, - visit_struct_literal_field, - struct_id, - field.name().into(), - value_id + &Scalar::String(field.name.clone()), + child_field_list_id, ); + visit_scalar(visitor, value, child_value_list_id); } - struct_id + call!( + visitor, + visit_struct_literal, + child_field_list_id, + child_value_list_id, + sibling_list_id + ) } - fn visit_struct(visitor: &mut EngineExpressionVisitor, exprs: &Vec) -> usize { - let expr_struct_id = call!(visitor, visit_struct, exprs.len()); + fn visit_struct( + visitor: &mut EngineExpressionVisitor, + exprs: &Vec, + sibling_list_id: usize, + ) { + let child_list_id = call!(visitor, make_field_list, exprs.len()); for expr in exprs { - let expr_id = visit_expression(visitor, expr); - call!(visitor, visit_struct_sub_expr, expr_struct_id, expr_id) + visit_expression_impl(visitor, expr, child_list_id); } - expr_struct_id + call!(visitor, visit_struct, child_list_id, sibling_list_id) } fn visit_variadic( visitor: &mut EngineExpressionVisitor, op: &VariadicOperator, exprs: &Vec, - ) -> usize { + sibling_list_id: usize, + ) { + let child_list_id = call!(visitor, make_field_list, exprs.len()); + for expr in exprs { + visit_expression_impl(visitor, expr, child_list_id); + } + let visit_fn = match op { VariadicOperator::And => &visitor.visit_and, VariadicOperator::Or => &visitor.visit_or, }; - let variadic_id = visit_fn(visitor.data, exprs.len()); - for expr in exprs { - let expr_id = visit_expression(visitor, expr); - call!(visitor, visit_variadic_sub_expr, variadic_id, expr_id) - } - variadic_id + visit_fn(visitor.data, child_list_id, sibling_list_id); } - fn visit_scalar(visitor: &mut EngineExpressionVisitor, scalar: &Scalar) -> usize { + fn visit_scalar( + visitor: &mut EngineExpressionVisitor, + scalar: &Scalar, + sibling_list_id: usize, + ) { match scalar { - Scalar::Integer(val) => call!(visitor, visit_int, *val), - Scalar::Long(val) => call!(visitor, visit_long, *val), - Scalar::Short(val) => call!(visitor, visit_short, *val), - Scalar::Byte(val) => call!(visitor, visit_byte, *val), - Scalar::Float(val) => call!(visitor, visit_float, *val), - Scalar::Double(val) => call!(visitor, visit_double, *val), - Scalar::String(val) => call!(visitor, visit_string, val.into()), - Scalar::Boolean(val) => call!(visitor, visit_bool, *val), - Scalar::Timestamp(val) => call!(visitor, visit_timestamp, *val), - Scalar::TimestampNtz(val) => call!(visitor, visit_timestamp_ntz, *val), - Scalar::Date(val) => call!(visitor, visit_date, *val), - Scalar::Binary(buf) => call!(visitor, visit_binary, buf.as_ptr(), buf.len()), + Scalar::Integer(val) => call!(visitor, visit_int, *val, sibling_list_id), + Scalar::Long(val) => call!(visitor, visit_long, *val, sibling_list_id), + Scalar::Short(val) => call!(visitor, visit_short, *val, sibling_list_id), + Scalar::Byte(val) => call!(visitor, visit_byte, *val, sibling_list_id), + Scalar::Float(val) => call!(visitor, visit_float, *val, sibling_list_id), + Scalar::Double(val) => call!(visitor, visit_double, *val, sibling_list_id), + Scalar::String(val) => call!(visitor, visit_string, val.into(), sibling_list_id), + Scalar::Boolean(val) => call!(visitor, visit_bool, *val, sibling_list_id), + Scalar::Timestamp(val) => call!(visitor, visit_timestamp, *val, sibling_list_id), + Scalar::TimestampNtz(val) => call!(visitor, visit_timestamp_ntz, *val, sibling_list_id), + Scalar::Date(val) => call!(visitor, visit_date, *val, sibling_list_id), + Scalar::Binary(buf) => call!( + visitor, + visit_binary, + buf.as_ptr(), + buf.len(), + sibling_list_id + ), Scalar::Decimal(value, precision, scale) => { let ms: u64 = (value >> 64) as u64; let ls: u64 = *value as u64; - call!(visitor, visit_decimal, ms, ls, *precision, *scale) + call!( + visitor, + visit_decimal, + ms, + ls, + *precision, + *scale, + sibling_list_id + ) } - Scalar::Null(_) => call!(visitor, visit_null), - Scalar::Struct(struct_data) => visit_struct_literal(visitor, struct_data), - Scalar::Array(array) => visit_array(visitor, array), + Scalar::Null(_) => call!(visitor, visit_null, sibling_list_id), + Scalar::Struct(struct_data) => { + visit_struct_literal(visitor, struct_data, sibling_list_id) + } + Scalar::Array(array) => visit_array(visitor, array, sibling_list_id), } } - fn visit_expression(visitor: &mut EngineExpressionVisitor, expression: &Expression) -> usize { + fn visit_expression_impl( + visitor: &mut EngineExpressionVisitor, + expression: &Expression, + sibling_list_id: usize, + ) { match expression { - Expression::Literal(scalar) => visit_scalar(visitor, scalar), - Expression::Column(name) => call!(visitor, visit_column, name.into()), - Expression::Struct(exprs) => visit_struct(visitor, exprs), + Expression::Literal(scalar) => visit_scalar(visitor, scalar, sibling_list_id), + Expression::Column(name) => call!(visitor, visit_column, name.into(), sibling_list_id), + Expression::Struct(exprs) => visit_struct(visitor, exprs, sibling_list_id), Expression::BinaryOperation { op, left, right } => { - let left_id = visit_expression(visitor, left); - let right_id = visit_expression(visitor, right); - match op { - BinaryOperator::Plus => call!(visitor, visit_add, left_id, right_id), - BinaryOperator::Minus => call!(visitor, visit_minus, left_id, right_id), - BinaryOperator::Multiply => call!(visitor, visit_multiply, left_id, right_id), - BinaryOperator::Divide => call!(visitor, visit_divide, left_id, right_id), - BinaryOperator::LessThan => call!(visitor, visit_lt, left_id, right_id), - BinaryOperator::LessThanOrEqual => call!(visitor, visit_le, left_id, right_id), - BinaryOperator::GreaterThan => call!(visitor, visit_gt, left_id, right_id), - BinaryOperator::GreaterThanOrEqual => { - call!(visitor, visit_ge, left_id, right_id) - } - BinaryOperator::Equal => call!(visitor, visit_eq, left_id, right_id), - BinaryOperator::NotEqual => call!(visitor, visit_ne, left_id, right_id), - BinaryOperator::Distinct => call!(visitor, visit_distinct, left_id, right_id), - BinaryOperator::In => call!(visitor, visit_in, left_id, right_id), - BinaryOperator::NotIn => call!(visitor, visit_not_in, left_id, right_id), - } + let child_list_id = call!(visitor, make_field_list, 2); + visit_expression_impl(visitor, left, child_list_id); + visit_expression_impl(visitor, right, child_list_id); + let op = match op { + BinaryOperator::Plus => visitor.visit_add, + BinaryOperator::Minus => visitor.visit_minus, + BinaryOperator::Multiply => visitor.visit_multiply, + BinaryOperator::Divide => visitor.visit_divide, + BinaryOperator::LessThan => visitor.visit_lt, + BinaryOperator::LessThanOrEqual => visitor.visit_le, + BinaryOperator::GreaterThan => visitor.visit_gt, + BinaryOperator::GreaterThanOrEqual => visitor.visit_ge, + BinaryOperator::Equal => visitor.visit_eq, + BinaryOperator::NotEqual => visitor.visit_ne, + BinaryOperator::Distinct => visitor.visit_distinct, + BinaryOperator::In => visitor.visit_in, + BinaryOperator::NotIn => visitor.visit_not_in, + }; + op(visitor.data, child_list_id, sibling_list_id); } Expression::UnaryOperation { op, expr } => { - let expr_id = visit_expression(visitor, expr); - match op { - UnaryOperator::Not => call!(visitor, visit_not, expr_id), - UnaryOperator::IsNull => call!(visitor, visit_is_null, expr_id), - } + let child_id_list = call!(visitor, make_field_list, 1); + visit_expression_impl(visitor, expr, child_id_list); + let op = match op { + UnaryOperator::Not => visitor.visit_not, + UnaryOperator::IsNull => visitor.visit_is_null, + }; + op(visitor.data, child_id_list, sibling_list_id); + } + Expression::VariadicOperation { op, exprs } => { + visit_variadic(visitor, op, exprs, sibling_list_id) } - Expression::VariadicOperation { op, exprs } => visit_variadic(visitor, op, exprs), } } - visit_expression(visitor, expression.as_ref()) + let top_level = call!(visitor, make_field_list, 1); + visit_expression_impl(visitor, expression.as_ref(), top_level); + top_level } From c8f80a399a8e1640b1eb83bb2146d815a9a34297 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 10 Oct 2024 19:01:45 -0700 Subject: [PATCH 32/82] Fix free --- ffi/examples/read-table/expression.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 361f4b437..c85cd1800 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -416,13 +416,11 @@ void free_expression_item(ExpressionItem ref) struct Struct* struct_data = &lit->value.struct_data; free_expression_item_list(struct_data->values); free_expression_item_list(struct_data->fields); - free(struct_data); break; } case Array: { struct ArrayData* array = &lit->value.array_data; free_expression_item_list(array->expr_list); - free(array); break; } case String: { From 006adfbcdee9f731f4b1fe77e729b9a06e86e31b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 09:35:53 -0700 Subject: [PATCH 33/82] fix visit_struct naming --- ffi/examples/read-table/expression.h | 4 ++-- ffi/src/expressions.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index c85cd1800..5bfe5205f 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -268,7 +268,7 @@ void visit_expr_variadic( } DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) -DEFINE_VARIADIC(visit_expr_struct, StructConstructor) +DEFINE_VARIADIC(visit_expr_struct_expr, StructConstructor) void visit_expr_array(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) { @@ -382,7 +382,7 @@ ExpressionItem construct_predicate(SharedExpression* predicate) .visit_multiply = visit_expr_multiply, .visit_divide = visit_expr_divide, .visit_column = visit_expr_column, - .visit_struct = visit_expr_struct, + .visit_struct_expr = visit_expr_struct_expr, .visit_null = visit_expr_null, .visit_struct_literal = visit_expr_struct_literal, .visit_array = visit_expr_array, diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 8226a8a40..325a39d31 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -512,7 +512,7 @@ pub struct EngineExpressionVisitor { /// Visit a `struct` which is constructed from an ordered list of expressions. This declares /// the number of expressions that the struct will be made of. The visitor will populate the /// list of expressions using the [`visit_struct_sub_expr`] method. - pub visit_struct: + pub visit_struct_expr: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visit a struct literal which is made up of a list of field names and values. This declares /// the number of fields that the struct will have. The visitor will populate the struct fields @@ -583,7 +583,7 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id ) } - fn visit_struct( + fn visit_struct_expr( visitor: &mut EngineExpressionVisitor, exprs: &Vec, sibling_list_id: usize, @@ -592,7 +592,7 @@ pub unsafe extern "C" fn visit_expression( for expr in exprs { visit_expression_impl(visitor, expr, child_list_id); } - call!(visitor, visit_struct, child_list_id, sibling_list_id) + call!(visitor, visit_struct_expr, child_list_id, sibling_list_id) } fn visit_variadic( visitor: &mut EngineExpressionVisitor, @@ -663,7 +663,7 @@ pub unsafe extern "C" fn visit_expression( match expression { Expression::Literal(scalar) => visit_scalar(visitor, scalar, sibling_list_id), Expression::Column(name) => call!(visitor, visit_column, name.into(), sibling_list_id), - Expression::Struct(exprs) => visit_struct(visitor, exprs, sibling_list_id), + Expression::Struct(exprs) => visit_struct_expr(visitor, exprs, sibling_list_id), Expression::BinaryOperation { op, left, right } => { let child_list_id = call!(visitor, make_field_list, 2); visit_expression_impl(visitor, left, child_list_id); From 30e0596dc3d8cade680cf44d5b327fc59f72d432 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 09:43:01 -0700 Subject: [PATCH 34/82] Fix formtting and style in test expr --- ffi/src/expressions.rs | 96 +++++++++++++----------------------------- 1 file changed, 30 insertions(+), 66 deletions(-) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 325a39d31..3366856b7 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -276,23 +276,26 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { false, ); let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]); + let nested_fields = vec![ StructField::new("a", DataType::Primitive(PrimitiveType::Integer), false), StructField::new("b", DataType::Array(Box::new(array_type)), false), ]; let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())]; - let nested = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); - let nested_type = StructType::new(nested_fields); - let top = StructData::try_new( + let nested_struct = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); + let nested_struct_type = StructType::new(nested_fields); + + let top_level_struct = StructData::try_new( vec![StructField::new( "top", - DataType::Struct(Box::new(nested_type)), + DataType::Struct(Box::new(nested_struct_type)), true, )], - vec![Scalar::Struct(nested)], + vec![Scalar::Struct(nested_struct)], ) .unwrap(); - Arc::new(Expr::and_from(vec![ + + let mut sub_exprs = vec![ Expr::literal(Scalar::Byte(i8::MAX)), Expr::literal(Scalar::Byte(i8::MIN)), Expr::literal(Scalar::Float(f32::MAX)), @@ -313,80 +316,41 @@ pub unsafe extern "C" fn get_kernel_expression() -> Handle { // Both the most and least significant u64 of the Decimal value will be 1 Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), - Expr::literal(Scalar::Struct(top)), + Expr::literal(Scalar::Struct(top_level_struct)), Expr::literal(Scalar::Array(array_data)), - Expr::binary( + Expr::struct_expr(vec![Expr::or_from(vec![ + Expr::literal(Scalar::Integer(5)), + Expr::literal(Scalar::Long(20)), + ])]), + Expr::not(Expr::is_null(Expr::column("col"))), + ]; + sub_exprs.extend( + [ BinaryOperator::In, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Plus, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Minus, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Equal, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::NotEqual, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::NotIn, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Divide, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Multiply, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::LessThan, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::LessThanOrEqual, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::GreaterThan, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::GreaterThanOrEqual, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::binary( BinaryOperator::Distinct, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ), - Expr::struct_expr(vec![Expr::or_from(vec![ - Expr::literal(Scalar::Integer(5)), - Expr::literal(Scalar::Long(20)), - ])]), - Expr::not(Expr::is_null(Expr::column("col"))), - ])) - .into() + ] + .iter() + .map(|op| { + Expr::binary( + *op, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ) + }), + ); + + Arc::new(Expr::and_from(sub_exprs)).into() } /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own From aef2928b475f57dd240bec3091cffe51057f15f9 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 09:52:12 -0700 Subject: [PATCH 35/82] Remove KernelStringSlice from c side code --- ffi/examples/read-table/expression.h | 36 +++++++++------------------- ffi/examples/read-table/test_expr.c | 3 ++- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 5bfe5205f..aba19caf3 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -162,7 +162,7 @@ struct Literal float float_data; double double_data; bool boolean_data; - struct KernelStringSlice string_data; + char* string_data; struct Struct struct_data; struct ArrayData array_data; struct BinaryData binary; @@ -185,14 +185,6 @@ ExpressionItemList get_handle(void* data, size_t list_id) } return data_ptr->lists[list_id]; } -KernelStringSlice copy_kernel_string(KernelStringSlice string) -{ - char* contents = malloc(string.len + 1); - strncpy(contents, string.ptr, string.len); - contents[string.len] = '\0'; - KernelStringSlice out = { .len = string.len, .ptr = contents }; - return out; -} void visit_expr_binop( void* data, @@ -223,7 +215,7 @@ void visit_expr_string(void* data, KernelStringSlice string, uintptr_t sibling_l { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; - literal->value.string_data = copy_kernel_string(string); + literal->value.string_data = strndup(string.ptr, string.len); put_handle(data, literal, Literal, sibling_list_id); } @@ -326,9 +318,8 @@ DEFINE_UNARY(visit_expr_not, Not) void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { - struct KernelStringSlice* heap_string = malloc(sizeof(KernelStringSlice)); - *heap_string = copy_kernel_string(string); - put_handle(data, heap_string, Column, sibling_id_list); + char* column_name = strndup(string.ptr, string.len); + put_handle(data, column_name, Column, sibling_id_list); } uintptr_t make_field_list(void* data, uintptr_t reserve) @@ -343,8 +334,7 @@ uintptr_t make_field_list(void* data, uintptr_t reserve) return id; } -// Print the schema of the snapshot -ExpressionItem construct_predicate(SharedExpression* predicate) +ExpressionItemList construct_predicate(SharedExpression* predicate) { ExpressionBuilder data = { 0 }; data.lists = malloc(sizeof(ExpressionItem) * 100); @@ -388,8 +378,7 @@ ExpressionItem construct_predicate(SharedExpression* predicate) .visit_array = visit_expr_array, }; uintptr_t top_level_id = visit_expression(&predicate, &visitor); - ExpressionItem ret = data.lists[top_level_id].exprList[0]; - return ret; + return data.lists[top_level_id]; } void free_expression_item_list(ExpressionItemList list); @@ -424,8 +413,7 @@ void free_expression_item(ExpressionItem ref) break; } case String: { - struct KernelStringSlice* string = &lit->value.string_data; - free((void*)string->ptr); + free(lit->value.string_data); break; } case Binary: { @@ -457,9 +445,7 @@ void free_expression_item(ExpressionItem ref) break; } case Column: { - KernelStringSlice* string = ref.ref; - free((void*)string->ptr); - free(string); + free((void*)ref.ref); break; } } @@ -595,7 +581,7 @@ void print_tree(ExpressionItem ref, int depth) printf("(%f)\n", lit->value.double_data); break; case String: { - printf("String(%s)\n", lit->value.string_data.ptr); + printf("String(%s)\n", lit->value.string_data); break; } case Boolean: @@ -665,8 +651,8 @@ void print_tree(ExpressionItem ref, int depth) } case Column: print_n_spaces(depth); - KernelStringSlice* string = ref.ref; - printf("Column(%s)\n", string->ptr); + char* column_name = ref.ref; + printf("Column(%s)\n", column_name); break; } } diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c index 8146a0611..1918ddf5b 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/read-table/test_expr.c @@ -2,6 +2,7 @@ int main() { SharedExpression* pred = get_kernel_expression(); - ExpressionItem ref = construct_predicate(pred); + ExpressionItemList list = construct_predicate(pred); + ExpressionItem ref = list.exprList[0]; print_tree(ref, 0); } From 3cfeb2af0475de13216e138fc45132155c7c6f3b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 10:02:27 -0700 Subject: [PATCH 36/82] Move to using allocate_string --- ffi/examples/read-table/CMakeLists.txt | 2 +- ffi/examples/read-table/expression.h | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 88b3a17bf..50f82418b 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -56,7 +56,7 @@ else() target_compile_options(test_expr PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g) endif() -# Add the read_table test +# Add the kernel expresion -> engine expression test include(CTest) set(TestRunner "../../../tests/test_expression_visitor/run_test.sh") set(ExpectedPath "../../../tests/test_expression_visitor/expected.txt") diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index aba19caf3..4bac8912f 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -186,6 +186,12 @@ ExpressionItemList get_handle(void* data, size_t list_id) return data_ptr->lists[list_id]; } +// utility to turn a slice into a char* +void* allocate_string(const KernelStringSlice slice) +{ + return strndup(slice.ptr, slice.len); +} + void visit_expr_binop( void* data, enum OpType op, @@ -215,7 +221,7 @@ void visit_expr_string(void* data, KernelStringSlice string, uintptr_t sibling_l { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; - literal->value.string_data = strndup(string.ptr, string.len); + literal->value.string_data = allocate_string(string); put_handle(data, literal, Literal, sibling_list_id); } @@ -318,7 +324,7 @@ DEFINE_UNARY(visit_expr_not, Not) void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { - char* column_name = strndup(string.ptr, string.len); + char* column_name = allocate_string(string); put_handle(data, column_name, Column, sibling_id_list); } From f2d4cf4ee5963e1507f017eeb688ec66eae0aac7 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 10:04:08 -0700 Subject: [PATCH 37/82] Remove read_table dependency in expresison --- ffi/examples/read-table/expression.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 4bac8912f..44b2972d0 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -1,6 +1,5 @@ #include "assert.h" #include "delta_kernel_ffi.h" -#include "read_table.h" #include #include #include @@ -187,7 +186,7 @@ ExpressionItemList get_handle(void* data, size_t list_id) } // utility to turn a slice into a char* -void* allocate_string(const KernelStringSlice slice) +char* allocate_string(const KernelStringSlice slice) { return strndup(slice.ptr, slice.len); } From 15e52eb551cd2bc4efaa1ba9a3726428b8ec64ef Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 13:25:15 -0700 Subject: [PATCH 38/82] Fix tests, change enum naming to be clearer --- ffi/examples/read-table/CMakeLists.txt | 6 +- ffi/examples/read-table/expression.h | 138 +++++++++--------- .../test_expression_visitor/expected.txt | 34 ++--- ffi/tests/test_expression_visitor/run_test.sh | 5 +- 4 files changed, 92 insertions(+), 91 deletions(-) mode change 100644 => 100755 ffi/tests/test_expression_visitor/run_test.sh diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 50f82418b..87b174ec2 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -58,6 +58,6 @@ endif() # Add the kernel expresion -> engine expression test include(CTest) -set(TestRunner "../../../tests/test_expression_visitor/run_test.sh") -set(ExpectedPath "../../../tests/test_expression_visitor/expected.txt") -add_test(NAME test_expression_visitor COMMAND ./test_expr ${DatPath}) +set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") +set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") +add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 44b2972d0..adf0a4939 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -14,12 +14,12 @@ * Each expression is an "ExpressionItem", which tracks the type and pointer to the expression. */ -#define DEFINE_BINOP(fun_name, op) \ +#define DEFINotEqual_BINOP(fun_name, op) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_binop(data, op, child_list_id, sibling_list_id); \ } -#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ +#define DEFINotEqual_SIMPLessThanOrEqual_SCALAR(fun_name, enum_member, c_type, literal_field) \ void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ @@ -29,12 +29,12 @@ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") -#define DEFINE_VARIADIC(fun_name, enum_member) \ +#define DEFINotEqual_VARIADIC(fun_name, enum_member) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ } -#define DEFINE_UNARY(fun_name, op) \ +#define DEFINotEqual_UNARY(fun_name, op) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_unary(data, op, child_list_id, sibling_list_id); \ @@ -42,15 +42,15 @@ enum OpType { Add, - Sub, - Div, - Mul, - LT, - LE, - GT, - GE, - EQ, - NE, + Minus, + Divide, + Multiply, + LessThan, + LessThanOrEqual, + GreaterThan, + GreaterThaneOrEqual, + Equal, + NotEqual, Distinct, In, NotIn, @@ -104,7 +104,7 @@ enum VariadicType { And, Or, - StructConstructor, + StructExpression, ArrayData }; enum UnaryType @@ -202,21 +202,19 @@ void visit_expr_binop( binop->exprs = get_handle(data, child_id_list); put_handle(data, binop, BinOp, sibling_id_list); } -DEFINE_BINOP(visit_expr_add, Add) -DEFINE_BINOP(visit_expr_minus, Sub) -DEFINE_BINOP(visit_expr_multiply, Mul) -DEFINE_BINOP(visit_expr_divide, Div) -DEFINE_BINOP(visit_expr_lt, LT) -DEFINE_BINOP(visit_expr_le, LE) -DEFINE_BINOP(visit_expr_gt, GT) -DEFINE_BINOP(visit_expr_ge, GE) -DEFINE_BINOP(visit_expr_eq, EQ) -DEFINE_BINOP(visit_expr_ne, NE) -DEFINE_BINOP(visit_expr_distinct, Distinct) -DEFINE_BINOP(visit_expr_in, In) -DEFINE_BINOP(visit_expr_not_in, NotIn) +DEFINotEqual_BINOP(visit_expr_add, Add) DEFINotEqual_BINOP(visit_expr_minus, Minus) + DEFINotEqual_BINOP(visit_expr_multiply, Multiply) DEFINotEqual_BINOP(visit_expr_divide, Divide) + DEFINotEqual_BINOP(visit_expr_lt, LessThan) DEFINotEqual_BINOP(visit_expr_le, LessThanOrEqual) + DEFINotEqual_BINOP(visit_expr_gt, GreaterThan) + DEFINotEqual_BINOP(visit_expr_ge, GreaterThaneOrEqual) + DEFINotEqual_BINOP(visit_expr_eq, Equal) DEFINotEqual_BINOP(visit_expr_ne, NotEqual) + DEFINotEqual_BINOP(visit_expr_distinct, Distinct) DEFINotEqual_BINOP(visit_expr_in, In) + DEFINotEqual_BINOP(visit_expr_not_in, NotIn) -void visit_expr_string(void* data, KernelStringSlice string, uintptr_t sibling_list_id) + void visit_expr_string( + void* data, + KernelStringSlice string, + uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; @@ -241,16 +239,16 @@ void visit_expr_decimal( dec->scale = scale; put_handle(data, literal, Literal, sibling_list_id); } -DEFINE_SIMPLE_SCALAR(visit_expr_int, Integer, int32_t, integer_data); -DEFINE_SIMPLE_SCALAR(visit_expr_long, Long, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_short, Short, int16_t, short_data); -DEFINE_SIMPLE_SCALAR(visit_expr_byte, Byte, int8_t, byte_data); -DEFINE_SIMPLE_SCALAR(visit_expr_float, Float, float, float_data); -DEFINE_SIMPLE_SCALAR(visit_expr_double, Double, double, double_data); -DEFINE_SIMPLE_SCALAR(visit_expr_boolean, Boolean, _Bool, boolean_data); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp, Timestamp, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_date, Date, int32_t, integer_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_int, Integer, int32_t, integer_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_long, Long, int64_t, long_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_short, Short, int16_t, short_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_byte, Byte, int8_t, byte_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_float, Float, float, float_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_double, Double, double, double_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_boolean, Boolean, _Bool, boolean_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_timestamp, Timestamp, int64_t, long_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t, long_data); +DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_date, Date, int32_t, integer_data); void visit_expr_variadic( void* data, @@ -263,11 +261,10 @@ void visit_expr_variadic( var->expr_list = get_handle(data, child_list_id); put_handle(data, var, Variadic, sibling_list_id); } -DEFINE_VARIADIC(visit_expr_and, And) -DEFINE_VARIADIC(visit_expr_or, Or) -DEFINE_VARIADIC(visit_expr_struct_expr, StructConstructor) +DEFINotEqual_VARIADIC(visit_expr_and, And) DEFINotEqual_VARIADIC(visit_expr_or, Or) + DEFINotEqual_VARIADIC(visit_expr_struct_expr, StructExpression) -void visit_expr_array(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) + void visit_expr_array(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; @@ -318,10 +315,9 @@ void visit_expr_unary( unary->sub_expr = get_handle(data, child_list_id); put_handle(data, unary, Unary, sibling_list_id); } -DEFINE_UNARY(visit_expr_is_null, IsNull) -DEFINE_UNARY(visit_expr_not, Not) +DEFINotEqual_UNARY(visit_expr_is_null, IsNull) DEFINotEqual_UNARY(visit_expr_not, Not) -void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) + void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { char* column_name = allocate_string(string); put_handle(data, column_name, Column, sibling_id_list); @@ -478,43 +474,43 @@ void print_tree(ExpressionItem ref, int depth) print_n_spaces(depth); switch (op->op) { case Add: { - printf("ADD\n"); + printf("Add\n"); break; } - case Sub: { - printf("SUB\n"); + case Minus: { + printf("Minus\n"); break; }; - case Div: { - printf("DIV\n"); + case Divide: { + printf("Divide\n"); break; }; - case Mul: { - printf("MUL\n"); + case Multiply: { + printf("Multiply\n"); break; }; - case LT: { - printf("LT\n"); + case LessThan: { + printf("LessThan\n"); break; }; - case LE: { - printf("LE\n"); + case LessThanOrEqual: { + printf("LessThanOrEqual\n"); break; } - case GT: { - printf("GT\n"); + case GreaterThan: { + printf("GreaterThan\n"); break; }; - case GE: { - printf("GE\n"); + case GreaterThaneOrEqual: { + printf("GreaterThanOrEqual\n"); break; }; - case EQ: { - printf("EQ\n"); + case Equal: { + printf("Equal\n"); break; }; - case NE: { - printf("NE\n"); + case NotEqual: { + printf("NotEqual\n"); break; }; case In: { @@ -546,8 +542,8 @@ void print_tree(ExpressionItem ref, int depth) case Or: printf("Or\n"); break; - case StructConstructor: - printf("StructConstructor\n"); + case StructExpression: + printf("StructExpression\n"); break; case ArrayData: printf("ArrayData\n"); @@ -626,8 +622,14 @@ void print_tree(ExpressionItem ref, int depth) struct Struct* struct_data = &lit->value.struct_data; for (size_t i = 0; i < struct_data->values.len; i++) { print_n_spaces(depth + 1); - printf("Field\n"); - print_tree(struct_data->fields.exprList[i], depth + 2); + + // Extract field name from field + ExpressionItem item = struct_data->fields.exprList[i]; + assert(item.type == Literal); + struct Literal* lit = item.ref; + assert(lit->type == String); + + printf("Field: %s\n", lit->value.string_data); print_tree(struct_data->values.exprList[i], depth + 2); } break; diff --git a/ffi/tests/test_expression_visitor/expected.txt b/ffi/tests/test_expression_visitor/expected.txt index e966d0446..eda433cdc 100644 --- a/ffi/tests/test_expression_visitor/expected.txt +++ b/ffi/tests/test_expression_visitor/expected.txt @@ -30,49 +30,49 @@ And Array Short(5) Short(0) + StructExpression + Or + Integer(5) + Long(20) + Not + IsNull + Column(col) In Integer(0) Long(0) - ADD + Add Integer(0) Long(0) - SUB + Minus Integer(0) Long(0) - EQ + Equal Integer(0) Long(0) - NE + NotEqual Integer(0) Long(0) NotIn Integer(0) Long(0) - DIV + Divide Integer(0) Long(0) - MUL + Multiply Integer(0) Long(0) - LT + LessThan Integer(0) Long(0) - LE + LessThanOrEqual Integer(0) Long(0) - GT + GreaterThan Integer(0) Long(0) - GE + GreaterThanOrEqual Integer(0) Long(0) Distinct Integer(0) Long(0) - StructConstructor - Or - Integer(5) - Long(20) - Not - IsNull - Column(col) diff --git a/ffi/tests/test_expression_visitor/run_test.sh b/ffi/tests/test_expression_visitor/run_test.sh old mode 100644 new mode 100755 index 1e8fd7f47..06a061627 --- a/ffi/tests/test_expression_visitor/run_test.sh +++ b/ffi/tests/test_expression_visitor/run_test.sh @@ -1,11 +1,10 @@ - #!/bin/bash set -euxo pipefail OUT_FILE=$(mktemp) -./read_table "$1" | tee "$OUT_FILE" -diff -s "$OUT_FILE" "$2" +./test_expr | tee "$OUT_FILE" +diff -s "$OUT_FILE" "$1" DIFF_EXIT_CODE=$? echo "Diff exited with $DIFF_EXIT_CODE" rm "$OUT_FILE" From 595280f57f290c09549f52966a883455cafe2201 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 13:30:22 -0700 Subject: [PATCH 39/82] Use system header for assert --- ffi/examples/read-table/expression.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index adf0a4939..603a0f124 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -1,5 +1,5 @@ -#include "assert.h" #include "delta_kernel_ffi.h" +#include #include #include #include From 7e7c847709fd6318c02a11a885a6f9ab9581d0f3 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 13:55:28 -0700 Subject: [PATCH 40/82] rename literals --- ffi/examples/read-table/expression.h | 108 ++++++++++++----------- ffi/src/expressions.rs | 91 ++++++++++--------- ffi/tests/read-table-testing/run_test.sh | 2 +- 3 files changed, 109 insertions(+), 92 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 603a0f124..b40d23027 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -14,12 +14,12 @@ * Each expression is an "ExpressionItem", which tracks the type and pointer to the expression. */ -#define DEFINotEqual_BINOP(fun_name, op) \ +#define DEFINE_BINOP(fun_name, op) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_binop(data, op, child_list_id, sibling_list_id); \ } -#define DEFINotEqual_SIMPLessThanOrEqual_SCALAR(fun_name, enum_member, c_type, literal_field) \ +#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ @@ -29,12 +29,12 @@ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") -#define DEFINotEqual_VARIADIC(fun_name, enum_member) \ +#define DEFINE_VARIADIC(fun_name, enum_member) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ } -#define DEFINotEqual_UNARY(fun_name, op) \ +#define DEFINE_UNARY(fun_name, op) \ void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ { \ visit_expr_unary(data, op, child_list_id, sibling_list_id); \ @@ -202,19 +202,21 @@ void visit_expr_binop( binop->exprs = get_handle(data, child_id_list); put_handle(data, binop, BinOp, sibling_id_list); } -DEFINotEqual_BINOP(visit_expr_add, Add) DEFINotEqual_BINOP(visit_expr_minus, Minus) - DEFINotEqual_BINOP(visit_expr_multiply, Multiply) DEFINotEqual_BINOP(visit_expr_divide, Divide) - DEFINotEqual_BINOP(visit_expr_lt, LessThan) DEFINotEqual_BINOP(visit_expr_le, LessThanOrEqual) - DEFINotEqual_BINOP(visit_expr_gt, GreaterThan) - DEFINotEqual_BINOP(visit_expr_ge, GreaterThaneOrEqual) - DEFINotEqual_BINOP(visit_expr_eq, Equal) DEFINotEqual_BINOP(visit_expr_ne, NotEqual) - DEFINotEqual_BINOP(visit_expr_distinct, Distinct) DEFINotEqual_BINOP(visit_expr_in, In) - DEFINotEqual_BINOP(visit_expr_not_in, NotIn) +DEFINE_BINOP(visit_expr_add, Add) +DEFINE_BINOP(visit_expr_minus, Minus) +DEFINE_BINOP(visit_expr_multiply, Multiply) +DEFINE_BINOP(visit_expr_divide, Divide) +DEFINE_BINOP(visit_expr_lt, LessThan) +DEFINE_BINOP(visit_expr_le, LessThanOrEqual) +DEFINE_BINOP(visit_expr_gt, GreaterThan) +DEFINE_BINOP(visit_expr_ge, GreaterThaneOrEqual) +DEFINE_BINOP(visit_expr_eq, Equal) +DEFINE_BINOP(visit_expr_ne, NotEqual) +DEFINE_BINOP(visit_expr_distinct, Distinct) +DEFINE_BINOP(visit_expr_in, In) +DEFINE_BINOP(visit_expr_not_in, NotIn) - void visit_expr_string( - void* data, - KernelStringSlice string, - uintptr_t sibling_list_id) +void visit_expr_string_literal(void* data, KernelStringSlice string, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; @@ -222,7 +224,7 @@ DEFINotEqual_BINOP(visit_expr_add, Add) DEFINotEqual_BINOP(visit_expr_minus, Min put_handle(data, literal, Literal, sibling_list_id); } -void visit_expr_decimal( +void visit_expr_decimal_literal( void* data, uint64_t value_ms, uint64_t value_ls, @@ -239,16 +241,16 @@ void visit_expr_decimal( dec->scale = scale; put_handle(data, literal, Literal, sibling_list_id); } -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_int, Integer, int32_t, integer_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_long, Long, int64_t, long_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_short, Short, int16_t, short_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_byte, Byte, int8_t, byte_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_float, Float, float, float_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_double, Double, double, double_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_boolean, Boolean, _Bool, boolean_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_timestamp, Timestamp, int64_t, long_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_timestamp_ntz, TimestampNtz, int64_t, long_data); -DEFINotEqual_SIMPLessThanOrEqual_SCALAR(visit_expr_date, Date, int32_t, integer_data); +DEFINE_SIMPLE_SCALAR(visit_expr_int_literal, Integer, int32_t, integer_data); +DEFINE_SIMPLE_SCALAR(visit_expr_long_literal, Long, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_short_literal, Short, int16_t, short_data); +DEFINE_SIMPLE_SCALAR(visit_expr_byte_literal, Byte, int8_t, byte_data); +DEFINE_SIMPLE_SCALAR(visit_expr_float_literal, Float, float, float_data); +DEFINE_SIMPLE_SCALAR(visit_expr_double_literal, Double, double, double_data); +DEFINE_SIMPLE_SCALAR(visit_expr_boolean_literal, Boolean, _Bool, boolean_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); void visit_expr_variadic( void* data, @@ -261,10 +263,11 @@ void visit_expr_variadic( var->expr_list = get_handle(data, child_list_id); put_handle(data, var, Variadic, sibling_list_id); } -DEFINotEqual_VARIADIC(visit_expr_and, And) DEFINotEqual_VARIADIC(visit_expr_or, Or) - DEFINotEqual_VARIADIC(visit_expr_struct_expr, StructExpression) +DEFINE_VARIADIC(visit_expr_and, And) +DEFINE_VARIADIC(visit_expr_or, Or) +DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) - void visit_expr_array(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) +void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; @@ -273,7 +276,11 @@ DEFINotEqual_VARIADIC(visit_expr_and, And) DEFINotEqual_VARIADIC(visit_expr_or, put_handle(data, literal, Literal, sibling_list_id); } -void visit_expr_binary(void* data, const uint8_t* buf, uintptr_t len, uintptr_t sibling_list_id) +void visit_expr_binary_literal( + void* data, + const uint8_t* buf, + uintptr_t len, + uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Binary; @@ -297,7 +304,7 @@ void visit_expr_struct_literal( put_handle(data, literal, Literal, sibling_list_id); } -void visit_expr_null(void* data, uintptr_t sibling_id_list) +void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; @@ -315,9 +322,10 @@ void visit_expr_unary( unary->sub_expr = get_handle(data, child_list_id); put_handle(data, unary, Unary, sibling_list_id); } -DEFINotEqual_UNARY(visit_expr_is_null, IsNull) DEFINotEqual_UNARY(visit_expr_not, Not) +DEFINE_UNARY(visit_expr_is_null, IsNull) +DEFINE_UNARY(visit_expr_not, Not) - void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) +void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { char* column_name = allocate_string(string); put_handle(data, column_name, Column, sibling_id_list); @@ -342,19 +350,22 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) EngineExpressionVisitor visitor = { .data = &data, .make_field_list = make_field_list, - .visit_int = visit_expr_int, - .visit_long = visit_expr_long, - .visit_short = visit_expr_short, - .visit_byte = visit_expr_byte, - .visit_float = visit_expr_float, - .visit_double = visit_expr_double, - .visit_bool = visit_expr_boolean, - .visit_timestamp = visit_expr_timestamp, - .visit_timestamp_ntz = visit_expr_timestamp_ntz, - .visit_date = visit_expr_date, - .visit_binary = visit_expr_binary, - .visit_decimal = visit_expr_decimal, - .visit_string = visit_expr_string, + .visit_int_literal = visit_expr_int_literal, + .visit_long_literal = visit_expr_long_literal, + .visit_short_literal = visit_expr_short_literal, + .visit_byte_literal = visit_expr_byte_literal, + .visit_float_literal = visit_expr_float_literal, + .visit_double_literal = visit_expr_double_literal, + .visit_bool_literal = visit_expr_boolean_literal, + .visit_timestamp_literal = visit_expr_timestamp_literal, + .visit_timestamp_ntz_literal = visit_expr_timestamp_ntz_literal, + .visit_date_literal = visit_expr_date_literal, + .visit_binary_literal = visit_expr_binary_literal, + .visit_null_literal = visit_expr_null_literal, + .visit_decimal_literal = visit_expr_decimal_literal, + .visit_string_literal = visit_expr_string_literal, + .visit_struct_literal = visit_expr_struct_literal, + .visit_array_literal = visit_expr_array_literal, .visit_and = visit_expr_and, .visit_or = visit_expr_or, .visit_not = visit_expr_not, @@ -374,9 +385,6 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) .visit_divide = visit_expr_divide, .visit_column = visit_expr_column, .visit_struct_expr = visit_expr_struct_expr, - .visit_null = visit_expr_null, - .visit_struct_literal = visit_expr_struct_literal, - .visit_array = visit_expr_array, }; uintptr_t top_level_id = visit_expression(&predicate, &visitor); return data.lists[top_level_id]; diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 3366856b7..d30b371e5 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -376,35 +376,37 @@ pub struct EngineExpressionVisitor { /// Creates a new expression list, optionally reserving capacity up front pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, /// Visit a 32bit `integer - pub visit_int: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), + pub visit_int_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), /// Visit a 64bit `long`. - pub visit_long: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + pub visit_long_literal: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 16bit `short`. - pub visit_short: extern "C" fn(data: *mut c_void, value: i16, sibling_list_id: usize), + pub visit_short_literal: extern "C" fn(data: *mut c_void, value: i16, sibling_list_id: usize), /// Visit an 8bit `byte`. - pub visit_byte: extern "C" fn(data: *mut c_void, value: i8, sibling_list_id: usize), + pub visit_byte_literal: extern "C" fn(data: *mut c_void, value: i8, sibling_list_id: usize), /// Visit a 32bit `float`. - pub visit_float: extern "C" fn(data: *mut c_void, value: f32, sibling_list_id: usize), + pub visit_float_literal: extern "C" fn(data: *mut c_void, value: f32, sibling_list_id: usize), /// Visit a 64bit `double`. - pub visit_double: extern "C" fn(data: *mut c_void, value: f64, sibling_list_id: usize), + pub visit_double_literal: extern "C" fn(data: *mut c_void, value: f64, sibling_list_id: usize), /// Visit a `string`. - pub visit_string: + pub visit_string_literal: extern "C" fn(data: *mut c_void, value: KernelStringSlice, sibling_list_id: usize), /// Visit a `boolean`. - pub visit_bool: extern "C" fn(data: *mut c_void, value: bool, sibling_list_id: usize), + pub visit_bool_literal: extern "C" fn(data: *mut c_void, value: bool, sibling_list_id: usize), /// Visit a 64bit timestamp. The timestamp is microsecond precision and adjusted to UTC. - pub visit_timestamp: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + pub visit_timestamp_literal: + extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 64bit timestamp. The timestamp is microsecond precision with no timezone. - pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + pub visit_timestamp_ntz_literal: + extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), /// Visit a 32bit int date representing days since UNIX epoch 1970-01-01. - pub visit_date: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), + pub visit_date_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), /// Visit binary data at the `buffer` with length `len`. - pub visit_binary: + pub visit_binary_literal: extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize, sibling_list_id: usize), /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 /// bits in `value_ls`. - pub visit_decimal: extern "C" fn( + pub visit_decimal_literal: extern "C" fn( data: *mut c_void, value_ms: u64, // Most significant 64 bits of decimal value value_ls: u64, // Least significant 64 bits of decimal value @@ -412,8 +414,21 @@ pub struct EngineExpressionVisitor { scale: u8, sibling_list_id: usize, ), + /// Visit a struct literal which is made up of a list of field names and values. This declares + /// the number of fields that the struct will have. The visitor will populate the struct fields + /// using the [`visit_struct_literal_field`] method. + pub visit_struct_literal: extern "C" fn( + data: *mut c_void, + child_field_list_value: usize, + child_value_list_id: usize, + sibling_list_id: usize, + ), + /// Visit an `arary`, declaring the length `len`. The visitor will populate the array + /// elements using the [`visit_array_element`] method. + pub visit_array_literal: + extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), /// Visits a null value. - pub visit_null: extern "C" fn(data: *mut c_void, sibling_list_id: usize), + pub visit_null_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize), /// Visits an `and` expression which is made of a list of sub-expressions. This declares the /// number of sub-expressions that the `and` expression will be made of. The visitor will populate /// the list of expressions using the [`visit_variadic_sub_expr`] method. @@ -478,18 +493,6 @@ pub struct EngineExpressionVisitor { /// list of expressions using the [`visit_struct_sub_expr`] method. pub visit_struct_expr: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit a struct literal which is made up of a list of field names and values. This declares - /// the number of fields that the struct will have. The visitor will populate the struct fields - /// using the [`visit_struct_literal_field`] method. - pub visit_struct_literal: extern "C" fn( - data: *mut c_void, - child_field_list_value: usize, - child_value_list_id: usize, - sibling_list_id: usize, - ), - /// Visit an `arary`, declaring the length `len`. The visitor will populate the array - /// elements using the [`visit_array_element`] method. - pub visit_array: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), } /// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. @@ -522,7 +525,7 @@ pub unsafe extern "C" fn visit_expression( for scalar in elements { visit_scalar(visitor, scalar, child_list_id); } - call!(visitor, visit_array, child_list_id, sibling_list_id); + call!(visitor, visit_array_literal, child_list_id, sibling_list_id); } fn visit_struct_literal( visitor: &mut EngineExpressionVisitor, @@ -581,20 +584,26 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id: usize, ) { match scalar { - Scalar::Integer(val) => call!(visitor, visit_int, *val, sibling_list_id), - Scalar::Long(val) => call!(visitor, visit_long, *val, sibling_list_id), - Scalar::Short(val) => call!(visitor, visit_short, *val, sibling_list_id), - Scalar::Byte(val) => call!(visitor, visit_byte, *val, sibling_list_id), - Scalar::Float(val) => call!(visitor, visit_float, *val, sibling_list_id), - Scalar::Double(val) => call!(visitor, visit_double, *val, sibling_list_id), - Scalar::String(val) => call!(visitor, visit_string, val.into(), sibling_list_id), - Scalar::Boolean(val) => call!(visitor, visit_bool, *val, sibling_list_id), - Scalar::Timestamp(val) => call!(visitor, visit_timestamp, *val, sibling_list_id), - Scalar::TimestampNtz(val) => call!(visitor, visit_timestamp_ntz, *val, sibling_list_id), - Scalar::Date(val) => call!(visitor, visit_date, *val, sibling_list_id), + Scalar::Integer(val) => call!(visitor, visit_int_literal, *val, sibling_list_id), + Scalar::Long(val) => call!(visitor, visit_long_literal, *val, sibling_list_id), + Scalar::Short(val) => call!(visitor, visit_short_literal, *val, sibling_list_id), + Scalar::Byte(val) => call!(visitor, visit_byte_literal, *val, sibling_list_id), + Scalar::Float(val) => call!(visitor, visit_float_literal, *val, sibling_list_id), + Scalar::Double(val) => call!(visitor, visit_double_literal, *val, sibling_list_id), + Scalar::String(val) => { + call!(visitor, visit_string_literal, val.into(), sibling_list_id) + } + Scalar::Boolean(val) => call!(visitor, visit_bool_literal, *val, sibling_list_id), + Scalar::Timestamp(val) => { + call!(visitor, visit_timestamp_literal, *val, sibling_list_id) + } + Scalar::TimestampNtz(val) => { + call!(visitor, visit_timestamp_ntz_literal, *val, sibling_list_id) + } + Scalar::Date(val) => call!(visitor, visit_date_literal, *val, sibling_list_id), Scalar::Binary(buf) => call!( visitor, - visit_binary, + visit_binary_literal, buf.as_ptr(), buf.len(), sibling_list_id @@ -604,7 +613,7 @@ pub unsafe extern "C" fn visit_expression( let ls: u64 = *value as u64; call!( visitor, - visit_decimal, + visit_decimal_literal, ms, ls, *precision, @@ -612,7 +621,7 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id ) } - Scalar::Null(_) => call!(visitor, visit_null, sibling_list_id), + Scalar::Null(_) => call!(visitor, visit_null_literal, sibling_list_id), Scalar::Struct(struct_data) => { visit_struct_literal(visitor, struct_data, sibling_list_id) } diff --git a/ffi/tests/read-table-testing/run_test.sh b/ffi/tests/read-table-testing/run_test.sh index 970a444b1..57b5410e2 100755 --- a/ffi/tests/read-table-testing/run_test.sh +++ b/ffi/tests/read-table-testing/run_test.sh @@ -7,6 +7,6 @@ OUT_FILE=$(mktemp) diff -s "$OUT_FILE" "$2" DIFF_EXIT_CODE=$? echo "Diff exited with $DIFF_EXIT_CODE" -rm "$OUT_FILE" +# rm "$OUT_FILE" exit "$DIFF_EXIT_CODE" From a6ddd40edbfaa0e723ec349df1c5349f3d18e3a3 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 14:32:45 -0700 Subject: [PATCH 41/82] fix memory leaks --- ffi/examples/read-table/CMakeLists.txt | 5 +- ffi/examples/read-table/expression.h | 161 +++++++++++++++---------- ffi/examples/read-table/test_expr.c | 6 +- ffi/src/expressions.rs | 2 +- 4 files changed, 105 insertions(+), 69 deletions(-) diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 87b174ec2..244785800 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -41,7 +41,7 @@ if(PRINT_DATA) target_compile_definitions(read_table PUBLIC PRINT_ARROW_DATA) endif(PRINT_DATA) -# Configuration for the `test_expr executable +# Configuration for the `test_expr` executable add_executable(test_expr test_expr.c) target_compile_definitions(test_expr PUBLIC DEFINE_DEFAULT_ENGINE) target_include_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") @@ -52,8 +52,7 @@ target_compile_options(test_expr PUBLIC) if(MSVC) target_compile_options(test_expr PRIVATE /W4 /WX) else() - # no-strict-prototypes because arrow headers have fn defs without prototypes - target_compile_options(test_expr PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g) + target_compile_options(test_expr PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) endif() # Add the kernel expresion -> engine expression test diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index b40d23027..98a8bbd79 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -8,37 +8,18 @@ /** * This module defines a very simple model of an expression, used only to be able to print the * provided expression. It consists of an "ExpressionBuilder" which is our user data that gets - * passed into each visit_x call. This simply keeps track of all the expressions we are asked to - * allocate. + * passed into each visit_x call. This simply keeps track of all the lists we are asked to allocate. * - * Each expression is an "ExpressionItem", which tracks the type and pointer to the expression. + * Each item "ExpressionItem", which tracks the type and pointer to the expression. + * + * Each complex type is made of an "ExpressionItemList", which tracks its length and an array of + * "ExpressionItems". The top level expression is in a length 1 "ExpressionItemList". */ -#define DEFINE_BINOP(fun_name, op) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ - { \ - visit_expr_binop(data, op, child_list_id, sibling_list_id); \ - } -#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ - void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ - { \ - struct Literal* lit = malloc(sizeof(struct Literal)); \ - lit->type = enum_member; \ - lit->value.literal_field = val; \ - put_handle(data, lit, Literal, sibling_list_id); \ - } \ - _Static_assert( \ - sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") -#define DEFINE_VARIADIC(fun_name, enum_member) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ - { \ - visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ - } -#define DEFINE_UNARY(fun_name, op) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ - { \ - visit_expr_unary(data, op, child_list_id, sibling_list_id); \ - } +/************************************************************* + * Data Types + ************************************************************/ + enum OpType { Add, @@ -169,6 +150,10 @@ struct Literal } value; }; +/************************************************************* + * Utilitiy functions + ************************************************************/ + void put_handle(void* data, void* ref, enum ExpressionType type, size_t sibling_list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; @@ -191,6 +176,15 @@ char* allocate_string(const KernelStringSlice slice) return strndup(slice.ptr, slice.len); } +/************************************************************* + * Binary Operations + ************************************************************/ + +#define DEFINE_BINOP(fun_name, op) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + { \ + visit_expr_binop(data, op, child_list_id, sibling_list_id); \ + } void visit_expr_binop( void* data, enum OpType op, @@ -216,6 +210,31 @@ DEFINE_BINOP(visit_expr_distinct, Distinct) DEFINE_BINOP(visit_expr_in, In) DEFINE_BINOP(visit_expr_not_in, NotIn) +/************************************************************* + * Literal Values + ************************************************************/ + +#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ + void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ + { \ + struct Literal* lit = malloc(sizeof(struct Literal)); \ + lit->type = enum_member; \ + lit->value.literal_field = val; \ + put_handle(data, lit, Literal, sibling_list_id); \ + } \ + _Static_assert( \ + sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") +DEFINE_SIMPLE_SCALAR(visit_expr_int_literal, Integer, int32_t, integer_data); +DEFINE_SIMPLE_SCALAR(visit_expr_long_literal, Long, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_short_literal, Short, int16_t, short_data); +DEFINE_SIMPLE_SCALAR(visit_expr_byte_literal, Byte, int8_t, byte_data); +DEFINE_SIMPLE_SCALAR(visit_expr_float_literal, Float, float, float_data); +DEFINE_SIMPLE_SCALAR(visit_expr_double_literal, Double, double, double_data); +DEFINE_SIMPLE_SCALAR(visit_expr_boolean_literal, Boolean, _Bool, boolean_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); + void visit_expr_string_literal(void* data, KernelStringSlice string, uintptr_t sibling_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); @@ -241,40 +260,6 @@ void visit_expr_decimal_literal( dec->scale = scale; put_handle(data, literal, Literal, sibling_list_id); } -DEFINE_SIMPLE_SCALAR(visit_expr_int_literal, Integer, int32_t, integer_data); -DEFINE_SIMPLE_SCALAR(visit_expr_long_literal, Long, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_short_literal, Short, int16_t, short_data); -DEFINE_SIMPLE_SCALAR(visit_expr_byte_literal, Byte, int8_t, byte_data); -DEFINE_SIMPLE_SCALAR(visit_expr_float_literal, Float, float, float_data); -DEFINE_SIMPLE_SCALAR(visit_expr_double_literal, Double, double, double_data); -DEFINE_SIMPLE_SCALAR(visit_expr_boolean_literal, Boolean, _Bool, boolean_data); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); -DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); - -void visit_expr_variadic( - void* data, - enum VariadicType op, - uintptr_t child_list_id, - uintptr_t sibling_list_id) -{ - struct Variadic* var = malloc(sizeof(struct Variadic)); - var->op = op; - var->expr_list = get_handle(data, child_list_id); - put_handle(data, var, Variadic, sibling_list_id); -} -DEFINE_VARIADIC(visit_expr_and, And) -DEFINE_VARIADIC(visit_expr_or, Or) -DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) - -void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) -{ - struct Literal* literal = malloc(sizeof(struct Literal)); - literal->type = Array; - struct ArrayData* arr = &(literal->value.array_data); - arr->expr_list = get_handle(data, child_list_id); - put_handle(data, literal, Literal, sibling_list_id); -} void visit_expr_binary_literal( void* data, @@ -311,6 +296,49 @@ void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) put_handle(data, literal, Literal, sibling_id_list); } +/************************************************************* + * Variadic Expressions + ************************************************************/ + +#define DEFINE_VARIADIC(fun_name, enum_member) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + { \ + visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ + } + +void visit_expr_variadic( + void* data, + enum VariadicType op, + uintptr_t child_list_id, + uintptr_t sibling_list_id) +{ + struct Variadic* var = malloc(sizeof(struct Variadic)); + var->op = op; + var->expr_list = get_handle(data, child_list_id); + put_handle(data, var, Variadic, sibling_list_id); +} +DEFINE_VARIADIC(visit_expr_and, And) +DEFINE_VARIADIC(visit_expr_or, Or) +DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) + +void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) +{ + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Array; + struct ArrayData* arr = &(literal->value.array_data); + arr->expr_list = get_handle(data, child_list_id); + put_handle(data, literal, Literal, sibling_list_id); +} + +/************************************************************* + * Unary Expressions + ************************************************************/ +#define DEFINE_UNARY(fun_name, op) \ + void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + { \ + visit_expr_unary(data, op, child_list_id, sibling_list_id); \ + } + void visit_expr_unary( void* data, enum UnaryType type, @@ -325,6 +353,10 @@ void visit_expr_unary( DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) +/************************************************************* + * Column Expression + ************************************************************/ + void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { char* column_name = allocate_string(string); @@ -346,7 +378,6 @@ uintptr_t make_field_list(void* data, uintptr_t reserve) ExpressionItemList construct_predicate(SharedExpression* predicate) { ExpressionBuilder data = { 0 }; - data.lists = malloc(sizeof(ExpressionItem) * 100); EngineExpressionVisitor visitor = { .data = &data, .make_field_list = make_field_list, @@ -387,7 +418,9 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) .visit_struct_expr = visit_expr_struct_expr, }; uintptr_t top_level_id = visit_expression(&predicate, &visitor); - return data.lists[top_level_id]; + ExpressionItemList top_level_expr = data.lists[top_level_id]; + free(data.lists); + return top_level_expr; } void free_expression_item_list(ExpressionItemList list); diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c index 1918ddf5b..b1136a7ed 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/read-table/test_expr.c @@ -1,8 +1,12 @@ +#include "delta_kernel_ffi.h" #include "expression.h" int main() { - SharedExpression* pred = get_kernel_expression(); + SharedExpression* pred = get_testing_kernel_expression(); ExpressionItemList list = construct_predicate(pred); ExpressionItem ref = list.exprList[0]; print_tree(ref, 0); + free_expression_item_list(list); + free_kernel_predicate(pred); + return 0; } diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index d30b371e5..3bf9ad550 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -268,7 +268,7 @@ pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { /// The caller is responsible for freeing the retured memory, either by calling /// [`free_kernel_predicate`], or [`Handle::drop_handle`] #[no_mangle] -pub unsafe extern "C" fn get_kernel_expression() -> Handle { +pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { use Expression as Expr; let array_type = ArrayType::new( From 24dbce41d2ac501353cb76a2d35d9c9fdd24fea4 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 16:13:45 -0700 Subject: [PATCH 42/82] Revert read_table --- ffi/examples/read-table/read_table.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 4c769dd31..b9b2337e5 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -105,11 +105,7 @@ void scan_row_callback( { (void)size; // not using this at the moment struct EngineContext* context = engine_context; - print_diag( - "Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", - (int)path.len, - path.ptr, - size); + print_diag("Called back to read file: %.*s. (size: %" PRIu64 ", num records: ", (int)path.len, path.ptr, size); if (stats) { print_diag("%" PRId64 ")\n", stats->num_records); } else { From 9019da110cd6be4e921097ed2b8a2e56b4b091a0 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 16:20:13 -0700 Subject: [PATCH 43/82] Improve naming --- ffi/examples/read-table/expression.h | 89 ++++++++++++++-------------- ffi/examples/read-table/test_expr.c | 4 +- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 98a8bbd79..2a1104a8a 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -13,7 +13,8 @@ * Each item "ExpressionItem", which tracks the type and pointer to the expression. * * Each complex type is made of an "ExpressionItemList", which tracks its length and an array of - * "ExpressionItems". The top level expression is in a length 1 "ExpressionItemList". + * "ExpressionItems" that make up the complex type. The top level expression is in a length 1 + * "ExpressionItemList". */ /************************************************************* @@ -72,7 +73,7 @@ typedef struct typedef struct { uint32_t len; - ExpressionItem* exprList; + ExpressionItem* list; } ExpressionItemList; struct BinOp { @@ -96,7 +97,7 @@ enum UnaryType struct Variadic { enum VariadicType op; - ExpressionItemList expr_list; + ExpressionItemList exprs; }; struct Unary { @@ -127,7 +128,7 @@ struct Struct struct ArrayData { - ExpressionItemList expr_list; + ExpressionItemList exprs; }; struct Literal @@ -154,14 +155,14 @@ struct Literal * Utilitiy functions ************************************************************/ -void put_handle(void* data, void* ref, enum ExpressionType type, size_t sibling_list_id) +void put_expr_item(void* data, void* ref, enum ExpressionType type, size_t sibling_list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; ExpressionItem expr = { .ref = ref, .type = type }; ExpressionItemList* list = &data_ptr->lists[sibling_list_id]; - list->exprList[list->len++] = expr; + list->list[list->len++] = expr; } -ExpressionItemList get_handle(void* data, size_t list_id) +ExpressionItemList get_expr_list(void* data, size_t list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; if (list_id > data_ptr->list_count) { @@ -193,8 +194,8 @@ void visit_expr_binop( { struct BinOp* binop = malloc(sizeof(struct BinOp)); binop->op = op; - binop->exprs = get_handle(data, child_id_list); - put_handle(data, binop, BinOp, sibling_id_list); + binop->exprs = get_expr_list(data, child_id_list); + put_expr_item(data, binop, BinOp, sibling_id_list); } DEFINE_BINOP(visit_expr_add, Add) DEFINE_BINOP(visit_expr_minus, Minus) @@ -220,7 +221,7 @@ DEFINE_BINOP(visit_expr_not_in, NotIn) struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ lit->value.literal_field = val; \ - put_handle(data, lit, Literal, sibling_list_id); \ + put_expr_item(data, lit, Literal, sibling_list_id); \ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") @@ -240,7 +241,7 @@ void visit_expr_string_literal(void* data, KernelStringSlice string, uintptr_t s struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; literal->value.string_data = allocate_string(string); - put_handle(data, literal, Literal, sibling_list_id); + put_expr_item(data, literal, Literal, sibling_list_id); } void visit_expr_decimal_literal( @@ -258,7 +259,7 @@ void visit_expr_decimal_literal( dec->value[1] = value_ls; dec->precision = precision; dec->scale = scale; - put_handle(data, literal, Literal, sibling_list_id); + put_expr_item(data, literal, Literal, sibling_list_id); } void visit_expr_binary_literal( @@ -272,7 +273,7 @@ void visit_expr_binary_literal( struct BinaryData* bin = &literal->value.binary; bin->buf = malloc(len); memcpy(bin->buf, buf, len); - put_handle(data, literal, Literal, sibling_list_id); + put_expr_item(data, literal, Literal, sibling_list_id); } void visit_expr_struct_literal( @@ -284,16 +285,16 @@ void visit_expr_struct_literal( struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Struct; struct Struct* struct_data = &literal->value.struct_data; - struct_data->fields = get_handle(data, child_field_list_id); - struct_data->values = get_handle(data, child_value_list_id); - put_handle(data, literal, Literal, sibling_list_id); + struct_data->fields = get_expr_list(data, child_field_list_id); + struct_data->values = get_expr_list(data, child_value_list_id); + put_expr_item(data, literal, Literal, sibling_list_id); } void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; - put_handle(data, literal, Literal, sibling_id_list); + put_expr_item(data, literal, Literal, sibling_id_list); } /************************************************************* @@ -314,8 +315,8 @@ void visit_expr_variadic( { struct Variadic* var = malloc(sizeof(struct Variadic)); var->op = op; - var->expr_list = get_handle(data, child_list_id); - put_handle(data, var, Variadic, sibling_list_id); + var->exprs = get_expr_list(data, child_list_id); + put_expr_item(data, var, Variadic, sibling_list_id); } DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) @@ -326,8 +327,8 @@ void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sib struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; struct ArrayData* arr = &(literal->value.array_data); - arr->expr_list = get_handle(data, child_list_id); - put_handle(data, literal, Literal, sibling_list_id); + arr->exprs = get_expr_list(data, child_list_id); + put_expr_item(data, literal, Literal, sibling_list_id); } /************************************************************* @@ -347,8 +348,8 @@ void visit_expr_unary( { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; - unary->sub_expr = get_handle(data, child_list_id); - put_handle(data, unary, Unary, sibling_list_id); + unary->sub_expr = get_expr_list(data, child_list_id); + put_expr_item(data, unary, Unary, sibling_list_id); } DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) @@ -360,7 +361,7 @@ DEFINE_UNARY(visit_expr_not, Not) void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) { char* column_name = allocate_string(string); - put_handle(data, column_name, Column, sibling_id_list); + put_expr_item(data, column_name, Column, sibling_id_list); } uintptr_t make_field_list(void* data, uintptr_t reserve) @@ -371,7 +372,7 @@ uintptr_t make_field_list(void* data, uintptr_t reserve) builder->lists = realloc(builder->lists, sizeof(ExpressionItemList) * builder->list_count); ExpressionItem* list = calloc(reserve, sizeof(ExpressionItem)); builder->lists[id].len = 0; - builder->lists[id].exprList = list; + builder->lists[id].list = list; return id; } @@ -423,20 +424,20 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) return top_level_expr; } -void free_expression_item_list(ExpressionItemList list); +void free_expression_list(ExpressionItemList list); void free_expression_item(ExpressionItem ref) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; - free_expression_item_list(op->exprs); + free_expression_list(op->exprs); free(op); break; } case Variadic: { struct Variadic* var = ref.ref; - free_expression_item_list(var->expr_list); + free_expression_list(var->exprs); free(var); break; }; @@ -445,13 +446,13 @@ void free_expression_item(ExpressionItem ref) switch (lit->type) { case Struct: { struct Struct* struct_data = &lit->value.struct_data; - free_expression_item_list(struct_data->values); - free_expression_item_list(struct_data->fields); + free_expression_list(struct_data->values); + free_expression_list(struct_data->fields); break; } case Array: { struct ArrayData* array = &lit->value.array_data; - free_expression_item_list(array->expr_list); + free_expression_list(array->exprs); break; } case String: { @@ -482,7 +483,7 @@ void free_expression_item(ExpressionItem ref) }; case Unary: { struct Unary* unary = ref.ref; - free_expression_item_list(unary->sub_expr); + free_expression_list(unary->sub_expr); free(unary); break; } @@ -493,12 +494,12 @@ void free_expression_item(ExpressionItem ref) } } -void free_expression_item_list(ExpressionItemList list) +void free_expression_list(ExpressionItemList list) { for (size_t i = 0; i < list.len; i++) { - free_expression_item(list.exprList[i]); + free_expression_item(list.list[i]); } - free(list.exprList); + free(list.list); } void print_n_spaces(int n) { @@ -567,8 +568,8 @@ void print_tree(ExpressionItem ref, int depth) break; } - ExpressionItem left = op->exprs.exprList[0]; - ExpressionItem right = op->exprs.exprList[1]; + ExpressionItem left = op->exprs.list[0]; + ExpressionItem right = op->exprs.list[1]; print_tree(left, depth + 1); print_tree(right, depth + 1); break; @@ -590,8 +591,8 @@ void print_tree(ExpressionItem ref, int depth) printf("ArrayData\n"); break; } - for (size_t i = 0; i < var->expr_list.len; i++) { - print_tree(var->expr_list.exprList[i], depth + 1); + for (size_t i = 0; i < var->exprs.len; i++) { + print_tree(var->exprs.list[i], depth + 1); } } break; case Literal: { @@ -665,20 +666,20 @@ void print_tree(ExpressionItem ref, int depth) print_n_spaces(depth + 1); // Extract field name from field - ExpressionItem item = struct_data->fields.exprList[i]; + ExpressionItem item = struct_data->fields.list[i]; assert(item.type == Literal); struct Literal* lit = item.ref; assert(lit->type == String); printf("Field: %s\n", lit->value.string_data); - print_tree(struct_data->values.exprList[i], depth + 2); + print_tree(struct_data->values.list[i], depth + 2); } break; case Array: printf("Array\n"); struct ArrayData* array = &lit->value.array_data; - for (size_t i = 0; i < array->expr_list.len; i++) { - print_tree(array->expr_list.exprList[i], depth + 1); + for (size_t i = 0; i < array->exprs.len; i++) { + print_tree(array->exprs.list[i], depth + 1); } break; } @@ -694,7 +695,7 @@ void print_tree(ExpressionItem ref, int depth) printf("IsNull\n"); break; } - print_tree(unary->sub_expr.exprList[0], depth + 1); + print_tree(unary->sub_expr.list[0], depth + 1); break; } case Column: diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/read-table/test_expr.c index b1136a7ed..d6a4a1b70 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/read-table/test_expr.c @@ -4,9 +4,9 @@ int main() { SharedExpression* pred = get_testing_kernel_expression(); ExpressionItemList list = construct_predicate(pred); - ExpressionItem ref = list.exprList[0]; + ExpressionItem ref = list.list[0]; print_tree(ref, 0); - free_expression_item_list(list); + free_expression_list(list); free_kernel_predicate(pred); return 0; } From 406cfe7974e8eaeef15ab8f13cf8d792c449f450 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 17:00:28 -0700 Subject: [PATCH 44/82] Put in documentation --- ffi/examples/read-table/expression.h | 10 +- ffi/src/expressions.rs | 151 +++++++++++++++------------ 2 files changed, 93 insertions(+), 68 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 2a1104a8a..a1889f9b7 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -364,6 +364,10 @@ void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_i put_expr_item(data, column_name, Column, sibling_id_list); } +/************************************************************* + * EngineExpressionVisitor Implementation + ************************************************************/ + uintptr_t make_field_list(void* data, uintptr_t reserve) { ExpressionBuilder* builder = data; @@ -425,7 +429,6 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) } void free_expression_list(ExpressionItemList list); - void free_expression_item(ExpressionItem ref) { switch (ref.type) { @@ -501,6 +504,11 @@ void free_expression_list(ExpressionItemList list) } free(list.list); } + +/************************************************************* + * Expression Printing + ************************************************************/ + void print_n_spaces(int n) { if (n == 0) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 3bf9ad550..3dc34bad4 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -356,15 +356,27 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle usize, - /// Visit a 32bit `integer + /// Visit a 32bit `integer belonging to the list identified by `sibling_list_id`. pub visit_int_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), - /// Visit a 64bit `long`. + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. pub visit_long_literal: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), - /// Visit a 16bit `short`. + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. pub visit_short_literal: extern "C" fn(data: *mut c_void, value: i16, sibling_list_id: usize), - /// Visit an 8bit `byte`. + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. pub visit_byte_literal: extern "C" fn(data: *mut c_void, value: i8, sibling_list_id: usize), - /// Visit a 32bit `float`. + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. pub visit_float_literal: extern "C" fn(data: *mut c_void, value: f32, sibling_list_id: usize), - /// Visit a 64bit `double`. + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. pub visit_double_literal: extern "C" fn(data: *mut c_void, value: f64, sibling_list_id: usize), - /// Visit a `string`. + /// Visit a `string` belonging to the list identified by `sibling_list_id`. pub visit_string_literal: extern "C" fn(data: *mut c_void, value: KernelStringSlice, sibling_list_id: usize), - /// Visit a `boolean`. + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. pub visit_bool_literal: extern "C" fn(data: *mut c_void, value: bool, sibling_list_id: usize), - /// Visit a 64bit timestamp. The timestamp is microsecond precision and adjusted to UTC. + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. pub visit_timestamp_literal: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), - /// Visit a 64bit timestamp. The timestamp is microsecond precision with no timezone. + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. pub visit_timestamp_ntz_literal: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), - /// Visit a 32bit int date representing days since UNIX epoch 1970-01-01. + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. pub visit_date_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), - /// Visit binary data at the `buffer` with length `len`. + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. pub visit_binary_literal: extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize, sibling_list_id: usize), /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 - /// bits in `value_ls`. + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. pub visit_decimal_literal: extern "C" fn( data: *mut c_void, value_ms: u64, // Most significant 64 bits of decimal value @@ -414,83 +430,84 @@ pub struct EngineExpressionVisitor { scale: u8, sibling_list_id: usize, ), - /// Visit a struct literal which is made up of a list of field names and values. This declares - /// the number of fields that the struct will have. The visitor will populate the struct fields - /// using the [`visit_struct_literal_field`] method. + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + /// + /// TODO: Change `child_field_list_values` to take a list of `StructField` pub visit_struct_literal: extern "C" fn( data: *mut c_void, child_field_list_value: usize, child_value_list_id: usize, sibling_list_id: usize, ), - /// Visit an `arary`, declaring the length `len`. The visitor will populate the array - /// elements using the [`visit_array_element`] method. + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. pub visit_array_literal: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visits a null value. + /// Visits a null value belonging to the list identified by `sibling_list_id. pub visit_null_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize), - /// Visits an `and` expression which is made of a list of sub-expressions. This declares the - /// number of sub-expressions that the `and` expression will be made of. The visitor will populate - /// the list of expressions using the [`visit_variadic_sub_expr`] method. + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` pub visit_and: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visits an `or` expression which is made of a list of sub-expressions. This declares the - /// number of sub-expressions that the `or` expression will be made of. The visitor will populate - /// the list of expressions using the [`visit_variadic_sub_expr`] method. + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` pub visit_or: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - ///Visits a `not` expression, bulit using the sub-expression `inner_expr`. - pub visit_not: extern "C" fn(data: *mut c_void, chilrd_list_id: usize, sibling_list_id: usize), - ///Visits an `is_null` expression, built using the sub-expression `inner_expr`. + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + pub visit_not: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` pub visit_is_null: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `less than` binary operation, which takes the left sub expression id `a` and the - /// right sub-expression id `b`. + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_lt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `less than or equal` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_le: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `greater than` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_gt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `greater than or equal` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_ge: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `equal` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_eq: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `not equal` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_ne: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `distinct` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_distinct: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `in` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_in: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `not in` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_not_in: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `add` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_add: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `minus` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_minus: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `multiply` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_multiply: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `divide` binary operation, which takes the left sub expression id `a` - /// and the right sub-expression id `b`. + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_divide: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), - /// Visit the `colmun` identified by the `name` string. + /// Visits the `column` belonging to the list identified by `sibling_list_id`. pub visit_column: extern "C" fn(data: *mut c_void, name: KernelStringSlice, sibling_list_id: usize), - /// Visit a `struct` which is constructed from an ordered list of expressions. This declares - /// the number of expressions that the struct will be made of. The visitor will populate the - /// list of expressions using the [`visit_struct_sub_expr`] method. + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` pub visit_struct_expr: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), } From cfb1ed72edf9aaa5c303aba74f6b61f3442b3ca6 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 17:17:37 -0700 Subject: [PATCH 45/82] Change arg order --- ffi/examples/read-table/expression.h | 44 +++++------ ffi/src/expressions.rs | 114 +++++++++++++-------------- 2 files changed, 79 insertions(+), 79 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index a1889f9b7..dc6c13fe4 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -182,15 +182,15 @@ char* allocate_string(const KernelStringSlice slice) ************************************************************/ #define DEFINE_BINOP(fun_name, op) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ { \ - visit_expr_binop(data, op, child_list_id, sibling_list_id); \ + visit_expr_binop(data, sibling_list_id, op, child_list_id); \ } void visit_expr_binop( void* data, + uintptr_t sibling_id_list, enum OpType op, - uintptr_t child_id_list, - uintptr_t sibling_id_list) + uintptr_t child_id_list) { struct BinOp* binop = malloc(sizeof(struct BinOp)); binop->op = op; @@ -216,7 +216,7 @@ DEFINE_BINOP(visit_expr_not_in, NotIn) ************************************************************/ #define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ - void fun_name(void* data, c_type val, uintptr_t sibling_list_id) \ + void fun_name(void* data, uintptr_t sibling_list_id, c_type val) \ { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ @@ -236,7 +236,7 @@ DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); -void visit_expr_string_literal(void* data, KernelStringSlice string, uintptr_t sibling_list_id) +void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; @@ -246,11 +246,11 @@ void visit_expr_string_literal(void* data, KernelStringSlice string, uintptr_t s void visit_expr_decimal_literal( void* data, + uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, uint8_t precision, - uint8_t scale, - uintptr_t sibling_list_id) + uint8_t scale) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Decimal; @@ -264,9 +264,9 @@ void visit_expr_decimal_literal( void visit_expr_binary_literal( void* data, + uintptr_t sibling_list_id, const uint8_t* buf, - uintptr_t len, - uintptr_t sibling_list_id) + uintptr_t len) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Binary; @@ -278,9 +278,9 @@ void visit_expr_binary_literal( void visit_expr_struct_literal( void* data, + uintptr_t sibling_list_id, uintptr_t child_field_list_id, - uintptr_t child_value_list_id, - uintptr_t sibling_list_id) + uintptr_t child_value_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Struct; @@ -302,16 +302,16 @@ void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) ************************************************************/ #define DEFINE_VARIADIC(fun_name, enum_member) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ { \ - visit_expr_variadic(data, enum_member, child_list_id, sibling_list_id); \ + visit_expr_variadic(data, sibling_list_id, enum_member, child_list_id); \ } void visit_expr_variadic( void* data, + uintptr_t sibling_list_id, enum VariadicType op, - uintptr_t child_list_id, - uintptr_t sibling_list_id) + uintptr_t child_list_id) { struct Variadic* var = malloc(sizeof(struct Variadic)); var->op = op; @@ -322,7 +322,7 @@ DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) -void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) +void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; @@ -335,16 +335,16 @@ void visit_expr_array_literal(void* data, uintptr_t child_list_id, uintptr_t sib * Unary Expressions ************************************************************/ #define DEFINE_UNARY(fun_name, op) \ - void fun_name(void* data, uintptr_t child_list_id, uintptr_t sibling_list_id) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ { \ - visit_expr_unary(data, op, child_list_id, sibling_list_id); \ + visit_expr_unary(data, sibling_list_id, op, child_list_id); \ } void visit_expr_unary( void* data, + uintptr_t sibling_list_id, enum UnaryType type, - uintptr_t child_list_id, - uintptr_t sibling_list_id) + uintptr_t child_list_id) { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; @@ -358,7 +358,7 @@ DEFINE_UNARY(visit_expr_not, Not) * Column Expression ************************************************************/ -void visit_expr_column(void* data, KernelStringSlice string, uintptr_t sibling_id_list) +void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice string) { char* column_name = allocate_string(string); put_expr_item(data, column_name, Column, sibling_id_list); diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index 3dc34bad4..eb351256f 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -388,47 +388,47 @@ pub struct EngineExpressionVisitor { /// Creates a new expression list, optionally reserving capacity up front pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, /// Visit a 32bit `integer belonging to the list identified by `sibling_list_id`. - pub visit_int_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), + pub visit_int_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i32), /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. - pub visit_long_literal: extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + pub visit_long_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. - pub visit_short_literal: extern "C" fn(data: *mut c_void, value: i16, sibling_list_id: usize), + pub visit_short_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i16), /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. - pub visit_byte_literal: extern "C" fn(data: *mut c_void, value: i8, sibling_list_id: usize), + pub visit_byte_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i8), /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. - pub visit_float_literal: extern "C" fn(data: *mut c_void, value: f32, sibling_list_id: usize), + pub visit_float_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: f32), /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. - pub visit_double_literal: extern "C" fn(data: *mut c_void, value: f64, sibling_list_id: usize), + pub visit_double_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: f64), /// Visit a `string` belonging to the list identified by `sibling_list_id`. pub visit_string_literal: - extern "C" fn(data: *mut c_void, value: KernelStringSlice, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: KernelStringSlice), /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - pub visit_bool_literal: extern "C" fn(data: *mut c_void, value: bool, sibling_list_id: usize), + pub visit_bool_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: bool), /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision and adjusted to UTC. pub visit_timestamp_literal: - extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision with no timezone. pub visit_timestamp_ntz_literal: - extern "C" fn(data: *mut c_void, value: i64, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs /// to the list identified by `sibling_list_id`. - pub visit_date_literal: extern "C" fn(data: *mut c_void, value: i32, sibling_list_id: usize), + pub visit_date_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i32), /// Visit binary data at the `buffer` with length `len` belonging to the list identified by /// `sibling_list_id`. pub visit_binary_literal: - extern "C" fn(data: *mut c_void, buffer: *const u8, len: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, buffer: *const u8, len: usize), /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. pub visit_decimal_literal: extern "C" fn( data: *mut c_void, + sibling_list_id: usize, value_ms: u64, // Most significant 64 bits of decimal value value_ls: u64, // Least significant 64 bits of decimal value precision: u8, scale: u8, - sibling_list_id: usize, ), /// Visit a struct literal belonging to the list identified by `sibling_list_id`. /// The field names of the struct are in a list identified by `child_field_list_id`. @@ -437,79 +437,79 @@ pub struct EngineExpressionVisitor { /// TODO: Change `child_field_list_values` to take a list of `StructField` pub visit_struct_literal: extern "C" fn( data: *mut c_void, + sibling_list_id: usize, child_field_list_value: usize, child_value_list_id: usize, - sibling_list_id: usize, ), /// Visit an array literal belonging to the list identified by `sibling_list_id`. /// The values of the array are in a list identified by `child_list_id`. pub visit_array_literal: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits a null value belonging to the list identified by `sibling_list_id. pub visit_null_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize), /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the array are in a list identified by `child_list_id` - pub visit_and: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_and: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the array are in a list identified by `child_list_id` - pub visit_or: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_or: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. /// The sub-expression will be in a _one_ item list identified by `child_list_id` - pub visit_not: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_not: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. /// The sub-expression will be in a _one_ item list identified by `child_list_id` pub visit_is_null: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_lt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_lt: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_le: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_le: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_gt: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_gt: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_ge: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_ge: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_eq: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_eq: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_ne: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_ne: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_distinct: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_in: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_in: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_not_in: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_add: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_add: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_minus: extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + pub visit_minus: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_multiply: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_divide: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `column` belonging to the list identified by `sibling_list_id`. pub visit_column: - extern "C" fn(data: *mut c_void, name: KernelStringSlice, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` pub visit_struct_expr: - extern "C" fn(data: *mut c_void, child_list_id: usize, sibling_list_id: usize), + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), } /// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. @@ -542,7 +542,7 @@ pub unsafe extern "C" fn visit_expression( for scalar in elements { visit_scalar(visitor, scalar, child_list_id); } - call!(visitor, visit_array_literal, child_list_id, sibling_list_id); + call!(visitor, visit_array_literal, sibling_list_id, child_list_id); } fn visit_struct_literal( visitor: &mut EngineExpressionVisitor, @@ -562,9 +562,9 @@ pub unsafe extern "C" fn visit_expression( call!( visitor, visit_struct_literal, + sibling_list_id, child_field_list_id, - child_value_list_id, - sibling_list_id + child_value_list_id ) } fn visit_struct_expr( @@ -576,7 +576,7 @@ pub unsafe extern "C" fn visit_expression( for expr in exprs { visit_expression_impl(visitor, expr, child_list_id); } - call!(visitor, visit_struct_expr, child_list_id, sibling_list_id) + call!(visitor, visit_struct_expr, sibling_list_id, child_list_id) } fn visit_variadic( visitor: &mut EngineExpressionVisitor, @@ -593,7 +593,7 @@ pub unsafe extern "C" fn visit_expression( VariadicOperator::And => &visitor.visit_and, VariadicOperator::Or => &visitor.visit_or, }; - visit_fn(visitor.data, child_list_id, sibling_list_id); + visit_fn(visitor.data, sibling_list_id, child_list_id); } fn visit_scalar( visitor: &mut EngineExpressionVisitor, @@ -601,29 +601,29 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id: usize, ) { match scalar { - Scalar::Integer(val) => call!(visitor, visit_int_literal, *val, sibling_list_id), - Scalar::Long(val) => call!(visitor, visit_long_literal, *val, sibling_list_id), - Scalar::Short(val) => call!(visitor, visit_short_literal, *val, sibling_list_id), - Scalar::Byte(val) => call!(visitor, visit_byte_literal, *val, sibling_list_id), - Scalar::Float(val) => call!(visitor, visit_float_literal, *val, sibling_list_id), - Scalar::Double(val) => call!(visitor, visit_double_literal, *val, sibling_list_id), + Scalar::Integer(val) => call!(visitor, visit_int_literal, sibling_list_id, *val), + Scalar::Long(val) => call!(visitor, visit_long_literal, sibling_list_id, *val), + Scalar::Short(val) => call!(visitor, visit_short_literal, sibling_list_id, *val), + Scalar::Byte(val) => call!(visitor, visit_byte_literal, sibling_list_id, *val), + Scalar::Float(val) => call!(visitor, visit_float_literal, sibling_list_id, *val), + Scalar::Double(val) => call!(visitor, visit_double_literal, sibling_list_id, *val), Scalar::String(val) => { - call!(visitor, visit_string_literal, val.into(), sibling_list_id) + call!(visitor, visit_string_literal, sibling_list_id, val.into()) } - Scalar::Boolean(val) => call!(visitor, visit_bool_literal, *val, sibling_list_id), + Scalar::Boolean(val) => call!(visitor, visit_bool_literal, sibling_list_id, *val), Scalar::Timestamp(val) => { - call!(visitor, visit_timestamp_literal, *val, sibling_list_id) + call!(visitor, visit_timestamp_literal, sibling_list_id, *val) } Scalar::TimestampNtz(val) => { - call!(visitor, visit_timestamp_ntz_literal, *val, sibling_list_id) + call!(visitor, visit_timestamp_ntz_literal, sibling_list_id, *val) } - Scalar::Date(val) => call!(visitor, visit_date_literal, *val, sibling_list_id), + Scalar::Date(val) => call!(visitor, visit_date_literal, sibling_list_id, *val), Scalar::Binary(buf) => call!( visitor, visit_binary_literal, + sibling_list_id, buf.as_ptr(), - buf.len(), - sibling_list_id + buf.len() ), Scalar::Decimal(value, precision, scale) => { let ms: u64 = (value >> 64) as u64; @@ -631,11 +631,11 @@ pub unsafe extern "C" fn visit_expression( call!( visitor, visit_decimal_literal, + sibling_list_id, ms, ls, *precision, - *scale, - sibling_list_id + *scale ) } Scalar::Null(_) => call!(visitor, visit_null_literal, sibling_list_id), @@ -652,7 +652,7 @@ pub unsafe extern "C" fn visit_expression( ) { match expression { Expression::Literal(scalar) => visit_scalar(visitor, scalar, sibling_list_id), - Expression::Column(name) => call!(visitor, visit_column, name.into(), sibling_list_id), + Expression::Column(name) => call!(visitor, visit_column, sibling_list_id, name.into()), Expression::Struct(exprs) => visit_struct_expr(visitor, exprs, sibling_list_id), Expression::BinaryOperation { op, left, right } => { let child_list_id = call!(visitor, make_field_list, 2); @@ -673,7 +673,7 @@ pub unsafe extern "C" fn visit_expression( BinaryOperator::In => visitor.visit_in, BinaryOperator::NotIn => visitor.visit_not_in, }; - op(visitor.data, child_list_id, sibling_list_id); + op(visitor.data, sibling_list_id, child_list_id); } Expression::UnaryOperation { op, expr } => { let child_id_list = call!(visitor, make_field_list, 1); @@ -682,7 +682,7 @@ pub unsafe extern "C" fn visit_expression( UnaryOperator::Not => visitor.visit_not, UnaryOperator::IsNull => visitor.visit_is_null, }; - op(visitor.data, child_id_list, sibling_list_id); + op(visitor.data, sibling_list_id, child_id_list); } Expression::VariadicOperation { op, exprs } => { visit_variadic(visitor, op, exprs, sibling_list_id) From fe339d13904d04bf84b34b2234871ee2ba6e437f Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 17:18:18 -0700 Subject: [PATCH 46/82] Remove unnecessary change --- ffi/tests/read-table-testing/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/tests/read-table-testing/run_test.sh b/ffi/tests/read-table-testing/run_test.sh index 57b5410e2..970a444b1 100755 --- a/ffi/tests/read-table-testing/run_test.sh +++ b/ffi/tests/read-table-testing/run_test.sh @@ -7,6 +7,6 @@ OUT_FILE=$(mktemp) diff -s "$OUT_FILE" "$2" DIFF_EXIT_CODE=$? echo "Diff exited with $DIFF_EXIT_CODE" -# rm "$OUT_FILE" +rm "$OUT_FILE" exit "$DIFF_EXIT_CODE" From ba3153a63d08542a4d570cab019d1bc889cf2900 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 11 Oct 2024 17:19:27 -0700 Subject: [PATCH 47/82] Fix grammar --- ffi/examples/read-table/expression.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index dc6c13fe4..09759108f 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -10,7 +10,7 @@ * provided expression. It consists of an "ExpressionBuilder" which is our user data that gets * passed into each visit_x call. This simply keeps track of all the lists we are asked to allocate. * - * Each item "ExpressionItem", which tracks the type and pointer to the expression. + * Each "ExpressionItem" tracks the type and pointer to the expression. * * Each complex type is made of an "ExpressionItemList", which tracks its length and an array of * "ExpressionItems" that make up the complex type. The top level expression is in a length 1 From 1b8575c3fe0e3b02555ad80211287c7371acc6cf Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 14 Oct 2024 09:55:46 -0700 Subject: [PATCH 48/82] Move test function to separate module --- ffi/Cargo.toml | 1 + ffi/src/expressions.rs | 94 +------------------------------------- ffi/src/lib.rs | 2 + ffi/src/test_ffi.rs | 100 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 93 deletions(-) create mode 100644 ffi/src/test_ffi.rs diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index cf7aa6a34..d924bc9c0 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -47,3 +47,4 @@ default-engine = [ ] sync-engine = ["delta_kernel/sync-engine"] developer-visibility = [] +test_ffi = [] diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index eb351256f..b7b3fb6b7 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -1,4 +1,4 @@ -use std::{ffi::c_void, ops::Not, sync::Arc}; +use std::ffi::c_void; use crate::{ handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, @@ -8,7 +8,6 @@ use delta_kernel::{ expressions::{ ArrayData, BinaryOperator, Expression, Scalar, StructData, UnaryOperator, VariadicOperator, }, - schema::{ArrayType, DataType, PrimitiveType, StructField, StructType}, DeltaResult, }; use delta_kernel_ffi_macros::handle_descriptor; @@ -262,97 +261,6 @@ pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { data.drop_handle(); } -/// Constructs a kernel expression that is passed back as a SharedExpression handle -/// -/// # Safety -/// The caller is responsible for freeing the retured memory, either by calling -/// [`free_kernel_predicate`], or [`Handle::drop_handle`] -#[no_mangle] -pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { - use Expression as Expr; - - let array_type = ArrayType::new( - DataType::Primitive(delta_kernel::schema::PrimitiveType::Short), - false, - ); - let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]); - - let nested_fields = vec![ - StructField::new("a", DataType::Primitive(PrimitiveType::Integer), false), - StructField::new("b", DataType::Array(Box::new(array_type)), false), - ]; - let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())]; - let nested_struct = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); - let nested_struct_type = StructType::new(nested_fields); - - let top_level_struct = StructData::try_new( - vec![StructField::new( - "top", - DataType::Struct(Box::new(nested_struct_type)), - true, - )], - vec![Scalar::Struct(nested_struct)], - ) - .unwrap(); - - let mut sub_exprs = vec![ - Expr::literal(Scalar::Byte(i8::MAX)), - Expr::literal(Scalar::Byte(i8::MIN)), - Expr::literal(Scalar::Float(f32::MAX)), - Expr::literal(Scalar::Float(f32::MIN)), - Expr::literal(Scalar::Double(f64::MAX)), - Expr::literal(Scalar::Double(f64::MIN)), - Expr::literal(Scalar::Integer(i32::MAX)), - Expr::literal(Scalar::Integer(i32::MIN)), - Expr::literal(Scalar::Long(i64::MAX)), - Expr::literal(Scalar::Long(i64::MIN)), - Expr::literal(Scalar::String("hello expressions".into())), - Expr::literal(Scalar::Boolean(true)), - Expr::literal(Scalar::Boolean(false)), - Expr::literal(Scalar::Timestamp(50)), - Expr::literal(Scalar::TimestampNtz(100)), - Expr::literal(Scalar::Date(32)), - Expr::literal(Scalar::Binary(b"0xdeadbeefcafe".to_vec())), - // Both the most and least significant u64 of the Decimal value will be 1 - Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), - Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), - Expr::literal(Scalar::Struct(top_level_struct)), - Expr::literal(Scalar::Array(array_data)), - Expr::struct_expr(vec![Expr::or_from(vec![ - Expr::literal(Scalar::Integer(5)), - Expr::literal(Scalar::Long(20)), - ])]), - Expr::not(Expr::is_null(Expr::column("col"))), - ]; - sub_exprs.extend( - [ - BinaryOperator::In, - BinaryOperator::Plus, - BinaryOperator::Minus, - BinaryOperator::Equal, - BinaryOperator::NotEqual, - BinaryOperator::NotIn, - BinaryOperator::Divide, - BinaryOperator::Multiply, - BinaryOperator::LessThan, - BinaryOperator::LessThanOrEqual, - BinaryOperator::GreaterThan, - BinaryOperator::GreaterThanOrEqual, - BinaryOperator::Distinct, - ] - .iter() - .map(|op| { - Expr::binary( - *op, - Expr::literal(Scalar::Integer(0)), - Expr::literal(Scalar::Long(0)), - ) - }), - ); - - Arc::new(Expr::and_from(sub_exprs)).into() -} - /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own /// representation of an expression from a particular expression within the kernel. /// diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 75a5f34a3..ae40c2abc 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -33,6 +33,8 @@ pub mod engine_funcs; pub mod expressions; pub mod scan; pub mod schema; +#[cfg(feature = "test_ffi")] +pub mod test_ffi; pub(crate) type NullableCvoid = Option>; diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs new file mode 100644 index 000000000..a22a4a8b8 --- /dev/null +++ b/ffi/src/test_ffi.rs @@ -0,0 +1,100 @@ +//! Utility functions used for testing ffi code + +use std::{ops::Not, sync::Arc}; + +use crate::{expressions::SharedExpression, handle::Handle}; +use delta_kernel::{ + expressions::{ArrayData, BinaryOperator, Expression, Scalar, StructData}, + schema::{ArrayType, DataType, PrimitiveType, StructField, StructType}, +}; + +/// Constructs a kernel expression that is passed back as a SharedExpression handle +/// +/// # Safety +/// The caller is responsible for freeing the retured memory, either by calling +/// [`free_kernel_predicate`], or [`Handle::drop_handle`] +#[no_mangle] +pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { + use Expression as Expr; + + let array_type = ArrayType::new( + DataType::Primitive(delta_kernel::schema::PrimitiveType::Short), + false, + ); + let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]); + + let nested_fields = vec![ + StructField::new("a", DataType::Primitive(PrimitiveType::Integer), false), + StructField::new("b", DataType::Array(Box::new(array_type)), false), + ]; + let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())]; + let nested_struct = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); + let nested_struct_type = StructType::new(nested_fields); + + let top_level_struct = StructData::try_new( + vec![StructField::new( + "top", + DataType::Struct(Box::new(nested_struct_type)), + true, + )], + vec![Scalar::Struct(nested_struct)], + ) + .unwrap(); + + let mut sub_exprs = vec![ + Expr::literal(Scalar::Byte(i8::MAX)), + Expr::literal(Scalar::Byte(i8::MIN)), + Expr::literal(Scalar::Float(f32::MAX)), + Expr::literal(Scalar::Float(f32::MIN)), + Expr::literal(Scalar::Double(f64::MAX)), + Expr::literal(Scalar::Double(f64::MIN)), + Expr::literal(Scalar::Integer(i32::MAX)), + Expr::literal(Scalar::Integer(i32::MIN)), + Expr::literal(Scalar::Long(i64::MAX)), + Expr::literal(Scalar::Long(i64::MIN)), + Expr::literal(Scalar::String("hello expressions".into())), + Expr::literal(Scalar::Boolean(true)), + Expr::literal(Scalar::Boolean(false)), + Expr::literal(Scalar::Timestamp(50)), + Expr::literal(Scalar::TimestampNtz(100)), + Expr::literal(Scalar::Date(32)), + Expr::literal(Scalar::Binary(b"0xdeadbeefcafe".to_vec())), + // Both the most and least significant u64 of the Decimal value will be 1 + Expr::literal(Scalar::Decimal((1 << 64) + 1, 2, 3)), + Expr::literal(Scalar::Null(DataType::Primitive(PrimitiveType::Short))), + Expr::literal(Scalar::Struct(top_level_struct)), + Expr::literal(Scalar::Array(array_data)), + Expr::struct_expr(vec![Expr::or_from(vec![ + Expr::literal(Scalar::Integer(5)), + Expr::literal(Scalar::Long(20)), + ])]), + Expr::not(Expr::is_null(Expr::column("col"))), + ]; + sub_exprs.extend( + [ + BinaryOperator::In, + BinaryOperator::Plus, + BinaryOperator::Minus, + BinaryOperator::Equal, + BinaryOperator::NotEqual, + BinaryOperator::NotIn, + BinaryOperator::Divide, + BinaryOperator::Multiply, + BinaryOperator::LessThan, + BinaryOperator::LessThanOrEqual, + BinaryOperator::GreaterThan, + BinaryOperator::GreaterThanOrEqual, + BinaryOperator::Distinct, + ] + .iter() + .map(|op| { + Expr::binary( + *op, + Expr::literal(Scalar::Integer(0)), + Expr::literal(Scalar::Long(0)), + ) + }), + ); + + Arc::new(Expr::and_from(sub_exprs)).into() +} From 6cfe39f4d04ff27d3ccbd3652ed7f6d8ed6ad6f5 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 14 Oct 2024 09:58:26 -0700 Subject: [PATCH 49/82] Add feature flag to test --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c977fde7a..582893d3f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -108,7 +108,7 @@ jobs: cargo build popd pushd ffi - cargo b --features default-engine,sync-engine + cargo b --features default-engine,sync-engine,test_ffi popd - name: build and run test run: | From bd2332c4298c2dc042ca34919d73e8c9177d94ab Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 14 Oct 2024 10:06:56 -0700 Subject: [PATCH 50/82] Make sibling id second arg always --- ffi/examples/read-table/expression.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 09759108f..31dfd9f24 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -155,7 +155,7 @@ struct Literal * Utilitiy functions ************************************************************/ -void put_expr_item(void* data, void* ref, enum ExpressionType type, size_t sibling_list_id) +void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; ExpressionItem expr = { .ref = ref, .type = type }; @@ -195,7 +195,7 @@ void visit_expr_binop( struct BinOp* binop = malloc(sizeof(struct BinOp)); binop->op = op; binop->exprs = get_expr_list(data, child_id_list); - put_expr_item(data, binop, BinOp, sibling_id_list); + put_expr_item(data, sibling_id_list, binop, BinOp); } DEFINE_BINOP(visit_expr_add, Add) DEFINE_BINOP(visit_expr_minus, Minus) @@ -221,7 +221,7 @@ DEFINE_BINOP(visit_expr_not_in, NotIn) struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ lit->value.literal_field = val; \ - put_expr_item(data, lit, Literal, sibling_list_id); \ + put_expr_item(data, sibling_list_id, lit, Literal); \ } \ _Static_assert( \ sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") @@ -241,7 +241,7 @@ void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStri struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; literal->value.string_data = allocate_string(string); - put_expr_item(data, literal, Literal, sibling_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); } void visit_expr_decimal_literal( @@ -259,7 +259,7 @@ void visit_expr_decimal_literal( dec->value[1] = value_ls; dec->precision = precision; dec->scale = scale; - put_expr_item(data, literal, Literal, sibling_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); } void visit_expr_binary_literal( @@ -273,7 +273,7 @@ void visit_expr_binary_literal( struct BinaryData* bin = &literal->value.binary; bin->buf = malloc(len); memcpy(bin->buf, buf, len); - put_expr_item(data, literal, Literal, sibling_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); } void visit_expr_struct_literal( @@ -287,14 +287,14 @@ void visit_expr_struct_literal( struct Struct* struct_data = &literal->value.struct_data; struct_data->fields = get_expr_list(data, child_field_list_id); struct_data->values = get_expr_list(data, child_value_list_id); - put_expr_item(data, literal, Literal, sibling_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); } void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; - put_expr_item(data, literal, Literal, sibling_id_list); + put_expr_item(data, sibling_id_list, literal, Literal); } /************************************************************* @@ -316,7 +316,7 @@ void visit_expr_variadic( struct Variadic* var = malloc(sizeof(struct Variadic)); var->op = op; var->exprs = get_expr_list(data, child_list_id); - put_expr_item(data, var, Variadic, sibling_list_id); + put_expr_item(data, sibling_list_id, var, Variadic); } DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) @@ -328,7 +328,7 @@ void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t c literal->type = Array; struct ArrayData* arr = &(literal->value.array_data); arr->exprs = get_expr_list(data, child_list_id); - put_expr_item(data, literal, Literal, sibling_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); } /************************************************************* @@ -349,7 +349,7 @@ void visit_expr_unary( struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; unary->sub_expr = get_expr_list(data, child_list_id); - put_expr_item(data, unary, Unary, sibling_list_id); + put_expr_item(data, sibling_list_id, unary, Unary); } DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) @@ -361,7 +361,7 @@ DEFINE_UNARY(visit_expr_not, Not) void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice string) { char* column_name = allocate_string(string); - put_expr_item(data, column_name, Column, sibling_id_list); + put_expr_item(data, sibling_id_list, column_name, Column); } /************************************************************* From 046147d4d5046ff706d274372139660405cf128e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 14 Oct 2024 10:29:26 -0700 Subject: [PATCH 51/82] Test binary data --- ffi/examples/read-table/expression.h | 38 +++++++++---------- ffi/src/test_ffi.rs | 2 +- .../test_expression_visitor/expected.txt | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index 31dfd9f24..e5b5e5c2d 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -64,6 +64,17 @@ enum ExpressionType Unary, Column }; +enum VariadicType +{ + And, + Or, + StructExpression, +}; +enum UnaryType +{ + Not, + IsNull +}; typedef struct { void* ref; @@ -82,18 +93,6 @@ struct BinOp }; struct Null; -enum VariadicType -{ - And, - Or, - StructExpression, - ArrayData -}; -enum UnaryType -{ - Not, - IsNull -}; struct Variadic { enum VariadicType op; @@ -125,12 +124,10 @@ struct Struct ExpressionItemList fields; ExpressionItemList values; }; - struct ArrayData { ExpressionItemList exprs; }; - struct Literal { enum LitType type; @@ -272,6 +269,7 @@ void visit_expr_binary_literal( literal->type = Binary; struct BinaryData* bin = &literal->value.binary; bin->buf = malloc(len); + bin->len = len; memcpy(bin->buf, buf, len); put_expr_item(data, sibling_list_id, literal, Literal); } @@ -595,9 +593,6 @@ void print_tree(ExpressionItem ref, int depth) case StructExpression: printf("StructExpression\n"); break; - case ArrayData: - printf("ArrayData\n"); - break; } for (size_t i = 0; i < var->exprs.len; i++) { print_tree(var->exprs.list[i], depth + 1); @@ -651,9 +646,14 @@ void print_tree(ExpressionItem ref, int depth) printf("Date"); printf("(%d)\n", lit->value.integer_data); break; - case Binary: - printf("Binary\n"); + case Binary: { + printf("Binary("); + for (size_t i = 0; i < lit->value.binary.len; i++) { + printf("%02x", lit->value.binary.buf[i]); + } + printf(")\n"); break; + } case Decimal: { struct Decimal* dec = &lit->value.decimal; printf( diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index a22a4a8b8..8a3cedc0a 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -58,7 +58,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Date: Mon, 14 Oct 2024 11:21:58 -0700 Subject: [PATCH 52/82] move abort to assert --- ffi/examples/read-table/expression.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/read-table/expression.h index e5b5e5c2d..ed877a676 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/read-table/expression.h @@ -162,9 +162,7 @@ void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum Expressio ExpressionItemList get_expr_list(void* data, size_t list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; - if (list_id > data_ptr->list_count) { - abort(); - } + assert(list_id < data_ptr->list_count); return data_ptr->lists[list_id]; } From d46b4e314d936b5002b354b82fde24649e27b476 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 14 Oct 2024 13:18:00 -0700 Subject: [PATCH 53/82] Small cosmetic changes --- ffi/src/expressions.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ffi/src/expressions.rs b/ffi/src/expressions.rs index b7b3fb6b7..0bbbc48b6 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions.rs @@ -333,16 +333,14 @@ pub struct EngineExpressionVisitor { pub visit_decimal_literal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, - value_ms: u64, // Most significant 64 bits of decimal value - value_ls: u64, // Least significant 64 bits of decimal value + value_ms: u64, + value_ls: u64, precision: u8, scale: u8, ), /// Visit a struct literal belonging to the list identified by `sibling_list_id`. /// The field names of the struct are in a list identified by `child_field_list_id`. /// The values of the struct are in a list identified by `child_value_list_id`. - /// - /// TODO: Change `child_field_list_values` to take a list of `StructField` pub visit_struct_literal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, From 75a84a32cabdc131fdf19512417243c410dee451 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:15:36 -0700 Subject: [PATCH 54/82] Address PR comments --- .github/workflows/build.yml | 12 +- ffi/Cargo.toml | 2 +- ffi/examples/read-table/CMakeLists.txt | 19 - ffi/examples/visit-expression/CMakeLists.txt | 22 + .../expression.h | 386 +++--------------- .../visit-expression/expression_print.h | 211 ++++++++++ .../visit_expr.c} | 5 +- ffi/src/lib.rs | 2 +- ffi/tests/test_expression_visitor/run_test.sh | 2 +- 9 files changed, 311 insertions(+), 350 deletions(-) create mode 100644 ffi/examples/visit-expression/CMakeLists.txt rename ffi/examples/{read-table => visit-expression}/expression.h (62%) create mode 100644 ffi/examples/visit-expression/expression_print.h rename ffi/examples/{read-table/test_expr.c => visit-expression/visit_expr.c} (87%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 582893d3f..c1edc2dd2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -108,9 +108,9 @@ jobs: cargo build popd pushd ffi - cargo b --features default-engine,sync-engine,test_ffi + cargo b --features default-engine,sync-engine,test-ffi popd - - name: build and run test + - name: build and run read-table test run: | pushd ffi/examples/read-table mkdir build @@ -118,6 +118,14 @@ jobs: cmake .. make make test + - name: build and run visit-expression test + run: | + pushd ffi/examples/visit-expression + mkdir build + pushd build + cmake .. + make + make test coverage: runs-on: ubuntu-latest diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index d924bc9c0..e0aa53a64 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -47,4 +47,4 @@ default-engine = [ ] sync-engine = ["delta_kernel/sync-engine"] developer-visibility = [] -test_ffi = [] +test-ffi = [] diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 244785800..4e5da2102 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -41,22 +41,3 @@ if(PRINT_DATA) target_compile_definitions(read_table PUBLIC PRINT_ARROW_DATA) endif(PRINT_DATA) -# Configuration for the `test_expr` executable -add_executable(test_expr test_expr.c) -target_compile_definitions(test_expr PUBLIC DEFINE_DEFAULT_ENGINE) -target_include_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") -target_link_directories(test_expr PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") -target_link_libraries(test_expr PUBLIC delta_kernel_ffi) -target_compile_options(test_expr PUBLIC) - -if(MSVC) - target_compile_options(test_expr PRIVATE /W4 /WX) -else() - target_compile_options(test_expr PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) -endif() - -# Add the kernel expresion -> engine expression test -include(CTest) -set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") -set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") -add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt new file mode 100644 index 000000000..dd42254b2 --- /dev/null +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.12) +project(visit_expressions) + +add_executable(visit_expression visit_expr.c) +target_compile_definitions(visit_expression PUBLIC DEFINE_DEFAULT_ENGINE) +target_include_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") +target_link_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") +target_link_libraries(visit_expression PUBLIC delta_kernel_ffi) +target_compile_options(visit_expression PUBLIC) + +if(MSVC) + target_compile_options(visit_expression PRIVATE /W4 /WX) +else() + target_compile_options(visit_expression PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) +endif() + +# Add the kernel expresion -> engine expression test +include(CTest) +set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") +set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") +add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) +add_test(NAME check_mem_leak COMMAND leaks -atExit -- ./visit_expression) diff --git a/ffi/examples/read-table/expression.h b/ffi/examples/visit-expression/expression.h similarity index 62% rename from ffi/examples/read-table/expression.h rename to ffi/examples/visit-expression/expression.h index ed877a676..f9432cf32 100644 --- a/ffi/examples/read-table/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -1,3 +1,5 @@ +#pragma once + #include "delta_kernel_ffi.h" #include #include @@ -21,8 +23,7 @@ * Data Types ************************************************************/ -enum OpType -{ +enum OpType { Add, Minus, Divide, @@ -37,8 +38,7 @@ enum OpType In, NotIn, }; -enum LitType -{ +enum LitType { Integer, Long, Short, @@ -56,83 +56,59 @@ enum LitType Struct, Array }; -enum ExpressionType -{ - BinOp, - Variadic, - Literal, - Unary, - Column -}; -enum VariadicType -{ +enum ExpressionType { BinOp, Variadic, Literal, Unary, Column }; +enum VariadicType { And, Or, StructExpression, }; -enum UnaryType -{ - Not, - IsNull -}; -typedef struct -{ +enum UnaryType { Not, IsNull }; +typedef struct { void* ref; enum ExpressionType type; } ExpressionItem; -typedef struct -{ +typedef struct { uint32_t len; ExpressionItem* list; } ExpressionItemList; -struct BinOp -{ +struct BinOp { enum OpType op; ExpressionItemList exprs; }; struct Null; -struct Variadic -{ +struct Variadic { enum VariadicType op; ExpressionItemList exprs; }; -struct Unary -{ +struct Unary { enum UnaryType type; ExpressionItemList sub_expr; }; -struct BinaryData -{ +struct BinaryData { uint8_t* buf; uintptr_t len; }; -struct Decimal -{ +struct Decimal { uint64_t value[2]; uint8_t precision; uint8_t scale; }; -typedef struct -{ +typedef struct { size_t list_count; ExpressionItemList* lists; } ExpressionBuilder; -struct Struct -{ +struct Struct { ExpressionItemList fields; ExpressionItemList values; }; -struct ArrayData -{ +struct ArrayData { ExpressionItemList exprs; }; -struct Literal -{ +struct Literal { enum LitType type; - union LiteralValue - { + union LiteralValue { int32_t integer_data; int64_t long_data; int16_t short_data; @@ -152,23 +128,20 @@ struct Literal * Utilitiy functions ************************************************************/ -void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) -{ +void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; ExpressionItem expr = { .ref = ref, .type = type }; ExpressionItemList* list = &data_ptr->lists[sibling_list_id]; list->list[list->len++] = expr; } -ExpressionItemList get_expr_list(void* data, size_t list_id) -{ +ExpressionItemList get_expr_list(void* data, size_t list_id) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; assert(list_id < data_ptr->list_count); return data_ptr->lists[list_id]; } // utility to turn a slice into a char* -char* allocate_string(const KernelStringSlice slice) -{ +char* allocate_string(const KernelStringSlice slice) { return strndup(slice.ptr, slice.len); } @@ -177,16 +150,13 @@ char* allocate_string(const KernelStringSlice slice) ************************************************************/ #define DEFINE_BINOP(fun_name, op) \ - void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ - { \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ visit_expr_binop(data, sibling_list_id, op, child_list_id); \ } -void visit_expr_binop( - void* data, - uintptr_t sibling_id_list, - enum OpType op, - uintptr_t child_id_list) -{ +void visit_expr_binop(void* data, + uintptr_t sibling_id_list, + enum OpType op, + uintptr_t child_id_list) { struct BinOp* binop = malloc(sizeof(struct BinOp)); binop->op = op; binop->exprs = get_expr_list(data, child_id_list); @@ -211,15 +181,14 @@ DEFINE_BINOP(visit_expr_not_in, NotIn) ************************************************************/ #define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ - void fun_name(void* data, uintptr_t sibling_list_id, c_type val) \ - { \ + void fun_name(void* data, uintptr_t sibling_list_id, c_type val) { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ lit->type = enum_member; \ lit->value.literal_field = val; \ put_expr_item(data, sibling_list_id, lit, Literal); \ } \ - _Static_assert( \ - sizeof(c_type) <= sizeof(uintptr_t), "The provided type is not a valid simple scalar") + _Static_assert(sizeof(c_type) <= sizeof(uintptr_t), \ + "The provided type is not a valid simple scalar") DEFINE_SIMPLE_SCALAR(visit_expr_int_literal, Integer, int32_t, integer_data); DEFINE_SIMPLE_SCALAR(visit_expr_long_literal, Long, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_short_literal, Short, int16_t, short_data); @@ -231,22 +200,19 @@ DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); -void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) -{ +void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; literal->value.string_data = allocate_string(string); put_expr_item(data, sibling_list_id, literal, Literal); } -void visit_expr_decimal_literal( - void* data, - uintptr_t sibling_list_id, - uint64_t value_ms, - uint64_t value_ls, - uint8_t precision, - uint8_t scale) -{ +void visit_expr_decimal_literal(void* data, + uintptr_t sibling_list_id, + uint64_t value_ms, + uint64_t value_ls, + uint8_t precision, + uint8_t scale) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Decimal; struct Decimal* dec = &literal->value.decimal; @@ -257,12 +223,10 @@ void visit_expr_decimal_literal( put_expr_item(data, sibling_list_id, literal, Literal); } -void visit_expr_binary_literal( - void* data, - uintptr_t sibling_list_id, - const uint8_t* buf, - uintptr_t len) -{ +void visit_expr_binary_literal(void* data, + uintptr_t sibling_list_id, + const uint8_t* buf, + uintptr_t len) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Binary; struct BinaryData* bin = &literal->value.binary; @@ -272,12 +236,10 @@ void visit_expr_binary_literal( put_expr_item(data, sibling_list_id, literal, Literal); } -void visit_expr_struct_literal( - void* data, - uintptr_t sibling_list_id, - uintptr_t child_field_list_id, - uintptr_t child_value_list_id) -{ +void visit_expr_struct_literal(void* data, + uintptr_t sibling_list_id, + uintptr_t child_field_list_id, + uintptr_t child_value_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Struct; struct Struct* struct_data = &literal->value.struct_data; @@ -286,8 +248,7 @@ void visit_expr_struct_literal( put_expr_item(data, sibling_list_id, literal, Literal); } -void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) -{ +void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; put_expr_item(data, sibling_id_list, literal, Literal); @@ -298,17 +259,14 @@ void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) ************************************************************/ #define DEFINE_VARIADIC(fun_name, enum_member) \ - void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ - { \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ visit_expr_variadic(data, sibling_list_id, enum_member, child_list_id); \ } -void visit_expr_variadic( - void* data, - uintptr_t sibling_list_id, - enum VariadicType op, - uintptr_t child_list_id) -{ +void visit_expr_variadic(void* data, + uintptr_t sibling_list_id, + enum VariadicType op, + uintptr_t child_list_id) { struct Variadic* var = malloc(sizeof(struct Variadic)); var->op = op; var->exprs = get_expr_list(data, child_list_id); @@ -318,8 +276,7 @@ DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) -void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) -{ +void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; struct ArrayData* arr = &(literal->value.array_data); @@ -331,17 +288,14 @@ void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t c * Unary Expressions ************************************************************/ #define DEFINE_UNARY(fun_name, op) \ - void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) \ - { \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ visit_expr_unary(data, sibling_list_id, op, child_list_id); \ } -void visit_expr_unary( - void* data, - uintptr_t sibling_list_id, - enum UnaryType type, - uintptr_t child_list_id) -{ +void visit_expr_unary(void* data, + uintptr_t sibling_list_id, + enum UnaryType type, + uintptr_t child_list_id) { struct Unary* unary = malloc(sizeof(struct Unary)); unary->type = type; unary->sub_expr = get_expr_list(data, child_list_id); @@ -354,8 +308,7 @@ DEFINE_UNARY(visit_expr_not, Not) * Column Expression ************************************************************/ -void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice string) -{ +void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice string) { char* column_name = allocate_string(string); put_expr_item(data, sibling_id_list, column_name, Column); } @@ -364,8 +317,7 @@ void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice * EngineExpressionVisitor Implementation ************************************************************/ -uintptr_t make_field_list(void* data, uintptr_t reserve) -{ +uintptr_t make_field_list(void* data, uintptr_t reserve) { ExpressionBuilder* builder = data; int id = builder->list_count; builder->list_count++; @@ -376,8 +328,7 @@ uintptr_t make_field_list(void* data, uintptr_t reserve) return id; } -ExpressionItemList construct_predicate(SharedExpression* predicate) -{ +ExpressionItemList construct_predicate(SharedExpression* predicate) { ExpressionBuilder data = { 0 }; EngineExpressionVisitor visitor = { .data = &data, @@ -425,8 +376,7 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) } void free_expression_list(ExpressionItemList list); -void free_expression_item(ExpressionItem ref) -{ +void free_expression_item(ExpressionItem ref) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; @@ -493,221 +443,9 @@ void free_expression_item(ExpressionItem ref) } } -void free_expression_list(ExpressionItemList list) -{ +void free_expression_list(ExpressionItemList list) { for (size_t i = 0; i < list.len; i++) { free_expression_item(list.list[i]); } free(list.list); } - -/************************************************************* - * Expression Printing - ************************************************************/ - -void print_n_spaces(int n) -{ - if (n == 0) - return; - printf(" "); - print_n_spaces(n - 1); -} -void print_tree(ExpressionItem ref, int depth) -{ - switch (ref.type) { - case BinOp: { - struct BinOp* op = ref.ref; - print_n_spaces(depth); - switch (op->op) { - case Add: { - printf("Add\n"); - break; - } - case Minus: { - printf("Minus\n"); - break; - }; - case Divide: { - printf("Divide\n"); - break; - }; - case Multiply: { - printf("Multiply\n"); - break; - }; - case LessThan: { - printf("LessThan\n"); - break; - }; - case LessThanOrEqual: { - printf("LessThanOrEqual\n"); - break; - } - case GreaterThan: { - printf("GreaterThan\n"); - break; - }; - case GreaterThaneOrEqual: { - printf("GreaterThanOrEqual\n"); - break; - }; - case Equal: { - printf("Equal\n"); - break; - }; - case NotEqual: { - printf("NotEqual\n"); - break; - }; - case In: { - printf("In\n"); - break; - }; - case NotIn: { - printf("NotIn\n"); - break; - }; break; - case Distinct: - printf("Distinct\n"); - break; - } - - ExpressionItem left = op->exprs.list[0]; - ExpressionItem right = op->exprs.list[1]; - print_tree(left, depth + 1); - print_tree(right, depth + 1); - break; - } - case Variadic: { - struct Variadic* var = ref.ref; - print_n_spaces(depth); - switch (var->op) { - case And: - printf("And\n"); - break; - case Or: - printf("Or\n"); - break; - case StructExpression: - printf("StructExpression\n"); - break; - } - for (size_t i = 0; i < var->exprs.len; i++) { - print_tree(var->exprs.list[i], depth + 1); - } - } break; - case Literal: { - struct Literal* lit = ref.ref; - print_n_spaces(depth); - switch (lit->type) { - case Integer: - printf("Integer"); - printf("(%d)\n", lit->value.integer_data); - break; - case Long: - printf("Long"); - printf("(%lld)\n", (long long)lit->value.long_data); - break; - case Short: - printf("Short"); - printf("(%hd)\n", lit->value.short_data); - break; - case Byte: - printf("Byte"); - printf("(%hhd)\n", lit->value.byte_data); - break; - case Float: - printf("Float"); - printf("(%f)\n", (float)lit->value.float_data); - break; - case Double: - printf("Double"); - printf("(%f)\n", lit->value.double_data); - break; - case String: { - printf("String(%s)\n", lit->value.string_data); - break; - } - case Boolean: - printf("Boolean"); - printf("(%d)\n", lit->value.boolean_data); - break; - case Timestamp: - printf("Timestamp"); - printf("(%lld)\n", (long long)lit->value.long_data); - break; - case TimestampNtz: - printf("TimestampNtz"); - printf("(%lld)\n", (long long)lit->value.long_data); - break; - case Date: - printf("Date"); - printf("(%d)\n", lit->value.integer_data); - break; - case Binary: { - printf("Binary("); - for (size_t i = 0; i < lit->value.binary.len; i++) { - printf("%02x", lit->value.binary.buf[i]); - } - printf(")\n"); - break; - } - case Decimal: { - struct Decimal* dec = &lit->value.decimal; - printf( - "Decimal(%lld,%lld, %d, %d)\n", - (long long)dec->value[0], - (long long)dec->value[1], - dec->scale, - dec->precision); - break; - } - case Null: - printf("Null\n"); - break; - case Struct: - printf("Struct\n"); - struct Struct* struct_data = &lit->value.struct_data; - for (size_t i = 0; i < struct_data->values.len; i++) { - print_n_spaces(depth + 1); - - // Extract field name from field - ExpressionItem item = struct_data->fields.list[i]; - assert(item.type == Literal); - struct Literal* lit = item.ref; - assert(lit->type == String); - - printf("Field: %s\n", lit->value.string_data); - print_tree(struct_data->values.list[i], depth + 2); - } - break; - case Array: - printf("Array\n"); - struct ArrayData* array = &lit->value.array_data; - for (size_t i = 0; i < array->exprs.len; i++) { - print_tree(array->exprs.list[i], depth + 1); - } - break; - } - } break; - case Unary: { - print_n_spaces(depth); - struct Unary* unary = ref.ref; - switch (unary->type) { - case Not: - printf("Not\n"); - break; - case IsNull: - printf("IsNull\n"); - break; - } - print_tree(unary->sub_expr.list[0], depth + 1); - break; - } - case Column: - print_n_spaces(depth); - char* column_name = ref.ref; - printf("Column(%s)\n", column_name); - break; - } -} diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h new file mode 100644 index 000000000..fd8e749f9 --- /dev/null +++ b/ffi/examples/visit-expression/expression_print.h @@ -0,0 +1,211 @@ + +#pragma once + +#include "expression.h" +/************************************************************* + * Expression Printing + ************************************************************/ + +void print_n_spaces(int n) { + if (n == 0) + return; + printf(" "); + print_n_spaces(n - 1); +} +void print_tree(ExpressionItem ref, int depth) { + switch (ref.type) { + case BinOp: { + struct BinOp* op = ref.ref; + print_n_spaces(depth); + switch (op->op) { + case Add: { + printf("Add\n"); + break; + } + case Minus: { + printf("Minus\n"); + break; + }; + case Divide: { + printf("Divide\n"); + break; + }; + case Multiply: { + printf("Multiply\n"); + break; + }; + case LessThan: { + printf("LessThan\n"); + break; + }; + case LessThanOrEqual: { + printf("LessThanOrEqual\n"); + break; + } + case GreaterThan: { + printf("GreaterThan\n"); + break; + }; + case GreaterThaneOrEqual: { + printf("GreaterThanOrEqual\n"); + break; + }; + case Equal: { + printf("Equal\n"); + break; + }; + case NotEqual: { + printf("NotEqual\n"); + break; + }; + case In: { + printf("In\n"); + break; + }; + case NotIn: { + printf("NotIn\n"); + break; + }; break; + case Distinct: + printf("Distinct\n"); + break; + } + + ExpressionItem left = op->exprs.list[0]; + ExpressionItem right = op->exprs.list[1]; + print_tree(left, depth + 1); + print_tree(right, depth + 1); + break; + } + case Variadic: { + struct Variadic* var = ref.ref; + print_n_spaces(depth); + switch (var->op) { + case And: + printf("And\n"); + break; + case Or: + printf("Or\n"); + break; + case StructExpression: + printf("StructExpression\n"); + break; + } + for (size_t i = 0; i < var->exprs.len; i++) { + print_tree(var->exprs.list[i], depth + 1); + } + } break; + case Literal: { + struct Literal* lit = ref.ref; + print_n_spaces(depth); + switch (lit->type) { + case Integer: + printf("Integer"); + printf("(%d)\n", lit->value.integer_data); + break; + case Long: + printf("Long"); + printf("(%lld)\n", (long long)lit->value.long_data); + break; + case Short: + printf("Short"); + printf("(%hd)\n", lit->value.short_data); + break; + case Byte: + printf("Byte"); + printf("(%hhd)\n", lit->value.byte_data); + break; + case Float: + printf("Float"); + printf("(%f)\n", (float)lit->value.float_data); + break; + case Double: + printf("Double"); + printf("(%f)\n", lit->value.double_data); + break; + case String: { + printf("String(%s)\n", lit->value.string_data); + break; + } + case Boolean: + printf("Boolean"); + printf("(%d)\n", lit->value.boolean_data); + break; + case Timestamp: + printf("Timestamp"); + printf("(%lld)\n", (long long)lit->value.long_data); + break; + case TimestampNtz: + printf("TimestampNtz"); + printf("(%lld)\n", (long long)lit->value.long_data); + break; + case Date: + printf("Date"); + printf("(%d)\n", lit->value.integer_data); + break; + case Binary: { + printf("Binary("); + for (size_t i = 0; i < lit->value.binary.len; i++) { + printf("%02x", lit->value.binary.buf[i]); + } + printf(")\n"); + break; + } + case Decimal: { + struct Decimal* dec = &lit->value.decimal; + printf("Decimal(%lld,%lld, %d, %d)\n", + (long long)dec->value[0], + (long long)dec->value[1], + dec->scale, + dec->precision); + break; + } + case Null: + printf("Null\n"); + break; + case Struct: + printf("Struct\n"); + struct Struct* struct_data = &lit->value.struct_data; + for (size_t i = 0; i < struct_data->values.len; i++) { + print_n_spaces(depth + 1); + + // Extract field name from field + ExpressionItem item = struct_data->fields.list[i]; + assert(item.type == Literal); + struct Literal* lit = item.ref; + assert(lit->type == String); + + printf("Field: %s\n", lit->value.string_data); + print_tree(struct_data->values.list[i], depth + 2); + } + break; + case Array: + printf("Array\n"); + struct ArrayData* array = &lit->value.array_data; + for (size_t i = 0; i < array->exprs.len; i++) { + print_tree(array->exprs.list[i], depth + 1); + } + break; + } + } break; + case Unary: { + print_n_spaces(depth); + struct Unary* unary = ref.ref; + switch (unary->type) { + case Not: + printf("Not\n"); + break; + case IsNull: + printf("IsNull\n"); + break; + } + print_tree(unary->sub_expr.list[0], depth + 1); + break; + } + case Column: + print_n_spaces(depth); + char* column_name = ref.ref; + printf("Column(%s)\n", column_name); + break; + } +} diff --git a/ffi/examples/read-table/test_expr.c b/ffi/examples/visit-expression/visit_expr.c similarity index 87% rename from ffi/examples/read-table/test_expr.c rename to ffi/examples/visit-expression/visit_expr.c index d6a4a1b70..a8fe919ab 100644 --- a/ffi/examples/read-table/test_expr.c +++ b/ffi/examples/visit-expression/visit_expr.c @@ -1,7 +1,8 @@ + #include "delta_kernel_ffi.h" #include "expression.h" -int main() -{ +#include "expression_print.h" +int main() { SharedExpression* pred = get_testing_kernel_expression(); ExpressionItemList list = construct_predicate(pred); ExpressionItem ref = list.list[0]; diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index ae40c2abc..0db8e1017 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -33,7 +33,7 @@ pub mod engine_funcs; pub mod expressions; pub mod scan; pub mod schema; -#[cfg(feature = "test_ffi")] +#[cfg(feature = "test-ffi")] pub mod test_ffi; pub(crate) type NullableCvoid = Option>; diff --git a/ffi/tests/test_expression_visitor/run_test.sh b/ffi/tests/test_expression_visitor/run_test.sh index 06a061627..2c00c9ea4 100755 --- a/ffi/tests/test_expression_visitor/run_test.sh +++ b/ffi/tests/test_expression_visitor/run_test.sh @@ -3,7 +3,7 @@ set -euxo pipefail OUT_FILE=$(mktemp) -./test_expr | tee "$OUT_FILE" +./visit_expression | tee "$OUT_FILE" diff -s "$OUT_FILE" "$1" DIFF_EXIT_CODE=$? echo "Diff exited with $DIFF_EXIT_CODE" From d8250acc36024c5de69846ae44d414e04721d58b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:20:01 -0700 Subject: [PATCH 55/82] Remove unneccessary changes --- ffi/examples/read-table/CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 4e5da2102..2df2b38e4 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -1,8 +1,6 @@ cmake_minimum_required(VERSION 3.12) project(read_table) option(PRINT_DATA "Print out the table data. Requires arrow-glib" ON) - -# Configuration for the read_table executable add_executable(read_table read_table.c arrow.c) target_compile_definitions(read_table PUBLIC DEFINE_DEFAULT_ENGINE) target_include_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") @@ -10,7 +8,7 @@ target_link_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../ target_link_libraries(read_table PUBLIC delta_kernel_ffi) target_compile_options(read_table PUBLIC) -# Add the read_table test +# Add the test include(CTest) set(TestRunner "../../../tests/read-table-testing/run_test.sh") set(DatPath "../../../../acceptance/tests/dat/out/reader_tests/generated") @@ -40,4 +38,3 @@ if(PRINT_DATA) target_compile_options(read_table PUBLIC ${ARROW_GLIB_CFLAGS_OTHER}) target_compile_definitions(read_table PUBLIC PRINT_ARROW_DATA) endif(PRINT_DATA) - From 748c45a4435589c1253fe6810f0270e2217b404f Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:23:20 -0700 Subject: [PATCH 56/82] better naming --- ffi/examples/visit-expression/CMakeLists.txt | 2 +- .../visit-expression/{visit_expr.c => visit_expression.c} | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) rename ffi/examples/visit-expression/{visit_expr.c => visit_expression.c} (99%) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index dd42254b2..d9e94cc23 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.12) project(visit_expressions) -add_executable(visit_expression visit_expr.c) +add_executable(visit_expression visit_expression.c) target_compile_definitions(visit_expression PUBLIC DEFINE_DEFAULT_ENGINE) target_include_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") target_link_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") diff --git a/ffi/examples/visit-expression/visit_expr.c b/ffi/examples/visit-expression/visit_expression.c similarity index 99% rename from ffi/examples/visit-expression/visit_expr.c rename to ffi/examples/visit-expression/visit_expression.c index a8fe919ab..536151d58 100644 --- a/ffi/examples/visit-expression/visit_expr.c +++ b/ffi/examples/visit-expression/visit_expression.c @@ -1,4 +1,3 @@ - #include "delta_kernel_ffi.h" #include "expression.h" #include "expression_print.h" From 8f1b2eb8405cd241ad8efda146b8e551d07e6c23 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:30:17 -0700 Subject: [PATCH 57/82] Improve doc comment --- ffi/examples/visit-expression/expression_print.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index fd8e749f9..1a3d68f36 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -1,10 +1,9 @@ - #pragma once - #include "expression.h" -/************************************************************* - * Expression Printing - ************************************************************/ + +/** + * This module defines a function `print_tree` to recursively print an ExpressionItem. + */ void print_n_spaces(int n) { if (n == 0) From d67d7d1853bd3e5dd69a0ccf3f073d4cd6b92024 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:31:06 -0700 Subject: [PATCH 58/82] Spacing --- ffi/examples/visit-expression/expression.h | 1 + ffi/examples/visit-expression/expression_print.h | 1 + 2 files changed, 2 insertions(+) diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index f9432cf32..cff6fc4b5 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -7,6 +7,7 @@ #include #include #include + /** * This module defines a very simple model of an expression, used only to be able to print the * provided expression. It consists of an "ExpressionBuilder" which is our user data that gets diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index 1a3d68f36..7d9685840 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -1,4 +1,5 @@ #pragma once + #include "expression.h" /** From 05e95dc34c1d9f347c4064652e80211f53739fe3 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:35:06 -0700 Subject: [PATCH 59/82] Remove leaks test --- ffi/examples/visit-expression/CMakeLists.txt | 1 - ffi/examples/visit-expression/visit_expression.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index d9e94cc23..bcc2743ca 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -19,4 +19,3 @@ include(CTest) set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) -add_test(NAME check_mem_leak COMMAND leaks -atExit -- ./visit_expression) diff --git a/ffi/examples/visit-expression/visit_expression.c b/ffi/examples/visit-expression/visit_expression.c index 536151d58..1fbc7e258 100644 --- a/ffi/examples/visit-expression/visit_expression.c +++ b/ffi/examples/visit-expression/visit_expression.c @@ -1,6 +1,7 @@ #include "delta_kernel_ffi.h" #include "expression.h" #include "expression_print.h" + int main() { SharedExpression* pred = get_testing_kernel_expression(); ExpressionItemList list = construct_predicate(pred); From 22b88967edca95d083f840ed0dcc49d52624818e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 09:46:11 -0700 Subject: [PATCH 60/82] Rename print_tree, take out implementation detail of lists from examples --- .../visit-expression/expression_print.h | 31 ++++++++++--------- .../visit-expression/visit_expression.c | 7 ++--- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index 7d9685840..fb69b5e08 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -6,13 +6,19 @@ * This module defines a function `print_tree` to recursively print an ExpressionItem. */ +void print_tree_helper(ExpressionItem ref, int depth); void print_n_spaces(int n) { if (n == 0) return; printf(" "); print_n_spaces(n - 1); } -void print_tree(ExpressionItem ref, int depth) { +void print_expression_item_list(ExpressionItemList list, int depth) { + for (size_t i = 0; i < list.len; i++) { + print_tree_helper(list.list[i], depth); + } +} +void print_tree_helper(ExpressionItem ref, int depth) { switch (ref.type) { case BinOp: { struct BinOp* op = ref.ref; @@ -70,11 +76,7 @@ void print_tree(ExpressionItem ref, int depth) { printf("Distinct\n"); break; } - - ExpressionItem left = op->exprs.list[0]; - ExpressionItem right = op->exprs.list[1]; - print_tree(left, depth + 1); - print_tree(right, depth + 1); + print_expression_item_list(op->exprs, depth + 1); break; } case Variadic: { @@ -91,9 +93,7 @@ void print_tree(ExpressionItem ref, int depth) { printf("StructExpression\n"); break; } - for (size_t i = 0; i < var->exprs.len; i++) { - print_tree(var->exprs.list[i], depth + 1); - } + print_expression_item_list(var->exprs, depth + 1); } break; case Literal: { struct Literal* lit = ref.ref; @@ -176,15 +176,13 @@ void print_tree(ExpressionItem ref, int depth) { assert(lit->type == String); printf("Field: %s\n", lit->value.string_data); - print_tree(struct_data->values.list[i], depth + 2); + print_tree_helper(struct_data->values.list[i], depth + 2); } break; case Array: printf("Array\n"); struct ArrayData* array = &lit->value.array_data; - for (size_t i = 0; i < array->exprs.len; i++) { - print_tree(array->exprs.list[i], depth + 1); - } + print_expression_item_list(array->exprs, depth + 1); break; } } break; @@ -199,7 +197,8 @@ void print_tree(ExpressionItem ref, int depth) { printf("IsNull\n"); break; } - print_tree(unary->sub_expr.list[0], depth + 1); + + print_expression_item_list(unary->sub_expr, depth + 1); break; } case Column: @@ -209,3 +208,7 @@ void print_tree(ExpressionItem ref, int depth) { break; } } + +void print_expression(ExpressionItemList expression) { + print_expression_item_list(expression, 0); +} diff --git a/ffi/examples/visit-expression/visit_expression.c b/ffi/examples/visit-expression/visit_expression.c index 1fbc7e258..2fec1b662 100644 --- a/ffi/examples/visit-expression/visit_expression.c +++ b/ffi/examples/visit-expression/visit_expression.c @@ -4,10 +4,9 @@ int main() { SharedExpression* pred = get_testing_kernel_expression(); - ExpressionItemList list = construct_predicate(pred); - ExpressionItem ref = list.list[0]; - print_tree(ref, 0); - free_expression_list(list); + ExpressionItemList expr = construct_predicate(pred); + print_expression(expr); + free_expression_list(expr); free_kernel_predicate(pred); return 0; } From 539d9c6e4d2eea83a624e62199ec101038f85ab8 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 15:28:39 -0700 Subject: [PATCH 61/82] Address PR comments --- ffi/examples/visit-expression/expression.h | 10 +- .../visit-expression/expression_print.h | 36 +- ffi/src/expressions/engine_expr_visitor.rs | 247 ++++++++++++++ .../kernel_expr_visitor.rs} | 317 ++---------------- ffi/src/expressions/mod.rs | 10 + ffi/src/scan.rs | 4 +- ffi/src/test_ffi.rs | 26 +- kernel/src/expressions/scalars.rs | 3 +- 8 files changed, 332 insertions(+), 321 deletions(-) create mode 100644 ffi/src/expressions/engine_expr_visitor.rs rename ffi/src/{expressions.rs => expressions/kernel_expr_visitor.rs} (62%) create mode 100644 ffi/src/expressions/mod.rs diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index cff6fc4b5..0d0111ead 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -177,6 +177,8 @@ DEFINE_BINOP(visit_expr_distinct, Distinct) DEFINE_BINOP(visit_expr_in, In) DEFINE_BINOP(visit_expr_not_in, NotIn) +#undef DEFINE_BINOP + /************************************************************* * Literal Values ************************************************************/ @@ -201,6 +203,8 @@ DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); +#undef DEFINE_SIMPLE_SCALAR + void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = String; @@ -277,6 +281,8 @@ DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) +#undef DEFINE_VARIADIC + void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Array; @@ -305,6 +311,8 @@ void visit_expr_unary(void* data, DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) +#undef DEFINE_UNARY + /************************************************************* * Column Expression ************************************************************/ @@ -438,7 +446,7 @@ void free_expression_item(ExpressionItem ref) { break; } case Column: { - free((void*)ref.ref); + free(ref.ref); break; } } diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index fb69b5e08..23c1dcb7b 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -94,54 +94,45 @@ void print_tree_helper(ExpressionItem ref, int depth) { break; } print_expression_item_list(var->exprs, depth + 1); - } break; + break; + } case Literal: { struct Literal* lit = ref.ref; print_n_spaces(depth); switch (lit->type) { case Integer: - printf("Integer"); - printf("(%d)\n", lit->value.integer_data); + printf("Integer(%d)\n", lit->value.integer_data); break; case Long: - printf("Long"); - printf("(%lld)\n", (long long)lit->value.long_data); + printf("Long(%lld)\n", (long long)lit->value.long_data); break; case Short: - printf("Short"); - printf("(%hd)\n", lit->value.short_data); + printf("Short(%hd)\n", lit->value.short_data); break; case Byte: - printf("Byte"); - printf("(%hhd)\n", lit->value.byte_data); + printf("Byte(%hhd)\n", lit->value.byte_data); break; case Float: - printf("Float"); - printf("(%f)\n", (float)lit->value.float_data); + printf("Float(%f)\n", (float)lit->value.float_data); break; case Double: - printf("Double"); - printf("(%f)\n", lit->value.double_data); + printf("Double(%f)\n", lit->value.double_data); break; case String: { printf("String(%s)\n", lit->value.string_data); break; } case Boolean: - printf("Boolean"); - printf("(%d)\n", lit->value.boolean_data); + printf("Boolean(%d)\n", lit->value.boolean_data); break; case Timestamp: - printf("Timestamp"); - printf("(%lld)\n", (long long)lit->value.long_data); + printf("Timestamp(%lld)\n", (long long)lit->value.long_data); break; case TimestampNtz: - printf("TimestampNtz"); - printf("(%lld)\n", (long long)lit->value.long_data); + printf("TimestampNtz(%lld)\n", (long long)lit->value.long_data); break; case Date: - printf("Date"); - printf("(%d)\n", lit->value.integer_data); + printf("Date(%d)\n", lit->value.integer_data); break; case Binary: { printf("Binary("); @@ -185,7 +176,8 @@ void print_tree_helper(ExpressionItem ref, int depth) { print_expression_item_list(array->exprs, depth + 1); break; } - } break; + break; + } case Unary: { print_n_spaces(depth); struct Unary* unary = ref.ref; diff --git a/ffi/src/expressions/engine_expr_visitor.rs b/ffi/src/expressions/engine_expr_visitor.rs new file mode 100644 index 000000000..9bd1a01cb --- /dev/null +++ b/ffi/src/expressions/engine_expr_visitor.rs @@ -0,0 +1,247 @@ +use std::ffi::c_void; + +use crate::{ + AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, KernelStringSlice, + ReferenceSet, TryFromStringSlice, +}; +use delta_kernel::{ + expressions::{BinaryOperator, Expression, Scalar, UnaryOperator}, + DeltaResult, +}; + +#[derive(Default)] +pub struct KernelExpressionVisitorState { + // TODO: ReferenceSet> instead? + inflight_expressions: ReferenceSet, +} +impl KernelExpressionVisitorState { + pub fn new() -> Self { + Self { + inflight_expressions: Default::default(), + } + } +} + +/// A predicate that can be used to skip data when scanning. +/// +/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, +/// along with a visitor function that can be invoked to recursively visit the predicate. This +/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// kernel allocates visitor state, which becomes the second argument to the predicate visitor +/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the +/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and +/// kernel each retain ownership of their respective objects, with no need to coordinate memory +/// lifetimes with the other. +#[repr(C)] +pub struct EnginePredicate { + pub predicate: *mut c_void, + pub visitor: + extern "C" fn(predicate: *mut c_void, state: &mut KernelExpressionVisitorState) -> usize, +} + +fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: Expression) -> usize { + state.inflight_expressions.insert(expr) +} + +pub fn unwrap_kernel_expression( + state: &mut KernelExpressionVisitorState, + exprid: usize, +) -> Option { + state.inflight_expressions.take(exprid) +} + +fn visit_expression_binary( + state: &mut KernelExpressionVisitorState, + op: BinaryOperator, + a: usize, + b: usize, +) -> usize { + let left = unwrap_kernel_expression(state, a).map(Box::new); + let right = unwrap_kernel_expression(state, b).map(Box::new); + match left.zip(right) { + Some((left, right)) => { + wrap_expression(state, Expression::BinaryOperation { op, left, right }) + } + None => 0, // invalid child => invalid node + } +} + +fn visit_expression_unary( + state: &mut KernelExpressionVisitorState, + op: UnaryOperator, + inner_expr: usize, +) -> usize { + unwrap_kernel_expression(state, inner_expr).map_or(0, |expr| { + wrap_expression(state, Expression::unary(op, expr)) + }) +} + +// The EngineIterator is not thread safe, not reentrant, not owned by callee, not freed by callee. +#[no_mangle] +pub extern "C" fn visit_expression_and( + state: &mut KernelExpressionVisitorState, + children: &mut EngineIterator, +) -> usize { + let result = Expression::and_from( + children.flat_map(|child| unwrap_kernel_expression(state, child as usize)), + ); + wrap_expression(state, result) +} + +#[no_mangle] +pub extern "C" fn visit_expression_lt( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::LessThan, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_le( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::LessThanOrEqual, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_gt( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::GreaterThan, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_ge( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::GreaterThanOrEqual, a, b) +} + +#[no_mangle] +pub extern "C" fn visit_expression_eq( + state: &mut KernelExpressionVisitorState, + a: usize, + b: usize, +) -> usize { + visit_expression_binary(state, BinaryOperator::Equal, a, b) +} + +/// # Safety +/// The string slice must be valid +#[no_mangle] +pub unsafe extern "C" fn visit_expression_column( + state: &mut KernelExpressionVisitorState, + name: KernelStringSlice, + allocate_error: AllocateErrorFn, +) -> ExternResult { + let name = unsafe { String::try_from_slice(&name) }; + visit_expression_column_impl(state, name).into_extern_result(&allocate_error) +} +fn visit_expression_column_impl( + state: &mut KernelExpressionVisitorState, + name: DeltaResult, +) -> DeltaResult { + Ok(wrap_expression(state, Expression::Column(name?))) +} + +#[no_mangle] +pub extern "C" fn visit_expression_not( + state: &mut KernelExpressionVisitorState, + inner_expr: usize, +) -> usize { + visit_expression_unary(state, UnaryOperator::Not, inner_expr) +} + +#[no_mangle] +pub extern "C" fn visit_expression_is_null( + state: &mut KernelExpressionVisitorState, + inner_expr: usize, +) -> usize { + visit_expression_unary(state, UnaryOperator::IsNull, inner_expr) +} + +/// # Safety +/// The string slice must be valid +#[no_mangle] +pub unsafe extern "C" fn visit_expression_literal_string( + state: &mut KernelExpressionVisitorState, + value: KernelStringSlice, + allocate_error: AllocateErrorFn, +) -> ExternResult { + let value = unsafe { String::try_from_slice(&value) }; + visit_expression_literal_string_impl(state, value).into_extern_result(&allocate_error) +} +fn visit_expression_literal_string_impl( + state: &mut KernelExpressionVisitorState, + value: DeltaResult, +) -> DeltaResult { + Ok(wrap_expression( + state, + Expression::Literal(Scalar::from(value?)), + )) +} + +// We need to get parse.expand working to be able to macro everything below, see issue #255 + +#[no_mangle] +pub extern "C" fn visit_expression_literal_int( + state: &mut KernelExpressionVisitorState, + value: i32, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_long( + state: &mut KernelExpressionVisitorState, + value: i64, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_short( + state: &mut KernelExpressionVisitorState, + value: i16, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_byte( + state: &mut KernelExpressionVisitorState, + value: i8, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_float( + state: &mut KernelExpressionVisitorState, + value: f32, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_double( + state: &mut KernelExpressionVisitorState, + value: f64, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} + +#[no_mangle] +pub extern "C" fn visit_expression_literal_bool( + state: &mut KernelExpressionVisitorState, + value: bool, +) -> usize { + wrap_expression(state, Expression::literal(value)) +} diff --git a/ffi/src/expressions.rs b/ffi/src/expressions/kernel_expr_visitor.rs similarity index 62% rename from ffi/src/expressions.rs rename to ffi/src/expressions/kernel_expr_visitor.rs index 0bbbc48b6..545ad0a92 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions/kernel_expr_visitor.rs @@ -1,256 +1,10 @@ +use crate::expressions::SharedExpression; use std::ffi::c_void; -use crate::{ - handle::Handle, AllocateErrorFn, EngineIterator, ExternResult, IntoExternResult, - KernelStringSlice, ReferenceSet, TryFromStringSlice, +use crate::{handle::Handle, KernelStringSlice}; +use delta_kernel::expressions::{ + ArrayData, BinaryOperator, Expression, Scalar, StructData, UnaryOperator, VariadicOperator, }; -use delta_kernel::{ - expressions::{ - ArrayData, BinaryOperator, Expression, Scalar, StructData, UnaryOperator, VariadicOperator, - }, - DeltaResult, -}; -use delta_kernel_ffi_macros::handle_descriptor; - -#[derive(Default)] -pub struct KernelExpressionVisitorState { - // TODO: ReferenceSet> instead? - inflight_expressions: ReferenceSet, -} -impl KernelExpressionVisitorState { - pub fn new() -> Self { - Self { - inflight_expressions: Default::default(), - } - } -} - -/// A predicate that can be used to skip data when scanning. -/// -/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, -/// along with a visitor function that can be invoked to recursively visit the predicate. This -/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the -/// kernel allocates visitor state, which becomes the second argument to the predicate visitor -/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the -/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and -/// kernel each retain ownership of their respective objects, with no need to coordinate memory -/// lifetimes with the other. -#[repr(C)] -pub struct EnginePredicate { - pub predicate: *mut c_void, - pub visitor: - extern "C" fn(predicate: *mut c_void, state: &mut KernelExpressionVisitorState) -> usize, -} - -fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: Expression) -> usize { - state.inflight_expressions.insert(expr) -} - -pub fn unwrap_kernel_expression( - state: &mut KernelExpressionVisitorState, - exprid: usize, -) -> Option { - state.inflight_expressions.take(exprid) -} - -fn visit_expression_binary( - state: &mut KernelExpressionVisitorState, - op: BinaryOperator, - a: usize, - b: usize, -) -> usize { - let left = unwrap_kernel_expression(state, a).map(Box::new); - let right = unwrap_kernel_expression(state, b).map(Box::new); - match left.zip(right) { - Some((left, right)) => { - wrap_expression(state, Expression::BinaryOperation { op, left, right }) - } - None => 0, // invalid child => invalid node - } -} - -fn visit_expression_unary( - state: &mut KernelExpressionVisitorState, - op: UnaryOperator, - inner_expr: usize, -) -> usize { - unwrap_kernel_expression(state, inner_expr).map_or(0, |expr| { - wrap_expression(state, Expression::unary(op, expr)) - }) -} - -// The EngineIterator is not thread safe, not reentrant, not owned by callee, not freed by callee. -#[no_mangle] -pub extern "C" fn visit_expression_and( - state: &mut KernelExpressionVisitorState, - children: &mut EngineIterator, -) -> usize { - let result = Expression::and_from( - children.flat_map(|child| unwrap_kernel_expression(state, child as usize)), - ); - wrap_expression(state, result) -} - -#[no_mangle] -pub extern "C" fn visit_expression_lt( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::LessThan, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_le( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::LessThanOrEqual, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_gt( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::GreaterThan, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_ge( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::GreaterThanOrEqual, a, b) -} - -#[no_mangle] -pub extern "C" fn visit_expression_eq( - state: &mut KernelExpressionVisitorState, - a: usize, - b: usize, -) -> usize { - visit_expression_binary(state, BinaryOperator::Equal, a, b) -} - -/// # Safety -/// The string slice must be valid -#[no_mangle] -pub unsafe extern "C" fn visit_expression_column( - state: &mut KernelExpressionVisitorState, - name: KernelStringSlice, - allocate_error: AllocateErrorFn, -) -> ExternResult { - let name = unsafe { String::try_from_slice(&name) }; - visit_expression_column_impl(state, name).into_extern_result(&allocate_error) -} -fn visit_expression_column_impl( - state: &mut KernelExpressionVisitorState, - name: DeltaResult, -) -> DeltaResult { - Ok(wrap_expression(state, Expression::Column(name?))) -} - -#[no_mangle] -pub extern "C" fn visit_expression_not( - state: &mut KernelExpressionVisitorState, - inner_expr: usize, -) -> usize { - visit_expression_unary(state, UnaryOperator::Not, inner_expr) -} - -#[no_mangle] -pub extern "C" fn visit_expression_is_null( - state: &mut KernelExpressionVisitorState, - inner_expr: usize, -) -> usize { - visit_expression_unary(state, UnaryOperator::IsNull, inner_expr) -} - -/// # Safety -/// The string slice must be valid -#[no_mangle] -pub unsafe extern "C" fn visit_expression_literal_string( - state: &mut KernelExpressionVisitorState, - value: KernelStringSlice, - allocate_error: AllocateErrorFn, -) -> ExternResult { - let value = unsafe { String::try_from_slice(&value) }; - visit_expression_literal_string_impl(state, value).into_extern_result(&allocate_error) -} -fn visit_expression_literal_string_impl( - state: &mut KernelExpressionVisitorState, - value: DeltaResult, -) -> DeltaResult { - Ok(wrap_expression( - state, - Expression::Literal(Scalar::from(value?)), - )) -} - -// We need to get parse.expand working to be able to macro everything below, see issue #255 - -#[no_mangle] -pub extern "C" fn visit_expression_literal_int( - state: &mut KernelExpressionVisitorState, - value: i32, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_long( - state: &mut KernelExpressionVisitorState, - value: i64, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_short( - state: &mut KernelExpressionVisitorState, - value: i16, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_byte( - state: &mut KernelExpressionVisitorState, - value: i8, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_float( - state: &mut KernelExpressionVisitorState, - value: f32, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_double( - state: &mut KernelExpressionVisitorState, - value: f64, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[no_mangle] -pub extern "C" fn visit_expression_literal_bool( - state: &mut KernelExpressionVisitorState, - value: bool, -) -> usize { - wrap_expression(state, Expression::literal(value)) -} - -#[handle_descriptor(target=Expression, mutable=false, sized=true)] -pub struct SharedExpression; /// Free the memory the passed SharedExpression /// @@ -261,8 +15,12 @@ pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { data.drop_handle(); } +type VisitLiteralFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: T); +type VisitBinaryOpFn = + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); + /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own -/// representation of an expression from a particular expression within the kernel. +/// representation of a kernel expression. /// /// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a /// particular size. Once allocated the engine returns an `id`, which can be any integer identifier @@ -291,38 +49,35 @@ pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { /// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. #[repr(C)] pub struct EngineExpressionVisitor { - /// An opaque state pointer + /// An opaque engine state pointer pub data: *mut c_void, /// Creates a new expression list, optionally reserving capacity up front pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, /// Visit a 32bit `integer belonging to the list identified by `sibling_list_id`. - pub visit_int_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i32), + pub visit_int_literal: VisitLiteralFn, /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. - pub visit_long_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), + pub visit_long_literal: VisitLiteralFn, /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. - pub visit_short_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i16), + pub visit_short_literal: VisitLiteralFn, /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. - pub visit_byte_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i8), + pub visit_byte_literal: VisitLiteralFn, /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. - pub visit_float_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: f32), + pub visit_float_literal: VisitLiteralFn, /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. - pub visit_double_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: f64), + pub visit_double_literal: VisitLiteralFn, /// Visit a `string` belonging to the list identified by `sibling_list_id`. - pub visit_string_literal: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: KernelStringSlice), + pub visit_string_literal: VisitLiteralFn, /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - pub visit_bool_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: bool), + pub visit_bool_literal: VisitLiteralFn, /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision and adjusted to UTC. - pub visit_timestamp_literal: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), + pub visit_timestamp_literal: VisitLiteralFn, /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision with no timezone. - pub visit_timestamp_ntz_literal: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i64), + pub visit_timestamp_ntz_literal: VisitLiteralFn, /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs /// to the list identified by `sibling_list_id`. - pub visit_date_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: i32), + pub visit_date_literal: VisitLiteralFn, /// Visit binary data at the `buffer` with length `len` belonging to the list identified by /// `sibling_list_id`. pub visit_binary_literal: @@ -368,47 +123,43 @@ pub struct EngineExpressionVisitor { extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_lt: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_lt: VisitBinaryOpFn, /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_le: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_le: VisitBinaryOpFn, /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_gt: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_gt: VisitBinaryOpFn, /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_ge: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_ge: VisitBinaryOpFn, /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_eq: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_eq: VisitBinaryOpFn, /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_ne: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_ne: VisitBinaryOpFn, /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_distinct: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_distinct: VisitBinaryOpFn, /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_in: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_in: VisitBinaryOpFn, /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_not_in: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_not_in: VisitBinaryOpFn, /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_add: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_add: VisitBinaryOpFn, /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_minus: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_minus: VisitBinaryOpFn, /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_multiply: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_multiply: VisitBinaryOpFn, /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` - pub visit_divide: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_divide: VisitBinaryOpFn, /// Visits the `column` belonging to the list identified by `sibling_list_id`. pub visit_column: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), diff --git a/ffi/src/expressions/mod.rs b/ffi/src/expressions/mod.rs new file mode 100644 index 000000000..91f15f166 --- /dev/null +++ b/ffi/src/expressions/mod.rs @@ -0,0 +1,10 @@ +//! This module holds functionality for moving expressions across the FFI boundary, both from +//! engine to kernel, and from kernel to engine. +use delta_kernel::Expression; +use delta_kernel_ffi_macros::handle_descriptor; + +pub mod engine_expr_visitor; +pub mod kernel_expr_visitor; + +#[handle_descriptor(target=Expression, mutable=false, sized=true)] +pub struct SharedExpression; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 92d067621..600cd2602 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,9 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expressions::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; +use crate::expressions::engine_visitor::{ + unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, +}; use crate::{ AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 8a3cedc0a..3b4ed666c 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -42,26 +42,26 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle) -> Self { + pub fn new(tpe: ArrayType, elements: impl IntoIterator>) -> Self { + let elements = elements.into_iter().map(Into::into).collect(); Self { tpe, elements } } pub fn array_type(&self) -> &ArrayType { From 456d7270dc213d6f9080eb2e9bfcb2f330f4c386 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 15:47:01 -0700 Subject: [PATCH 62/82] More pr addressing --- ffi/src/scan.rs | 2 +- ffi/src/test_ffi.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 600cd2602..4bb2de9b6 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,7 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expressions::engine_visitor::{ +use crate::expressions::engine_expr_visitor::{ unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, }; use crate::{ diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 3b4ed666c..3f5df08a1 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -24,7 +24,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Handle Date: Tue, 15 Oct 2024 16:02:05 -0700 Subject: [PATCH 63/82] Simplify Variadic and Unary --- ffi/src/expressions/kernel_expr_visitor.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ffi/src/expressions/kernel_expr_visitor.rs b/ffi/src/expressions/kernel_expr_visitor.rs index 545ad0a92..4b28fb1b1 100644 --- a/ffi/src/expressions/kernel_expr_visitor.rs +++ b/ffi/src/expressions/kernel_expr_visitor.rs @@ -18,6 +18,9 @@ pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { type VisitLiteralFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: T); type VisitBinaryOpFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); +type VisitVariadicFn = + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); +type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own /// representation of a kernel expression. @@ -110,17 +113,16 @@ pub struct EngineExpressionVisitor { pub visit_null_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize), /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the array are in a list identified by `child_list_id` - pub visit_and: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_and: VisitVariadicFn, /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the array are in a list identified by `child_list_id` - pub visit_or: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_or: VisitVariadicFn, /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. /// The sub-expression will be in a _one_ item list identified by `child_list_id` - pub visit_not: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_not: VisitUnaryFn, /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. /// The sub-expression will be in a _one_ item list identified by `child_list_id` - pub visit_is_null: - extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + pub visit_is_null: VisitUnaryFn, /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. /// The operands will be in a _two_ item list identified by `child_list_id` pub visit_lt: VisitBinaryOpFn, From ffc3d51ac879f3153790db6774003c2eed6d0f9a Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 16:06:54 -0700 Subject: [PATCH 64/82] Fix build error --- ffi/src/test_ffi.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 3f5df08a1..b82ef2082 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -5,7 +5,7 @@ use std::{ops::Not, sync::Arc}; use crate::{expressions::SharedExpression, handle::Handle}; use delta_kernel::{ expressions::{ArrayData, BinaryOperator, Expression, Scalar, StructData}, - schema::{ArrayType, DataType, PrimitiveType, StructField, StructType}, + schema::{ArrayType, DataType, StructField, StructType}, }; /// Constructs a kernel expression that is passed back as a SharedExpression handle @@ -24,10 +24,10 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Date: Tue, 15 Oct 2024 16:13:50 -0700 Subject: [PATCH 65/82] fix build --- ffi/src/test_ffi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index b82ef2082..ae873dc5c 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -27,7 +27,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Date: Tue, 15 Oct 2024 16:22:20 -0700 Subject: [PATCH 66/82] Fix kernel issue --- kernel/src/engine/parquet_stats_skipping/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/src/engine/parquet_stats_skipping/tests.rs b/kernel/src/engine/parquet_stats_skipping/tests.rs index fc7f05eef..c877f7893 100644 --- a/kernel/src/engine/parquet_stats_skipping/tests.rs +++ b/kernel/src/engine/parquet_stats_skipping/tests.rs @@ -164,7 +164,7 @@ fn test_binary_scalars() { Struct(StructData::try_new(vec![], vec![]).unwrap()), Array(ArrayData::new( ArrayType::new(DataType::LONG, false), - vec![], + Vec::::new(), )), ]; let larger_values = &[ @@ -185,7 +185,7 @@ fn test_binary_scalars() { Struct(StructData::try_new(vec![], vec![]).unwrap()), Array(ArrayData::new( ArrayType::new(DataType::LONG, false), - vec![], + Vec::::new(), )), ]; From 3683a85b894f3b0e99763bf7f15b5cddaff772d4 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Tue, 15 Oct 2024 18:15:47 -0700 Subject: [PATCH 67/82] Follow PR recommendations --- ffi/src/test_ffi.rs | 2 +- kernel/src/schema.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index ae873dc5c..bd4b287de 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -25,7 +25,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle None, } } + pub fn array(array_type: ArrayType) -> Self { + DataType::Array(Box::new(array_type)) + } } impl Display for DataType { From 3f37450f6d6473bed2f3e6f62747b06e050d2af5 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 18 Oct 2024 11:47:46 -0700 Subject: [PATCH 68/82] Fix typo --- ffi/examples/visit-expression/expression.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index 0d0111ead..e7a3e505d 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -126,7 +126,7 @@ struct Literal { }; /************************************************************* - * Utilitiy functions + * Utility functions ************************************************************/ void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) { From 03c1440cd35ce50d9de10763dd09e9c618e5aadb Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 23 Oct 2024 12:37:03 -0700 Subject: [PATCH 69/82] Fix rebase issue --- ffi/src/test_ffi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index bd4b287de..ee221e3b7 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -64,7 +64,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Date: Thu, 24 Oct 2024 10:43:05 -0700 Subject: [PATCH 70/82] Rename the expressions modules --- ffi/src/expressions/{engine_expr_visitor.rs => engine.rs} | 0 ffi/src/expressions/{kernel_expr_visitor.rs => kernel.rs} | 0 ffi/src/expressions/mod.rs | 4 ++-- ffi/src/scan.rs | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename ffi/src/expressions/{engine_expr_visitor.rs => engine.rs} (100%) rename ffi/src/expressions/{kernel_expr_visitor.rs => kernel.rs} (100%) diff --git a/ffi/src/expressions/engine_expr_visitor.rs b/ffi/src/expressions/engine.rs similarity index 100% rename from ffi/src/expressions/engine_expr_visitor.rs rename to ffi/src/expressions/engine.rs diff --git a/ffi/src/expressions/kernel_expr_visitor.rs b/ffi/src/expressions/kernel.rs similarity index 100% rename from ffi/src/expressions/kernel_expr_visitor.rs rename to ffi/src/expressions/kernel.rs diff --git a/ffi/src/expressions/mod.rs b/ffi/src/expressions/mod.rs index 91f15f166..a6756f972 100644 --- a/ffi/src/expressions/mod.rs +++ b/ffi/src/expressions/mod.rs @@ -3,8 +3,8 @@ use delta_kernel::Expression; use delta_kernel_ffi_macros::handle_descriptor; -pub mod engine_expr_visitor; -pub mod kernel_expr_visitor; +pub mod engine; +pub mod kernel; #[handle_descriptor(target=Expression, mutable=false, sized=true)] pub struct SharedExpression; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 6111e412f..d388a6e0c 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,7 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expressions::engine_expr_visitor::{ +use crate::expressions::engine::{ unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, }; use crate::{ From 14a5c156a07c0511431f0d5a619788eeacf99987 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 13:38:01 -0700 Subject: [PATCH 71/82] Apply review comments --- ffi/src/expressions/engine.rs | 2 ++ ffi/src/expressions/kernel.rs | 31 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ffi/src/expressions/engine.rs b/ffi/src/expressions/engine.rs index 087cae163..9d6391571 100644 --- a/ffi/src/expressions/engine.rs +++ b/ffi/src/expressions/engine.rs @@ -1,3 +1,5 @@ +//! Defines [`KernelExpressionVisitorState`]. This is a visitor that can be used by an [`EnginePredicate`] +//! to convert engine expressions into kernel expressions. use std::ffi::c_void; use crate::{ diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 4b28fb1b1..e5610b9d1 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -1,3 +1,4 @@ +//! Defines [`EngineExpressionVisitor`]. This is a visitor that can be used to convert the kernel's [`Expression`] to an engine's expression format. use crate::expressions::SharedExpression; use std::ffi::c_void; @@ -190,7 +191,7 @@ pub unsafe extern "C" fn visit_expression( ($visitor.$visitor_fn)($visitor.data $(, $extra_args) *) }; } - fn visit_array( + fn visit_expression_array( visitor: &mut EngineExpressionVisitor, array: &ArrayData, sibling_list_id: usize, @@ -199,11 +200,11 @@ pub unsafe extern "C" fn visit_expression( let elements = array.array_elements(); let child_list_id = call!(visitor, make_field_list, elements.len()); for scalar in elements { - visit_scalar(visitor, scalar, child_list_id); + visit_expression_scalar(visitor, scalar, child_list_id); } call!(visitor, visit_array_literal, sibling_list_id, child_list_id); } - fn visit_struct_literal( + fn visit_expression_struct_literal( visitor: &mut EngineExpressionVisitor, struct_data: &StructData, sibling_list_id: usize, @@ -211,12 +212,12 @@ pub unsafe extern "C" fn visit_expression( let child_value_list_id = call!(visitor, make_field_list, struct_data.fields().len()); let child_field_list_id = call!(visitor, make_field_list, struct_data.fields().len()); for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { - visit_scalar( + visit_expression_scalar( visitor, &Scalar::String(field.name.clone()), child_field_list_id, ); - visit_scalar(visitor, value, child_value_list_id); + visit_expression_scalar(visitor, value, child_value_list_id); } call!( visitor, @@ -226,7 +227,7 @@ pub unsafe extern "C" fn visit_expression( child_value_list_id ) } - fn visit_struct_expr( + fn visit_expression_struct_expr( visitor: &mut EngineExpressionVisitor, exprs: &Vec, sibling_list_id: usize, @@ -237,7 +238,7 @@ pub unsafe extern "C" fn visit_expression( } call!(visitor, visit_struct_expr, sibling_list_id, child_list_id) } - fn visit_variadic( + fn visit_expression_variadic( visitor: &mut EngineExpressionVisitor, op: &VariadicOperator, exprs: &Vec, @@ -254,7 +255,7 @@ pub unsafe extern "C" fn visit_expression( }; visit_fn(visitor.data, sibling_list_id, child_list_id); } - fn visit_scalar( + fn visit_expression_scalar( visitor: &mut EngineExpressionVisitor, scalar: &Scalar, sibling_list_id: usize, @@ -299,9 +300,9 @@ pub unsafe extern "C" fn visit_expression( } Scalar::Null(_) => call!(visitor, visit_null_literal, sibling_list_id), Scalar::Struct(struct_data) => { - visit_struct_literal(visitor, struct_data, sibling_list_id) + visit_expression_struct_literal(visitor, struct_data, sibling_list_id) } - Scalar::Array(array) => visit_array(visitor, array, sibling_list_id), + Scalar::Array(array) => visit_expression_array(visitor, array, sibling_list_id), } } fn visit_expression_impl( @@ -310,9 +311,13 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id: usize, ) { match expression { - Expression::Literal(scalar) => visit_scalar(visitor, scalar, sibling_list_id), + Expression::Literal(scalar) => { + visit_expression_scalar(visitor, scalar, sibling_list_id) + } Expression::Column(name) => call!(visitor, visit_column, sibling_list_id, name.into()), - Expression::Struct(exprs) => visit_struct_expr(visitor, exprs, sibling_list_id), + Expression::Struct(exprs) => { + visit_expression_struct_expr(visitor, exprs, sibling_list_id) + } Expression::BinaryOperation { op, left, right } => { let child_list_id = call!(visitor, make_field_list, 2); visit_expression_impl(visitor, left, child_list_id); @@ -344,7 +349,7 @@ pub unsafe extern "C" fn visit_expression( op(visitor.data, sibling_list_id, child_id_list); } Expression::VariadicOperation { op, exprs } => { - visit_variadic(visitor, op, exprs, sibling_list_id) + visit_expression_variadic(visitor, op, exprs, sibling_list_id) } } } From c22e31119d2be1a903dd846c7db12a48a61fb41b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 14:32:24 -0700 Subject: [PATCH 72/82] Address comments --- ffi/examples/visit-expression/expression.h | 7 ++----- ffi/src/expressions/kernel.rs | 6 ++++-- ffi/src/test_ffi.rs | 4 +++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index e7a3e505d..0b3737d09 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -68,7 +68,6 @@ typedef struct { void* ref; enum ExpressionType type; } ExpressionItem; - typedef struct { uint32_t len; ExpressionItem* list; @@ -77,8 +76,6 @@ struct BinOp { enum OpType op; ExpressionItemList exprs; }; -struct Null; - struct Variadic { enum VariadicType op; ExpressionItemList exprs; @@ -317,8 +314,8 @@ DEFINE_UNARY(visit_expr_not, Not) * Column Expression ************************************************************/ -void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice string) { - char* column_name = allocate_string(string); +void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice col_name) { + char* column_name = allocate_string(col_name); put_expr_item(data, sibling_id_list, column_name, Column); } diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index e5610b9d1..36353607d 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -50,14 +50,16 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi /// /// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed /// to visitor methods -/// TODO: Add type information in struct field and null. This will likely involve using the schema visitor. +/// TODO: Visit type information in struct field and null. This will likely involve using the schema +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant +/// issue: https://github.com/delta-incubator/delta-kernel-rs/issues/412 #[repr(C)] pub struct EngineExpressionVisitor { /// An opaque engine state pointer pub data: *mut c_void, /// Creates a new expression list, optionally reserving capacity up front pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, - /// Visit a 32bit `integer belonging to the list identified by `sibling_list_id`. + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. pub visit_int_literal: VisitLiteralFn, /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. pub visit_long_literal: VisitLiteralFn, diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index ee221e3b7..b2382b865 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -8,7 +8,9 @@ use delta_kernel::{ schema::{ArrayType, DataType, StructField, StructType}, }; -/// Constructs a kernel expression that is passed back as a SharedExpression handle +/// Constructs a kernel expression that is passed back as a SharedExpression handle. The expected +/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt` as a printed +/// tree. /// /// # Safety /// The caller is responsible for freeing the retured memory, either by calling From 49f61984f2006d9ee66cb198d966146401b5d118 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 14:39:21 -0700 Subject: [PATCH 73/82] Try windows without MSVC --- ffi/examples/visit-expression/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index bcc2743ca..c9d24eb48 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -8,11 +8,7 @@ target_link_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../ target_link_libraries(visit_expression PUBLIC delta_kernel_ffi) target_compile_options(visit_expression PUBLIC) -if(MSVC) - target_compile_options(visit_expression PRIVATE /W4 /WX) -else() - target_compile_options(visit_expression PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) -endif() +target_compile_options(visit_expression PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) # Add the kernel expresion -> engine expression test include(CTest) From 633a4baf09513574cec7b676602e65bc1b723de0 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 14:40:58 -0700 Subject: [PATCH 74/82] Fix wording --- ffi/src/test_ffi.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index b2382b865..440990a02 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -9,8 +9,7 @@ use delta_kernel::{ }; /// Constructs a kernel expression that is passed back as a SharedExpression handle. The expected -/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt` as a printed -/// tree. +/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt`. /// /// # Safety /// The caller is responsible for freeing the retured memory, either by calling From 4faff29afb5141743fb6c13f25a476dec61f3ba8 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 14:50:32 -0700 Subject: [PATCH 75/82] Add dev visibility --- kernel/src/expressions/scalars.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index 0a352cc3c..7922b04a7 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -20,6 +20,7 @@ pub struct ArrayData { } impl ArrayData { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub fn new(tpe: ArrayType, elements: impl IntoIterator>) -> Self { let elements = elements.into_iter().map(Into::into).collect(); Self { tpe, elements } From 984b615f61c317c2cd657c984f8fc603648e93e0 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 15:46:49 -0700 Subject: [PATCH 76/82] Flushing changes --- ffi/examples/visit-expression/CMakeLists.txt | 19 +++++++++++++ ffi/src/test_ffi.rs | 30 ++++++++------------ kernel/src/schema.rs | 3 -- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index c9d24eb48..a30439593 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -10,8 +10,27 @@ target_compile_options(visit_expression PUBLIC) target_compile_options(visit_expression PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) +# Get info on the OS and platform +if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(MACOSX TRUE) +endif() +if(UNIX AND NOT APPLE) + set(LINUX TRUE) +endif() + # Add the kernel expresion -> engine expression test include(CTest) set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) + +if(LINUX) + add_test(NAME test_expression_visitor_leaks COMMAND valgrind + --error-exitcode=1 + --tool=memcheck + --leak-check=full + --errors-for-leak-kinds=definite + --show-leak-kinds=definite ./visit_expression) +elseif(MACOSX) + add_test(NAME test_expression_visitor_leaks COMMAND leaks --atExit -- ./visit_expression) +endif() diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 440990a02..8aee68a57 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -26,7 +26,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Handle Handle None, } } - pub fn array(array_type: ArrayType) -> Self { - DataType::Array(Box::new(array_type)) - } } impl Display for DataType { From 8530b8de2c33649c46b27bc6412bf34c82a5f0d5 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 15:58:42 -0700 Subject: [PATCH 77/82] Address pr comments --- ffi/src/expressions/kernel.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 36353607d..6521dba2f 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -214,10 +214,11 @@ pub unsafe extern "C" fn visit_expression( let child_value_list_id = call!(visitor, make_field_list, struct_data.fields().len()); let child_field_list_id = call!(visitor, make_field_list, struct_data.fields().len()); for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { - visit_expression_scalar( + call!( visitor, - &Scalar::String(field.name.clone()), + visit_string_literal, child_field_list_id, + field.name().into() ); visit_expression_scalar(visitor, value, child_value_list_id); } @@ -231,7 +232,7 @@ pub unsafe extern "C" fn visit_expression( } fn visit_expression_struct_expr( visitor: &mut EngineExpressionVisitor, - exprs: &Vec, + exprs: &[Expression], sibling_list_id: usize, ) { let child_list_id = call!(visitor, make_field_list, exprs.len()); @@ -243,7 +244,7 @@ pub unsafe extern "C" fn visit_expression( fn visit_expression_variadic( visitor: &mut EngineExpressionVisitor, op: &VariadicOperator, - exprs: &Vec, + exprs: &[Expression], sibling_list_id: usize, ) { let child_list_id = call!(visitor, make_field_list, exprs.len()); From bbac1bbafa819dd9debba0bacf3f1d4593208ea9 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 16:28:27 -0700 Subject: [PATCH 78/82] Fix rebase issue --- ffi/src/expressions/kernel.rs | 4 +++- ffi/src/test_ffi.rs | 4 ++-- kernel/src/expressions/column_names.rs | 2 +- kernel/src/lib.rs | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 6521dba2f..27aff3096 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -317,7 +317,9 @@ pub unsafe extern "C" fn visit_expression( Expression::Literal(scalar) => { visit_expression_scalar(visitor, scalar, sibling_list_id) } - Expression::Column(name) => call!(visitor, visit_column, sibling_list_id, name.into()), + Expression::Column(name) => { + call!(visitor, visit_column, sibling_list_id, name.as_str().into()) + } Expression::Struct(exprs) => { visit_expression_struct_expr(visitor, exprs, sibling_list_id) } diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 8aee68a57..0bd4bf43a 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -4,7 +4,7 @@ use std::{ops::Not, sync::Arc}; use crate::{expressions::SharedExpression, handle::Handle}; use delta_kernel::{ - expressions::{ArrayData, BinaryOperator, Expression, Scalar, StructData}, + expressions::{column_expr, ArrayData, BinaryOperator, Expression, Scalar, StructData}, schema::{ArrayType, DataType, StructField, StructType}, }; @@ -69,7 +69,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { - $crate::expressions::ColumnName::new(delta_kernel_derive::parse_column_name!($($name)*)) + $crate::expressions::ColumnName::new($crate::delta_kernel_derive::parse_column_name!($($name)*)) }; } #[doc(inline)] diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 6829c8eca..3079b04af 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -75,6 +75,7 @@ pub mod snapshot; pub mod table; pub(crate) mod utils; +pub use delta_kernel_derive; pub use engine_data::{DataVisitor, EngineData}; pub use error::{DeltaResult, Error}; pub use expressions::{Expression, ExpressionRef}; From 00d0354340614403e9237ba19972f7f06b51f771 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 16:42:07 -0700 Subject: [PATCH 79/82] hopefully fix linux leak test --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c1edc2dd2..9795be5dd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,6 +86,7 @@ jobs: sudo apt update sudo apt install -y -V libarrow-dev # For C++ sudo apt install -y -V libarrow-glib-dev # For GLib (C) + sudo apt install -y -V valgrind # For memory leak test elif [ "$RUNNER_OS" == "macOS" ]; then brew install apache-arrow brew install apache-arrow-glib From d28206300e3005c29cd8937ebf9f3a9db6c67891 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 17:28:02 -0700 Subject: [PATCH 80/82] Remove failing test --- ffi/examples/visit-expression/CMakeLists.txt | 2 -- ffi/examples/visit-expression/expression.h | 17 ----------------- ffi/src/expressions/kernel.rs | 3 ++- ffi/src/test_ffi.rs | 2 +- 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index a30439593..5ea2e58b4 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -31,6 +31,4 @@ if(LINUX) --leak-check=full --errors-for-leak-kinds=definite --show-leak-kinds=definite ./visit_expression) -elseif(MACOSX) - add_test(NAME test_expression_visitor_leaks COMMAND leaks --atExit -- ./visit_expression) endif() diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index 0b3737d09..26299f4f0 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -23,7 +23,6 @@ /************************************************************* * Data Types ************************************************************/ - enum OpType { Add, Minus, @@ -125,7 +124,6 @@ struct Literal { /************************************************************* * Utility functions ************************************************************/ - void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) { ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; ExpressionItem expr = { .ref = ref, .type = type }; @@ -137,7 +135,6 @@ ExpressionItemList get_expr_list(void* data, size_t list_id) { assert(list_id < data_ptr->list_count); return data_ptr->lists[list_id]; } - // utility to turn a slice into a char* char* allocate_string(const KernelStringSlice slice) { return strndup(slice.ptr, slice.len); @@ -146,7 +143,6 @@ char* allocate_string(const KernelStringSlice slice) { /************************************************************* * Binary Operations ************************************************************/ - #define DEFINE_BINOP(fun_name, op) \ void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ visit_expr_binop(data, sibling_list_id, op, child_list_id); \ @@ -173,13 +169,11 @@ DEFINE_BINOP(visit_expr_ne, NotEqual) DEFINE_BINOP(visit_expr_distinct, Distinct) DEFINE_BINOP(visit_expr_in, In) DEFINE_BINOP(visit_expr_not_in, NotIn) - #undef DEFINE_BINOP /************************************************************* * Literal Values ************************************************************/ - #define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ void fun_name(void* data, uintptr_t sibling_list_id, c_type val) { \ struct Literal* lit = malloc(sizeof(struct Literal)); \ @@ -199,7 +193,6 @@ DEFINE_SIMPLE_SCALAR(visit_expr_boolean_literal, Boolean, _Bool, boolean_data); DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); - #undef DEFINE_SIMPLE_SCALAR void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) { @@ -208,7 +201,6 @@ void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStri literal->value.string_data = allocate_string(string); put_expr_item(data, sibling_list_id, literal, Literal); } - void visit_expr_decimal_literal(void* data, uintptr_t sibling_list_id, uint64_t value_ms, @@ -224,7 +216,6 @@ void visit_expr_decimal_literal(void* data, dec->scale = scale; put_expr_item(data, sibling_list_id, literal, Literal); } - void visit_expr_binary_literal(void* data, uintptr_t sibling_list_id, const uint8_t* buf, @@ -237,7 +228,6 @@ void visit_expr_binary_literal(void* data, memcpy(bin->buf, buf, len); put_expr_item(data, sibling_list_id, literal, Literal); } - void visit_expr_struct_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_field_list_id, @@ -249,7 +239,6 @@ void visit_expr_struct_literal(void* data, struct_data->values = get_expr_list(data, child_value_list_id); put_expr_item(data, sibling_list_id, literal, Literal); } - void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Null; @@ -259,7 +248,6 @@ void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { /************************************************************* * Variadic Expressions ************************************************************/ - #define DEFINE_VARIADIC(fun_name, enum_member) \ void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ visit_expr_variadic(data, sibling_list_id, enum_member, child_list_id); \ @@ -277,7 +265,6 @@ void visit_expr_variadic(void* data, DEFINE_VARIADIC(visit_expr_and, And) DEFINE_VARIADIC(visit_expr_or, Or) DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) - #undef DEFINE_VARIADIC void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { @@ -307,13 +294,11 @@ void visit_expr_unary(void* data, } DEFINE_UNARY(visit_expr_is_null, IsNull) DEFINE_UNARY(visit_expr_not, Not) - #undef DEFINE_UNARY /************************************************************* * Column Expression ************************************************************/ - void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice col_name) { char* column_name = allocate_string(col_name); put_expr_item(data, sibling_id_list, column_name, Column); @@ -322,7 +307,6 @@ void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice /************************************************************* * EngineExpressionVisitor Implementation ************************************************************/ - uintptr_t make_field_list(void* data, uintptr_t reserve) { ExpressionBuilder* builder = data; int id = builder->list_count; @@ -448,7 +432,6 @@ void free_expression_item(ExpressionItem ref) { } } } - void free_expression_list(ExpressionItemList list) { for (size_t i = 0; i < list.len; i++) { free_expression_item(list.list[i]); diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 27aff3096..b1da8c8b0 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -1,4 +1,5 @@ -//! Defines [`EngineExpressionVisitor`]. This is a visitor that can be used to convert the kernel's [`Expression`] to an engine's expression format. +//! Defines [`EngineExpressionVisitor`]. This is a visitor that can be used to convert the kernel's +//! [`Expression`] to an engine's expression format. use crate::expressions::SharedExpression; use std::ffi::c_void; diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 0bd4bf43a..fa7e2ef05 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -69,7 +69,7 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle Date: Thu, 24 Oct 2024 18:03:41 -0700 Subject: [PATCH 81/82] Renaming --- ffi/examples/visit-expression/CMakeLists.txt | 4 +- ffi/examples/visit-expression/expression.h | 32 ++++----- ffi/src/expressions/kernel.rs | 68 ++++++++++--------- .../expected.txt | 0 .../run_test.sh | 0 5 files changed, 53 insertions(+), 51 deletions(-) rename ffi/tests/{test_expression_visitor => test-expression-visitor}/expected.txt (100%) rename ffi/tests/{test_expression_visitor => test-expression-visitor}/run_test.sh (100%) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index 5ea2e58b4..7f1c5aee8 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -20,8 +20,8 @@ endif() # Add the kernel expresion -> engine expression test include(CTest) -set(ExprTestRunner "../../../tests/test_expression_visitor/run_test.sh") -set(ExprExpectedPath "../../../tests/test_expression_visitor/expected.txt") +set(ExprTestRunner "../../../tests/test-expression-visitor/run_test.sh") +set(ExprExpectedPath "../../../tests/test-expression-visitor/expected.txt") add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) if(LINUX) diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index 26299f4f0..302566d10 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -323,22 +323,22 @@ ExpressionItemList construct_predicate(SharedExpression* predicate) { EngineExpressionVisitor visitor = { .data = &data, .make_field_list = make_field_list, - .visit_int_literal = visit_expr_int_literal, - .visit_long_literal = visit_expr_long_literal, - .visit_short_literal = visit_expr_short_literal, - .visit_byte_literal = visit_expr_byte_literal, - .visit_float_literal = visit_expr_float_literal, - .visit_double_literal = visit_expr_double_literal, - .visit_bool_literal = visit_expr_boolean_literal, - .visit_timestamp_literal = visit_expr_timestamp_literal, - .visit_timestamp_ntz_literal = visit_expr_timestamp_ntz_literal, - .visit_date_literal = visit_expr_date_literal, - .visit_binary_literal = visit_expr_binary_literal, - .visit_null_literal = visit_expr_null_literal, - .visit_decimal_literal = visit_expr_decimal_literal, - .visit_string_literal = visit_expr_string_literal, - .visit_struct_literal = visit_expr_struct_literal, - .visit_array_literal = visit_expr_array_literal, + .visit_literal_int = visit_expr_int_literal, + .visit_literal_long = visit_expr_long_literal, + .visit_literal_short = visit_expr_short_literal, + .visit_literal_byte = visit_expr_byte_literal, + .visit_literal_float = visit_expr_float_literal, + .visit_literal_double = visit_expr_double_literal, + .visit_literal_bool = visit_expr_boolean_literal, + .visit_literal_timestamp = visit_expr_timestamp_literal, + .visit_literal_timestamp_ntz = visit_expr_timestamp_ntz_literal, + .visit_literal_date = visit_expr_date_literal, + .visit_literal_binary = visit_expr_binary_literal, + .visit_literal_null = visit_expr_null_literal, + .visit_literal_decimal = visit_expr_decimal_literal, + .visit_literal_string = visit_expr_string_literal, + .visit_literal_struct = visit_expr_struct_literal, + .visit_literal_array = visit_expr_array_literal, .visit_and = visit_expr_and, .visit_or = visit_expr_or, .visit_not = visit_expr_not, diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index b1da8c8b0..47e3e6dea 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -61,38 +61,38 @@ pub struct EngineExpressionVisitor { /// Creates a new expression list, optionally reserving capacity up front pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. - pub visit_int_literal: VisitLiteralFn, + pub visit_literal_int: VisitLiteralFn, /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. - pub visit_long_literal: VisitLiteralFn, + pub visit_literal_long: VisitLiteralFn, /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. - pub visit_short_literal: VisitLiteralFn, + pub visit_literal_short: VisitLiteralFn, /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. - pub visit_byte_literal: VisitLiteralFn, + pub visit_literal_byte: VisitLiteralFn, /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. - pub visit_float_literal: VisitLiteralFn, + pub visit_literal_float: VisitLiteralFn, /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. - pub visit_double_literal: VisitLiteralFn, + pub visit_literal_double: VisitLiteralFn, /// Visit a `string` belonging to the list identified by `sibling_list_id`. - pub visit_string_literal: VisitLiteralFn, + pub visit_literal_string: VisitLiteralFn, /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - pub visit_bool_literal: VisitLiteralFn, + pub visit_literal_bool: VisitLiteralFn, /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision and adjusted to UTC. - pub visit_timestamp_literal: VisitLiteralFn, + pub visit_literal_timestamp: VisitLiteralFn, /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision with no timezone. - pub visit_timestamp_ntz_literal: VisitLiteralFn, + pub visit_literal_timestamp_ntz: VisitLiteralFn, /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs /// to the list identified by `sibling_list_id`. - pub visit_date_literal: VisitLiteralFn, + pub visit_literal_date: VisitLiteralFn, /// Visit binary data at the `buffer` with length `len` belonging to the list identified by /// `sibling_list_id`. - pub visit_binary_literal: + pub visit_literal_binary: extern "C" fn(data: *mut c_void, sibling_list_id: usize, buffer: *const u8, len: usize), /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. - pub visit_decimal_literal: extern "C" fn( + pub visit_literal_decimal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, value_ms: u64, @@ -103,7 +103,7 @@ pub struct EngineExpressionVisitor { /// Visit a struct literal belonging to the list identified by `sibling_list_id`. /// The field names of the struct are in a list identified by `child_field_list_id`. /// The values of the struct are in a list identified by `child_value_list_id`. - pub visit_struct_literal: extern "C" fn( + pub visit_literal_struct: extern "C" fn( data: *mut c_void, sibling_list_id: usize, child_field_list_value: usize, @@ -111,10 +111,10 @@ pub struct EngineExpressionVisitor { ), /// Visit an array literal belonging to the list identified by `sibling_list_id`. /// The values of the array are in a list identified by `child_list_id`. - pub visit_array_literal: + pub visit_literal_array: extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), /// Visits a null value belonging to the list identified by `sibling_list_id. - pub visit_null_literal: extern "C" fn(data: *mut c_void, sibling_list_id: usize), + pub visit_literal_null: extern "C" fn(data: *mut c_void, sibling_list_id: usize), /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. /// The sub-expressions of the array are in a list identified by `child_list_id` pub visit_and: VisitVariadicFn, @@ -205,7 +205,7 @@ pub unsafe extern "C" fn visit_expression( for scalar in elements { visit_expression_scalar(visitor, scalar, child_list_id); } - call!(visitor, visit_array_literal, sibling_list_id, child_list_id); + call!(visitor, visit_literal_array, sibling_list_id, child_list_id); } fn visit_expression_struct_literal( visitor: &mut EngineExpressionVisitor, @@ -217,7 +217,7 @@ pub unsafe extern "C" fn visit_expression( for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { call!( visitor, - visit_string_literal, + visit_literal_string, child_field_list_id, field.name().into() ); @@ -225,7 +225,7 @@ pub unsafe extern "C" fn visit_expression( } call!( visitor, - visit_struct_literal, + visit_literal_struct, sibling_list_id, child_field_list_id, child_value_list_id @@ -265,26 +265,28 @@ pub unsafe extern "C" fn visit_expression( sibling_list_id: usize, ) { match scalar { - Scalar::Integer(val) => call!(visitor, visit_int_literal, sibling_list_id, *val), - Scalar::Long(val) => call!(visitor, visit_long_literal, sibling_list_id, *val), - Scalar::Short(val) => call!(visitor, visit_short_literal, sibling_list_id, *val), - Scalar::Byte(val) => call!(visitor, visit_byte_literal, sibling_list_id, *val), - Scalar::Float(val) => call!(visitor, visit_float_literal, sibling_list_id, *val), - Scalar::Double(val) => call!(visitor, visit_double_literal, sibling_list_id, *val), + Scalar::Integer(val) => call!(visitor, visit_literal_int, sibling_list_id, *val), + Scalar::Long(val) => call!(visitor, visit_literal_long, sibling_list_id, *val), + Scalar::Short(val) => call!(visitor, visit_literal_short, sibling_list_id, *val), + Scalar::Byte(val) => call!(visitor, visit_literal_byte, sibling_list_id, *val), + Scalar::Float(val) => call!(visitor, visit_literal_float, sibling_list_id, *val), + Scalar::Double(val) => { + call!(visitor, visit_literal_double, sibling_list_id, *val) + } Scalar::String(val) => { - call!(visitor, visit_string_literal, sibling_list_id, val.into()) + call!(visitor, visit_literal_string, sibling_list_id, val.into()) } - Scalar::Boolean(val) => call!(visitor, visit_bool_literal, sibling_list_id, *val), + Scalar::Boolean(val) => call!(visitor, visit_literal_bool, sibling_list_id, *val), Scalar::Timestamp(val) => { - call!(visitor, visit_timestamp_literal, sibling_list_id, *val) + call!(visitor, visit_literal_timestamp, sibling_list_id, *val) } Scalar::TimestampNtz(val) => { - call!(visitor, visit_timestamp_ntz_literal, sibling_list_id, *val) + call!(visitor, visit_literal_timestamp_ntz, sibling_list_id, *val) } - Scalar::Date(val) => call!(visitor, visit_date_literal, sibling_list_id, *val), + Scalar::Date(val) => call!(visitor, visit_literal_date, sibling_list_id, *val), Scalar::Binary(buf) => call!( visitor, - visit_binary_literal, + visit_literal_binary, sibling_list_id, buf.as_ptr(), buf.len() @@ -294,7 +296,7 @@ pub unsafe extern "C" fn visit_expression( let ls: u64 = *value as u64; call!( visitor, - visit_decimal_literal, + visit_literal_decimal, sibling_list_id, ms, ls, @@ -302,7 +304,7 @@ pub unsafe extern "C" fn visit_expression( *scale ) } - Scalar::Null(_) => call!(visitor, visit_null_literal, sibling_list_id), + Scalar::Null(_) => call!(visitor, visit_literal_null, sibling_list_id), Scalar::Struct(struct_data) => { visit_expression_struct_literal(visitor, struct_data, sibling_list_id) } diff --git a/ffi/tests/test_expression_visitor/expected.txt b/ffi/tests/test-expression-visitor/expected.txt similarity index 100% rename from ffi/tests/test_expression_visitor/expected.txt rename to ffi/tests/test-expression-visitor/expected.txt diff --git a/ffi/tests/test_expression_visitor/run_test.sh b/ffi/tests/test-expression-visitor/run_test.sh similarity index 100% rename from ffi/tests/test_expression_visitor/run_test.sh rename to ffi/tests/test-expression-visitor/run_test.sh From 60049ab11013d80685e23a42c1618ecbb0bc6cf8 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 24 Oct 2024 18:08:53 -0700 Subject: [PATCH 82/82] Small nit --- ffi/examples/visit-expression/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt index 7f1c5aee8..1b89deec6 100644 --- a/ffi/examples/visit-expression/CMakeLists.txt +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -23,7 +23,6 @@ include(CTest) set(ExprTestRunner "../../../tests/test-expression-visitor/run_test.sh") set(ExprExpectedPath "../../../tests/test-expression-visitor/expected.txt") add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) - if(LINUX) add_test(NAME test_expression_visitor_leaks COMMAND valgrind --error-exitcode=1