Skip to content

Commit ed681b5

Browse files
(feat) Optimizer & Memo Full Logical -> Logical (#101)
Implements the in-memory memo for logical -> logical, as well as the optimizer task graph, and the support of `*` stored expressions in the compiler. All features are tested accordingly, except the optimizer itself which will be postponed in a future PR. --------- Co-authored-by: Connor Tsui <[email protected]>
1 parent ad02315 commit ed681b5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4810
-149
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
# optd
44

5-
Query Optimizer Service
5+
Query Optimizer Service

optd-cli/examples/higher_order.opt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
data Logical =
22
| Operation =
33
| Binary =
4-
| Arithmetic =
4+
| Arith =
55
| Add(left: Logical, right: Logical)
66
| Mul(left: Logical, right: Logical)
77
| Div(left: Logical, right: Logical)

optd-cli/examples/logical_rules.opt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,10 @@ fn build_calculator_expr() =
138138
const3 = Const(3),
139139
const4 = Const(4),
140140
addition = Add(const2, const3),
141-
multiplication = Mult(addition, const4)
141+
multiplication = Mult(addition, const4),
142+
pow = Pow(multiplication, Const(10)),
142143
in
143-
multiplication.evaluate()
144+
pow.evaluate()
144145

145146
[run]
146147
fn run_mult_commute() =

optd-cli/examples/tutorial.opt

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ fn (catalog: Catalog) get_table_schema(name: String): Schema
227227

228228
fn (logical: Logical*) properties(): LogicalProperties
229229

230-
fn (costed: Physical$) statistics(): CostedProperties
230+
fn (costed: Physical$) statistics(): Statistics
231231

232232
// -------------------------
233233
// 9. Transformation Rules
@@ -239,11 +239,11 @@ fn (costed: Physical$) statistics(): CostedProperties
239239
// Helper function for scalar rewrites.
240240
fn (expr: Scalar) remap(bindings: {I64 : I64}): Scalar =
241241
match expr
242-
| ColumnRef(idx) ->
243-
if bindings(idx) != none then // TODO: Fix none bug... Should not be a type! Rather Option<Nothing>.
244-
ColumnRef(0) // TODO: Add ! once we have the `!` syntax.
245-
else
246-
ColumnRef(idx)
242+
| ColumnRef(idx) -> ColumnRef(0)
243+
// if bindings(idx) != none then // TODO: Fix EQ inference: just require EqHash & adapt engine.
244+
// ColumnRef(0) // TODO: Add ! once we have the `!` syntax.
245+
// else
246+
// ColumnRef(idx)
247247
| IntLiteral(value) -> IntLiteral(value)
248248
| StringLiteral(value) -> StringLiteral(value)
249249
| BoolLiteral(value) -> BoolLiteral(value)
@@ -277,12 +277,12 @@ fn (expr: Logical*) join_commute(): Logical? = match expr
277277
right_indices = 0..right_len,
278278
left_indices = 0..left_len,
279279

280-
remapping = (left_indices.map(i -> (i, i + right_len)) ++
281-
right_indices.map(i -> (i + left_len, i))).to_map(),
280+
remapping = (left_indices.map((i: I64) -> (i, i + right_len)) ++
281+
right_indices.map((i: I64) -> (i + left_len, i))).to_map(),
282282
in
283283
Project(
284284
Join(right, left, Inner, predicate.remap(remapping)),
285-
(right_indices ++ left_indices).map(i -> ColumnRef(i))
285+
(right_indices ++ left_indices).map((i: I64) -> ColumnRef(i))
286286
)
287287
\ _ -> none
288288

@@ -306,8 +306,8 @@ fn (plan: Logical*) join_associativity(): Logical? = match plan
306306
b_indices = 0..b_len,
307307
c_indices = 0..c_len,
308308

309-
pred_bc_remapping = (b_indices.map(i -> (a_len + i, i)) ++
310-
c_indices.map(i -> (a_len + b_len + i, b_len + i))).to_map(),
309+
pred_bc_remapping = (b_indices.map((i: I64) -> (a_len + i, i)) ++
310+
c_indices.map((i: I64) -> (a_len + b_len + i, b_len + i))).to_map(),
311311

312312
remapped_pred_bc = pred_bc.remap(pred_bc_remapping),
313313

@@ -316,8 +316,8 @@ fn (plan: Logical*) join_associativity(): Logical? = match plan
316316
b_indices_after = 0..b_len,
317317
c_indices_after = 0..c_len,
318318

319-
pred_ab_remapping = (b_indices_after.map(i -> (a_len + i, a_len + i)) ++
320-
c_indices_after.map(i -> (a_len + b_len + i, a_len + b_len + i))).to_map(),
319+
pred_ab_remapping = (b_indices_after.map((i: I64) -> (a_len + i, a_len + i)) ++
320+
c_indices_after.map((i: I64) -> (a_len + b_len + i, a_len + b_len + i))).to_map(),
321321

322322
remapped_pred_ab = pred_ab.remap(pred_ab_remapping)
323323
in
@@ -353,10 +353,11 @@ fn (expr: Logical*) impl_filter_enforce(props: PhysicalProperties?): Physical? =
353353
let
354354
result = PhysFilter(child.optimize(none), predicate)
355355
in
356-
if props != none then
357-
PhysSort(result, props#order_by)
358-
else
359-
result
356+
result
357+
// if props != none then
358+
// PhysSort(result, props#order_by) // TODO: same ! problem
359+
// else
360+
// result
360361
\ _ -> none
361362

362363
[implementation]
@@ -365,14 +366,14 @@ fn (expr: Logical*) impl_filter_passthrough(props: PhysicalProperties?): Physica
365366
| Filter(child, predicate) -> PhysFilter(child.optimize(props), predicate)
366367
\ _ -> none
367368

368-
[implementation]
369-
fn (expr: Logical*) impl_sort(props: PhysicalProperties?): Physical? = match expr
370-
| Sort(child, order_by) ->
371-
if props == none then // TODO: The incoming `?` syntax will make this cleaner
372-
PhysSort(child.optimize(none), order_by)
373-
else
374-
none
375-
\ _ -> none
369+
// [implementation]
370+
// fn (expr: Logical*) impl_sort(props: PhysicalProperties?): Physical? = match expr
371+
// | Sort(child, order_by) ->
372+
// if props == none then // TODO: The incoming `?` syntax will make this cleaner. Also == problem for now.
373+
// PhysSort(child.optimize(none), order_by)
374+
// else
375+
// none
376+
// \ _ -> none
376377

377378
// -------------------------
378379
// 11. Other Required Functions
@@ -381,14 +382,14 @@ fn (expr: Logical*) impl_sort(props: PhysicalProperties?): Physical? = match exp
381382
// Cost function for physical operators - used by the cost-based optimizer to evaluate
382383
// different physical implementation alternatives.
383384

384-
fn (expr: Physical*) cost(): F64 = 0
385+
fn (expr: Physical*) cost(): F64 = 0.0
385386

386387
// Derive logical properties from a logical plan - propagates schema information and other
387388
// logical properties (like cardinality estimates, uniqueness, or functional dependencies)
388389
// through the logical plan.
389390

390-
fn (log: Logical*) derive(): LogicalProperties = match log
391-
| Get(table_name) -> catalog.get_table_schema(table_name)
391+
fn (log: Logical*) derive(): LogicalProperties? = match log
392+
| Get(table_name) -> LogicalProperties(Catalog.get_table_schema(table_name))
392393
| Filter(child, _) -> child.properties()
393394
| Join(left, right, join_type, _) ->
394395
let

optd-cli/src/main.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ use optd::dsl::analyzer::hir::{CoreData, HIR, Udf, Value};
4040
use optd::dsl::compile::{Config, compile_hir};
4141
use optd::dsl::engine::{Continuation, Engine, EngineResponse};
4242
use optd::dsl::utils::errors::{CompileError, Diagnose};
43+
use optd::dsl::utils::retriever::{MockRetriever, Retriever};
4344
use std::collections::HashMap;
4445
use std::sync::Arc;
45-
use tokio::runtime::Runtime;
46+
use tokio::runtime::Builder;
4647
use tokio::task::JoinSet;
4748

4849
#[derive(Parser)]
@@ -66,7 +67,11 @@ enum Commands {
6667
}
6768

6869
/// A unimplemented user-defined function.
69-
pub fn unimplemented_udf(_args: &[Value], _catalog: &dyn Catalog) -> Value {
70+
pub fn unimplemented_udf(
71+
_args: &[Value],
72+
_catalog: &dyn Catalog,
73+
_retriever: &dyn Retriever,
74+
) -> Value {
7075
println!("This user-defined function is unimplemented!");
7176
Value::new(CoreData::<Value>::None)
7277
}
@@ -122,7 +127,13 @@ fn run_all_functions(hir: &HIR) -> Result<(), Vec<CompileError>> {
122127
println!("Found {} functions to run", functions.len());
123128

124129
// Create a multi-threaded runtime for parallel execution.
125-
let runtime = Runtime::new().unwrap();
130+
// TODO: We increase the stack size by x64 to avoid stack overflow
131+
// given the lack of tail recursion in the engine (yet...)
132+
let runtime = Builder::new_multi_thread()
133+
.thread_stack_size(128 * 1024 * 1024)
134+
.enable_all()
135+
.build()
136+
.unwrap();
126137
let function_results = runtime.block_on(run_functions_in_parallel(hir, functions));
127138

128139
// Process and display function results.
@@ -139,10 +150,11 @@ fn run_all_functions(hir: &HIR) -> Result<(), Vec<CompileError>> {
139150

140151
async fn run_functions_in_parallel(hir: &HIR, functions: Vec<String>) -> Vec<FunctionResult> {
141152
let catalog = Arc::new(memory_catalog());
153+
let retriever = Arc::new(MockRetriever::new());
142154
let mut set = JoinSet::new();
143155

144156
for function_name in functions {
145-
let engine = Engine::new(hir.context.clone(), catalog.clone());
157+
let engine = Engine::new(hir.context.clone(), catalog.clone(), retriever.clone());
146158
let name = function_name.clone();
147159

148160
set.spawn(async move {
@@ -151,7 +163,7 @@ async fn run_functions_in_parallel(hir: &HIR, functions: Vec<String>) -> Vec<Fun
151163
Arc::new(|value| Box::pin(async move { value }));
152164

153165
// Launch the function with an empty vector of arguments.
154-
let result = engine.launch_rule(&name, vec![], result_handler).await;
166+
let result = engine.launch(&name, vec![], result_handler).await;
155167
FunctionResult { name, result }
156168
});
157169
}

optd/src/dsl/analyzer/from_ast/converter.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ mod converter_tests {
182182
use crate::dsl::analyzer::hir::{CoreData, FunKind};
183183
use crate::dsl::analyzer::type_checks::registry::{Generic, TypeKind};
184184
use crate::dsl::parser::ast::{self, Adt, Function, Item, Module, Type as AstType};
185+
use crate::dsl::utils::retriever::Retriever;
185186
use crate::dsl::utils::span::{Span, Spanned};
186187

187188
// Helper functions to create test items
@@ -381,7 +382,11 @@ mod converter_tests {
381382
let ext_func = create_simple_function("external_function", false);
382383
let module = create_module_with_functions(vec![ext_func]);
383384

384-
pub fn external_function(_args: &[Value], _catalog: &dyn Catalog) -> Value {
385+
pub fn external_function(
386+
_args: &[Value],
387+
_catalog: &dyn Catalog,
388+
_retriever: &dyn Retriever,
389+
) -> Value {
385390
println!("Hello from UDF!");
386391
Value::new(CoreData::<Value>::None)
387392
}

optd/src/dsl/analyzer/hir/mod.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
1818
use super::type_checks::registry::Type;
1919
use crate::catalog::Catalog;
20+
use crate::dsl::utils::retriever::Retriever;
2021
use crate::dsl::utils::span::Span;
2122
use context::Context;
2223
use map::Map;
@@ -76,12 +77,17 @@ pub struct Udf {
7677
/// The function pointer to the user-defined function.
7778
///
7879
/// Note that [`Value`]s passed to and returned from this UDF do not have associated metadata.
79-
pub func: fn(&[Value], &dyn Catalog) -> Value,
80+
pub func: fn(&[Value], &dyn Catalog, &dyn Retriever) -> Value,
8081
}
8182

8283
impl Udf {
83-
pub fn call(&self, values: &[Value], catalog: &dyn Catalog) -> Value {
84-
(self.func)(values, catalog)
84+
pub fn call(
85+
&self,
86+
values: &[Value],
87+
catalog: &dyn Catalog,
88+
retriever: &dyn Retriever,
89+
) -> Value {
90+
(self.func)(values, catalog, retriever)
8591
}
8692
}
8793

optd/src/dsl/analyzer/into_hir/converter.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,12 @@ fn convert_field_access(
234234
) -> ExprKind {
235235
use ExprKind::*;
236236

237-
let expr_type = registry.resolve_type(&expr.metadata.ty);
237+
let mut expr_type = registry.resolve_type(&expr.metadata.ty);
238+
239+
// Unwrap stored type if necessary.
240+
while let TypeKind::Stored(inner_type) = *expr_type.value {
241+
expr_type = inner_type;
242+
}
238243

239244
match &*expr_type.value {
240245
TypeKind::Tuple(_) => {
@@ -262,7 +267,10 @@ fn convert_field_access(
262267
)
263268
}
264269

265-
_ => panic!("Field access on non-struct, non-tuple type: error in type inference"),
270+
_ => panic!(
271+
"Field access on non-struct, non-tuple type: {:?}",
272+
expr_type
273+
),
266274
}
267275
}
268276

optd/src/dsl/analyzer/type_checks/glb.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,13 @@ impl TypeRegistry {
221221
let result = glb_kind.into();
222222

223223
// Verify post-condition in debug mode only
224-
debug_assert!(
224+
assert!(
225225
self.is_subtype(&result, type1),
226226
"GLB post-condition failed: {:?} is not a subtype of {:?}",
227227
result,
228228
type1
229229
);
230-
debug_assert!(
230+
assert!(
231231
self.is_subtype(&result, type2),
232232
"GLB post-condition failed: {:?} is not a subtype of {:?}",
233233
result,

optd/src/dsl/analyzer/type_checks/lub.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,13 +200,13 @@ impl TypeRegistry {
200200
let result = lub_kind.into();
201201

202202
// Verify post-condition in debug mode only
203-
debug_assert!(
203+
assert!(
204204
self.is_subtype(type1, &result),
205205
"LUB post-condition failed: {:?} is not a subtype of {:?}",
206206
type1,
207207
result
208208
);
209-
debug_assert!(
209+
assert!(
210210
self.is_subtype(type2, &result),
211211
"LUB post-condition failed: {:?} is not a subtype of {:?}",
212212
type2,

0 commit comments

Comments
 (0)