From ab752172d6aecda80fb4cf67fe1ebb91e8d4cc09 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 11:47:44 -0300 Subject: [PATCH 01/10] create a common crate --- Cargo.lock | 14 + Cargo.toml | 4 +- common/Cargo.toml | 25 ++ common/lib.rs | 4 + common/numeric/mod.rs | 789 +++++++++++++++++++++++++++++++++ common/numeric/nonnan.rs | 105 +++++ common/schema/affinity.rs | 618 ++++++++++++++++++++++++++ common/schema/collation.rs | 61 +++ common/schema/column.rs | 238 ++++++++++ common/schema/mod.rs | 3 + common/table_reference.rs | 247 +++++++++++ common/value.rs | 874 +++++++++++++++++++++++++++++++++++++ 12 files changed, 2981 insertions(+), 1 deletion(-) create mode 100644 common/Cargo.toml create mode 100644 common/lib.rs create mode 100644 common/numeric/mod.rs create mode 100644 common/numeric/nonnan.rs create mode 100644 common/schema/affinity.rs create mode 100644 common/schema/collation.rs create mode 100644 common/schema/column.rs create mode 100644 common/schema/mod.rs create mode 100644 common/table_reference.rs create mode 100644 common/value.rs diff --git a/Cargo.lock b/Cargo.lock index 50f3f8047e..6d03e71b21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4894,6 +4894,20 @@ dependencies = [ "validator", ] +[[package]] +name = "turso_common" +version = "0.4.0-pre.1" +dependencies = [ + "either", + "miette", + "serde", + "strum", + "strum_macros", + "thiserror 2.0.16", + "turso_parser", + "uncased", +] + [[package]] name = "turso_core" version = "0.4.0-pre.1" diff --git a/Cargo.toml b/Cargo.toml index f397e4863c..c0a7f57662 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,8 @@ members = [ "whopper", "perf/throughput/turso", "perf/throughput/rusqlite", - "perf/encryption" + "perf/encryption", + "common", ] exclude = [ "perf/latency/limbo", @@ -62,6 +63,7 @@ limbo_regexp = { path = "extensions/regexp", version = "0.4.0-pre.1" } limbo_uuid = { path = "extensions/uuid", version = "0.4.0-pre.1" } turso_parser = { path = "parser", version = "0.4.0-pre.1" } limbo_fuzzy = { path = "extensions/fuzzy", version = "0.4.0-pre.1" } +turso_common = { path = "common", version = "0.4.0-pre.1" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } strum_macros = "0.26" diff --git a/common/Cargo.toml b/common/Cargo.toml new file mode 100644 index 0000000000..4162d95aef --- /dev/null +++ b/common/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "turso_common" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +path = "lib.rs" + +[features] +default = ["json"] +serde = ["dep:serde"] +json = [] + +[dependencies] +either.workspace = true +miette.workspace = true +serde = { workspace = true, optional = true } +strum.workspace = true +strum_macros.workspace = true +thiserror.workspace = true +turso_parser.workspace = true +uncased = "0.9.10" diff --git a/common/lib.rs b/common/lib.rs new file mode 100644 index 0000000000..2024dbc57d --- /dev/null +++ b/common/lib.rs @@ -0,0 +1,4 @@ +pub mod numeric; +pub mod schema; +pub mod table_reference; +pub mod value; diff --git a/common/numeric/mod.rs b/common/numeric/mod.rs new file mode 100644 index 0000000000..ac3ed6d7ba --- /dev/null +++ b/common/numeric/mod.rs @@ -0,0 +1,789 @@ +use crate::value::Value; + +pub mod nonnan; + +use nonnan::NonNan; + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShl { + fn saturating_shl(self, rhs: u32) -> Self; +} + +impl SaturatingShl for i64 { + fn saturating_shl(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + 0 + } else { + self << rhs + } + } +} + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShr { + fn saturating_shr(self, rhs: u32) -> Self; +} + +impl SaturatingShr for i64 { + fn saturating_shr(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + if self >= 0 { + 0 + } else { + -1 + } + } else { + self >> rhs + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Numeric { + Null, + Integer(i64), + Float(NonNan), +} + +impl Numeric { + pub fn from_value_strict(value: &Value) -> Numeric { + match value { + Value::Null | Value::Blob(_) => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + Value::Text(text) => { + let s = text.as_str(); + + match str_to_f64(s) { + None + | Some(StrToF64::FractionalPrefix(_)) + | Some(StrToF64::DecimalPrefix(_)) => Self::Null, + Some(StrToF64::Fractional(value)) => Self::Float(value), + Some(StrToF64::Decimal(real)) => { + let integer = str_to_i64(s).unwrap_or(0); + + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } + } + } + } + } + + pub fn try_into_f64(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(v) => Some(*v as _), + Numeric::Float(v) => Some((*v).into()), + } + } + + pub fn try_into_bool(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(0) => Some(false), + Numeric::Float(non_nan) if *non_nan == 0.0 => Some(false), + _ => Some(true), + } + } +} + +impl From for NullableInteger { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => NullableInteger::Null, + Numeric::Integer(v) => NullableInteger::Integer(v), + Numeric::Float(v) => NullableInteger::Integer(f64::from(v) as i64), + } + } +} + +impl From for Value { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => Value::Null, + Numeric::Integer(v) => Value::Integer(v), + Numeric::Float(v) => Value::Float(v.into()), + } + } +} + +impl> From for Numeric { + fn from(value: T) -> Self { + let text = value.as_ref(); + + match str_to_f64(text) { + None => Self::Integer(0), + Some(StrToF64::Fractional(value) | StrToF64::FractionalPrefix(value)) => { + Self::Float(value) + } + Some(StrToF64::Decimal(real) | StrToF64::DecimalPrefix(real)) => { + let integer = str_to_i64(text).unwrap_or(0); + + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } + } + } +} + +impl From for Numeric { + fn from(value: Value) -> Self { + Self::from(&value) + } +} +impl From<&Value> for Numeric { + fn from(value: &Value) -> Self { + match value { + Value::Null => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + Value::Text(text) => Numeric::from(text.as_str()), + Value::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Numeric::from(&text) + } + } + } +} + +impl std::ops::Add for Numeric { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_add(rhs) { + None => Numeric::Float(lhs.into()) + Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs + rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f + Numeric::Float(i.into()), + } + } +} + +impl std::ops::Sub for Numeric { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs - rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_sub(rhs) { + None => Numeric::Float(lhs.into()) - Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f - Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) - f, + } + } +} + +impl std::ops::Mul for Numeric { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs * rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_mul(rhs) { + None => Numeric::Float(lhs.into()) * Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f * Numeric::Float(i.into()), + } + } +} + +impl std::ops::Div for Numeric { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs / rhs { + Some(v) if rhs != 0.0 => Numeric::Float(v), + _ => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_div(rhs) { + None => Numeric::Float(lhs.into()) / Numeric::Float(rhs.into()), + Some(v) => Numeric::Integer(v), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f / Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) / f, + } + } +} + +impl std::ops::Neg for Numeric { + type Output = Self; + + fn neg(self) -> Self::Output { + match self { + Numeric::Null => Numeric::Null, + Numeric::Integer(v) => match v.checked_neg() { + None => -Numeric::Float(v.into()), + Some(i) => Numeric::Integer(i), + }, + Numeric::Float(v) => Numeric::Float(-v), + } + } +} + +#[derive(Debug)] +pub enum NullableInteger { + Null, + Integer(i64), +} + +impl From for Value { + fn from(value: NullableInteger) -> Self { + match value { + NullableInteger::Null => Value::Null, + NullableInteger::Integer(v) => Value::Integer(v), + } + } +} + +impl> From for NullableInteger { + fn from(value: T) -> Self { + Self::Integer(str_to_i64(value.as_ref()).unwrap_or(0)) + } +} + +impl From for NullableInteger { + fn from(value: Value) -> Self { + Self::from(&value) + } +} + +impl From<&Value> for NullableInteger { + fn from(value: &Value) -> Self { + match value { + Value::Null => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => Self::Integer(*v as i64), + Value::Text(text) => Self::from(text.as_str()), + Value::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Self::from(text) + } + } + } +} + +impl std::ops::Not for NullableInteger { + type Output = Self; + + fn not(self) -> Self::Output { + match self { + NullableInteger::Null => NullableInteger::Null, + NullableInteger::Integer(lhs) => NullableInteger::Integer(!lhs), + } + } +} + +impl std::ops::BitAnd for NullableInteger { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs & rhs) + } + } + } +} + +impl std::ops::BitOr for NullableInteger { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs | rhs) + } + } + } +} + +impl std::ops::Shl for NullableInteger { + type Output = Self; + + fn shl(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shl(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shr(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Shr for NullableInteger { + type Output = Self; + + fn shr(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shr(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shl(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Rem for NullableInteger { + type Output = Self; + + fn rem(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (_, NullableInteger::Integer(0)) => NullableInteger::Null, + (lhs, NullableInteger::Integer(-1)) => lhs % NullableInteger::Integer(1), + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs % rhs) + } + } + } +} + +// Maximum u64 that can survive a f64 round trip +const MAX_EXACT: u64 = u64::MAX << 11; + +const VERTICAL_TAB: char = '\u{b}'; + +/// Encapsulates Dekker's arithmetic for higher precision. This is spiritually the same as using a +/// f128 for arithmetic, but cross platform and compatible with sqlite. +#[derive(Debug, Clone, Copy)] +struct DoubleDouble(f64, f64); + +impl DoubleDouble { + pub const E100: Self = DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83); + pub const E10: Self = DoubleDouble(1.0e+10, 0.0); + pub const E1: Self = DoubleDouble(1.0e+01, 0.0); + + pub const NEG_E100: Self = DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117); + pub const NEG_E10: Self = DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27); + pub const NEG_E1: Self = DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18); +} + +impl From for DoubleDouble { + fn from(value: u64) -> Self { + let r = value as f64; + + // If the value is smaller than MAX_EXACT, the error isn't significant + let rr = if r <= MAX_EXACT as f64 { + let round_tripped = value as f64 as u64; + let sign = if value >= round_tripped { 1.0 } else { -1.0 }; + + // Error term is the signed distance of the round tripped value and itself + sign * value.abs_diff(round_tripped) as f64 + } else { + 0.0 + }; + + DoubleDouble(r, rr) + } +} + +impl From for u64 { + fn from(value: DoubleDouble) -> Self { + if value.1 < 0.0 { + value.0 as u64 - value.1.abs() as u64 + } else { + value.0 as u64 + value.1 as u64 + } + } +} + +impl From for f64 { + fn from(DoubleDouble(a, aa): DoubleDouble) -> Self { + a + aa + } +} + +impl std::ops::Mul for DoubleDouble { + type Output = Self; + + /// Double-Double multiplication. (self.0, self.1) *= (rhs.0, rhs.1) + /// + /// Reference: + /// T. J. Dekker, "A Floating-Point Technique for Extending the Available Precision". + /// 1971-07-26. + /// + fn mul(self, rhs: Self) -> Self::Output { + // TODO: Better variable naming + + let mask = u64::MAX << 26; + + let hx = f64::from_bits(self.0.to_bits() & mask); + let tx = self.0 - hx; + + let hy = f64::from_bits(rhs.0.to_bits() & mask); + let ty = rhs.0 - hy; + + let p = hx * hy; + let q = hx * ty + tx * hy; + + let c = p + q; + let cc = p - c + q + tx * ty; + let cc = self.0 * rhs.1 + self.1 * rhs.0 + cc; + + let r = c + cc; + let rr = (c - r) + cc; + + DoubleDouble(r, rr) + } +} + +impl std::ops::MulAssign for DoubleDouble { + fn mul_assign(&mut self, rhs: Self) { + *self = *self * rhs; + } +} + +pub fn str_to_i64(input: impl AsRef) -> Option { + let input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB); + + let mut iter = input.chars().enumerate().peekable(); + + iter.next_if(|(_, ch)| matches!(ch, '+' | '-')); + let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else { + return Some(0); + }; + + input[0..=end].parse::().map_or_else( + |err| match err.kind() { + std::num::IntErrorKind::PosOverflow => Some(i64::MAX), + std::num::IntErrorKind::NegOverflow => Some(i64::MIN), + std::num::IntErrorKind::Empty => unreachable!(), + _ => Some(0), + }, + Some, + ) +} + +#[derive(Debug, Clone, Copy)] +pub enum StrToF64 { + Fractional(NonNan), + Decimal(NonNan), + FractionalPrefix(NonNan), + DecimalPrefix(NonNan), +} + +impl From for f64 { + fn from(value: StrToF64) -> Self { + match value { + StrToF64::Fractional(non_nan) => non_nan.into(), + StrToF64::Decimal(non_nan) => non_nan.into(), + StrToF64::FractionalPrefix(non_nan) => non_nan.into(), + StrToF64::DecimalPrefix(non_nan) => non_nan.into(), + } + } +} + +pub fn str_to_f64(input: impl AsRef) -> Option { + let mut input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB) + .chars() + .peekable(); + + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1.0, + _ => 1.0, + }; + + let mut had_digits = false; + let mut is_fractional = false; + + let mut significant: u64 = 0; + + // Copy as many significant digits as we can + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + had_digits = true; + + match significant + .checked_mul(10) + .and_then(|v| v.checked_add(digit as u64)) + { + Some(new) => significant = new, + None => break, + } + + input.next(); + } + + let mut exponent = 0; + + // Increment the exponent for every non significant digit we skipped + while input.next_if(char::is_ascii_digit).is_some() { + exponent += 1 + } + + if input.next_if(|ch| matches!(ch, '.')).is_some() { + if had_digits { + is_fractional = true; + } + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true; + } + + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + if significant < (u64::MAX - 9) / 10 { + significant = significant * 10 + digit as u64; + exponent -= 1; + } + + input.next(); + } + }; + + let mut valid_exponent = true; + + if (had_digits || is_fractional) && input.next_if(|ch| matches!(ch, 'e' | 'E')).is_some() { + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1, + _ => 1, + }; + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true; + let mut e = 0; + + while let Some(ch) = input.next_if(char::is_ascii_digit) { + e = (e * 10 + ch.to_digit(10).unwrap() as i32).min(1000); + } + + exponent += sign * e; + } else { + valid_exponent = false; + } + }; + + if !(had_digits || is_fractional) { + return None; + } + + while exponent.is_positive() && significant < MAX_EXACT / 10 { + significant *= 10; + exponent -= 1; + } + + while exponent.is_negative() && significant % 10 == 0 { + significant /= 10; + exponent += 1; + } + + let mut result = DoubleDouble::from(significant); + + if exponent > 0 { + while exponent >= 100 { + exponent -= 100; + result *= DoubleDouble::E100; + } + while exponent >= 10 { + exponent -= 10; + result *= DoubleDouble::E10; + } + while exponent >= 1 { + exponent -= 1; + result *= DoubleDouble::E1; + } + } else { + while exponent <= -100 { + exponent += 100; + result *= DoubleDouble::NEG_E100; + } + while exponent <= -10 { + exponent += 10; + result *= DoubleDouble::NEG_E10; + } + while exponent <= -1 { + exponent += 1; + result *= DoubleDouble::NEG_E1; + } + } + + let result = NonNan::new(f64::from(result) * sign) + .unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap()); + + if !valid_exponent || input.count() > 0 { + if is_fractional { + return Some(StrToF64::FractionalPrefix(result)); + } else { + return Some(StrToF64::DecimalPrefix(result)); + } + } + + Some(if is_fractional { + StrToF64::Fractional(result) + } else { + StrToF64::Decimal(result) + }) +} + +pub fn format_float(v: f64) -> String { + if v.is_nan() { + return "".to_string(); + } + + if v.is_infinite() { + return if v.is_sign_negative() { "-Inf" } else { "Inf" }.to_string(); + } + + if v == 0.0 { + return "0.0".to_string(); + } + + let negative = v < 0.0; + let mut d = DoubleDouble(v.abs(), 0.0); + let mut exp = 0; + + if d.0 > 9.223_372_036_854_775e18 { + while d.0 > 9.223_372_036_854_774e118 { + exp += 100; + d *= DoubleDouble::NEG_E100; + } + while d.0 > 9.223_372_036_854_774e28 { + exp += 10; + d *= DoubleDouble::NEG_E10; + } + while d.0 > 9.223_372_036_854_775e18 { + exp += 1; + d *= DoubleDouble::NEG_E1; + } + } else { + while d.0 < 9.223_372_036_854_775e-83 { + exp -= 100; + d *= DoubleDouble::E100; + } + while d.0 < 9.223_372_036_854_775e7 { + exp -= 10; + d *= DoubleDouble::E10; + } + while d.0 < 9.223_372_036_854_775e17 { + exp -= 1; + d *= DoubleDouble::E1; + } + } + + let v = u64::from(d); + + let mut digits = v.to_string().into_bytes(); + + let precision = 15; + + let mut decimal_pos = digits.len() as i32 + exp; + + 'out: { + if digits.len() > precision { + let round_up = digits[precision] >= b'5'; + digits.truncate(precision); + + if round_up { + for i in (0..precision).rev() { + if digits[i] < b'9' { + digits[i] += 1; + break 'out; + } + digits[i] = b'0'; + } + + digits.insert(0, b'1'); + decimal_pos += 1; + } + } + } + + while digits.len() > 1 && digits[digits.len() - 1] == b'0' { + digits.pop(); + } + + let exp = decimal_pos - 1; + + if (-4..=14).contains(&exp) { + format!( + "{}{}.{}{}", + if negative { "-" } else { Default::default() }, + if decimal_pos > 0 { + let zeroes = (decimal_pos - digits.len() as i32).max(0) as usize; + let digits = digits + .get(0..(decimal_pos.min(digits.len() as i32) as usize)) + .unwrap(); + (unsafe { str::from_utf8_unchecked(digits) }).to_owned() + &"0".repeat(zeroes) + } else { + "0".to_string() + }, + "0".repeat(decimal_pos.min(0).unsigned_abs() as usize), + digits + .get((decimal_pos.max(0) as usize)..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0") + ) + } else { + format!( + "{}{}.{}e{}{:0width$}", + if negative { "-" } else { "" }, + digits.first().cloned().unwrap_or(b'0') as char, + digits + .get(1..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0"), + if exp.is_positive() { "+" } else { "-" }, + exp.abs(), + width = if exp > 100 { 3 } else { 2 } + ) + } +} + +#[test] +fn test_decode_float() { + assert_eq!(format_float(9.93e-322), "9.93071948140905e-322"); + assert_eq!(format_float(9.93), "9.93"); + assert_eq!(format_float(0.093), "0.093"); + assert_eq!(format_float(-0.093), "-0.093"); + assert_eq!(format_float(0.0), "0.0"); + assert_eq!(format_float(4.94e-322), "4.94065645841247e-322"); + assert_eq!(format_float(-20228007.0), "-20228007.0"); +} diff --git a/common/numeric/nonnan.rs b/common/numeric/nonnan.rs new file mode 100644 index 0000000000..5ae6a1f34a --- /dev/null +++ b/common/numeric/nonnan.rs @@ -0,0 +1,105 @@ +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct NonNan(f64); + +impl NonNan { + pub fn new(value: f64) -> Option { + if value.is_nan() { + return None; + } + + Some(NonNan(value)) + } +} + +impl PartialEq for f64 { + fn eq(&self, other: &NonNan) -> bool { + *self == other.0 + } +} + +impl PartialEq for NonNan { + fn eq(&self, other: &f64) -> bool { + self.0 == *other + } +} + +impl PartialOrd for NonNan { + fn partial_cmp(&self, other: &f64) -> Option { + self.0.partial_cmp(other) + } +} + +impl PartialOrd for f64 { + fn partial_cmp(&self, other: &NonNan) -> Option { + self.partial_cmp(&other.0) + } +} + +impl From for NonNan { + fn from(value: i64) -> Self { + NonNan(value as f64) + } +} + +impl From for f64 { + fn from(value: NonNan) -> Self { + value.0 + } +} + +impl std::ops::Deref for NonNan { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::Add for NonNan { + type Output = Option; + + fn add(self, rhs: Self) -> Self::Output { + Self::new(self.0 + rhs.0) + } +} + +impl std::ops::Sub for NonNan { + type Output = Option; + + fn sub(self, rhs: Self) -> Self::Output { + Self::new(self.0 - rhs.0) + } +} + +impl std::ops::Mul for NonNan { + type Output = Option; + + fn mul(self, rhs: Self) -> Self::Output { + Self::new(self.0 * rhs.0) + } +} + +impl std::ops::Div for NonNan { + type Output = Option; + + fn div(self, rhs: Self) -> Self::Output { + Self::new(self.0 / rhs.0) + } +} + +impl std::ops::Rem for NonNan { + type Output = Option; + + fn rem(self, rhs: Self) -> Self::Output { + Self::new(self.0 % rhs.0) + } +} + +impl std::ops::Neg for NonNan { + type Output = Self; + + fn neg(self) -> Self::Output { + Self(-self.0) + } +} diff --git a/common/schema/affinity.rs b/common/schema/affinity.rs new file mode 100644 index 0000000000..0064b503ff --- /dev/null +++ b/common/schema/affinity.rs @@ -0,0 +1,618 @@ +use either::Either; +use turso_parser::ast::{Expr, Literal}; + +use crate::value::{AsValueRef, Value, ValueRef}; + +/// # SQLite Column Type Affinities +/// +/// Each column in an SQLite 3 database is assigned one of the following type affinities: +/// +/// - **TEXT** +/// - **NUMERIC** +/// - **INTEGER** +/// - **REAL** +/// - **BLOB** +/// +/// > **Note:** Historically, the "BLOB" type affinity was called "NONE". However, this term was renamed to avoid confusion with "no affinity". +/// +/// ## Affinity Descriptions +/// +/// ### **TEXT** +/// - Stores data using the NULL, TEXT, or BLOB storage classes. +/// - Numerical data inserted into a column with TEXT affinity is converted into text form before being stored. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col TEXT); +/// INSERT INTO example (col) VALUES (123); -- Stored as '123' (text) +/// SELECT typeof(col) FROM example; -- Returns 'text' +/// ``` +/// +/// ### **NUMERIC** +/// - Can store values using all five storage classes. +/// - Text data is converted to INTEGER or REAL (in that order of preference) if it is a well-formed integer or real literal. +/// - If the text represents an integer too large for a 64-bit signed integer, it is converted to REAL. +/// - If the text is not a well-formed literal, it is stored as TEXT. +/// - Hexadecimal integer literals are stored as TEXT for historical compatibility. +/// - Floating-point values that can be exactly represented as integers are converted to integers. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col NUMERIC); +/// INSERT INTO example (col) VALUES ('3.0e+5'); -- Stored as 300000 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` +/// +/// ### **INTEGER** +/// - Behaves like NUMERIC affinity but differs in `CAST` expressions. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col INTEGER); +/// INSERT INTO example (col) VALUES (4.0); -- Stored as 4 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` +/// +/// ### **REAL** +/// - Similar to NUMERIC affinity but forces integer values into floating-point representation. +/// - **Optimization:** Small floating-point values with no fractional component may be stored as integers on disk to save space. This is invisible at the SQL level. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col REAL); +/// INSERT INTO example (col) VALUES (4); -- Stored as 4.0 (real) +/// SELECT typeof(col) FROM example; -- Returns 'real' +/// ``` +/// +/// ### **BLOB** +/// - Does not prefer any storage class. +/// - No coercion is performed between storage classes. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col BLOB); +/// INSERT INTO example (col) VALUES (x'1234'); -- Stored as a binary blob +/// SELECT typeof(col) FROM example; -- Returns 'blob' +/// ``` +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Affinity { + Blob = 0, + Text = 1, + Numeric = 2, + Integer = 3, + Real = 4, +} + +pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB +pub const SQLITE_AFF_TEXT: char = 'B'; +pub const SQLITE_AFF_NUMERIC: char = 'C'; +pub const SQLITE_AFF_INTEGER: char = 'D'; +pub const SQLITE_AFF_REAL: char = 'E'; + +impl Affinity { + /// This is meant to be used in opcodes like Eq, which state: + /// + /// "The SQLITE_AFF_MASK portion of P5 must be an affinity character - SQLITE_AFF_TEXT, SQLITE_AFF_INTEGER, and so forth. + /// An attempt is made to coerce both inputs according to this affinity before the comparison is made. + /// If the SQLITE_AFF_MASK is 0x00, then numeric affinity is used. + /// Note that the affinity conversions are stored back into the input registers P1 and P3. + /// So this opcode can cause persistent changes to registers P1 and P3."" + pub fn aff_mask(&self) -> char { + match self { + Affinity::Integer => SQLITE_AFF_INTEGER, + Affinity::Text => SQLITE_AFF_TEXT, + Affinity::Blob => SQLITE_AFF_NONE, + Affinity::Real => SQLITE_AFF_REAL, + Affinity::Numeric => SQLITE_AFF_NUMERIC, + } + } + + pub fn from_char(char: char) -> Self { + match char { + SQLITE_AFF_INTEGER => Affinity::Integer, + SQLITE_AFF_TEXT => Affinity::Text, + SQLITE_AFF_NONE => Affinity::Blob, + SQLITE_AFF_REAL => Affinity::Real, + SQLITE_AFF_NUMERIC => Affinity::Numeric, + _ => Affinity::Blob, + } + } + + pub fn as_char_code(&self) -> u8 { + self.aff_mask() as u8 + } + + pub fn from_char_code(code: u8) -> Self { + Self::from_char(code as char) + } + + pub fn is_numeric(&self) -> bool { + matches!(self, Affinity::Integer | Affinity::Real | Affinity::Numeric) + } + + pub fn has_affinity(&self) -> bool { + !matches!(self, Affinity::Blob) + } + + /// 3.1. Determination Of Column Affinity + /// For tables not declared as STRICT, the affinity of a column is determined by the declared type of the column, according to the following rules in the order shown: + /// + /// If the declared type contains the string "INT" then it is assigned INTEGER affinity. + /// + /// If the declared type of the column contains any of the strings "CHAR", "CLOB", or "TEXT" then that column has TEXT affinity. Notice that the type VARCHAR contains the string "CHAR" and is thus assigned TEXT affinity. + /// + /// If the declared type for a column contains the string "BLOB" or if no type is specified then the column has affinity BLOB. + /// + /// If the declared type for a column contains any of the strings "REAL", "FLOA", or "DOUB" then the column has REAL affinity. + /// + /// Otherwise, the affinity is NUMERIC. + /// + /// Note that the order of the rules for determining column affinity is important. A column whose declared type is "CHARINT" will match both rules 1 and 2 but the first rule takes precedence and so the column affinity will be INTEGER. + #[expect(clippy::self_named_constructors)] + pub fn affinity(datatype: &str) -> Self { + let datatype = datatype.to_ascii_uppercase(); + + // Rule 1: INT -> INTEGER affinity + if datatype.contains("INT") { + return Affinity::Integer; + } + + // Rule 2: CHAR/CLOB/TEXT -> TEXT affinity + if datatype.contains("CHAR") || datatype.contains("CLOB") || datatype.contains("TEXT") { + return Affinity::Text; + } + + // Rule 3: BLOB or empty -> BLOB affinity (historically called NONE) + if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") { + return Affinity::Blob; + } + + // Rule 4: REAL/FLOA/DOUB -> REAL affinity + if datatype.contains("REAL") || datatype.contains("FLOA") || datatype.contains("DOUB") { + return Affinity::Real; + } + + // Rule 5: Otherwise -> NUMERIC affinity + Affinity::Numeric + } + + pub fn convert<'a>(&self, val: &'a impl AsValueRef) -> Option, Value>> { + let val = val.as_value_ref(); + let is_text = matches!(val, ValueRef::Text(_)); + // Apply affinity conversions + match self { + Affinity::Numeric | Affinity::Integer => is_text + .then(|| apply_numeric_affinity(val, false)) + .flatten() + .map(Either::Left), + + Affinity::Text => { + if is_text { + is_numeric_value(val) + .then(|| stringify_register(val)) + .flatten() + .map(Either::Right) + } else { + None + } + } + + Affinity::Real => { + let mut left = is_text + .then(|| apply_numeric_affinity(val, false)) + .flatten(); + + if let ValueRef::Integer(i) = left.unwrap_or(val) { + left = Some(ValueRef::Float(i as f64)); + } + + left.map(Either::Left) + } + + Affinity::Blob => None, // Do nothing for blob affinity. + } + } + + /// Return TRUE if the given expression is a constant which would be + /// unchanged by OP_Affinity with the affinity given in the second + /// argument. + /// + /// This routine is used to determine if the OP_Affinity operation + /// can be omitted. When in doubt return FALSE. A false negative + /// is harmless. A false positive, however, can result in the wrong + /// answer. + /// + /// reference https://github.com/sqlite/sqlite/blob/master/src/expr.c#L3000 + pub fn expr_needs_no_affinity_change(&self, expr: &Expr) -> bool { + if !self.has_affinity() { + return true; + } + // TODO: check for unary minus in the expr, as it may be an additional optimization. + // This involves mostly likely walking the expression + match expr { + Expr::Literal(literal) => match literal { + Literal::Numeric(_) => self.is_numeric(), + Literal::String(_) => matches!(self, Affinity::Text), + Literal::Blob(_) => true, + _ => false, + }, + Expr::Column { + is_rowid_alias: true, + .. + } => self.is_numeric(), + _ => false, + } + } +} + +#[derive(Debug, PartialEq)] +pub enum NumericParseResult { + NotNumeric, // not a valid number + PureInteger, // pure integer (entire string) + HasDecimalOrExp, // has decimal point or exponent (entire string) + ValidPrefixOnly, // valid prefix but not entire string +} + +#[derive(Debug)] +pub enum ParsedNumber { + None, + Integer(i64), + Float(f64), +} + +impl ParsedNumber { + fn as_integer(&self) -> Option { + match self { + ParsedNumber::Integer(i) => Some(*i), + _ => None, + } + } + + fn as_float(&self) -> Option { + match self { + ParsedNumber::Float(f) => Some(*f), + _ => None, + } + } +} + +pub fn try_for_float(text: &str) -> (NumericParseResult, ParsedNumber) { + let bytes = text.as_bytes(); + if bytes.is_empty() { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let mut pos = 0; + let len = bytes.len(); + + while pos < len && is_space(bytes[pos]) { + pos += 1; + } + + if pos >= len { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let mut sign = 1i64; + + if bytes[pos] == b'-' { + sign = -1; + pos += 1; + } else if bytes[pos] == b'+' { + pos += 1; + } + + if pos >= len { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let mut significand = 0u64; + let mut decimal_adjust = 0i32; + let mut has_digits = false; + + // Parse digits before decimal point + while pos < len && bytes[pos].is_ascii_digit() { + has_digits = true; + let digit = (bytes[pos] - b'0') as u64; + + if significand <= (u64::MAX - 9) / 10 { + significand = significand * 10 + digit; + } else { + // Skip overflow digits but adjust exponent + decimal_adjust += 1; + } + pos += 1; + } + + let mut has_decimal = false; + let mut has_exponent = false; + + // Check for decimal point + if pos < len && bytes[pos] == b'.' { + has_decimal = true; + pos += 1; + + // Parse fractional digits + while pos < len && bytes[pos].is_ascii_digit() { + has_digits = true; + let digit = (bytes[pos] - b'0') as u64; + + if significand <= (u64::MAX - 9) / 10 { + significand = significand * 10 + digit; + decimal_adjust -= 1; + } + pos += 1; + } + } + + if !has_digits { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + // Check for exponent + let mut exponent = 0i32; + if pos < len && (bytes[pos] == b'e' || bytes[pos] == b'E') { + has_exponent = true; + pos += 1; + + if pos >= len { + // Incomplete exponent, but we have valid digits before + return create_result_from_significand( + significand, + sign, + decimal_adjust, + has_decimal, + has_exponent, + NumericParseResult::ValidPrefixOnly, + ); + } + + let mut exp_sign = 1i32; + if bytes[pos] == b'-' { + exp_sign = -1; + pos += 1; + } else if bytes[pos] == b'+' { + pos += 1; + } + + if pos >= len || !bytes[pos].is_ascii_digit() { + // Incomplete exponent + return create_result_from_significand( + significand, + sign, + decimal_adjust, + has_decimal, + false, + NumericParseResult::ValidPrefixOnly, + ); + } + + // Parse exponent digits + while pos < len && bytes[pos].is_ascii_digit() { + let digit = (bytes[pos] - b'0') as i32; + if exponent < 10000 { + exponent = exponent * 10 + digit; + } else { + exponent = 10000; // Cap at large value + } + pos += 1; + } + exponent *= exp_sign; + } + + // Skip trailing whitespace + while pos < len && is_space(bytes[pos]) { + pos += 1; + } + + // Determine if we consumed the entire string + let consumed_all = pos >= len; + let final_exponent = decimal_adjust + exponent; + + let parse_result = if !consumed_all { + NumericParseResult::ValidPrefixOnly + } else if has_decimal || has_exponent { + NumericParseResult::HasDecimalOrExp + } else { + NumericParseResult::PureInteger + }; + + create_result_from_significand( + significand, + sign, + final_exponent, + has_decimal, + has_exponent, + parse_result, + ) +} + +fn create_result_from_significand( + significand: u64, + sign: i64, + exponent: i32, + has_decimal: bool, + has_exponent: bool, + parse_result: NumericParseResult, +) -> (NumericParseResult, ParsedNumber) { + if significand == 0 { + match parse_result { + NumericParseResult::PureInteger => { + return (parse_result, ParsedNumber::Integer(0)); + } + _ => { + return (parse_result, ParsedNumber::Float(0.0)); + } + } + } + + // For pure integers without exponent, try to return as integer + if !has_decimal && !has_exponent && exponent == 0 && significand <= i64::MAX as u64 { + let signed_val = (significand as i64).wrapping_mul(sign); + return (parse_result, ParsedNumber::Integer(signed_val)); + } + + // Convert to float + let mut result = significand as f64; + + let mut exp = exponent; + match exp.cmp(&0) { + std::cmp::Ordering::Greater => { + while exp >= 100 { + result *= 1e100; + exp -= 100; + } + while exp >= 10 { + result *= 1e10; + exp -= 10; + } + while exp >= 1 { + result *= 10.0; + exp -= 1; + } + } + std::cmp::Ordering::Less => { + while exp <= -100 { + result *= 1e-100; + exp += 100; + } + while exp <= -10 { + result *= 1e-10; + exp += 10; + } + while exp <= -1 { + result *= 0.1; + exp += 1; + } + } + std::cmp::Ordering::Equal => {} + } + + if sign < 0 { + result = -result; + } + + (parse_result, ParsedNumber::Float(result)) +} + +pub fn is_space(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0c') +} + +fn real_to_i64(r: f64) -> i64 { + if r < -9223372036854774784.0 { + i64::MIN + } else if r > 9223372036854774784.0 { + i64::MAX + } else { + r as i64 + } +} + +fn apply_integer_affinity(val: ValueRef) -> Option { + let ValueRef::Float(f) = val else { + return None; + }; + + let ix = real_to_i64(f); + + // Only convert if round-trip is exact and not at extreme values + if f == (ix as f64) && ix > i64::MIN && ix < i64::MAX { + Some(ValueRef::Integer(ix)) + } else { + None + } +} + +/// Try to convert a value into a numeric representation if we can +/// do so without loss of information. In other words, if the string +/// looks like a number, convert it into a number. If it does not +/// look like a number, leave it alone. +pub fn apply_numeric_affinity(val: ValueRef, try_for_int: bool) -> Option { + let ValueRef::Text(text) = val else { + return None; // Only apply to text values + }; + + let text_str = text.as_str(); + let (parse_result, parsed_value) = try_for_float(text_str); + + // Only convert if we have a complete valid number (not just a prefix) + match parse_result { + NumericParseResult::NotNumeric | NumericParseResult::ValidPrefixOnly => { + None // Leave as text + } + NumericParseResult::PureInteger => { + if let Some(int_val) = parsed_value.as_integer() { + Some(ValueRef::Integer(int_val)) + } else if let Some(float_val) = parsed_value.as_float() { + let res = ValueRef::Float(float_val); + if try_for_int { + apply_integer_affinity(res) + } else { + Some(res) + } + } else { + None + } + } + NumericParseResult::HasDecimalOrExp => { + if let Some(float_val) = parsed_value.as_float() { + let res = ValueRef::Float(float_val); + // If try_for_int is true, try to convert float to int if exact + if try_for_int { + apply_integer_affinity(res) + } else { + Some(res) + } + } else { + None + } + } + } +} + +fn is_numeric_value(val: ValueRef) -> bool { + matches!(val, ValueRef::Integer(_) | ValueRef::Float(_)) +} + +fn stringify_register(val: ValueRef) -> Option { + match val { + ValueRef::Integer(i) => Some(Value::build_text(i.to_string())), + ValueRef::Float(f) => Some(Value::build_text(f.to_string())), + ValueRef::Text(_) | ValueRef::Null | ValueRef::Blob(_) => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_apply_numeric_affinity_partial_numbers() { + let val = Value::Text("123abc".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert!(res.is_none()); + + let val = Value::Text("-53093015420544-15062897".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert!(res.is_none()); + + let val = Value::Text("123.45xyz".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert!(res.is_none()); + } + + #[test] + fn test_apply_numeric_affinity_complete_numbers() { + let val = Value::Text("123".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert_eq!(res, Some(ValueRef::Integer(123))); + + let val = Value::Text("123.45".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert_eq!(res, Some(ValueRef::Float(123.45))); + + let val = Value::Text(" -456 ".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert_eq!(res, Some(ValueRef::Integer(-456))); + + let val = Value::Text("0".into()); + let res = apply_numeric_affinity(val.as_value_ref(), false); + assert_eq!(res, Some(ValueRef::Integer(0))); + } +} diff --git a/common/schema/collation.rs b/common/schema/collation.rs new file mode 100644 index 0000000000..aa45c1455d --- /dev/null +++ b/common/schema/collation.rs @@ -0,0 +1,61 @@ +use std::{cmp::Ordering, str::FromStr}; + +// TODO: in the future allow user to define collation sequences +// Will have to meddle with ffi for this +#[derive( + Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, +)] +#[strum(ascii_case_insensitive)] +/// **Pre defined collation sequences**\ +/// Collating functions only matter when comparing string values. +/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). +#[repr(u8)] +pub enum CollationSeq { + Unset = 0, + #[default] + Binary = 1, + NoCase = 2, + Rtrim = 3, +} + +impl CollationSeq { + pub fn new(collation: &str) -> Result { + CollationSeq::from_str(collation) + .map_err(|_| format!("no such collation sequence: {collation}")) + } + #[inline] + /// Returns the collation, defaulting to BINARY if unset + pub const fn from_bits(bits: u8) -> Self { + match bits { + 2 => CollationSeq::NoCase, + 3 => CollationSeq::Rtrim, + _ => CollationSeq::Binary, + } + } + + #[inline(always)] + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + match self { + CollationSeq::Unset | CollationSeq::Binary => Self::binary_cmp(lhs, rhs), + CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), + CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), + } + } + + #[inline(always)] + fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.cmp(rhs) + } + + #[inline(always)] + fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { + let nocase_lhs = uncased::UncasedStr::new(lhs); + let nocase_rhs = uncased::UncasedStr::new(rhs); + nocase_lhs.cmp(nocase_rhs) + } + + #[inline(always)] + fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.trim_end().cmp(rhs.trim_end()) + } +} diff --git a/common/schema/column.rs b/common/schema/column.rs new file mode 100644 index 0000000000..4290b81fa1 --- /dev/null +++ b/common/schema/column.rs @@ -0,0 +1,238 @@ +use core::fmt; + +use turso_parser::ast::Expr; + +use crate::schema::{affinity::Affinity, collation::CollationSeq}; + +#[derive(Debug, Clone)] +pub struct Column { + pub name: Option, + pub ty_str: String, + pub default: Option>, + raw: u16, +} + +// flags +const F_PRIMARY_KEY: u16 = 1; +const F_ROWID_ALIAS: u16 = 2; +const F_NOTNULL: u16 = 4; +const F_UNIQUE: u16 = 8; +const F_HIDDEN: u16 = 16; + +// pack Type and Collation in the remaining bits +const TYPE_SHIFT: u16 = 5; +const TYPE_MASK: u16 = 0b111 << TYPE_SHIFT; +const COLL_SHIFT: u16 = TYPE_SHIFT + 3; +const COLL_MASK: u16 = 0b11 << COLL_SHIFT; + +impl Column { + pub fn affinity(&self) -> Affinity { + Affinity::affinity(&self.ty_str) + } + pub const fn new_default_text( + name: Option, + ty_str: String, + default: Option>, + ) -> Self { + Self::new( + name, + ty_str, + default, + Type::Text, + None, + false, + false, + false, + false, + false, + ) + } + pub const fn new_default_integer( + name: Option, + ty_str: String, + default: Option>, + ) -> Self { + Self::new( + name, + ty_str, + default, + Type::Integer, + None, + false, + false, + false, + false, + false, + ) + } + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new( + name: Option, + ty_str: String, + default: Option>, + ty: Type, + col: Option, + primary_key: bool, + rowid_alias: bool, + notnull: bool, + unique: bool, + hidden: bool, + ) -> Self { + let mut raw = 0u16; + raw |= (ty as u16) << TYPE_SHIFT; + if let Some(c) = col { + raw |= (c as u16) << COLL_SHIFT; + } + if primary_key { + raw |= F_PRIMARY_KEY + } + if rowid_alias { + raw |= F_ROWID_ALIAS + } + if notnull { + raw |= F_NOTNULL + } + if unique { + raw |= F_UNIQUE + } + if hidden { + raw |= F_HIDDEN + } + Self { + name, + ty_str, + default, + raw, + } + } + #[inline] + pub const fn ty(&self) -> Type { + let v = ((self.raw & TYPE_MASK) >> TYPE_SHIFT) as u8; + Type::from_bits(v) + } + + #[inline] + pub const fn set_ty(&mut self, ty: Type) { + self.raw = (self.raw & !TYPE_MASK) | (((ty as u16) << TYPE_SHIFT) & TYPE_MASK); + } + + #[inline] + pub const fn collation_opt(&self) -> Option { + if self.has_explicit_collation() { + Some(self.collation()) + } else { + None + } + } + + #[inline] + pub const fn collation(&self) -> CollationSeq { + let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; + CollationSeq::from_bits(v) + } + + #[inline] + pub const fn has_explicit_collation(&self) -> bool { + let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; + v != CollationSeq::Unset as u8 + } + + #[inline] + pub const fn set_collation(&mut self, c: Option) { + if let Some(c) = c { + self.raw = (self.raw & !COLL_MASK) | (((c as u16) << COLL_SHIFT) & COLL_MASK); + } + } + + #[inline] + pub fn primary_key(&self) -> bool { + self.raw & F_PRIMARY_KEY != 0 + } + #[inline] + pub const fn is_rowid_alias(&self) -> bool { + self.raw & F_ROWID_ALIAS != 0 + } + #[inline] + pub const fn notnull(&self) -> bool { + self.raw & F_NOTNULL != 0 + } + #[inline] + pub const fn unique(&self) -> bool { + self.raw & F_UNIQUE != 0 + } + #[inline] + pub const fn hidden(&self) -> bool { + self.raw & F_HIDDEN != 0 + } + + #[inline] + pub const fn set_primary_key(&mut self, v: bool) { + self.set_flag(F_PRIMARY_KEY, v); + } + #[inline] + pub const fn set_rowid_alias(&mut self, v: bool) { + self.set_flag(F_ROWID_ALIAS, v); + } + #[inline] + pub const fn set_notnull(&mut self, v: bool) { + self.set_flag(F_NOTNULL, v); + } + #[inline] + pub const fn set_unique(&mut self, v: bool) { + self.set_flag(F_UNIQUE, v); + } + #[inline] + pub const fn set_hidden(&mut self, v: bool) { + self.set_flag(F_HIDDEN, v); + } + + #[inline] + const fn set_flag(&mut self, mask: u16, val: bool) { + if val { + self.raw |= mask + } else { + self.raw &= !mask + } + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Type { + Null = 0, + Text = 1, + Numeric = 2, + Integer = 3, + Real = 4, + Blob = 5, +} + +impl Type { + #[inline] + const fn from_bits(bits: u8) -> Self { + match bits { + 0 => Type::Null, + 1 => Type::Text, + 2 => Type::Numeric, + 3 => Type::Integer, + 4 => Type::Real, + 5 => Type::Blob, + _ => Type::Null, + } + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Null => "", + Self::Text => "TEXT", + Self::Numeric => "NUMERIC", + Self::Integer => "INTEGER", + Self::Real => "REAL", + Self::Blob => "BLOB", + }; + write!(f, "{s}") + } +} diff --git a/common/schema/mod.rs b/common/schema/mod.rs new file mode 100644 index 0000000000..bbbfbb191a --- /dev/null +++ b/common/schema/mod.rs @@ -0,0 +1,3 @@ +pub mod affinity; +pub mod collation; +pub mod column; diff --git a/common/table_reference.rs b/common/table_reference.rs new file mode 100644 index 0000000000..7df5646e54 --- /dev/null +++ b/common/table_reference.rs @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +/// A fully resolved path to a table of the form "catalog.schema.table" +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ResolvedTableReference { + /// The catalog (aka database) containing the table + pub catalog: Arc, + /// The schema containing the table + pub schema: Arc, + /// The table name + pub table: Arc, +} + +impl std::fmt::Display for ResolvedTableReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}.{}.{}", self.catalog, self.schema, self.table) + } +} + +/// A multi part identifier (path) to a table that may require further +/// resolution (e.g. `foo.bar`). +/// +/// [`TableReference`]s are cheap to `clone()` as they are implemented with +/// `Arc`. +/// +/// See [`ResolvedTableReference`] for a fully resolved table reference. +/// +/// # Creating [`TableReference`] +/// +/// When converting strings to [`TableReference`]s, the string is parsed as +/// though it were a SQL identifier, normalizing (convert to lowercase) any +/// unquoted identifiers. [`TableReference::bare`] creates references without +/// applying normalization semantics. +/// +/// # Examples +/// ``` +/// // Get a table reference to 'mytable' +/// let table_reference = TableReference::from("mytable"); +/// assert_eq!(table_reference, TableReference::bare("mytable")); +/// +/// // Get a table reference to 'mytable' (note the capitalization) +/// let table_reference = TableReference::from("MyTable"); +/// assert_eq!(table_reference, TableReference::bare("mytable")); +/// +/// // Get a table reference to 'MyTable' (note the capitalization) using double quotes +/// // (programmatically it is better to use `TableReference::bare` for this) +/// let table_reference = TableReference::from(r#""MyTable""#); +/// assert_eq!(table_reference, TableReference::bare("MyTable")); +/// +/// // Get a table reference to 'myschema.mytable' (note the capitalization) +/// let table_reference = TableReference::from("MySchema.MyTable"); +/// assert_eq!(table_reference, TableReference::partial("myschema", "mytable")); +///``` +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum TableReference { + /// An unqualified table reference, e.g. "table" + Bare { + /// The table name + table: Arc, + }, + /// A partially resolved table reference, e.g. "schema.table" + Partial { + /// The schema containing the table + schema: Arc, + /// The table name + table: Arc, + }, + /// A fully resolved table reference, e.g. "catalog.schema.table" + Full { + /// The catalog (aka database) containing the table + catalog: Arc, + /// The schema containing the table + schema: Arc, + /// The table name + table: Arc, + }, +} + +impl std::fmt::Display for TableReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TableReference::Bare { table } => write!(f, "{table}"), + TableReference::Partial { schema, table } => { + write!(f, "{schema}.{table}") + } + TableReference::Full { + catalog, + schema, + table, + } => write!(f, "{catalog}.{schema}.{table}"), + } + } +} + +impl TableReference { + /// Convenience method for creating a typed none `None` + pub fn none() -> Option { + None + } + + /// Convenience method for creating a [`TableReference::Bare`] + /// + /// As described on [`TableReference`] this does *NO* normalization at + /// all, so "Foo.Bar" stays as a reference to the table named + /// "Foo.Bar" (rather than "foo"."bar") + pub fn bare(table: impl Into>) -> TableReference { + TableReference::Bare { + table: table.into(), + } + } + + /// Convenience method for creating a [`TableReference::Partial`]. + /// + /// Note: *NO* normalization is applied to the schema or table name. + pub fn partial(schema: impl Into>, table: impl Into>) -> TableReference { + TableReference::Partial { + schema: schema.into(), + table: table.into(), + } + } + + /// Convenience method for creating a [`TableReference::Full`] + /// + /// Note: *NO* normalization is applied to the catalog, schema or table + /// name. + pub fn full( + catalog: impl Into>, + schema: impl Into>, + table: impl Into>, + ) -> TableReference { + TableReference::Full { + catalog: catalog.into(), + schema: schema.into(), + table: table.into(), + } + } + + /// Retrieve the table name, regardless of qualification. + pub fn table(&self) -> &str { + match self { + Self::Full { table, .. } | Self::Partial { table, .. } | Self::Bare { table } => table, + } + } + + /// Retrieve the schema name if [`Self::Partial]` or [`Self::`Full`], + /// `None` otherwise. + pub fn schema(&self) -> Option<&str> { + match self { + Self::Full { schema, .. } | Self::Partial { schema, .. } => Some(schema), + _ => None, + } + } + + /// Retrieve the catalog name if [`Self::Full`], `None` otherwise. + pub fn catalog(&self) -> Option<&str> { + match self { + Self::Full { catalog, .. } => Some(catalog), + _ => None, + } + } + + /// Compare with another [`TableReference`] as if both are resolved. + /// This allows comparing across variants. If a field is not present + /// in both variants being compared then it is ignored in the comparison. + /// + /// e.g. this allows a [`TableReference::Bare`] to be considered equal to a + /// fully qualified [`TableReference::Full`] if the table names match. + pub fn resolved_eq(&self, other: &Self) -> bool { + match self { + TableReference::Bare { table } => **table == *other.table(), + TableReference::Partial { schema, table } => { + **table == *other.table() && other.schema().is_none_or(|s| *s == **schema) + } + TableReference::Full { + catalog, + schema, + table, + } => { + **table == *other.table() + && other.schema().is_none_or(|s| *s == **schema) + && other.catalog().is_none_or(|c| *c == **catalog) + } + } + } + + /// Given a default catalog and schema, ensure this table reference is fully + /// resolved + pub fn resolve(self, default_catalog: &str, default_schema: &str) -> ResolvedTableReference { + match self { + Self::Full { + catalog, + schema, + table, + } => ResolvedTableReference { + catalog, + schema, + table, + }, + Self::Partial { schema, table } => ResolvedTableReference { + catalog: default_catalog.into(), + schema, + table, + }, + Self::Bare { table } => ResolvedTableReference { + catalog: default_catalog.into(), + schema: default_schema.into(), + table, + }, + } + } + + /// Decompose a [`TableReference`] to separate parts. The result vector contains + /// at most three elements in the following sequence: + /// ```no_rust + /// [, , table] + /// ``` + pub fn to_vec(&self) -> Vec { + match self { + TableReference::Bare { table } => vec![table.to_string()], + TableReference::Partial { schema, table } => { + vec![schema.to_string(), table.to_string()] + } + TableReference::Full { + catalog, + schema, + table, + } => vec![catalog.to_string(), schema.to_string(), table.to_string()], + } + } +} diff --git a/common/value.rs b/common/value.rs new file mode 100644 index 0000000000..637dffe316 --- /dev/null +++ b/common/value.rs @@ -0,0 +1,874 @@ +use either::Either; +#[cfg(feature = "serde")] +use serde::Deserialize; +use std::{ + borrow::{Borrow, Cow}, + fmt::{Debug, Display}, + ops::Deref, +}; + +use crate::numeric::format_float; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ValueType { + Null, + Integer, + Float, + Text, + Blob, + Error, +} + +impl Display for ValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value = match self { + Self::Null => "NULL", + Self::Integer => "INT", + Self::Float => "REAL", + Self::Blob => "BLOB", + Self::Text => "TEXT", + Self::Error => "ERROR", + }; + write!(f, "{value}") + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TextSubtype { + Text, + #[cfg(feature = "json")] + Json, +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Text { + pub value: Cow<'static, str>, + pub subtype: TextSubtype, +} + +impl Display for Text { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl Text { + pub fn new(value: impl Into>) -> Self { + Self { + value: value.into(), + subtype: TextSubtype::Text, + } + } + #[cfg(feature = "json")] + pub fn json(value: String) -> Self { + Self { + value: value.into(), + subtype: TextSubtype::Json, + } + } + + pub fn as_str(&self) -> &str { + &self.value + } +} + +#[derive(Debug, Clone, Copy)] +pub struct TextRef<'a> { + pub value: &'a str, + pub subtype: TextSubtype, +} + +impl<'a> TextRef<'a> { + pub fn new(value: &'a str, subtype: TextSubtype) -> Self { + Self { value, subtype } + } + + #[inline] + pub fn as_str(&self) -> &'a str { + self.value + } +} + +impl<'a> Borrow for TextRef<'a> { + #[inline] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl<'a> Deref for TextRef<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +pub trait Extendable { + fn do_extend(&mut self, other: &T); +} + +impl Extendable for Text { + #[inline(always)] + fn do_extend(&mut self, other: &T) { + let value = self.value.to_mut(); + value.clear(); + value.push_str(other.as_ref()); + self.subtype = other.subtype(); + } +} + +impl Extendable for Vec { + #[inline(always)] + fn do_extend(&mut self, other: &T) { + self.clear(); + self.extend_from_slice(other.as_slice()); + } +} + +pub trait AnyText: AsRef { + fn subtype(&self) -> TextSubtype; +} + +impl AnyText for Text { + fn subtype(&self) -> TextSubtype { + self.subtype + } +} + +impl AnyText for &str { + fn subtype(&self) -> TextSubtype { + TextSubtype::Text + } +} + +pub trait AnyBlob { + fn as_slice(&self) -> &[u8]; +} + +impl AnyBlob for Vec { + fn as_slice(&self) -> &[u8] { + self.as_slice() + } +} + +impl AnyBlob for &[u8] { + fn as_slice(&self) -> &[u8] { + self + } +} + +impl AsRef for Text { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl From<&str> for Text { + fn from(value: &str) -> Self { + Text { + value: value.to_owned().into(), + subtype: TextSubtype::Text, + } + } +} + +impl From for Text { + fn from(value: String) -> Self { + Text { + value: Cow::from(value), + subtype: TextSubtype::Text, + } + } +} + +impl From for String { + fn from(value: Text) -> Self { + value.value.into_owned() + } +} + +#[cfg(feature = "serde")] +fn float_to_string(float: &f64, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&format!("{float}")) +} + +#[cfg(feature = "serde")] +fn string_to_float<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + match crate::numeric::str_to_f64(s) { + Some(result) => Ok(result.into()), + None => Err(serde::de::Error::custom("")), + } +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Value { + Null, + Integer(i64), + // we use custom serialization to preserve float precision + #[cfg_attr( + feature = "serde", + serde( + serialize_with = "float_to_string", + deserialize_with = "string_to_float" + ) + )] + Float(f64), + Text(Text), + Blob(Vec), +} + +/// Please use Display trait for all limbo output so we have single origin of truth +/// When you need value as string: +/// ---GOOD--- +/// format!("{}", value); +/// ---BAD--- +/// match value { +/// Value::Integer(i) => *i.as_str(), +/// Value::Float(f) => *f.as_str(), +/// .... +/// } +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, ""), + Self::Integer(i) => { + write!(f, "{i}") + } + Self::Float(fl) => f.write_str(&format_float(*fl)), + Self::Text(s) => { + write!(f, "{}", s.as_str()) + } + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), + } + } +} + +impl PartialEq for Value { + fn eq(&self, other: &Value) -> bool { + let (left, right) = (self.as_value_ref(), other.as_value_ref()); + left.eq(&right) + } +} + +impl PartialOrd for Value { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for Value {} + +impl Ord for Value { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let (left, right) = (self.as_value_ref(), other.as_value_ref()); + left.cmp(&right) + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: Self) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: f64) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: i64) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(mut self: &mut Self, rhs: Self) { + match (&mut self, rhs) { + (Self::Integer(int_left), Self::Integer(int_right)) => *int_left += int_right, + (Self::Integer(int_left), Self::Float(float_right)) => { + *self = Self::Float(*int_left as f64 + float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + *self = Self::Float(*float_left + int_right as f64) + } + (Self::Float(float_left), Self::Float(float_right)) => { + *float_left += float_right; + } + (Self::Text(string_left), Self::Text(string_right)) => { + string_left.value.to_mut().push_str(&string_right.value); + string_left.subtype = TextSubtype::Text; + } + (Self::Text(string_left), Self::Integer(int_right)) => { + let string_right = int_right.to_string(); + string_left.value.to_mut().push_str(&string_right); + string_left.subtype = TextSubtype::Text; + } + (Self::Integer(int_left), Self::Text(string_right)) => { + let string_left = int_left.to_string(); + *self = Self::build_text(string_left + string_right.as_str()); + } + (Self::Text(string_left), Self::Float(float_right)) => { + let string_right = Self::Float(float_right).to_string(); + string_left.value.to_mut().push_str(&string_right); + string_left.subtype = TextSubtype::Text; + } + (Self::Float(float_left), Self::Text(string_right)) => { + let string_left = Self::Float(*float_left).to_string(); + *self = Self::build_text(string_left + string_right.as_str()); + } + (_, Self::Null) => {} + (Self::Null, rhs) => *self = rhs, + _ => *self = Self::Float(0.0), + } + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(&mut self, rhs: i64) { + match self { + Self::Integer(int_left) => *int_left += rhs, + Self::Float(float_left) => *float_left += rhs as f64, + _ => unreachable!(), + } + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(&mut self, rhs: f64) { + match self { + Self::Integer(int_left) => *self = Self::Float(*int_left as f64 + rhs), + Self::Float(float_left) => *float_left += rhs, + _ => unreachable!(), + } + } +} + +impl std::ops::Div for Value { + type Output = Value; + + fn div(self, rhs: Value) -> Self::Output { + match (self, rhs) { + (Self::Integer(int_left), Self::Integer(int_right)) => { + Self::Integer(int_left / int_right) + } + (Self::Integer(int_left), Self::Float(float_right)) => { + Self::Float(int_left as f64 / float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + Self::Float(float_left / int_right as f64) + } + (Self::Float(float_left), Self::Float(float_right)) => { + Self::Float(float_left / float_right) + } + _ => Self::Float(0.0), + } + } +} + +impl std::ops::DivAssign for Value { + fn div_assign(&mut self, rhs: Value) { + *self = self.clone() / rhs; + } +} + +impl Value { + pub fn as_ref<'a>(&'a self) -> ValueRef<'a> { + match self { + Value::Null => ValueRef::Null, + Value::Integer(v) => ValueRef::Integer(*v), + Value::Float(v) => ValueRef::Float(*v), + Value::Text(v) => ValueRef::Text(TextRef { + value: &v.value, + subtype: v.subtype, + }), + Value::Blob(v) => ValueRef::Blob(v.as_slice()), + } + } + + // A helper function that makes building a text Value easier. + pub fn build_text(text: impl Into>) -> Self { + Self::Text(Text::new(text)) + } + + pub fn to_blob(&self) -> Option<&[u8]> { + match self { + Self::Blob(blob) => Some(blob), + _ => None, + } + } + + pub fn from_blob(data: Vec) -> Self { + Value::Blob(data) + } + + pub fn to_text(&self) -> Option<&str> { + match self { + Value::Text(t) => Some(t.as_str()), + _ => None, + } + } + + pub fn as_blob(&self) -> &Vec { + match self { + Value::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + + pub fn as_blob_mut(&mut self) -> &mut Vec { + match self { + Value::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + pub fn as_float(&self) -> f64 { + match self { + Value::Float(f) => *f, + Value::Integer(i) => *i as f64, + _ => panic!("as_float must be called only for Value::Float or Value::Integer"), + } + } + + pub fn as_int(&self) -> Option { + match self { + Value::Integer(i) => Some(*i), + _ => None, + } + } + + pub fn as_uint(&self) -> u64 { + match self { + Value::Integer(i) => (*i).cast_unsigned(), + _ => 0, + } + } + + pub fn from_text(text: impl Into>) -> Self { + Value::Text(Text::new(text)) + } + + pub fn value_type(&self) -> ValueType { + match self { + Value::Null => ValueType::Null, + Value::Integer(_) => ValueType::Integer, + Value::Float(_) => ValueType::Float, + Value::Text(_) => ValueType::Text, + Value::Blob(_) => ValueType::Blob, + } + } + + /// Cast Value to String, if Value is NULL returns None + pub fn cast_text(&self) -> Option { + Some(match self { + Value::Null => return None, + v => v.to_string(), + }) + } +} + +#[derive(Clone, Copy)] +pub enum ValueRef<'a> { + Null, + Integer(i64), + Float(f64), + Text(TextRef<'a>), + Blob(&'a [u8]), +} + +impl<'a> ValueRef<'a> { + pub fn to_blob(&self) -> Option<&'a [u8]> { + match self { + Self::Blob(blob) => Some(*blob), + _ => None, + } + } + + pub fn to_text(&self) -> Option<&'a str> { + match self { + Self::Text(t) => Some(t.as_str()), + _ => None, + } + } + + pub fn as_blob(&self) -> &'a [u8] { + match self { + Self::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + + pub fn as_float(&self) -> f64 { + match self { + Self::Float(f) => *f, + Self::Integer(i) => *i as f64, + _ => panic!("as_float must be called only for Value::Float or Value::Integer"), + } + } + + pub fn as_int(&self) -> Option { + match self { + Self::Integer(i) => Some(*i), + _ => None, + } + } + + pub fn as_uint(&self) -> u64 { + match self { + Self::Integer(i) => (*i).cast_unsigned(), + _ => 0, + } + } + + pub fn to_owned(&self) -> Value { + match self { + ValueRef::Null => Value::Null, + ValueRef::Integer(i) => Value::Integer(*i), + ValueRef::Float(f) => Value::Float(*f), + ValueRef::Text(text) => Value::Text(Text { + value: text.value.to_string().into(), + subtype: text.subtype, + }), + ValueRef::Blob(b) => Value::Blob(b.to_vec()), + } + } + + pub fn value_type(&self) -> ValueType { + match self { + Self::Null => ValueType::Null, + Self::Integer(_) => ValueType::Integer, + Self::Float(_) => ValueType::Float, + Self::Text(_) => ValueType::Text, + Self::Blob(_) => ValueType::Blob, + } + } +} + +impl Display for ValueRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{i}"), + Self::Float(fl) => write!(f, "{fl:?}"), + Self::Text(s) => write!(f, "{}", s.as_str()), + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), + } + } +} + +impl Debug for ValueRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ValueRef::Null => write!(f, "Null"), + ValueRef::Integer(i) => f.debug_tuple("Integer").field(i).finish(), + ValueRef::Float(float) => f.debug_tuple("Float").field(float).finish(), + ValueRef::Text(text_ref) => { + // truncate string to at most 256 chars + let text = text_ref.as_str(); + let max_len = text.len().min(256); + f.debug_struct("Text") + .field("data", &&text[0..max_len]) + // Indicates to the developer debugging that the data is truncated for printing + .field("truncated", &(text.len() > max_len)) + .finish() + } + ValueRef::Blob(blob) => { + // truncate blob_slice to at most 32 bytes + let max_len = blob.len().min(32); + f.debug_struct("Blob") + .field("data", &&blob[0..max_len]) + // Indicates to the developer debugging that the data is truncated for printing + .field("truncated", &(blob.len() > max_len)) + .finish() + } + } + } +} + +impl<'a> PartialEq> for ValueRef<'a> { + fn eq(&self, other: &ValueRef<'a>) -> bool { + match (self, other) { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right, + (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int)) => { + sqlite_int_float_compare(*int, *float).is_eq() + } + (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right, + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false, + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => false, + (Self::Text(text_left), Self::Text(text_right)) => { + text_left.value.as_bytes() == text_right.value.as_bytes() + } + (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.eq(blob_right), + (Self::Null, Self::Null) => true, + _ => false, + } + } +} + +impl<'a> PartialEq for ValueRef<'a> { + fn eq(&self, other: &Value) -> bool { + let other = other.as_value_ref(); + self.eq(&other) + } +} + +impl<'a> Eq for ValueRef<'a> {} + +#[expect(clippy::non_canonical_partial_ord_impl)] +impl<'a> PartialOrd> for ValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), + (Self::Integer(int_left), Self::Float(float_right)) => { + (*int_left as f64).partial_cmp(float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + float_left.partial_cmp(&(*int_right as f64)) + } + (Self::Float(float_left), Self::Float(float_right)) => { + float_left.partial_cmp(float_right) + } + // Numeric vs Text/Blob + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { + Some(std::cmp::Ordering::Greater) + } + + (Self::Text(text_left), Self::Text(text_right)) => text_left + .value + .as_bytes() + .partial_cmp(text_right.value.as_bytes()), + // Text vs Blob + (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.partial_cmp(blob_right), + (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, Self::Null) => Some(std::cmp::Ordering::Greater), + } + } +} + +impl<'a> Ord for ValueRef<'a> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +pub fn sqlite_int_float_compare(int_val: i64, float_val: f64) -> std::cmp::Ordering { + if float_val.is_nan() { + return std::cmp::Ordering::Greater; + } + + if float_val < -9223372036854775808.0 { + return std::cmp::Ordering::Greater; + } + if float_val >= 9223372036854775808.0 { + return std::cmp::Ordering::Less; + } + + let float_as_int = float_val as i64; + match int_val.cmp(&float_as_int) { + std::cmp::Ordering::Equal => { + let int_as_float = int_val as f64; + int_as_float + .partial_cmp(&float_val) + .unwrap_or(std::cmp::Ordering::Equal) + } + other => other, + } +} + +pub trait AsValueRef { + fn as_value_ref<'a>(&'a self) -> ValueRef<'a>; +} + +impl<'b> AsValueRef for ValueRef<'b> { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + *self + } +} + +impl AsValueRef for Value { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + self.as_ref() + } +} + +impl AsValueRef for &mut Value { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + self.as_ref() + } +} + +impl AsValueRef for Either +where + V1: AsValueRef, + V2: AsValueRef, +{ + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + match self { + Either::Left(left) => left.as_value_ref(), + Either::Right(right) => right.as_value_ref(), + } + } +} + +impl AsValueRef for &V { + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + (*self).as_value_ref() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum FromSqlError { + #[error("Null value")] + NullValue, + #[error("invalid column type")] + InvalidColumnType, + #[error("Invalid blob size, expected {0}")] + InvalidBlobSize(usize), +} + +/// Convert a `Value` into the implementors type. +pub trait FromValue: Sealed { + fn from_sql(val: Value) -> Result + where + Self: Sized; +} + +impl FromValue for Value { + fn from_sql(val: Value) -> Result { + Ok(val) + } +} +impl Sealed for Value {} + +macro_rules! impl_int_from_value { + ($ty:ty, $cast:expr) => { + impl FromValue for $ty { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Integer(i) => Ok($cast(i)), + _ => unreachable!("invalid value type"), + } + } + } + + impl Sealed for $ty {} + }; +} + +impl_int_from_value!(i32, |i| i as i32); +impl_int_from_value!(u32, |i| i as u32); +impl_int_from_value!(i64, |i| i); +impl_int_from_value!(u64, |i| i as u64); + +impl FromValue for f64 { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Float(f) => Ok(f), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for f64 {} + +impl FromValue for Vec { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Blob(blob) => Ok(blob), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for Vec {} + +impl FromValue for [u8; N] { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Blob(blob) => blob + .try_into() + .map_err(|_| FromSqlError::InvalidBlobSize(N)), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for [u8; N] {} + +impl FromValue for String { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Text(s) => Ok(s.to_string()), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for String {} + +impl FromValue for bool { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Integer(i) => match i { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(FromSqlError::InvalidColumnType), + }, + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for bool {} + +impl FromValue for Option +where + T: FromValue, +{ + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Ok(None), + _ => T::from_sql(val).map(Some), + } + } +} +impl Sealed for Option {} + +mod sealed { + pub trait Sealed {} +} +use sealed::Sealed; From 41acf76bdf56b785e060c561962bb05f3164cf21 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 11:49:59 -0300 Subject: [PATCH 02/10] add common to core --- Cargo.lock | 1 + core/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 6d03e71b21..03ff56b172 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4962,6 +4962,7 @@ dependencies = [ "test-log", "thiserror 2.0.16", "tracing", + "turso_common", "turso_ext", "turso_macros", "turso_parser", diff --git a/core/Cargo.toml b/core/Cargo.toml index 348a8f107b..a6e32c0c3a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -86,6 +86,7 @@ simsimd = "6.5.3" arc-swap = "1.7" rustc-hash = "2.0" either = { workspace = true } +turso_common = { workspace = true, features = ["serde"] } [build-dependencies] chrono = { workspace = true, default-features = false } From ad701aeee7de2d570c22f5d88412a4ca61fce887 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 11:51:43 -0300 Subject: [PATCH 03/10] change numeric module to be reexported by turso_common --- core/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index a32e5497b5..7fe4fc6886 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -35,10 +35,10 @@ pub mod vector; mod vtab; #[cfg(feature = "fuzz")] -pub mod numeric; +pub use turso_common::numeric; #[cfg(not(feature = "fuzz"))] -mod numeric; +use turso_common::numeric; use crate::index_method::IndexMethod; use crate::storage::checksum::CHECKSUM_REQUIRED_RESERVED_BYTES; From 3e5490bfbdfc6103e3e40448f1176c6d7069e828 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 11:52:59 -0300 Subject: [PATCH 04/10] remove affinity code from core --- core/schema.rs | 2 +- core/translate/expr.rs | 3 +- core/translate/insert.rs | 3 +- core/translate/main_loop.rs | 19 +- core/translate/optimizer/constraints.rs | 2 +- core/translate/optimizer/mod.rs | 6 +- core/translate/plan.rs | 2 +- core/vdbe/affinity.rs | 618 ------------------------ core/vdbe/execute.rs | 4 +- core/vdbe/insn.rs | 2 +- core/vdbe/mod.rs | 1 - core/vdbe/value.rs | 5 +- 12 files changed, 23 insertions(+), 644 deletions(-) delete mode 100644 core/vdbe/affinity.rs diff --git a/core/schema.rs b/core/schema.rs index 4990b4b3c8..08bfafbc8b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -4,8 +4,8 @@ use crate::index_method::{IndexMethodAttachment, IndexMethodConfiguration}; use crate::translate::expr::{bind_and_rewrite_expr, walk_expr, BindingBehavior, WalkControl}; use crate::translate::index::{resolve_index_method_parameters, resolve_sorted_columns}; use crate::translate::planner::ROWID_STRS; -use crate::vdbe::affinity::Affinity; use parking_lot::RwLock; +use turso_common::schema::affinity::Affinity; use turso_macros::AtomicEnum; #[derive(Debug, Clone, AtomicEnum)] diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fbda8897d7..9e4244fe0a 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use tracing::{instrument, Level}; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{self, Expr, SubqueryType, UnaryOperator}; use super::emitter::Resolver; @@ -15,7 +16,7 @@ use crate::translate::optimizer::TakeOwnership; use crate::translate::plan::{Operation, ResultSetColumn}; use crate::translate::planner::parse_row_id; use crate::util::{exprs_are_equivalent, normalize_ident, parse_numeric_literal}; -use crate::vdbe::affinity::Affinity; + use crate::vdbe::builder::CursorKey; use crate::vdbe::{ builder::ProgramBuilder, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 234323cd7b..0f76713caa 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -1,5 +1,6 @@ use std::num::NonZeroUsize; use std::sync::Arc; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{ self, Expr, InsertBody, OneSelect, QualifiedName, ResolveType, ResultColumn, Upsert, UpsertDo, }; @@ -27,7 +28,7 @@ use crate::translate::upsert::{ collect_set_clauses_for_upsert, emit_upsert, resolve_upsert_target, ResolvedUpsertTarget, }; use crate::util::normalize_ident; -use crate::vdbe::affinity::Affinity; + use crate::vdbe::builder::ProgramBuilderOpts; use crate::vdbe::insn::{CmpInsFlags, IdxInsertFlags, InsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 14424ea7fb..178b58a0dc 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,3 +1,4 @@ +use turso_common::schema::affinity::{self, Affinity}; use turso_parser::ast::{fmt::ToTokens, SortOrder}; use std::sync::Arc; @@ -18,6 +19,13 @@ use super::{ Search, SeekDef, SelectPlan, TableReferences, WhereTerm, }, }; +use crate::translate::{ + collate::get_collseq_from_expr, + emitter::UpdateRowSource, + plan::{EvalAt, NonFromClauseSubquery}, + subquery::emit_non_from_clause_subquery, + window::emit_window_loop_source, +}; use crate::{ schema::{Index, IndexColumn, Table}, translate::{ @@ -27,23 +35,12 @@ use crate::{ }, types::SeekOp, vdbe::{ - affinity, builder::{CursorKey, CursorType, ProgramBuilder}, insn::{CmpInsFlags, IdxInsertFlags, Insn}, BranchOffset, CursorID, }, Result, }; -use crate::{ - translate::{ - collate::get_collseq_from_expr, - emitter::UpdateRowSource, - plan::{EvalAt, NonFromClauseSubquery}, - subquery::emit_non_from_clause_subquery, - window::emit_window_loop_source, - }, - vdbe::affinity::Affinity, -}; // Metadata for handling LEFT JOIN operations #[derive(Debug)] diff --git a/core/translate/optimizer/constraints.rs b/core/translate/optimizer/constraints.rs index 4e6b6bbd04..133b7b6ab1 100644 --- a/core/translate/optimizer/constraints.rs +++ b/core/translate/optimizer/constraints.rs @@ -13,9 +13,9 @@ use crate::{ planner::{table_mask_from_expr, TableMask}, }, util::exprs_are_equivalent, - vdbe::affinity::Affinity, Result, }; +use turso_common::schema::affinity::Affinity; use turso_ext::{ConstraintInfo, ConstraintOp}; use turso_parser::ast::{self, SortOrder, TableInternalId}; diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index cc706b898d..51b40edb86 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -12,6 +12,7 @@ use cost::Cost; use join::{compute_best_join_order, BestJoinOrderResult}; use lift_common_subexpressions::lift_common_subexpressions_from_binary_or_terms; use order::{compute_order_target, plan_satisfies_order_target, EliminatesSortBy}; +use turso_common::schema::affinity::Affinity; use turso_ext::{ConstraintInfo, ConstraintUsage}; use turso_parser::ast::{self, Expr, SortOrder}; @@ -32,10 +33,7 @@ use crate::{ util::{ exprs_are_equivalent, simple_bind_expr, try_capture_parameters, try_substitute_parameters, }, - vdbe::{ - affinity::Affinity, - builder::{CursorKey, CursorType, ProgramBuilder}, - }, + vdbe::builder::{CursorKey, CursorType, ProgramBuilder}, LimboError, Result, }; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 775bdfa674..753d2ab896 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,4 +1,5 @@ use std::{cmp::Ordering, collections::HashMap, marker::PhantomData, sync::Arc}; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{ self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder, SubqueryType, }; @@ -11,7 +12,6 @@ use crate::{ optimizer::constraints::SeekRangeConstraint, }, vdbe::{ - affinity::Affinity, builder::{CursorKey, CursorType, ProgramBuilder}, insn::{IdxInsertFlags, Insn}, BranchOffset, CursorID, diff --git a/core/vdbe/affinity.rs b/core/vdbe/affinity.rs deleted file mode 100644 index 7a40a22d77..0000000000 --- a/core/vdbe/affinity.rs +++ /dev/null @@ -1,618 +0,0 @@ -use either::Either; -use turso_parser::ast::{Expr, Literal}; - -use crate::{types::AsValueRef, Value, ValueRef}; - -/// # SQLite Column Type Affinities -/// -/// Each column in an SQLite 3 database is assigned one of the following type affinities: -/// -/// - **TEXT** -/// - **NUMERIC** -/// - **INTEGER** -/// - **REAL** -/// - **BLOB** -/// -/// > **Note:** Historically, the "BLOB" type affinity was called "NONE". However, this term was renamed to avoid confusion with "no affinity". -/// -/// ## Affinity Descriptions -/// -/// ### **TEXT** -/// - Stores data using the NULL, TEXT, or BLOB storage classes. -/// - Numerical data inserted into a column with TEXT affinity is converted into text form before being stored. -/// - **Example:** -/// ```sql -/// CREATE TABLE example (col TEXT); -/// INSERT INTO example (col) VALUES (123); -- Stored as '123' (text) -/// SELECT typeof(col) FROM example; -- Returns 'text' -/// ``` -/// -/// ### **NUMERIC** -/// - Can store values using all five storage classes. -/// - Text data is converted to INTEGER or REAL (in that order of preference) if it is a well-formed integer or real literal. -/// - If the text represents an integer too large for a 64-bit signed integer, it is converted to REAL. -/// - If the text is not a well-formed literal, it is stored as TEXT. -/// - Hexadecimal integer literals are stored as TEXT for historical compatibility. -/// - Floating-point values that can be exactly represented as integers are converted to integers. -/// - **Example:** -/// ```sql -/// CREATE TABLE example (col NUMERIC); -/// INSERT INTO example (col) VALUES ('3.0e+5'); -- Stored as 300000 (integer) -/// SELECT typeof(col) FROM example; -- Returns 'integer' -/// ``` -/// -/// ### **INTEGER** -/// - Behaves like NUMERIC affinity but differs in `CAST` expressions. -/// - **Example:** -/// ```sql -/// CREATE TABLE example (col INTEGER); -/// INSERT INTO example (col) VALUES (4.0); -- Stored as 4 (integer) -/// SELECT typeof(col) FROM example; -- Returns 'integer' -/// ``` -/// -/// ### **REAL** -/// - Similar to NUMERIC affinity but forces integer values into floating-point representation. -/// - **Optimization:** Small floating-point values with no fractional component may be stored as integers on disk to save space. This is invisible at the SQL level. -/// - **Example:** -/// ```sql -/// CREATE TABLE example (col REAL); -/// INSERT INTO example (col) VALUES (4); -- Stored as 4.0 (real) -/// SELECT typeof(col) FROM example; -- Returns 'real' -/// ``` -/// -/// ### **BLOB** -/// - Does not prefer any storage class. -/// - No coercion is performed between storage classes. -/// - **Example:** -/// ```sql -/// CREATE TABLE example (col BLOB); -/// INSERT INTO example (col) VALUES (x'1234'); -- Stored as a binary blob -/// SELECT typeof(col) FROM example; -- Returns 'blob' -/// ``` -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Affinity { - Blob = 0, - Text = 1, - Numeric = 2, - Integer = 3, - Real = 4, -} - -pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB -pub const SQLITE_AFF_TEXT: char = 'B'; -pub const SQLITE_AFF_NUMERIC: char = 'C'; -pub const SQLITE_AFF_INTEGER: char = 'D'; -pub const SQLITE_AFF_REAL: char = 'E'; - -impl Affinity { - /// This is meant to be used in opcodes like Eq, which state: - /// - /// "The SQLITE_AFF_MASK portion of P5 must be an affinity character - SQLITE_AFF_TEXT, SQLITE_AFF_INTEGER, and so forth. - /// An attempt is made to coerce both inputs according to this affinity before the comparison is made. - /// If the SQLITE_AFF_MASK is 0x00, then numeric affinity is used. - /// Note that the affinity conversions are stored back into the input registers P1 and P3. - /// So this opcode can cause persistent changes to registers P1 and P3."" - pub fn aff_mask(&self) -> char { - match self { - Affinity::Integer => SQLITE_AFF_INTEGER, - Affinity::Text => SQLITE_AFF_TEXT, - Affinity::Blob => SQLITE_AFF_NONE, - Affinity::Real => SQLITE_AFF_REAL, - Affinity::Numeric => SQLITE_AFF_NUMERIC, - } - } - - pub fn from_char(char: char) -> Self { - match char { - SQLITE_AFF_INTEGER => Affinity::Integer, - SQLITE_AFF_TEXT => Affinity::Text, - SQLITE_AFF_NONE => Affinity::Blob, - SQLITE_AFF_REAL => Affinity::Real, - SQLITE_AFF_NUMERIC => Affinity::Numeric, - _ => Affinity::Blob, - } - } - - pub fn as_char_code(&self) -> u8 { - self.aff_mask() as u8 - } - - pub fn from_char_code(code: u8) -> Self { - Self::from_char(code as char) - } - - pub fn is_numeric(&self) -> bool { - matches!(self, Affinity::Integer | Affinity::Real | Affinity::Numeric) - } - - pub fn has_affinity(&self) -> bool { - !matches!(self, Affinity::Blob) - } - - /// 3.1. Determination Of Column Affinity - /// For tables not declared as STRICT, the affinity of a column is determined by the declared type of the column, according to the following rules in the order shown: - /// - /// If the declared type contains the string "INT" then it is assigned INTEGER affinity. - /// - /// If the declared type of the column contains any of the strings "CHAR", "CLOB", or "TEXT" then that column has TEXT affinity. Notice that the type VARCHAR contains the string "CHAR" and is thus assigned TEXT affinity. - /// - /// If the declared type for a column contains the string "BLOB" or if no type is specified then the column has affinity BLOB. - /// - /// If the declared type for a column contains any of the strings "REAL", "FLOA", or "DOUB" then the column has REAL affinity. - /// - /// Otherwise, the affinity is NUMERIC. - /// - /// Note that the order of the rules for determining column affinity is important. A column whose declared type is "CHARINT" will match both rules 1 and 2 but the first rule takes precedence and so the column affinity will be INTEGER. - #[expect(clippy::self_named_constructors)] - pub fn affinity(datatype: &str) -> Self { - let datatype = datatype.to_ascii_uppercase(); - - // Rule 1: INT -> INTEGER affinity - if datatype.contains("INT") { - return Affinity::Integer; - } - - // Rule 2: CHAR/CLOB/TEXT -> TEXT affinity - if datatype.contains("CHAR") || datatype.contains("CLOB") || datatype.contains("TEXT") { - return Affinity::Text; - } - - // Rule 3: BLOB or empty -> BLOB affinity (historically called NONE) - if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") { - return Affinity::Blob; - } - - // Rule 4: REAL/FLOA/DOUB -> REAL affinity - if datatype.contains("REAL") || datatype.contains("FLOA") || datatype.contains("DOUB") { - return Affinity::Real; - } - - // Rule 5: Otherwise -> NUMERIC affinity - Affinity::Numeric - } - - pub fn convert<'a>(&self, val: &'a impl AsValueRef) -> Option, Value>> { - let val = val.as_value_ref(); - let is_text = matches!(val, ValueRef::Text(_)); - // Apply affinity conversions - match self { - Affinity::Numeric | Affinity::Integer => is_text - .then(|| apply_numeric_affinity(val, false)) - .flatten() - .map(Either::Left), - - Affinity::Text => { - if is_text { - is_numeric_value(val) - .then(|| stringify_register(val)) - .flatten() - .map(Either::Right) - } else { - None - } - } - - Affinity::Real => { - let mut left = is_text - .then(|| apply_numeric_affinity(val, false)) - .flatten(); - - if let ValueRef::Integer(i) = left.unwrap_or(val) { - left = Some(ValueRef::Float(i as f64)); - } - - left.map(Either::Left) - } - - Affinity::Blob => None, // Do nothing for blob affinity. - } - } - - /// Return TRUE if the given expression is a constant which would be - /// unchanged by OP_Affinity with the affinity given in the second - /// argument. - /// - /// This routine is used to determine if the OP_Affinity operation - /// can be omitted. When in doubt return FALSE. A false negative - /// is harmless. A false positive, however, can result in the wrong - /// answer. - /// - /// reference https://github.com/sqlite/sqlite/blob/master/src/expr.c#L3000 - pub fn expr_needs_no_affinity_change(&self, expr: &Expr) -> bool { - if !self.has_affinity() { - return true; - } - // TODO: check for unary minus in the expr, as it may be an additional optimization. - // This involves mostly likely walking the expression - match expr { - Expr::Literal(literal) => match literal { - Literal::Numeric(_) => self.is_numeric(), - Literal::String(_) => matches!(self, Affinity::Text), - Literal::Blob(_) => true, - _ => false, - }, - Expr::Column { - is_rowid_alias: true, - .. - } => self.is_numeric(), - _ => false, - } - } -} - -#[derive(Debug, PartialEq)] -pub enum NumericParseResult { - NotNumeric, // not a valid number - PureInteger, // pure integer (entire string) - HasDecimalOrExp, // has decimal point or exponent (entire string) - ValidPrefixOnly, // valid prefix but not entire string -} - -#[derive(Debug)] -pub enum ParsedNumber { - None, - Integer(i64), - Float(f64), -} - -impl ParsedNumber { - fn as_integer(&self) -> Option { - match self { - ParsedNumber::Integer(i) => Some(*i), - _ => None, - } - } - - fn as_float(&self) -> Option { - match self { - ParsedNumber::Float(f) => Some(*f), - _ => None, - } - } -} - -pub fn try_for_float(text: &str) -> (NumericParseResult, ParsedNumber) { - let bytes = text.as_bytes(); - if bytes.is_empty() { - return (NumericParseResult::NotNumeric, ParsedNumber::None); - } - - let mut pos = 0; - let len = bytes.len(); - - while pos < len && is_space(bytes[pos]) { - pos += 1; - } - - if pos >= len { - return (NumericParseResult::NotNumeric, ParsedNumber::None); - } - - let mut sign = 1i64; - - if bytes[pos] == b'-' { - sign = -1; - pos += 1; - } else if bytes[pos] == b'+' { - pos += 1; - } - - if pos >= len { - return (NumericParseResult::NotNumeric, ParsedNumber::None); - } - - let mut significand = 0u64; - let mut decimal_adjust = 0i32; - let mut has_digits = false; - - // Parse digits before decimal point - while pos < len && bytes[pos].is_ascii_digit() { - has_digits = true; - let digit = (bytes[pos] - b'0') as u64; - - if significand <= (u64::MAX - 9) / 10 { - significand = significand * 10 + digit; - } else { - // Skip overflow digits but adjust exponent - decimal_adjust += 1; - } - pos += 1; - } - - let mut has_decimal = false; - let mut has_exponent = false; - - // Check for decimal point - if pos < len && bytes[pos] == b'.' { - has_decimal = true; - pos += 1; - - // Parse fractional digits - while pos < len && bytes[pos].is_ascii_digit() { - has_digits = true; - let digit = (bytes[pos] - b'0') as u64; - - if significand <= (u64::MAX - 9) / 10 { - significand = significand * 10 + digit; - decimal_adjust -= 1; - } - pos += 1; - } - } - - if !has_digits { - return (NumericParseResult::NotNumeric, ParsedNumber::None); - } - - // Check for exponent - let mut exponent = 0i32; - if pos < len && (bytes[pos] == b'e' || bytes[pos] == b'E') { - has_exponent = true; - pos += 1; - - if pos >= len { - // Incomplete exponent, but we have valid digits before - return create_result_from_significand( - significand, - sign, - decimal_adjust, - has_decimal, - has_exponent, - NumericParseResult::ValidPrefixOnly, - ); - } - - let mut exp_sign = 1i32; - if bytes[pos] == b'-' { - exp_sign = -1; - pos += 1; - } else if bytes[pos] == b'+' { - pos += 1; - } - - if pos >= len || !bytes[pos].is_ascii_digit() { - // Incomplete exponent - return create_result_from_significand( - significand, - sign, - decimal_adjust, - has_decimal, - false, - NumericParseResult::ValidPrefixOnly, - ); - } - - // Parse exponent digits - while pos < len && bytes[pos].is_ascii_digit() { - let digit = (bytes[pos] - b'0') as i32; - if exponent < 10000 { - exponent = exponent * 10 + digit; - } else { - exponent = 10000; // Cap at large value - } - pos += 1; - } - exponent *= exp_sign; - } - - // Skip trailing whitespace - while pos < len && is_space(bytes[pos]) { - pos += 1; - } - - // Determine if we consumed the entire string - let consumed_all = pos >= len; - let final_exponent = decimal_adjust + exponent; - - let parse_result = if !consumed_all { - NumericParseResult::ValidPrefixOnly - } else if has_decimal || has_exponent { - NumericParseResult::HasDecimalOrExp - } else { - NumericParseResult::PureInteger - }; - - create_result_from_significand( - significand, - sign, - final_exponent, - has_decimal, - has_exponent, - parse_result, - ) -} - -fn create_result_from_significand( - significand: u64, - sign: i64, - exponent: i32, - has_decimal: bool, - has_exponent: bool, - parse_result: NumericParseResult, -) -> (NumericParseResult, ParsedNumber) { - if significand == 0 { - match parse_result { - NumericParseResult::PureInteger => { - return (parse_result, ParsedNumber::Integer(0)); - } - _ => { - return (parse_result, ParsedNumber::Float(0.0)); - } - } - } - - // For pure integers without exponent, try to return as integer - if !has_decimal && !has_exponent && exponent == 0 && significand <= i64::MAX as u64 { - let signed_val = (significand as i64).wrapping_mul(sign); - return (parse_result, ParsedNumber::Integer(signed_val)); - } - - // Convert to float - let mut result = significand as f64; - - let mut exp = exponent; - match exp.cmp(&0) { - std::cmp::Ordering::Greater => { - while exp >= 100 { - result *= 1e100; - exp -= 100; - } - while exp >= 10 { - result *= 1e10; - exp -= 10; - } - while exp >= 1 { - result *= 10.0; - exp -= 1; - } - } - std::cmp::Ordering::Less => { - while exp <= -100 { - result *= 1e-100; - exp += 100; - } - while exp <= -10 { - result *= 1e-10; - exp += 10; - } - while exp <= -1 { - result *= 0.1; - exp += 1; - } - } - std::cmp::Ordering::Equal => {} - } - - if sign < 0 { - result = -result; - } - - (parse_result, ParsedNumber::Float(result)) -} - -pub fn is_space(byte: u8) -> bool { - matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0c') -} - -fn real_to_i64(r: f64) -> i64 { - if r < -9223372036854774784.0 { - i64::MIN - } else if r > 9223372036854774784.0 { - i64::MAX - } else { - r as i64 - } -} - -fn apply_integer_affinity(val: ValueRef) -> Option { - let ValueRef::Float(f) = val else { - return None; - }; - - let ix = real_to_i64(f); - - // Only convert if round-trip is exact and not at extreme values - if f == (ix as f64) && ix > i64::MIN && ix < i64::MAX { - Some(ValueRef::Integer(ix)) - } else { - None - } -} - -/// Try to convert a value into a numeric representation if we can -/// do so without loss of information. In other words, if the string -/// looks like a number, convert it into a number. If it does not -/// look like a number, leave it alone. -pub fn apply_numeric_affinity(val: ValueRef, try_for_int: bool) -> Option { - let ValueRef::Text(text) = val else { - return None; // Only apply to text values - }; - - let text_str = text.as_str(); - let (parse_result, parsed_value) = try_for_float(text_str); - - // Only convert if we have a complete valid number (not just a prefix) - match parse_result { - NumericParseResult::NotNumeric | NumericParseResult::ValidPrefixOnly => { - None // Leave as text - } - NumericParseResult::PureInteger => { - if let Some(int_val) = parsed_value.as_integer() { - Some(ValueRef::Integer(int_val)) - } else if let Some(float_val) = parsed_value.as_float() { - let res = ValueRef::Float(float_val); - if try_for_int { - apply_integer_affinity(res) - } else { - Some(res) - } - } else { - None - } - } - NumericParseResult::HasDecimalOrExp => { - if let Some(float_val) = parsed_value.as_float() { - let res = ValueRef::Float(float_val); - // If try_for_int is true, try to convert float to int if exact - if try_for_int { - apply_integer_affinity(res) - } else { - Some(res) - } - } else { - None - } - } - } -} - -fn is_numeric_value(val: ValueRef) -> bool { - matches!(val, ValueRef::Integer(_) | ValueRef::Float(_)) -} - -fn stringify_register(val: ValueRef) -> Option { - match val { - ValueRef::Integer(i) => Some(Value::build_text(i.to_string())), - ValueRef::Float(f) => Some(Value::build_text(f.to_string())), - ValueRef::Text(_) | ValueRef::Null | ValueRef::Blob(_) => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_apply_numeric_affinity_partial_numbers() { - let val = Value::Text("123abc".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert!(res.is_none()); - - let val = Value::Text("-53093015420544-15062897".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert!(res.is_none()); - - let val = Value::Text("123.45xyz".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert!(res.is_none()); - } - - #[test] - fn test_apply_numeric_affinity_complete_numbers() { - let val = Value::Text("123".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert_eq!(res, Some(ValueRef::Integer(123))); - - let val = Value::Text("123.45".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert_eq!(res, Some(ValueRef::Float(123.45))); - - let val = Value::Text(" -456 ".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert_eq!(res, Some(ValueRef::Integer(-456))); - - let val = Value::Text("0".into()); - let res = apply_numeric_affinity(val.as_value_ref(), false); - assert_eq!(res, Some(ValueRef::Integer(0))); - } -} diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index bba8b9cc98..35088b8dc7 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -20,7 +20,6 @@ use crate::util::{ normalize_ident, rewrite_column_references_if_needed, rewrite_fk_parent_cols_if_self_ref, rewrite_fk_parent_table_if_needed, rewrite_inline_col_fk_target_if_needed, }; -use crate::vdbe::affinity::{apply_numeric_affinity, try_for_float, Affinity, ParsedNumber}; use crate::vdbe::insn::InsertFlags; use crate::vdbe::value::ComparisonOp; use crate::vdbe::{registers_to_ref_values, EndStatement, TxnCleanup}; @@ -48,6 +47,9 @@ use std::{ borrow::BorrowMut, sync::{atomic::Ordering, Arc, Mutex}, }; +use turso_common::schema::affinity::{ + apply_numeric_affinity, try_for_float, Affinity, ParsedNumber, +}; use turso_macros::match_ignore_ascii_case; use crate::pseudo::PseudoCursor; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index df70784d13..31d10f7574 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -9,11 +9,11 @@ use crate::{ storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, translate::{collate::CollationSeq, emitter::TransactionMode}, types::KeyInfo, - vdbe::affinity::Affinity, Value, }; use strum::EnumCount; use strum_macros::{EnumDiscriminants, FromRepr, VariantArray}; +use turso_common::schema::affinity::Affinity; use turso_macros::Description; use turso_parser::ast::SortOrder; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index bebb41aaa7..d574970807 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -17,7 +17,6 @@ //! //! https://www.sqlite.org/opcode.html -pub mod affinity; pub mod builder; pub mod execute; pub mod explain; diff --git a/core/vdbe/value.rs b/core/vdbe/value.rs index 62dfccd27f..7339907d16 100644 --- a/core/vdbe/value.rs +++ b/core/vdbe/value.rs @@ -1,13 +1,12 @@ -use std::collections::HashMap; - use regex::{Regex, RegexBuilder}; +use std::collections::HashMap; +use turso_common::schema::affinity::Affinity; use crate::{ function::MathFunc, numeric::{NullableInteger, Numeric}, translate::collate::CollationSeq, types::{compare_immutable_single, AsValueRef, SeekOp}, - vdbe::affinity::Affinity, LimboError, Result, Value, ValueRef, }; From 530b31cbd036eba5c0117754b7d47aa5ca535fe7 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 12:27:16 -0300 Subject: [PATCH 05/10] remove `Value` from core --- core/Cargo.toml | 4 +- core/types.rs | 930 +----------------------------------------------- 2 files changed, 9 insertions(+), 925 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index a6e32c0c3a..100bba6fb0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -26,7 +26,7 @@ time = [] fuzz = [] omit_autovacuum = [] simulator = ["fuzz", "serde"] -serde = ["dep:serde"] +serde = ["dep:serde", "turso_common/serde"] series = [] encryption = [] checksum = [] @@ -86,7 +86,7 @@ simsimd = "6.5.3" arc-swap = "1.7" rustc-hash = "2.0" either = { workspace = true } -turso_common = { workspace = true, features = ["serde"] } +turso_common = { workspace = true } [build-dependencies] chrono = { workspace = true, default-features = false } diff --git a/core/types.rs b/core/types.rs index f4c3434085..5f52adb00b 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,13 +1,13 @@ -use either::Either; -#[cfg(feature = "serde")] -use serde::Deserialize; +pub use turso_common::value::{ + sqlite_int_float_compare, AsValueRef, Extendable, Text, TextRef, TextSubtype, Value, ValueRef, + ValueType, +}; use turso_ext::{AggCtx, FinalizeFunction, StepFunction}; use turso_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::index_method::IndexMethodCursor; -use crate::numeric::format_float; use crate::pseudo::PseudoCursor; use crate::schema::Index; use crate::storage::btree::CursorTrait; @@ -18,10 +18,8 @@ use crate::vdbe::sorter::Sorter; use crate::vdbe::Register; use crate::vtab::VirtualTableCursor; use crate::{Completion, CompletionError, Result, IO}; -use std::borrow::{Borrow, Cow}; -use std::fmt::{Debug, Display}; +use std::fmt::Debug; use std::iter::Peekable; -use std::ops::Deref; use std::task::Waker; /// SQLite by default uses 2000 as maximum numbers in a row. @@ -29,424 +27,6 @@ use std::task::Waker; /// But the hard limit of number of columns is 32,767 columns i16::MAX const MAX_COLUMN: usize = 2000; -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum ValueType { - Null, - Integer, - Float, - Text, - Blob, - Error, -} - -impl Display for ValueType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let value = match self { - Self::Null => "NULL", - Self::Integer => "INT", - Self::Float => "REAL", - Self::Blob => "BLOB", - Self::Text => "TEXT", - Self::Error => "ERROR", - }; - write!(f, "{value}") - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum TextSubtype { - Text, - #[cfg(feature = "json")] - Json, -} - -#[derive(Debug, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct Text { - pub value: Cow<'static, str>, - pub subtype: TextSubtype, -} - -impl Display for Text { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl Text { - pub fn new(value: impl Into>) -> Self { - Self { - value: value.into(), - subtype: TextSubtype::Text, - } - } - #[cfg(feature = "json")] - pub fn json(value: String) -> Self { - Self { - value: value.into(), - subtype: TextSubtype::Json, - } - } - - pub fn as_str(&self) -> &str { - &self.value - } -} - -#[derive(Debug, Clone, Copy)] -pub struct TextRef<'a> { - pub value: &'a str, - pub subtype: TextSubtype, -} - -impl<'a> TextRef<'a> { - pub fn new(value: &'a str, subtype: TextSubtype) -> Self { - Self { value, subtype } - } - - #[inline] - pub fn as_str(&self) -> &'a str { - self.value - } -} - -impl<'a> Borrow for TextRef<'a> { - #[inline] - fn borrow(&self) -> &str { - self.as_str() - } -} - -impl<'a> Deref for TextRef<'a> { - type Target = str; - - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -pub trait Extendable { - fn do_extend(&mut self, other: &T); -} - -impl Extendable for Text { - #[inline(always)] - fn do_extend(&mut self, other: &T) { - let value = self.value.to_mut(); - value.clear(); - value.push_str(other.as_ref()); - self.subtype = other.subtype(); - } -} - -impl Extendable for Vec { - #[inline(always)] - fn do_extend(&mut self, other: &T) { - self.clear(); - self.extend_from_slice(other.as_slice()); - } -} - -pub trait AnyText: AsRef { - fn subtype(&self) -> TextSubtype; -} - -impl AnyText for Text { - fn subtype(&self) -> TextSubtype { - self.subtype - } -} - -impl AnyText for &str { - fn subtype(&self) -> TextSubtype { - TextSubtype::Text - } -} - -pub trait AnyBlob { - fn as_slice(&self) -> &[u8]; -} - -impl AnyBlob for Vec { - fn as_slice(&self) -> &[u8] { - self.as_slice() - } -} - -impl AnyBlob for &[u8] { - fn as_slice(&self) -> &[u8] { - self - } -} - -impl AsRef for Text { - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl From<&str> for Text { - fn from(value: &str) -> Self { - Text { - value: value.to_owned().into(), - subtype: TextSubtype::Text, - } - } -} - -impl From for Text { - fn from(value: String) -> Self { - Text { - value: Cow::from(value), - subtype: TextSubtype::Text, - } - } -} - -impl From for String { - fn from(value: Text) -> Self { - value.value.into_owned() - } -} - -#[cfg(feature = "serde")] -fn float_to_string(float: &f64, serializer: S) -> Result -where - S: serde::Serializer, -{ - serializer.serialize_str(&format!("{float}")) -} - -#[cfg(feature = "serde")] -fn string_to_float<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - let s = String::deserialize(deserializer)?; - match crate::numeric::str_to_f64(s) { - Some(result) => Ok(result.into()), - None => Err(serde::de::Error::custom("")), - } -} - -#[derive(Debug, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum Value { - Null, - Integer(i64), - // we use custom serialization to preserve float precision - #[cfg_attr( - feature = "serde", - serde( - serialize_with = "float_to_string", - deserialize_with = "string_to_float" - ) - )] - Float(f64), - Text(Text), - Blob(Vec), -} - -#[derive(Clone, Copy)] -pub enum ValueRef<'a> { - Null, - Integer(i64), - Float(f64), - Text(TextRef<'a>), - Blob(&'a [u8]), -} - -impl Debug for ValueRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ValueRef::Null => write!(f, "Null"), - ValueRef::Integer(i) => f.debug_tuple("Integer").field(i).finish(), - ValueRef::Float(float) => f.debug_tuple("Float").field(float).finish(), - ValueRef::Text(text_ref) => { - // truncate string to at most 256 chars - let text = text_ref.as_str(); - let max_len = text.len().min(256); - f.debug_struct("Text") - .field("data", &&text[0..max_len]) - // Indicates to the developer debugging that the data is truncated for printing - .field("truncated", &(text.len() > max_len)) - .finish() - } - ValueRef::Blob(blob) => { - // truncate blob_slice to at most 32 bytes - let max_len = blob.len().min(32); - f.debug_struct("Blob") - .field("data", &&blob[0..max_len]) - // Indicates to the developer debugging that the data is truncated for printing - .field("truncated", &(blob.len() > max_len)) - .finish() - } - } - } -} - -pub trait AsValueRef { - fn as_value_ref<'a>(&'a self) -> ValueRef<'a>; -} - -impl<'b> AsValueRef for ValueRef<'b> { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - *self - } -} - -impl AsValueRef for Value { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - self.as_ref() - } -} - -impl AsValueRef for &mut Value { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - self.as_ref() - } -} - -impl AsValueRef for Either -where - V1: AsValueRef, - V2: AsValueRef, -{ - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - match self { - Either::Left(left) => left.as_value_ref(), - Either::Right(right) => right.as_value_ref(), - } - } -} - -impl AsValueRef for &V { - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - (*self).as_value_ref() - } -} - -impl Value { - pub fn as_ref<'a>(&'a self) -> ValueRef<'a> { - match self { - Value::Null => ValueRef::Null, - Value::Integer(v) => ValueRef::Integer(*v), - Value::Float(v) => ValueRef::Float(*v), - Value::Text(v) => ValueRef::Text(TextRef { - value: &v.value, - subtype: v.subtype, - }), - Value::Blob(v) => ValueRef::Blob(v.as_slice()), - } - } - - // A helper function that makes building a text Value easier. - pub fn build_text(text: impl Into>) -> Self { - Self::Text(Text::new(text)) - } - - pub fn to_blob(&self) -> Option<&[u8]> { - match self { - Self::Blob(blob) => Some(blob), - _ => None, - } - } - - pub fn from_blob(data: Vec) -> Self { - Value::Blob(data) - } - - pub fn to_text(&self) -> Option<&str> { - match self { - Value::Text(t) => Some(t.as_str()), - _ => None, - } - } - - pub fn as_blob(&self) -> &Vec { - match self { - Value::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - - pub fn as_blob_mut(&mut self) -> &mut Vec { - match self { - Value::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - pub fn as_float(&self) -> f64 { - match self { - Value::Float(f) => *f, - Value::Integer(i) => *i as f64, - _ => panic!("as_float must be called only for Value::Float or Value::Integer"), - } - } - - pub fn as_int(&self) -> Option { - match self { - Value::Integer(i) => Some(*i), - _ => None, - } - } - - pub fn as_uint(&self) -> u64 { - match self { - Value::Integer(i) => (*i).cast_unsigned(), - _ => 0, - } - } - - pub fn from_text(text: impl Into>) -> Self { - Value::Text(Text::new(text)) - } - - pub fn value_type(&self) -> ValueType { - match self { - Value::Null => ValueType::Null, - Value::Integer(_) => ValueType::Integer, - Value::Float(_) => ValueType::Float, - Value::Text(_) => ValueType::Text, - Value::Blob(_) => ValueType::Blob, - } - } - pub fn serialize_serial(&self, out: &mut Vec) { - match self { - Value::Null => {} - Value::Integer(i) => { - let serial_type = SerialType::from(self); - match serial_type.kind() { - SerialTypeKind::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialTypeKind::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialTypeKind::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialTypeKind::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialTypeKind::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialTypeKind::I64 => out.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), - } - } - Value::Float(f) => out.extend_from_slice(&f.to_be_bytes()), - Value::Text(t) => out.extend_from_slice(t.value.as_bytes()), - Value::Blob(b) => out.extend_from_slice(b), - }; - } - - /// Cast Value to String, if Value is NULL returns None - pub fn cast_text(&self) -> Option { - Some(match self { - Value::Null => return None, - v => v.to_string(), - }) - } -} - #[derive(Debug, Clone, PartialEq)] pub struct ExternalAggState { pub state: *mut AggCtx, @@ -455,32 +35,6 @@ pub struct ExternalAggState { pub finalize_fn: FinalizeFunction, } -/// Please use Display trait for all limbo output so we have single origin of truth -/// When you need value as string: -/// ---GOOD--- -/// format!("{}", value); -/// ---BAD--- -/// match value { -/// Value::Integer(i) => *i.as_str(), -/// Value::Float(f) => *f.as_str(), -/// .... -/// } -impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Null => write!(f, ""), - Self::Integer(i) => { - write!(f, "{i}") - } - Self::Float(fl) => f.write_str(&format_float(*fl)), - Self::Text(s) => { - write!(f, "{}", s.as_str()) - } - Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), - } - } -} - impl Value { pub fn to_ffi(&self) -> ExtValue { match self { @@ -538,118 +92,6 @@ impl Value { } } -/// Convert a `Value` into the implementors type. -pub trait FromValue: Sealed { - fn from_sql(val: Value) -> Result - where - Self: Sized; -} - -impl FromValue for Value { - fn from_sql(val: Value) -> Result { - Ok(val) - } -} -impl Sealed for crate::Value {} - -macro_rules! impl_int_from_value { - ($ty:ty, $cast:expr) => { - impl FromValue for $ty { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Integer(i) => Ok($cast(i)), - _ => unreachable!("invalid value type"), - } - } - } - - impl Sealed for $ty {} - }; -} - -impl_int_from_value!(i32, |i| i as i32); -impl_int_from_value!(u32, |i| i as u32); -impl_int_from_value!(i64, |i| i); -impl_int_from_value!(u64, |i| i as u64); - -impl FromValue for f64 { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Float(f) => Ok(f), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for f64 {} - -impl FromValue for Vec { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Blob(blob) => Ok(blob), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for Vec {} - -impl FromValue for [u8; N] { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Blob(blob) => blob.try_into().map_err(|_| LimboError::InvalidBlobSize(N)), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for [u8; N] {} - -impl FromValue for String { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Text(s) => Ok(s.to_string()), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for String {} - -impl FromValue for bool { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Integer(i) => match i { - 0 => Ok(false), - 1 => Ok(true), - _ => Err(LimboError::InvalidColumnType), - }, - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for bool {} - -impl FromValue for Option -where - T: FromValue, -{ - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Ok(None), - _ => T::from_sql(val).map(Some), - } - } -} -impl Sealed for Option {} - -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - #[derive(Debug, Clone, PartialEq)] pub struct SumAggState { pub r_err: f64, // Error term for Kahan-Babushka-Neumaier summation @@ -688,191 +130,6 @@ impl AggContext { } } -impl PartialEq for Value { - fn eq(&self, other: &Value) -> bool { - let (left, right) = (self.as_value_ref(), other.as_value_ref()); - left.eq(&right) - } -} - -impl PartialOrd for Value { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialOrd for AggContext { - fn partial_cmp(&self, other: &AggContext) -> Option { - match (self, other) { - (Self::Avg(a, _), Self::Avg(b, _)) => a.partial_cmp(b), - (Self::Sum(a, _), Self::Sum(b, _)) => a.partial_cmp(b), - (Self::Count(a), Self::Count(b)) => a.partial_cmp(b), - (Self::Max(a), Self::Max(b)) => a.partial_cmp(b), - (Self::Min(a), Self::Min(b)) => a.partial_cmp(b), - (Self::GroupConcat(a), Self::GroupConcat(b)) => a.partial_cmp(b), - _ => None, - } - } -} - -impl Eq for Value {} - -impl Ord for Value { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let (left, right) = (self.as_value_ref(), other.as_value_ref()); - left.cmp(&right) - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: Self) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: f64) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: i64) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(mut self: &mut Self, rhs: Self) { - match (&mut self, rhs) { - (Self::Integer(int_left), Self::Integer(int_right)) => *int_left += int_right, - (Self::Integer(int_left), Self::Float(float_right)) => { - *self = Self::Float(*int_left as f64 + float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - *self = Self::Float(*float_left + int_right as f64) - } - (Self::Float(float_left), Self::Float(float_right)) => { - *float_left += float_right; - } - (Self::Text(string_left), Self::Text(string_right)) => { - string_left.value.to_mut().push_str(&string_right.value); - string_left.subtype = TextSubtype::Text; - } - (Self::Text(string_left), Self::Integer(int_right)) => { - let string_right = int_right.to_string(); - string_left.value.to_mut().push_str(&string_right); - string_left.subtype = TextSubtype::Text; - } - (Self::Integer(int_left), Self::Text(string_right)) => { - let string_left = int_left.to_string(); - *self = Self::build_text(string_left + string_right.as_str()); - } - (Self::Text(string_left), Self::Float(float_right)) => { - let string_right = Self::Float(float_right).to_string(); - string_left.value.to_mut().push_str(&string_right); - string_left.subtype = TextSubtype::Text; - } - (Self::Float(float_left), Self::Text(string_right)) => { - let string_left = Self::Float(*float_left).to_string(); - *self = Self::build_text(string_left + string_right.as_str()); - } - (_, Self::Null) => {} - (Self::Null, rhs) => *self = rhs, - _ => *self = Self::Float(0.0), - } - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(&mut self, rhs: i64) { - match self { - Self::Integer(int_left) => *int_left += rhs, - Self::Float(float_left) => *float_left += rhs as f64, - _ => unreachable!(), - } - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(&mut self, rhs: f64) { - match self { - Self::Integer(int_left) => *self = Self::Float(*int_left as f64 + rhs), - Self::Float(float_left) => *float_left += rhs, - _ => unreachable!(), - } - } -} - -impl std::ops::Div for Value { - type Output = Value; - - fn div(self, rhs: Value) -> Self::Output { - match (self, rhs) { - (Self::Integer(int_left), Self::Integer(int_right)) => { - Self::Integer(int_left / int_right) - } - (Self::Integer(int_left), Self::Float(float_right)) => { - Self::Float(int_left as f64 / float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - Self::Float(float_left / int_right as f64) - } - (Self::Float(float_left), Self::Float(float_right)) => { - Self::Float(float_left / float_right) - } - _ => Self::Float(0.0), - } - } -} - -impl std::ops::DivAssign for Value { - fn div_assign(&mut self, rhs: Value) { - *self = self.clone() / rhs; - } -} - -impl TryFrom> for i64 { - type Error = LimboError; - - fn try_from(value: ValueRef<'_>) -> Result { - match value { - ValueRef::Integer(i) => Ok(i), - _ => Err(LimboError::ConversionError("Expected integer value".into())), - } - } -} - -impl TryFrom> for String { - type Error = LimboError; - - #[inline] - fn try_from(value: ValueRef<'_>) -> Result { - Ok(<&str>::try_from(value)?.to_string()) - } -} - -impl<'a> TryFrom> for &'a str { - type Error = LimboError; - - #[inline] - fn try_from(value: ValueRef<'a>) -> Result { - match value { - ValueRef::Text(s) => Ok(s.as_str()), - _ => Err(LimboError::ConversionError("Expected text value".into())), - } - } -} - /// This struct serves the purpose of not allocating multiple vectors of bytes if not needed. /// A value in a record that has already been serialized can stay serialized and what this struct offsers /// is easy acces to each value which point to the payload. @@ -1502,181 +759,6 @@ impl<'a> ValueRef<'a> { Self::Blob(blob) => ExtValue::from_blob(blob.to_vec()), } } - - pub fn to_blob(&self) -> Option<&'a [u8]> { - match self { - Self::Blob(blob) => Some(*blob), - _ => None, - } - } - - pub fn to_text(&self) -> Option<&'a str> { - match self { - Self::Text(t) => Some(t.as_str()), - _ => None, - } - } - - pub fn as_blob(&self) -> &'a [u8] { - match self { - Self::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - - pub fn as_float(&self) -> f64 { - match self { - Self::Float(f) => *f, - Self::Integer(i) => *i as f64, - _ => panic!("as_float must be called only for Value::Float or Value::Integer"), - } - } - - pub fn as_int(&self) -> Option { - match self { - Self::Integer(i) => Some(*i), - _ => None, - } - } - - pub fn as_uint(&self) -> u64 { - match self { - Self::Integer(i) => (*i).cast_unsigned(), - _ => 0, - } - } - - pub fn to_owned(&self) -> Value { - match self { - ValueRef::Null => Value::Null, - ValueRef::Integer(i) => Value::Integer(*i), - ValueRef::Float(f) => Value::Float(*f), - ValueRef::Text(text) => Value::Text(Text { - value: text.value.to_string().into(), - subtype: text.subtype, - }), - ValueRef::Blob(b) => Value::Blob(b.to_vec()), - } - } - - pub fn value_type(&self) -> ValueType { - match self { - Self::Null => ValueType::Null, - Self::Integer(_) => ValueType::Integer, - Self::Float(_) => ValueType::Float, - Self::Text(_) => ValueType::Text, - Self::Blob(_) => ValueType::Blob, - } - } -} - -impl Display for ValueRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Null => write!(f, "NULL"), - Self::Integer(i) => write!(f, "{i}"), - Self::Float(fl) => write!(f, "{fl:?}"), - Self::Text(s) => write!(f, "{}", s.as_str()), - Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), - } - } -} - -impl<'a> PartialEq> for ValueRef<'a> { - fn eq(&self, other: &ValueRef<'a>) -> bool { - match (self, other) { - (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right, - (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int)) => { - sqlite_int_float_compare(*int, *float).is_eq() - } - (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right, - (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false, - (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => false, - (Self::Text(text_left), Self::Text(text_right)) => { - text_left.value.as_bytes() == text_right.value.as_bytes() - } - (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.eq(blob_right), - (Self::Null, Self::Null) => true, - _ => false, - } - } -} - -impl<'a> PartialEq for ValueRef<'a> { - fn eq(&self, other: &Value) -> bool { - let other = other.as_value_ref(); - self.eq(&other) - } -} - -impl<'a> Eq for ValueRef<'a> {} - -#[expect(clippy::non_canonical_partial_ord_impl)] -impl<'a> PartialOrd> for ValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), - (Self::Integer(int_left), Self::Float(float_right)) => { - (*int_left as f64).partial_cmp(float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - float_left.partial_cmp(&(*int_right as f64)) - } - (Self::Float(float_left), Self::Float(float_right)) => { - float_left.partial_cmp(float_right) - } - // Numeric vs Text/Blob - (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { - Some(std::cmp::Ordering::Less) - } - (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { - Some(std::cmp::Ordering::Greater) - } - - (Self::Text(text_left), Self::Text(text_right)) => text_left - .value - .as_bytes() - .partial_cmp(text_right.value.as_bytes()), - // Text vs Blob - (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), - (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), - - (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.partial_cmp(blob_right), - (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), - (Self::Null, _) => Some(std::cmp::Ordering::Less), - (_, Self::Null) => Some(std::cmp::Ordering::Greater), - } - } -} - -impl<'a> Ord for ValueRef<'a> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.partial_cmp(other).unwrap() - } -} - -fn sqlite_int_float_compare(int_val: i64, float_val: f64) -> std::cmp::Ordering { - if float_val.is_nan() { - return std::cmp::Ordering::Greater; - } - - if float_val < -9223372036854775808.0 { - return std::cmp::Ordering::Greater; - } - if float_val >= 9223372036854775808.0 { - return std::cmp::Ordering::Less; - } - - let float_as_int = float_val as i64; - match int_val.cmp(&float_as_int) { - std::cmp::Ordering::Equal => { - let int_as_float = int_val as f64; - int_as_float - .partial_cmp(&float_val) - .unwrap_or(std::cmp::Ordering::Equal) - } - other => other, - } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -2752,6 +1834,8 @@ impl WalFrameInfo { #[cfg(test)] mod tests { + use turso_common::value::{Text, TextRef, TextSubtype}; + use super::*; use crate::translate::collate::CollationSeq; From 648f31c9c441fd4ddfe6234ca95e09973a5745cf Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 12:36:27 -0300 Subject: [PATCH 06/10] create `ExecValue` trait to describe the execution of opcode related operations --- core/lib.rs | 3 +- core/vdbe/execute.rs | 2 +- core/vdbe/value.rs | 222 ++++++++++++++++++++++++++-------- sql_generation/model/table.rs | 2 +- 4 files changed, 173 insertions(+), 56 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 7fe4fc6886..c97b04ca17 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -109,7 +109,8 @@ pub use types::ValueRef; use util::parse_schema_rows; pub use util::IOExt; pub use vdbe::{ - builder::QueryMode, explain::EXPLAIN_COLUMNS, explain::EXPLAIN_QUERY_PLAN_COLUMNS, Register, + builder::QueryMode, explain::EXPLAIN_COLUMNS, explain::EXPLAIN_QUERY_PLAN_COLUMNS, + value::ExecValue, Register, }; /// Configuration for database features diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 35088b8dc7..837156b3b7 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -38,7 +38,7 @@ use crate::{ }, translate::emitter::TransactionMode, }; -use crate::{get_cursor, CheckpointMode, Connection, DatabaseStorage, MvCursor}; +use crate::{get_cursor, CheckpointMode, Connection, DatabaseStorage, ExecValue, MvCursor}; use either::Either; use std::any::Any; use std::env::temp_dir; diff --git a/core/vdbe/value.rs b/core/vdbe/value.rs index 7339907d16..5bf018d9e3 100644 --- a/core/vdbe/value.rs +++ b/core/vdbe/value.rs @@ -115,13 +115,132 @@ impl TrimType { } } -impl Value { - pub fn exec_lower(&self) -> Option { +pub trait ExecValue: Sized { + fn exec_lower(&self) -> Option; + + fn exec_length(&self) -> Self; + + fn exec_octet_length(&self) -> Self; + + fn exec_upper(&self) -> Option; + + fn exec_sign(&self) -> Option; + + /// Generates the Soundex code for a given word + fn exec_soundex(&self) -> Value; + + fn exec_abs(&self) -> Result; + + fn exec_random(generate_random_number: F) -> Self + where + F: Fn() -> i64; + + fn exec_randomblob(&self, fill_bytes: F) -> Value + where + F: Fn(&mut [u8]); + + fn exec_quote(&self) -> Self; + + fn exec_nullif(&self, second_value: &Self) -> Self; + + fn exec_substring(value: &Value, start_value: &Value, length_value: Option<&Value>) -> Value; + + fn exec_instr(&self, pattern: &Value) -> Value; + + fn exec_typeof(&self) -> Value; + + fn exec_hex(&self) -> Value; + + fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value; + + fn exec_unicode(&self) -> Value; + + fn exec_round(&self, precision: Option<&Value>) -> Value; + + fn _exec_trim(&self, pattern: Option<&Value>, trim_type: TrimType) -> Value; + + // Implements TRIM pattern matching. + fn exec_trim(&self, pattern: Option<&Value>) -> Value; + // Implements RTRIM pattern matching. + fn exec_rtrim(&self, pattern: Option<&Value>) -> Value; + + // Implements LTRIM pattern matching. + fn exec_ltrim(&self, pattern: Option<&Value>) -> Value; + + fn exec_zeroblob(&self) -> Value; + + // exec_if returns whether you should jump + fn exec_if(&self, jump_if_null: bool, not: bool) -> bool; + + fn exec_cast(&self, datatype: &str) -> Value; + + fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value; + + fn exec_math_unary(&self, function: &MathFunc) -> Value; + + fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value; + + fn exec_math_log(&self, base: Option<&Value>) -> Value; + + fn exec_add(&self, rhs: &Value) -> Value; + + fn exec_subtract(&self, rhs: &Value) -> Value; + + fn exec_multiply(&self, rhs: &Value) -> Value; + + fn exec_divide(&self, rhs: &Value) -> Value; + + fn exec_bit_and(&self, rhs: &Value) -> Value; + + fn exec_bit_or(&self, rhs: &Value) -> Value; + + fn exec_remainder(&self, rhs: &Value) -> Value; + + fn exec_bit_not(&self) -> Value; + + fn exec_shift_left(&self, rhs: &Value) -> Value; + + fn exec_shift_right(&self, rhs: &Value) -> Value; + + fn exec_boolean_not(&self) -> Value; + + fn exec_concat(&self, rhs: &Value) -> Value; + + fn exec_and(&self, rhs: &Value) -> Value; + + fn exec_or(&self, rhs: &Value) -> Value; + + // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided + fn exec_like( + regex_cache: Option<&mut HashMap>, + pattern: &str, + text: &str, + ) -> bool; + + fn exec_min<'a, T: Iterator>(regs: T) -> Value; + + fn exec_max<'a, T: Iterator>(regs: T) -> Value; + + fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self + where + Self: 'a; + + fn exec_concat_ws<'a, T: ExactSizeIterator>(registers: T) -> Self + where + Self: 'a; + + fn exec_char<'a, T: Iterator>(values: T) -> Self + where + Self: 'a; +} + +impl ExecValue for Value { + fn exec_lower(&self) -> Option { self.cast_text() .map(|s| Value::build_text(s.to_ascii_lowercase())) } - pub fn exec_length(&self) -> Self { + fn exec_length(&self) -> Self { match self { Value::Text(t) => { let s = t.as_str(); @@ -140,7 +259,7 @@ impl Value { } } - pub fn exec_octet_length(&self) -> Self { + fn exec_octet_length(&self) -> Self { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) => { Value::Integer(self.to_string().into_bytes().len() as i64) @@ -150,12 +269,12 @@ impl Value { } } - pub fn exec_upper(&self) -> Option { + fn exec_upper(&self) -> Option { self.cast_text() .map(|s| Value::build_text(s.to_ascii_uppercase())) } - pub fn exec_sign(&self) -> Option { + fn exec_sign(&self) -> Option { let v = Numeric::from_value_strict(self).try_into_f64()?; Some(Value::Integer(if v > 0.0 { @@ -168,7 +287,7 @@ impl Value { } /// Generates the Soundex code for a given word - pub fn exec_soundex(&self) -> Value { + fn exec_soundex(&self) -> Value { let s = match self { Value::Null => return Value::build_text("?000"), Value::Text(s) => { @@ -258,7 +377,7 @@ impl Value { Value::build_text(result.to_uppercase()) } - pub fn exec_abs(&self) -> Result { + fn exec_abs(&self) -> Result { Ok(match self { Value::Null => Value::Null, Value::Integer(v) => { @@ -279,14 +398,14 @@ impl Value { }) } - pub fn exec_random(generate_random_number: F) -> Self + fn exec_random(generate_random_number: F) -> Self where F: Fn() -> i64, { Value::Integer(generate_random_number()) } - pub fn exec_randomblob(&self, fill_bytes: F) -> Value + fn exec_randomblob(&self, fill_bytes: F) -> Value where F: Fn(&mut [u8]), { @@ -303,7 +422,7 @@ impl Value { Value::Blob(blob) } - pub fn exec_quote(&self) -> Self { + fn exec_quote(&self) -> Self { match self { Value::Null => Value::build_text("NULL"), Value::Integer(_) | Value::Float(_) => self.to_owned(), @@ -327,7 +446,7 @@ impl Value { } } - pub fn exec_nullif(&self, second_value: &Self) -> Self { + fn exec_nullif(&self, second_value: &Self) -> Self { if self != second_value { self.clone() } else { @@ -335,11 +454,7 @@ impl Value { } } - pub fn exec_substring( - value: &Value, - start_value: &Value, - length_value: Option<&Value>, - ) -> Value { + fn exec_substring(value: &Value, start_value: &Value, length_value: Option<&Value>) -> Value { /// Function is stabilized but not released for version 1.88 \ /// https://doc.rust-lang.org/src/core/str/mod.rs.html#453 const fn ceil_char_boundary(s: &str, index: usize) -> usize { @@ -433,7 +548,7 @@ impl Value { } } - pub fn exec_instr(&self, pattern: &Value) -> Value { + fn exec_instr(&self, pattern: &Value) -> Value { if self == &Value::Null || pattern == &Value::Null { return Value::Null; } @@ -470,7 +585,7 @@ impl Value { } } - pub fn exec_typeof(&self) -> Value { + fn exec_typeof(&self) -> Value { match self { Value::Null => Value::build_text("null"), Value::Integer(_) => Value::build_text("integer"), @@ -480,7 +595,7 @@ impl Value { } } - pub fn exec_hex(&self) -> Value { + fn exec_hex(&self) -> Value { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) => { let text = self.to_string(); @@ -491,7 +606,7 @@ impl Value { } } - pub fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value { + fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value { match self { Value::Null => Value::Null, _ => match ignored_chars { @@ -521,7 +636,7 @@ impl Value { } } - pub fn exec_unicode(&self) -> Value { + fn exec_unicode(&self) -> Value { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) | Value::Blob(_) => { let text = self.to_string(); @@ -535,7 +650,7 @@ impl Value { } } - pub fn exec_round(&self, precision: Option<&Value>) -> Value { + fn exec_round(&self, precision: Option<&Value>) -> Value { let Some(f) = Numeric::from(self).try_into_f64() else { return Value::Null; }; @@ -579,20 +694,20 @@ impl Value { } // Implements TRIM pattern matching. - pub fn exec_trim(&self, pattern: Option<&Value>) -> Value { + fn exec_trim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::All) } // Implements RTRIM pattern matching. - pub fn exec_rtrim(&self, pattern: Option<&Value>) -> Value { + fn exec_rtrim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::Right) } // Implements LTRIM pattern matching. - pub fn exec_ltrim(&self, pattern: Option<&Value>) -> Value { + fn exec_ltrim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::Left) } - pub fn exec_zeroblob(&self) -> Value { + fn exec_zeroblob(&self) -> Value { let length: i64 = match self { Value::Integer(i) => *i, Value::Float(f) => *f as i64, @@ -603,14 +718,14 @@ impl Value { } // exec_if returns whether you should jump - pub fn exec_if(&self, jump_if_null: bool, not: bool) -> bool { + fn exec_if(&self, jump_if_null: bool, not: bool) -> bool { Numeric::from(self) .try_into_bool() .map(|jump| if not { !jump } else { jump }) .unwrap_or(jump_if_null) } - pub fn exec_cast(&self, datatype: &str) -> Value { + fn exec_cast(&self, datatype: &str) -> Value { if matches!(self, Value::Null) { return Value::Null; } @@ -700,7 +815,7 @@ impl Value { } } - pub fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value { + fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value { // The replace(X,Y,Z) function returns a string formed by substituting string Z for every occurrence of // string Y in string X. The BINARY collating sequence is used for comparisons. If Y is an empty string // then return X unchanged. If Z is not initially a string, it is cast to a UTF-8 string prior to processing. @@ -733,7 +848,7 @@ impl Value { } } - pub fn exec_math_unary(&self, function: &MathFunc) -> Value { + fn exec_math_unary(&self, function: &MathFunc) -> Value { let v = Numeric::from_value_strict(self); // In case of some functions and integer input, return the input as is @@ -785,7 +900,7 @@ impl Value { } } - pub fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value { + fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value { let Some(lhs) = Numeric::from_value_strict(self).try_into_f64() else { return Value::Null; }; @@ -808,7 +923,7 @@ impl Value { } } - pub fn exec_math_log(&self, base: Option<&Value>) -> Value { + fn exec_math_log(&self, base: Option<&Value>) -> Value { let Some(f) = Numeric::from_value_strict(self).try_into_f64() else { return Value::Null; }; @@ -840,31 +955,31 @@ impl Value { Value::Float(result) } - pub fn exec_add(&self, rhs: &Value) -> Value { + fn exec_add(&self, rhs: &Value) -> Value { (Numeric::from(self) + Numeric::from(rhs)).into() } - pub fn exec_subtract(&self, rhs: &Value) -> Value { + fn exec_subtract(&self, rhs: &Value) -> Value { (Numeric::from(self) - Numeric::from(rhs)).into() } - pub fn exec_multiply(&self, rhs: &Value) -> Value { + fn exec_multiply(&self, rhs: &Value) -> Value { (Numeric::from(self) * Numeric::from(rhs)).into() } - pub fn exec_divide(&self, rhs: &Value) -> Value { + fn exec_divide(&self, rhs: &Value) -> Value { (Numeric::from(self) / Numeric::from(rhs)).into() } - pub fn exec_bit_and(&self, rhs: &Value) -> Value { + fn exec_bit_and(&self, rhs: &Value) -> Value { (NullableInteger::from(self) & NullableInteger::from(rhs)).into() } - pub fn exec_bit_or(&self, rhs: &Value) -> Value { + fn exec_bit_or(&self, rhs: &Value) -> Value { (NullableInteger::from(self) | NullableInteger::from(rhs)).into() } - pub fn exec_remainder(&self, rhs: &Value) -> Value { + fn exec_remainder(&self, rhs: &Value) -> Value { let convert_to_float = matches!(Numeric::from(self), Numeric::Float(_)) || matches!(Numeric::from(rhs), Numeric::Float(_)); @@ -880,26 +995,26 @@ impl Value { } } - pub fn exec_bit_not(&self) -> Value { + fn exec_bit_not(&self) -> Value { (!NullableInteger::from(self)).into() } - pub fn exec_shift_left(&self, rhs: &Value) -> Value { + fn exec_shift_left(&self, rhs: &Value) -> Value { (NullableInteger::from(self) << NullableInteger::from(rhs)).into() } - pub fn exec_shift_right(&self, rhs: &Value) -> Value { + fn exec_shift_right(&self, rhs: &Value) -> Value { (NullableInteger::from(self) >> NullableInteger::from(rhs)).into() } - pub fn exec_boolean_not(&self) -> Value { + fn exec_boolean_not(&self) -> Value { match Numeric::from(self).try_into_bool() { None => Value::Null, Some(v) => Value::Integer(!v as i64), } } - pub fn exec_concat(&self, rhs: &Value) -> Value { + fn exec_concat(&self, rhs: &Value) -> Value { if let (Value::Blob(lhs), Value::Blob(rhs)) = (self, rhs) { return Value::build_text( String::from_utf8_lossy(&[lhs.as_slice(), rhs.as_slice()].concat()).into_owned(), @@ -917,7 +1032,7 @@ impl Value { Value::build_text(lhs + &rhs) } - pub fn exec_and(&self, rhs: &Value) -> Value { + fn exec_and(&self, rhs: &Value) -> Value { match ( Numeric::from(self).try_into_bool(), Numeric::from(rhs).try_into_bool(), @@ -928,7 +1043,7 @@ impl Value { } } - pub fn exec_or(&self, rhs: &Value) -> Value { + fn exec_or(&self, rhs: &Value) -> Value { match ( Numeric::from(self).try_into_bool(), Numeric::from(rhs).try_into_bool(), @@ -940,7 +1055,7 @@ impl Value { } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided - pub fn exec_like( + fn exec_like( regex_cache: Option<&mut HashMap>, pattern: &str, text: &str, @@ -961,15 +1076,15 @@ impl Value { } } - pub fn exec_min<'a, T: Iterator>(regs: T) -> Value { + fn exec_min<'a, T: Iterator>(regs: T) -> Value { regs.min().map(|v| v.to_owned()).unwrap_or(Value::Null) } - pub fn exec_max<'a, T: Iterator>(regs: T) -> Value { + fn exec_max<'a, T: Iterator>(regs: T) -> Value { regs.max().map(|v| v.to_owned()).unwrap_or(Value::Null) } - pub fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self { + fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self { let mut result = String::new(); for val in registers { match val { @@ -981,7 +1096,7 @@ impl Value { Value::build_text(result) } - pub fn exec_concat_ws<'a, T: ExactSizeIterator>(mut registers: T) -> Self { + fn exec_concat_ws<'a, T: ExactSizeIterator>(mut registers: T) -> Self { if registers.len() == 0 { return Value::Null; } @@ -1000,7 +1115,7 @@ impl Value { Value::build_text(result) } - pub fn exec_char<'a, T: Iterator>(values: T) -> Self { + fn exec_char<'a, T: Iterator>(values: T) -> Self { let result: String = values .filter_map(|x| { if let Value::Integer(i) = x { @@ -1045,6 +1160,7 @@ pub fn construct_like_regex(pattern: &str) -> Regex { #[cfg(test)] mod tests { use crate::types::Value; + use crate::vdbe::value::ExecValue; use crate::vdbe::{Bitfield, Register}; use rand::{Rng, RngCore}; diff --git a/sql_generation/model/table.rs b/sql_generation/model/table.rs index dce2fdddf2..40b78471ea 100644 --- a/sql_generation/model/table.rs +++ b/sql_generation/model/table.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash, ops::Deref}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use turso_core::{numeric::Numeric, types}; +use turso_core::{numeric::Numeric, types, ExecValue}; use turso_parser::ast::{self, ColumnConstraint, SortOrder}; use crate::model::query::predicate::Predicate; From 7fab13597f69a6b7d380f10e6276f8b2964b83ac Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 12:50:50 -0300 Subject: [PATCH 07/10] add `ToExtValue` and `FromExtValue` for ffi conversion --- core/ext/vtab_xconnect.rs | 5 ++++- core/types.rs | 39 ++++++++++++++++++--------------------- core/vdbe/execute.rs | 4 ++-- core/vtab.rs | 3 ++- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/core/ext/vtab_xconnect.rs b/core/ext/vtab_xconnect.rs index 7cd1321cf2..823aad36a4 100644 --- a/core/ext/vtab_xconnect.rs +++ b/core/ext/vtab_xconnect.rs @@ -1,4 +1,7 @@ -use crate::{types::Value, Connection, Statement, StepResult}; +use crate::{ + types::{FromExtValue, ToExtValue, Value}, + Connection, Statement, StepResult, +}; use std::{ boxed::Box, ffi::{c_char, c_void, CStr, CString}, diff --git a/core/types.rs b/core/types.rs index 5f52adb00b..b6daf557b1 100644 --- a/core/types.rs +++ b/core/types.rs @@ -35,18 +35,27 @@ pub struct ExternalAggState { pub finalize_fn: FinalizeFunction, } -impl Value { - pub fn to_ffi(&self) -> ExtValue { - match self { - Self::Null => ExtValue::null(), - Self::Integer(i) => ExtValue::from_integer(*i), - Self::Float(fl) => ExtValue::from_float(*fl), - Self::Text(text) => ExtValue::from_text(text.as_str().to_string()), - Self::Blob(blob) => ExtValue::from_blob(blob.to_vec()), +pub trait ToExtValue: AsValueRef { + fn to_ffi(&self) -> ExtValue { + let value = self.as_value_ref(); + match value { + ValueRef::Null => ExtValue::null(), + ValueRef::Integer(i) => ExtValue::from_integer(i), + ValueRef::Float(fl) => ExtValue::from_float(fl), + ValueRef::Text(text) => ExtValue::from_text(text.as_str().to_string()), + ValueRef::Blob(blob) => ExtValue::from_blob(blob.to_vec()), } } +} + +impl ToExtValue for V {} + +pub trait FromExtValue: Sized { + fn from_ffi(v: ExtValue) -> Result; +} - pub fn from_ffi(v: ExtValue) -> Result { +impl FromExtValue for Value { + fn from_ffi(v: ExtValue) -> Result { let res = match v.value_type() { ExtValueType::Null => Ok(Value::Null), ExtValueType::Integer => { @@ -749,18 +758,6 @@ impl RecordCursor { } } -impl<'a> ValueRef<'a> { - pub fn to_ffi(&self) -> ExtValue { - match self { - Self::Null => ExtValue::null(), - Self::Integer(i) => ExtValue::from_integer(*i), - Self::Float(fl) => ExtValue::from_float(*fl), - Self::Text(text) => ExtValue::from_text(text.as_str().to_string()), - Self::Blob(blob) => ExtValue::from_blob(blob.to_vec()), - } - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct KeyInfo { pub sort_order: SortOrder, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 837156b3b7..5deb6747e9 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -13,8 +13,8 @@ use crate::storage::pager::{AtomicDbState, CreateBTreeFlags, DbState}; use crate::storage::sqlite3_ondisk::{read_varint_fast, DatabaseHeader, PageSize}; use crate::translate::collate::CollationSeq; use crate::types::{ - compare_immutable, compare_records_generic, AsValueRef, Extendable, IOCompletions, - ImmutableRecord, SeekResult, Text, + compare_immutable, compare_records_generic, AsValueRef, Extendable, FromExtValue, + IOCompletions, ImmutableRecord, SeekResult, Text, ToExtValue, }; use crate::util::{ normalize_ident, rewrite_column_references_if_needed, rewrite_fk_parent_cols_if_self_ref, diff --git a/core/vtab.rs b/core/vtab.rs index 36e05e9bb7..695f697e6b 100644 --- a/core/vtab.rs +++ b/core/vtab.rs @@ -1,5 +1,6 @@ use crate::pragma::{PragmaVirtualTable, PragmaVirtualTableCursor}; use crate::schema::Column; +use crate::types::{FromExtValue, ToExtValue}; use crate::util::columns_from_create_table_body; use crate::{Connection, LimboError, SymbolTable, Value}; use parking_lot::RwLock; @@ -525,7 +526,7 @@ impl ExtVirtualTableCursor { fn column(&self, column: usize) -> crate::Result { let val = unsafe { (self.implementation.column)(self.cursor.as_ptr(), column as u32) }; - Value::from_ffi(val) + crate::Value::from_ffi(val) } fn next(&self) -> crate::Result { From a424f7197b9db5b33ff6eadabd4055c463e7b633 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 12:58:02 -0300 Subject: [PATCH 08/10] add `SerialType::from_value` --- core/types.rs | 16 +++++++--------- core/vdbe/value.rs | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/core/types.rs b/core/types.rs index b6daf557b1..1ab59b6905 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,6 +1,6 @@ pub use turso_common::value::{ - sqlite_int_float_compare, AsValueRef, Extendable, Text, TextRef, TextSubtype, Value, ValueRef, - ValueType, + sqlite_int_float_compare, AsValueRef, Extendable, FromValue, Text, TextRef, TextSubtype, Value, + ValueRef, ValueType, }; use turso_ext::{AggCtx, FinalizeFunction, StepFunction}; use turso_parser::ast::SortOrder; @@ -292,7 +292,7 @@ impl ImmutableRecord { let mut serial_type_buf = [0; 9]; // write serial types for value in values.clone() { - let serial_type = SerialType::from(value.as_value_ref()); + let serial_type = SerialType::from_value(value); let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); serials.push((serial_type_buf, n)); @@ -325,7 +325,7 @@ impl ImmutableRecord { match value { ValueRef::Null => {} ValueRef::Integer(i) => { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); match serial_type.kind() { SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} SerialTypeKind::I8 => writer.extend_from_slice(&(i as i8).to_be_bytes()), @@ -1417,10 +1417,8 @@ impl SerialType { SerialTypeKind::Blob => (self.0 as usize - 12) / 2, } } -} -impl From for SerialType { - fn from(value: T) -> Self { + pub fn from_value(value: impl AsValueRef) -> Self { let value = value.as_value_ref(); match value { ValueRef::Null => SerialType::null(), @@ -1499,7 +1497,7 @@ impl Record { // write serial types for value in &self.values { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type.into()); @@ -1512,7 +1510,7 @@ impl Record { match value { Value::Null => {} Value::Integer(i) => { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); match serial_type.kind() { SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} SerialTypeKind::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), diff --git a/core/vdbe/value.rs b/core/vdbe/value.rs index 5bf018d9e3..1e9be6b350 100644 --- a/core/vdbe/value.rs +++ b/core/vdbe/value.rs @@ -99,7 +99,7 @@ impl From for ComparisonOp { } } -enum TrimType { +pub enum TrimType { All, Left, Right, From 24b108e08731c21e7889784148a3d06960a17868 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 15:13:06 -0300 Subject: [PATCH 09/10] remove Column and Type code from core --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/lib.rs | 1 + common/schema/column.rs | 104 +++++++++++++++++- common/schema/mod.rs | 52 +++++++++ common/utils.rs | 5 + core/schema.rs | 236 +--------------------------------------- 7 files changed, 163 insertions(+), 237 deletions(-) create mode 100644 common/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 03ff56b172..9ed0477a71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4904,6 +4904,7 @@ dependencies = [ "strum", "strum_macros", "thiserror 2.0.16", + "turso_macros", "turso_parser", "uncased", ] diff --git a/common/Cargo.toml b/common/Cargo.toml index 4162d95aef..89a258fcb8 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -21,5 +21,6 @@ serde = { workspace = true, optional = true } strum.workspace = true strum_macros.workspace = true thiserror.workspace = true +turso_macros.workspace = true turso_parser.workspace = true uncased = "0.9.10" diff --git a/common/lib.rs b/common/lib.rs index 2024dbc57d..49e323c7be 100644 --- a/common/lib.rs +++ b/common/lib.rs @@ -1,4 +1,5 @@ pub mod numeric; pub mod schema; pub mod table_reference; +pub mod utils; pub mod value; diff --git a/common/schema/column.rs b/common/schema/column.rs index 4290b81fa1..fb9dcd8b13 100644 --- a/common/schema/column.rs +++ b/common/schema/column.rs @@ -1,8 +1,13 @@ use core::fmt; -use turso_parser::ast::Expr; +use turso_macros::match_ignore_ascii_case; +use turso_parser::ast::{self, ColumnDefinition, Expr, Literal}; -use crate::schema::{affinity::Affinity, collation::CollationSeq}; +use crate::{ + contains_ignore_ascii_case, eq_ignore_ascii_case, + schema::{affinity::Affinity, collation::CollationSeq}, + utils::normalize_ident, +}; #[derive(Debug, Clone)] pub struct Column { @@ -197,6 +202,64 @@ impl Column { } } +// TODO: This might replace some of util::columns_from_create_table_body +impl From<&ColumnDefinition> for Column { + fn from(value: &ColumnDefinition) -> Self { + let name = value.col_name.as_str(); + + let mut default = None; + let mut notnull = false; + let mut primary_key = false; + let mut unique = false; + let mut collation = None; + + for ast::NamedColumnConstraint { constraint, .. } in &value.constraints { + match constraint { + ast::ColumnConstraint::PrimaryKey { .. } => primary_key = true, + ast::ColumnConstraint::NotNull { .. } => notnull = true, + ast::ColumnConstraint::Unique(..) => unique = true, + ast::ColumnConstraint::Default(expr) => { + default + .replace(translate_ident_to_string_literal(expr).unwrap_or(expr.clone())); + } + ast::ColumnConstraint::Collate { collation_name } => { + collation.replace( + CollationSeq::new(collation_name.as_str()) + .expect("collation should have been set correctly in create table"), + ); + } + _ => {} + }; + } + + let ty = match value.col_type { + Some(ref data_type) => type_from_name(&data_type.name).0, + None => Type::Null, + }; + + let ty_str = value + .col_type + .as_ref() + .map(|t| t.name.to_string()) + .unwrap_or_default(); + + let hidden = ty_str.contains("HIDDEN"); + + Column::new( + Some(normalize_ident(name)), + ty_str, + default, + ty, + collation, + primary_key, + primary_key && matches!(ty, Type::Integer), + notnull, + unique, + hidden, + ) + } +} + #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum Type { @@ -236,3 +299,40 @@ impl fmt::Display for Type { write!(f, "{s}") } } + +pub fn translate_ident_to_string_literal(expr: &Expr) -> Option> { + match expr { + Expr::Name(name) => Some(Box::new(Expr::Literal(Literal::String(name.as_literal())))), + _ => None, + } +} + +// this function returns the affinity type and whether the type name was exactly "INTEGER" +// https://www.sqlite.org/datatype3.html +pub fn type_from_name(type_name: &str) -> (Type, bool) { + let type_name = type_name.as_bytes(); + if type_name.is_empty() { + return (Type::Blob, false); + } + + if eq_ignore_ascii_case!(type_name, b"INTEGER") { + return (Type::Integer, true); + } + + if contains_ignore_ascii_case!(type_name, b"INT") { + return (Type::Integer, false); + } + + if let Some(ty) = type_name.windows(4).find_map(|s| { + match_ignore_ascii_case!(match s { + b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text), + b"BLOB" => Some(Type::Blob), + b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real), + _ => None, + }) + }) { + return (ty, false); + } + + (Type::Numeric, false) +} diff --git a/common/schema/mod.rs b/common/schema/mod.rs index bbbfbb191a..5a598c883c 100644 --- a/common/schema/mod.rs +++ b/common/schema/mod.rs @@ -1,3 +1,55 @@ pub mod affinity; pub mod collation; pub mod column; + +#[macro_export] +macro_rules! eq_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + ::turso_macros::match_ignore_ascii_case!(match $var { + $value => true, + _ => false, + }) + }}; +} + +#[macro_export] +macro_rules! contains_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + let compare_to_idx = $var.len().saturating_sub($value.len()); + if $var.len() < $value.len() { + false + } else { + let mut result = false; + for i in 0..=compare_to_idx { + if eq_ignore_ascii_case!(&$var[i..i + $value.len()], $value) { + result = true; + break; + } + } + + result + } + }}; +} + +#[macro_export] +macro_rules! starts_with_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + if $var.len() < $value.len() { + false + } else { + eq_ignore_ascii_case!(&$var[..$value.len()], $value) + } + }}; +} + +#[macro_export] +macro_rules! ends_with_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + if $var.len() < $value.len() { + false + } else { + eq_ignore_ascii_case!(&$var[$var.len() - $value.len()..], $value) + } + }}; +} diff --git a/common/utils.rs b/common/utils.rs new file mode 100644 index 0000000000..5c99d35638 --- /dev/null +++ b/common/utils.rs @@ -0,0 +1,5 @@ +pub fn normalize_ident(identifier: &str) -> String { + // quotes normalization already happened in the parser layer (see Name ast node implementation) + // so, we only need to convert identifier string to lowercase + identifier.to_lowercase() +} diff --git a/core/schema.rs b/core/schema.rs index 08bfafbc8b..7e89cecda4 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -5,7 +5,7 @@ use crate::translate::expr::{bind_and_rewrite_expr, walk_expr, BindingBehavior, use crate::translate::index::{resolve_index_method_parameters, resolve_sorted_columns}; use crate::translate::planner::ROWID_STRS; use parking_lot::RwLock; -use turso_common::schema::affinity::Affinity; +pub use turso_common::schema::column::{Column, Type}; use turso_macros::AtomicEnum; #[derive(Debug, Clone, AtomicEnum)] @@ -85,7 +85,6 @@ use crate::{ Connection, LimboError, MvCursor, MvStore, Pager, SymbolTable, ValueRef, VirtualTable, }; use crate::{util::normalize_ident, Result}; -use core::fmt; use std::collections::{HashMap, HashSet, VecDeque}; use std::ops::Deref; use std::sync::Arc; @@ -2065,199 +2064,6 @@ impl ResolvedFkRef { } } -#[derive(Debug, Clone)] -pub struct Column { - pub name: Option, - pub ty_str: String, - pub default: Option>, - raw: u16, -} - -// flags -const F_PRIMARY_KEY: u16 = 1; -const F_ROWID_ALIAS: u16 = 2; -const F_NOTNULL: u16 = 4; -const F_UNIQUE: u16 = 8; -const F_HIDDEN: u16 = 16; - -// pack Type and Collation in the remaining bits -const TYPE_SHIFT: u16 = 5; -const TYPE_MASK: u16 = 0b111 << TYPE_SHIFT; -const COLL_SHIFT: u16 = TYPE_SHIFT + 3; -const COLL_MASK: u16 = 0b11 << COLL_SHIFT; - -impl Column { - pub fn affinity(&self) -> Affinity { - Affinity::affinity(&self.ty_str) - } - pub const fn new_default_text( - name: Option, - ty_str: String, - default: Option>, - ) -> Self { - Self::new( - name, - ty_str, - default, - Type::Text, - None, - false, - false, - false, - false, - false, - ) - } - pub const fn new_default_integer( - name: Option, - ty_str: String, - default: Option>, - ) -> Self { - Self::new( - name, - ty_str, - default, - Type::Integer, - None, - false, - false, - false, - false, - false, - ) - } - #[inline] - #[allow(clippy::too_many_arguments)] - pub const fn new( - name: Option, - ty_str: String, - default: Option>, - ty: Type, - col: Option, - primary_key: bool, - rowid_alias: bool, - notnull: bool, - unique: bool, - hidden: bool, - ) -> Self { - let mut raw = 0u16; - raw |= (ty as u16) << TYPE_SHIFT; - if let Some(c) = col { - raw |= (c as u16) << COLL_SHIFT; - } - if primary_key { - raw |= F_PRIMARY_KEY - } - if rowid_alias { - raw |= F_ROWID_ALIAS - } - if notnull { - raw |= F_NOTNULL - } - if unique { - raw |= F_UNIQUE - } - if hidden { - raw |= F_HIDDEN - } - Self { - name, - ty_str, - default, - raw, - } - } - #[inline] - pub const fn ty(&self) -> Type { - let v = ((self.raw & TYPE_MASK) >> TYPE_SHIFT) as u8; - Type::from_bits(v) - } - - #[inline] - pub const fn set_ty(&mut self, ty: Type) { - self.raw = (self.raw & !TYPE_MASK) | (((ty as u16) << TYPE_SHIFT) & TYPE_MASK); - } - - #[inline] - pub const fn collation_opt(&self) -> Option { - if self.has_explicit_collation() { - Some(self.collation()) - } else { - None - } - } - - #[inline] - pub const fn collation(&self) -> CollationSeq { - let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; - CollationSeq::from_bits(v) - } - - #[inline] - pub const fn has_explicit_collation(&self) -> bool { - let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; - v != CollationSeq::Unset as u8 - } - - #[inline] - pub const fn set_collation(&mut self, c: Option) { - if let Some(c) = c { - self.raw = (self.raw & !COLL_MASK) | (((c as u16) << COLL_SHIFT) & COLL_MASK); - } - } - - #[inline] - pub fn primary_key(&self) -> bool { - self.raw & F_PRIMARY_KEY != 0 - } - #[inline] - pub const fn is_rowid_alias(&self) -> bool { - self.raw & F_ROWID_ALIAS != 0 - } - #[inline] - pub const fn notnull(&self) -> bool { - self.raw & F_NOTNULL != 0 - } - #[inline] - pub const fn unique(&self) -> bool { - self.raw & F_UNIQUE != 0 - } - #[inline] - pub const fn hidden(&self) -> bool { - self.raw & F_HIDDEN != 0 - } - - #[inline] - pub const fn set_primary_key(&mut self, v: bool) { - self.set_flag(F_PRIMARY_KEY, v); - } - #[inline] - pub const fn set_rowid_alias(&mut self, v: bool) { - self.set_flag(F_ROWID_ALIAS, v); - } - #[inline] - pub const fn set_notnull(&mut self, v: bool) { - self.set_flag(F_NOTNULL, v); - } - #[inline] - pub const fn set_unique(&mut self, v: bool) { - self.set_flag(F_UNIQUE, v); - } - #[inline] - pub const fn set_hidden(&mut self, v: bool) { - self.set_flag(F_HIDDEN, v); - } - - #[inline] - const fn set_flag(&mut self, mask: u16, val: bool) { - if val { - self.raw |= mask - } else { - self.raw &= !mask - } - } -} - // TODO: This might replace some of util::columns_from_create_table_body impl From<&ColumnDefinition> for Column { fn from(value: &ColumnDefinition) -> Self { @@ -2316,46 +2122,6 @@ impl From<&ColumnDefinition> for Column { } } -#[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Type { - Null = 0, - Text = 1, - Numeric = 2, - Integer = 3, - Real = 4, - Blob = 5, -} - -impl Type { - #[inline] - const fn from_bits(bits: u8) -> Self { - match bits { - 0 => Type::Null, - 1 => Type::Text, - 2 => Type::Numeric, - 3 => Type::Integer, - 4 => Type::Real, - 5 => Type::Blob, - _ => Type::Null, - } - } -} - -impl fmt::Display for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - Self::Null => "", - Self::Text => "TEXT", - Self::Numeric => "NUMERIC", - Self::Integer => "INTEGER", - Self::Real => "REAL", - Self::Blob => "BLOB", - }; - write!(f, "{s}") - } -} - pub fn sqlite_schema_table() -> BTreeTable { BTreeTable { root_page: 1, From d4ca8d4c5eb757f3ef58017665ac13486d951f67 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 14 Nov 2025 15:31:47 -0300 Subject: [PATCH 10/10] remove CollateCode from core --- common/schema/{collation.rs => collate.rs} | 9 ++- common/schema/column.rs | 2 +- common/schema/mod.rs | 2 +- core/error.rs | 9 +++ core/schema.rs | 62 +-------------------- core/translate/collate.rs | 64 +--------------------- 6 files changed, 20 insertions(+), 128 deletions(-) rename common/schema/{collation.rs => collate.rs} (88%) diff --git a/common/schema/collation.rs b/common/schema/collate.rs similarity index 88% rename from common/schema/collation.rs rename to common/schema/collate.rs index aa45c1455d..626e74b463 100644 --- a/common/schema/collation.rs +++ b/common/schema/collate.rs @@ -18,10 +18,15 @@ pub enum CollationSeq { Rtrim = 3, } +#[derive(Debug)] +pub enum CollateError { + NotFound(String), +} + impl CollationSeq { - pub fn new(collation: &str) -> Result { + pub fn new(collation: &str) -> Result { CollationSeq::from_str(collation) - .map_err(|_| format!("no such collation sequence: {collation}")) + .map_err(|_| CollateError::NotFound(format!("no such collation sequence: {collation}"))) } #[inline] /// Returns the collation, defaulting to BINARY if unset diff --git a/common/schema/column.rs b/common/schema/column.rs index fb9dcd8b13..c8c98b116f 100644 --- a/common/schema/column.rs +++ b/common/schema/column.rs @@ -5,7 +5,7 @@ use turso_parser::ast::{self, ColumnDefinition, Expr, Literal}; use crate::{ contains_ignore_ascii_case, eq_ignore_ascii_case, - schema::{affinity::Affinity, collation::CollationSeq}, + schema::{affinity::Affinity, collate::CollationSeq}, utils::normalize_ident, }; diff --git a/common/schema/mod.rs b/common/schema/mod.rs index 5a598c883c..2b9ce0146e 100644 --- a/common/schema/mod.rs +++ b/common/schema/mod.rs @@ -1,5 +1,5 @@ pub mod affinity; -pub mod collation; +pub mod collate; pub mod column; #[macro_export] diff --git a/core/error.rs b/core/error.rs index c5bb811db2..a294672508 100644 --- a/core/error.rs +++ b/core/error.rs @@ -1,4 +1,5 @@ use thiserror::Error; +use turso_common::schema::collate::CollateError; use crate::storage::page_cache::CacheError; @@ -79,6 +80,14 @@ pub enum LimboError { PlanningError(String), } +impl From for LimboError { + fn from(value: CollateError) -> Self { + match value { + CollateError::NotFound(msg) => LimboError::ParseError(msg), + } + } +} + // We only propagate the error kind so we can avoid string allocation in hot path and copying/cloning enums is cheaper impl From for LimboError { fn from(value: std::io::Error) -> Self { diff --git a/core/schema.rs b/core/schema.rs index 7e89cecda4..ebc70677b8 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -90,9 +90,7 @@ use std::ops::Deref; use std::sync::Arc; use std::sync::Mutex; use tracing::trace; -use turso_parser::ast::{ - self, ColumnDefinition, Expr, InitDeferredPred, Literal, RefAct, SortOrder, TableOptions, -}; +use turso_parser::ast::{self, Expr, InitDeferredPred, Literal, RefAct, SortOrder, TableOptions}; use turso_parser::{ ast::{Cmd, CreateTableBody, ResultColumn, Stmt}, parser::Parser, @@ -2064,64 +2062,6 @@ impl ResolvedFkRef { } } -// TODO: This might replace some of util::columns_from_create_table_body -impl From<&ColumnDefinition> for Column { - fn from(value: &ColumnDefinition) -> Self { - let name = value.col_name.as_str(); - - let mut default = None; - let mut notnull = false; - let mut primary_key = false; - let mut unique = false; - let mut collation = None; - - for ast::NamedColumnConstraint { constraint, .. } in &value.constraints { - match constraint { - ast::ColumnConstraint::PrimaryKey { .. } => primary_key = true, - ast::ColumnConstraint::NotNull { .. } => notnull = true, - ast::ColumnConstraint::Unique(..) => unique = true, - ast::ColumnConstraint::Default(expr) => { - default - .replace(translate_ident_to_string_literal(expr).unwrap_or(expr.clone())); - } - ast::ColumnConstraint::Collate { collation_name } => { - collation.replace( - CollationSeq::new(collation_name.as_str()) - .expect("collation should have been set correctly in create table"), - ); - } - _ => {} - }; - } - - let ty = match value.col_type { - Some(ref data_type) => type_from_name(&data_type.name).0, - None => Type::Null, - }; - - let ty_str = value - .col_type - .as_ref() - .map(|t| t.name.to_string()) - .unwrap_or_default(); - - let hidden = ty_str.contains("HIDDEN"); - - Column::new( - Some(normalize_ident(name)), - ty_str, - default, - ty, - collation, - primary_key, - primary_key && matches!(ty, Type::Integer), - notnull, - unique, - hidden, - ) - } -} - pub fn sqlite_schema_table() -> BTreeTable { BTreeTable { root_page: 1, diff --git a/core/translate/collate.rs b/core/translate/collate.rs index 6c5048d509..a7d9cc5cac 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -1,5 +1,4 @@ -use std::{cmp::Ordering, str::FromStr as _}; - +pub use turso_common::schema::collate::*; use turso_parser::ast::Expr; use crate::{ @@ -10,67 +9,6 @@ use crate::{ Result, }; -// TODO: in the future allow user to define collation sequences -// Will have to meddle with ffi for this -#[derive( - Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, -)] -#[strum(ascii_case_insensitive)] -/// **Pre defined collation sequences**\ -/// Collating functions only matter when comparing string values. -/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). -#[repr(u8)] -pub enum CollationSeq { - Unset = 0, - #[default] - Binary = 1, - NoCase = 2, - Rtrim = 3, -} - -impl CollationSeq { - pub fn new(collation: &str) -> crate::Result { - CollationSeq::from_str(collation).map_err(|_| { - crate::LimboError::ParseError(format!("no such collation sequence: {collation}")) - }) - } - #[inline] - /// Returns the collation, defaulting to BINARY if unset - pub const fn from_bits(bits: u8) -> Self { - match bits { - 2 => CollationSeq::NoCase, - 3 => CollationSeq::Rtrim, - _ => CollationSeq::Binary, - } - } - - #[inline(always)] - pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { - match self { - CollationSeq::Unset | CollationSeq::Binary => Self::binary_cmp(lhs, rhs), - CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), - CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), - } - } - - #[inline(always)] - fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { - lhs.cmp(rhs) - } - - #[inline(always)] - fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { - let nocase_lhs = uncased::UncasedStr::new(lhs); - let nocase_rhs = uncased::UncasedStr::new(rhs); - nocase_lhs.cmp(nocase_rhs) - } - - #[inline(always)] - fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { - lhs.trim_end().cmp(rhs.trim_end()) - } -} - /// Every column of every table has an associated collating function. If no collating function is explicitly defined, /// then the collating function defaults to BINARY. /// The COLLATE clause of the column definition is used to define alternative collating functions for a column.