diff --git a/Cargo.lock b/Cargo.lock index 50f3f8047e..9ed0477a71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4894,6 +4894,21 @@ dependencies = [ "validator", ] +[[package]] +name = "turso_common" +version = "0.4.0-pre.1" +dependencies = [ + "either", + "miette", + "serde", + "strum", + "strum_macros", + "thiserror 2.0.16", + "turso_macros", + "turso_parser", + "uncased", +] + [[package]] name = "turso_core" version = "0.4.0-pre.1" @@ -4948,6 +4963,7 @@ dependencies = [ "test-log", "thiserror 2.0.16", "tracing", + "turso_common", "turso_ext", "turso_macros", "turso_parser", diff --git a/Cargo.toml b/Cargo.toml index f397e4863c..c0a7f57662 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,8 @@ members = [ "whopper", "perf/throughput/turso", "perf/throughput/rusqlite", - "perf/encryption" + "perf/encryption", + "common", ] exclude = [ "perf/latency/limbo", @@ -62,6 +63,7 @@ limbo_regexp = { path = "extensions/regexp", version = "0.4.0-pre.1" } limbo_uuid = { path = "extensions/uuid", version = "0.4.0-pre.1" } turso_parser = { path = "parser", version = "0.4.0-pre.1" } limbo_fuzzy = { path = "extensions/fuzzy", version = "0.4.0-pre.1" } +turso_common = { path = "common", version = "0.4.0-pre.1" } sql_generation = { path = "sql_generation" } strum = { version = "0.26", features = ["derive"] } strum_macros = "0.26" diff --git a/common/Cargo.toml b/common/Cargo.toml new file mode 100644 index 0000000000..89a258fcb8 --- /dev/null +++ b/common/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "turso_common" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +path = "lib.rs" + +[features] +default = ["json"] +serde = ["dep:serde"] +json = [] + +[dependencies] +either.workspace = true +miette.workspace = true +serde = { workspace = true, optional = true } +strum.workspace = true +strum_macros.workspace = true +thiserror.workspace = true +turso_macros.workspace = true +turso_parser.workspace = true +uncased = "0.9.10" diff --git a/common/lib.rs b/common/lib.rs new file mode 100644 index 0000000000..49e323c7be --- /dev/null +++ b/common/lib.rs @@ -0,0 +1,5 @@ +pub mod numeric; +pub mod schema; +pub mod table_reference; +pub mod utils; +pub mod value; diff --git a/common/numeric/mod.rs b/common/numeric/mod.rs new file mode 100644 index 0000000000..ac3ed6d7ba --- /dev/null +++ b/common/numeric/mod.rs @@ -0,0 +1,789 @@ +use crate::value::Value; + +pub mod nonnan; + +use nonnan::NonNan; + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShl { + fn saturating_shl(self, rhs: u32) -> Self; +} + +impl SaturatingShl for i64 { + fn saturating_shl(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + 0 + } else { + self << rhs + } + } +} + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShr { + fn saturating_shr(self, rhs: u32) -> Self; +} + +impl SaturatingShr for i64 { + fn saturating_shr(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + if self >= 0 { + 0 + } else { + -1 + } + } else { + self >> rhs + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Numeric { + Null, + Integer(i64), + Float(NonNan), +} + +impl Numeric { + pub fn from_value_strict(value: &Value) -> Numeric { + match value { + Value::Null | Value::Blob(_) => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + Value::Text(text) => { + let s = text.as_str(); + + match str_to_f64(s) { + None + | Some(StrToF64::FractionalPrefix(_)) + | Some(StrToF64::DecimalPrefix(_)) => Self::Null, + Some(StrToF64::Fractional(value)) => Self::Float(value), + Some(StrToF64::Decimal(real)) => { + let integer = str_to_i64(s).unwrap_or(0); + + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } + } + } + } + } + + pub fn try_into_f64(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(v) => Some(*v as _), + Numeric::Float(v) => Some((*v).into()), + } + } + + pub fn try_into_bool(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(0) => Some(false), + Numeric::Float(non_nan) if *non_nan == 0.0 => Some(false), + _ => Some(true), + } + } +} + +impl From for NullableInteger { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => NullableInteger::Null, + Numeric::Integer(v) => NullableInteger::Integer(v), + Numeric::Float(v) => NullableInteger::Integer(f64::from(v) as i64), + } + } +} + +impl From for Value { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => Value::Null, + Numeric::Integer(v) => Value::Integer(v), + Numeric::Float(v) => Value::Float(v.into()), + } + } +} + +impl> From for Numeric { + fn from(value: T) -> Self { + let text = value.as_ref(); + + match str_to_f64(text) { + None => Self::Integer(0), + Some(StrToF64::Fractional(value) | StrToF64::FractionalPrefix(value)) => { + Self::Float(value) + } + Some(StrToF64::Decimal(real) | StrToF64::DecimalPrefix(real)) => { + let integer = str_to_i64(text).unwrap_or(0); + + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } + } + } +} + +impl From for Numeric { + fn from(value: Value) -> Self { + Self::from(&value) + } +} +impl From<&Value> for Numeric { + fn from(value: &Value) -> Self { + match value { + Value::Null => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + Value::Text(text) => Numeric::from(text.as_str()), + Value::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Numeric::from(&text) + } + } + } +} + +impl std::ops::Add for Numeric { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_add(rhs) { + None => Numeric::Float(lhs.into()) + Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs + rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f + Numeric::Float(i.into()), + } + } +} + +impl std::ops::Sub for Numeric { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs - rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_sub(rhs) { + None => Numeric::Float(lhs.into()) - Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f - Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) - f, + } + } +} + +impl std::ops::Mul for Numeric { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs * rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_mul(rhs) { + None => Numeric::Float(lhs.into()) * Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f * Numeric::Float(i.into()), + } + } +} + +impl std::ops::Div for Numeric { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs / rhs { + Some(v) if rhs != 0.0 => Numeric::Float(v), + _ => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_div(rhs) { + None => Numeric::Float(lhs.into()) / Numeric::Float(rhs.into()), + Some(v) => Numeric::Integer(v), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f / Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) / f, + } + } +} + +impl std::ops::Neg for Numeric { + type Output = Self; + + fn neg(self) -> Self::Output { + match self { + Numeric::Null => Numeric::Null, + Numeric::Integer(v) => match v.checked_neg() { + None => -Numeric::Float(v.into()), + Some(i) => Numeric::Integer(i), + }, + Numeric::Float(v) => Numeric::Float(-v), + } + } +} + +#[derive(Debug)] +pub enum NullableInteger { + Null, + Integer(i64), +} + +impl From for Value { + fn from(value: NullableInteger) -> Self { + match value { + NullableInteger::Null => Value::Null, + NullableInteger::Integer(v) => Value::Integer(v), + } + } +} + +impl> From for NullableInteger { + fn from(value: T) -> Self { + Self::Integer(str_to_i64(value.as_ref()).unwrap_or(0)) + } +} + +impl From for NullableInteger { + fn from(value: Value) -> Self { + Self::from(&value) + } +} + +impl From<&Value> for NullableInteger { + fn from(value: &Value) -> Self { + match value { + Value::Null => Self::Null, + Value::Integer(v) => Self::Integer(*v), + Value::Float(v) => Self::Integer(*v as i64), + Value::Text(text) => Self::from(text.as_str()), + Value::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Self::from(text) + } + } + } +} + +impl std::ops::Not for NullableInteger { + type Output = Self; + + fn not(self) -> Self::Output { + match self { + NullableInteger::Null => NullableInteger::Null, + NullableInteger::Integer(lhs) => NullableInteger::Integer(!lhs), + } + } +} + +impl std::ops::BitAnd for NullableInteger { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs & rhs) + } + } + } +} + +impl std::ops::BitOr for NullableInteger { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs | rhs) + } + } + } +} + +impl std::ops::Shl for NullableInteger { + type Output = Self; + + fn shl(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shl(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shr(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Shr for NullableInteger { + type Output = Self; + + fn shr(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shr(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shl(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Rem for NullableInteger { + type Output = Self; + + fn rem(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (_, NullableInteger::Integer(0)) => NullableInteger::Null, + (lhs, NullableInteger::Integer(-1)) => lhs % NullableInteger::Integer(1), + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs % rhs) + } + } + } +} + +// Maximum u64 that can survive a f64 round trip +const MAX_EXACT: u64 = u64::MAX << 11; + +const VERTICAL_TAB: char = '\u{b}'; + +/// Encapsulates Dekker's arithmetic for higher precision. This is spiritually the same as using a +/// f128 for arithmetic, but cross platform and compatible with sqlite. +#[derive(Debug, Clone, Copy)] +struct DoubleDouble(f64, f64); + +impl DoubleDouble { + pub const E100: Self = DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83); + pub const E10: Self = DoubleDouble(1.0e+10, 0.0); + pub const E1: Self = DoubleDouble(1.0e+01, 0.0); + + pub const NEG_E100: Self = DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117); + pub const NEG_E10: Self = DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27); + pub const NEG_E1: Self = DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18); +} + +impl From for DoubleDouble { + fn from(value: u64) -> Self { + let r = value as f64; + + // If the value is smaller than MAX_EXACT, the error isn't significant + let rr = if r <= MAX_EXACT as f64 { + let round_tripped = value as f64 as u64; + let sign = if value >= round_tripped { 1.0 } else { -1.0 }; + + // Error term is the signed distance of the round tripped value and itself + sign * value.abs_diff(round_tripped) as f64 + } else { + 0.0 + }; + + DoubleDouble(r, rr) + } +} + +impl From for u64 { + fn from(value: DoubleDouble) -> Self { + if value.1 < 0.0 { + value.0 as u64 - value.1.abs() as u64 + } else { + value.0 as u64 + value.1 as u64 + } + } +} + +impl From for f64 { + fn from(DoubleDouble(a, aa): DoubleDouble) -> Self { + a + aa + } +} + +impl std::ops::Mul for DoubleDouble { + type Output = Self; + + /// Double-Double multiplication. (self.0, self.1) *= (rhs.0, rhs.1) + /// + /// Reference: + /// T. J. Dekker, "A Floating-Point Technique for Extending the Available Precision". + /// 1971-07-26. + /// + fn mul(self, rhs: Self) -> Self::Output { + // TODO: Better variable naming + + let mask = u64::MAX << 26; + + let hx = f64::from_bits(self.0.to_bits() & mask); + let tx = self.0 - hx; + + let hy = f64::from_bits(rhs.0.to_bits() & mask); + let ty = rhs.0 - hy; + + let p = hx * hy; + let q = hx * ty + tx * hy; + + let c = p + q; + let cc = p - c + q + tx * ty; + let cc = self.0 * rhs.1 + self.1 * rhs.0 + cc; + + let r = c + cc; + let rr = (c - r) + cc; + + DoubleDouble(r, rr) + } +} + +impl std::ops::MulAssign for DoubleDouble { + fn mul_assign(&mut self, rhs: Self) { + *self = *self * rhs; + } +} + +pub fn str_to_i64(input: impl AsRef) -> Option { + let input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB); + + let mut iter = input.chars().enumerate().peekable(); + + iter.next_if(|(_, ch)| matches!(ch, '+' | '-')); + let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else { + return Some(0); + }; + + input[0..=end].parse::().map_or_else( + |err| match err.kind() { + std::num::IntErrorKind::PosOverflow => Some(i64::MAX), + std::num::IntErrorKind::NegOverflow => Some(i64::MIN), + std::num::IntErrorKind::Empty => unreachable!(), + _ => Some(0), + }, + Some, + ) +} + +#[derive(Debug, Clone, Copy)] +pub enum StrToF64 { + Fractional(NonNan), + Decimal(NonNan), + FractionalPrefix(NonNan), + DecimalPrefix(NonNan), +} + +impl From for f64 { + fn from(value: StrToF64) -> Self { + match value { + StrToF64::Fractional(non_nan) => non_nan.into(), + StrToF64::Decimal(non_nan) => non_nan.into(), + StrToF64::FractionalPrefix(non_nan) => non_nan.into(), + StrToF64::DecimalPrefix(non_nan) => non_nan.into(), + } + } +} + +pub fn str_to_f64(input: impl AsRef) -> Option { + let mut input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB) + .chars() + .peekable(); + + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1.0, + _ => 1.0, + }; + + let mut had_digits = false; + let mut is_fractional = false; + + let mut significant: u64 = 0; + + // Copy as many significant digits as we can + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + had_digits = true; + + match significant + .checked_mul(10) + .and_then(|v| v.checked_add(digit as u64)) + { + Some(new) => significant = new, + None => break, + } + + input.next(); + } + + let mut exponent = 0; + + // Increment the exponent for every non significant digit we skipped + while input.next_if(char::is_ascii_digit).is_some() { + exponent += 1 + } + + if input.next_if(|ch| matches!(ch, '.')).is_some() { + if had_digits { + is_fractional = true; + } + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true; + } + + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + if significant < (u64::MAX - 9) / 10 { + significant = significant * 10 + digit as u64; + exponent -= 1; + } + + input.next(); + } + }; + + let mut valid_exponent = true; + + if (had_digits || is_fractional) && input.next_if(|ch| matches!(ch, 'e' | 'E')).is_some() { + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1, + _ => 1, + }; + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true; + let mut e = 0; + + while let Some(ch) = input.next_if(char::is_ascii_digit) { + e = (e * 10 + ch.to_digit(10).unwrap() as i32).min(1000); + } + + exponent += sign * e; + } else { + valid_exponent = false; + } + }; + + if !(had_digits || is_fractional) { + return None; + } + + while exponent.is_positive() && significant < MAX_EXACT / 10 { + significant *= 10; + exponent -= 1; + } + + while exponent.is_negative() && significant % 10 == 0 { + significant /= 10; + exponent += 1; + } + + let mut result = DoubleDouble::from(significant); + + if exponent > 0 { + while exponent >= 100 { + exponent -= 100; + result *= DoubleDouble::E100; + } + while exponent >= 10 { + exponent -= 10; + result *= DoubleDouble::E10; + } + while exponent >= 1 { + exponent -= 1; + result *= DoubleDouble::E1; + } + } else { + while exponent <= -100 { + exponent += 100; + result *= DoubleDouble::NEG_E100; + } + while exponent <= -10 { + exponent += 10; + result *= DoubleDouble::NEG_E10; + } + while exponent <= -1 { + exponent += 1; + result *= DoubleDouble::NEG_E1; + } + } + + let result = NonNan::new(f64::from(result) * sign) + .unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap()); + + if !valid_exponent || input.count() > 0 { + if is_fractional { + return Some(StrToF64::FractionalPrefix(result)); + } else { + return Some(StrToF64::DecimalPrefix(result)); + } + } + + Some(if is_fractional { + StrToF64::Fractional(result) + } else { + StrToF64::Decimal(result) + }) +} + +pub fn format_float(v: f64) -> String { + if v.is_nan() { + return "".to_string(); + } + + if v.is_infinite() { + return if v.is_sign_negative() { "-Inf" } else { "Inf" }.to_string(); + } + + if v == 0.0 { + return "0.0".to_string(); + } + + let negative = v < 0.0; + let mut d = DoubleDouble(v.abs(), 0.0); + let mut exp = 0; + + if d.0 > 9.223_372_036_854_775e18 { + while d.0 > 9.223_372_036_854_774e118 { + exp += 100; + d *= DoubleDouble::NEG_E100; + } + while d.0 > 9.223_372_036_854_774e28 { + exp += 10; + d *= DoubleDouble::NEG_E10; + } + while d.0 > 9.223_372_036_854_775e18 { + exp += 1; + d *= DoubleDouble::NEG_E1; + } + } else { + while d.0 < 9.223_372_036_854_775e-83 { + exp -= 100; + d *= DoubleDouble::E100; + } + while d.0 < 9.223_372_036_854_775e7 { + exp -= 10; + d *= DoubleDouble::E10; + } + while d.0 < 9.223_372_036_854_775e17 { + exp -= 1; + d *= DoubleDouble::E1; + } + } + + let v = u64::from(d); + + let mut digits = v.to_string().into_bytes(); + + let precision = 15; + + let mut decimal_pos = digits.len() as i32 + exp; + + 'out: { + if digits.len() > precision { + let round_up = digits[precision] >= b'5'; + digits.truncate(precision); + + if round_up { + for i in (0..precision).rev() { + if digits[i] < b'9' { + digits[i] += 1; + break 'out; + } + digits[i] = b'0'; + } + + digits.insert(0, b'1'); + decimal_pos += 1; + } + } + } + + while digits.len() > 1 && digits[digits.len() - 1] == b'0' { + digits.pop(); + } + + let exp = decimal_pos - 1; + + if (-4..=14).contains(&exp) { + format!( + "{}{}.{}{}", + if negative { "-" } else { Default::default() }, + if decimal_pos > 0 { + let zeroes = (decimal_pos - digits.len() as i32).max(0) as usize; + let digits = digits + .get(0..(decimal_pos.min(digits.len() as i32) as usize)) + .unwrap(); + (unsafe { str::from_utf8_unchecked(digits) }).to_owned() + &"0".repeat(zeroes) + } else { + "0".to_string() + }, + "0".repeat(decimal_pos.min(0).unsigned_abs() as usize), + digits + .get((decimal_pos.max(0) as usize)..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0") + ) + } else { + format!( + "{}{}.{}e{}{:0width$}", + if negative { "-" } else { "" }, + digits.first().cloned().unwrap_or(b'0') as char, + digits + .get(1..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0"), + if exp.is_positive() { "+" } else { "-" }, + exp.abs(), + width = if exp > 100 { 3 } else { 2 } + ) + } +} + +#[test] +fn test_decode_float() { + assert_eq!(format_float(9.93e-322), "9.93071948140905e-322"); + assert_eq!(format_float(9.93), "9.93"); + assert_eq!(format_float(0.093), "0.093"); + assert_eq!(format_float(-0.093), "-0.093"); + assert_eq!(format_float(0.0), "0.0"); + assert_eq!(format_float(4.94e-322), "4.94065645841247e-322"); + assert_eq!(format_float(-20228007.0), "-20228007.0"); +} diff --git a/common/numeric/nonnan.rs b/common/numeric/nonnan.rs new file mode 100644 index 0000000000..5ae6a1f34a --- /dev/null +++ b/common/numeric/nonnan.rs @@ -0,0 +1,105 @@ +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct NonNan(f64); + +impl NonNan { + pub fn new(value: f64) -> Option { + if value.is_nan() { + return None; + } + + Some(NonNan(value)) + } +} + +impl PartialEq for f64 { + fn eq(&self, other: &NonNan) -> bool { + *self == other.0 + } +} + +impl PartialEq for NonNan { + fn eq(&self, other: &f64) -> bool { + self.0 == *other + } +} + +impl PartialOrd for NonNan { + fn partial_cmp(&self, other: &f64) -> Option { + self.0.partial_cmp(other) + } +} + +impl PartialOrd for f64 { + fn partial_cmp(&self, other: &NonNan) -> Option { + self.partial_cmp(&other.0) + } +} + +impl From for NonNan { + fn from(value: i64) -> Self { + NonNan(value as f64) + } +} + +impl From for f64 { + fn from(value: NonNan) -> Self { + value.0 + } +} + +impl std::ops::Deref for NonNan { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::Add for NonNan { + type Output = Option; + + fn add(self, rhs: Self) -> Self::Output { + Self::new(self.0 + rhs.0) + } +} + +impl std::ops::Sub for NonNan { + type Output = Option; + + fn sub(self, rhs: Self) -> Self::Output { + Self::new(self.0 - rhs.0) + } +} + +impl std::ops::Mul for NonNan { + type Output = Option; + + fn mul(self, rhs: Self) -> Self::Output { + Self::new(self.0 * rhs.0) + } +} + +impl std::ops::Div for NonNan { + type Output = Option; + + fn div(self, rhs: Self) -> Self::Output { + Self::new(self.0 / rhs.0) + } +} + +impl std::ops::Rem for NonNan { + type Output = Option; + + fn rem(self, rhs: Self) -> Self::Output { + Self::new(self.0 % rhs.0) + } +} + +impl std::ops::Neg for NonNan { + type Output = Self; + + fn neg(self) -> Self::Output { + Self(-self.0) + } +} diff --git a/core/vdbe/affinity.rs b/common/schema/affinity.rs similarity index 99% rename from core/vdbe/affinity.rs rename to common/schema/affinity.rs index 7a40a22d77..0064b503ff 100644 --- a/core/vdbe/affinity.rs +++ b/common/schema/affinity.rs @@ -1,7 +1,7 @@ use either::Either; use turso_parser::ast::{Expr, Literal}; -use crate::{types::AsValueRef, Value, ValueRef}; +use crate::value::{AsValueRef, Value, ValueRef}; /// # SQLite Column Type Affinities /// diff --git a/common/schema/collate.rs b/common/schema/collate.rs new file mode 100644 index 0000000000..626e74b463 --- /dev/null +++ b/common/schema/collate.rs @@ -0,0 +1,66 @@ +use std::{cmp::Ordering, str::FromStr}; + +// TODO: in the future allow user to define collation sequences +// Will have to meddle with ffi for this +#[derive( + Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, +)] +#[strum(ascii_case_insensitive)] +/// **Pre defined collation sequences**\ +/// Collating functions only matter when comparing string values. +/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). +#[repr(u8)] +pub enum CollationSeq { + Unset = 0, + #[default] + Binary = 1, + NoCase = 2, + Rtrim = 3, +} + +#[derive(Debug)] +pub enum CollateError { + NotFound(String), +} + +impl CollationSeq { + pub fn new(collation: &str) -> Result { + CollationSeq::from_str(collation) + .map_err(|_| CollateError::NotFound(format!("no such collation sequence: {collation}"))) + } + #[inline] + /// Returns the collation, defaulting to BINARY if unset + pub const fn from_bits(bits: u8) -> Self { + match bits { + 2 => CollationSeq::NoCase, + 3 => CollationSeq::Rtrim, + _ => CollationSeq::Binary, + } + } + + #[inline(always)] + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + match self { + CollationSeq::Unset | CollationSeq::Binary => Self::binary_cmp(lhs, rhs), + CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), + CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), + } + } + + #[inline(always)] + fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.cmp(rhs) + } + + #[inline(always)] + fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { + let nocase_lhs = uncased::UncasedStr::new(lhs); + let nocase_rhs = uncased::UncasedStr::new(rhs); + nocase_lhs.cmp(nocase_rhs) + } + + #[inline(always)] + fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.trim_end().cmp(rhs.trim_end()) + } +} diff --git a/common/schema/column.rs b/common/schema/column.rs new file mode 100644 index 0000000000..c8c98b116f --- /dev/null +++ b/common/schema/column.rs @@ -0,0 +1,338 @@ +use core::fmt; + +use turso_macros::match_ignore_ascii_case; +use turso_parser::ast::{self, ColumnDefinition, Expr, Literal}; + +use crate::{ + contains_ignore_ascii_case, eq_ignore_ascii_case, + schema::{affinity::Affinity, collate::CollationSeq}, + utils::normalize_ident, +}; + +#[derive(Debug, Clone)] +pub struct Column { + pub name: Option, + pub ty_str: String, + pub default: Option>, + raw: u16, +} + +// flags +const F_PRIMARY_KEY: u16 = 1; +const F_ROWID_ALIAS: u16 = 2; +const F_NOTNULL: u16 = 4; +const F_UNIQUE: u16 = 8; +const F_HIDDEN: u16 = 16; + +// pack Type and Collation in the remaining bits +const TYPE_SHIFT: u16 = 5; +const TYPE_MASK: u16 = 0b111 << TYPE_SHIFT; +const COLL_SHIFT: u16 = TYPE_SHIFT + 3; +const COLL_MASK: u16 = 0b11 << COLL_SHIFT; + +impl Column { + pub fn affinity(&self) -> Affinity { + Affinity::affinity(&self.ty_str) + } + pub const fn new_default_text( + name: Option, + ty_str: String, + default: Option>, + ) -> Self { + Self::new( + name, + ty_str, + default, + Type::Text, + None, + false, + false, + false, + false, + false, + ) + } + pub const fn new_default_integer( + name: Option, + ty_str: String, + default: Option>, + ) -> Self { + Self::new( + name, + ty_str, + default, + Type::Integer, + None, + false, + false, + false, + false, + false, + ) + } + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new( + name: Option, + ty_str: String, + default: Option>, + ty: Type, + col: Option, + primary_key: bool, + rowid_alias: bool, + notnull: bool, + unique: bool, + hidden: bool, + ) -> Self { + let mut raw = 0u16; + raw |= (ty as u16) << TYPE_SHIFT; + if let Some(c) = col { + raw |= (c as u16) << COLL_SHIFT; + } + if primary_key { + raw |= F_PRIMARY_KEY + } + if rowid_alias { + raw |= F_ROWID_ALIAS + } + if notnull { + raw |= F_NOTNULL + } + if unique { + raw |= F_UNIQUE + } + if hidden { + raw |= F_HIDDEN + } + Self { + name, + ty_str, + default, + raw, + } + } + #[inline] + pub const fn ty(&self) -> Type { + let v = ((self.raw & TYPE_MASK) >> TYPE_SHIFT) as u8; + Type::from_bits(v) + } + + #[inline] + pub const fn set_ty(&mut self, ty: Type) { + self.raw = (self.raw & !TYPE_MASK) | (((ty as u16) << TYPE_SHIFT) & TYPE_MASK); + } + + #[inline] + pub const fn collation_opt(&self) -> Option { + if self.has_explicit_collation() { + Some(self.collation()) + } else { + None + } + } + + #[inline] + pub const fn collation(&self) -> CollationSeq { + let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; + CollationSeq::from_bits(v) + } + + #[inline] + pub const fn has_explicit_collation(&self) -> bool { + let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; + v != CollationSeq::Unset as u8 + } + + #[inline] + pub const fn set_collation(&mut self, c: Option) { + if let Some(c) = c { + self.raw = (self.raw & !COLL_MASK) | (((c as u16) << COLL_SHIFT) & COLL_MASK); + } + } + + #[inline] + pub fn primary_key(&self) -> bool { + self.raw & F_PRIMARY_KEY != 0 + } + #[inline] + pub const fn is_rowid_alias(&self) -> bool { + self.raw & F_ROWID_ALIAS != 0 + } + #[inline] + pub const fn notnull(&self) -> bool { + self.raw & F_NOTNULL != 0 + } + #[inline] + pub const fn unique(&self) -> bool { + self.raw & F_UNIQUE != 0 + } + #[inline] + pub const fn hidden(&self) -> bool { + self.raw & F_HIDDEN != 0 + } + + #[inline] + pub const fn set_primary_key(&mut self, v: bool) { + self.set_flag(F_PRIMARY_KEY, v); + } + #[inline] + pub const fn set_rowid_alias(&mut self, v: bool) { + self.set_flag(F_ROWID_ALIAS, v); + } + #[inline] + pub const fn set_notnull(&mut self, v: bool) { + self.set_flag(F_NOTNULL, v); + } + #[inline] + pub const fn set_unique(&mut self, v: bool) { + self.set_flag(F_UNIQUE, v); + } + #[inline] + pub const fn set_hidden(&mut self, v: bool) { + self.set_flag(F_HIDDEN, v); + } + + #[inline] + const fn set_flag(&mut self, mask: u16, val: bool) { + if val { + self.raw |= mask + } else { + self.raw &= !mask + } + } +} + +// TODO: This might replace some of util::columns_from_create_table_body +impl From<&ColumnDefinition> for Column { + fn from(value: &ColumnDefinition) -> Self { + let name = value.col_name.as_str(); + + let mut default = None; + let mut notnull = false; + let mut primary_key = false; + let mut unique = false; + let mut collation = None; + + for ast::NamedColumnConstraint { constraint, .. } in &value.constraints { + match constraint { + ast::ColumnConstraint::PrimaryKey { .. } => primary_key = true, + ast::ColumnConstraint::NotNull { .. } => notnull = true, + ast::ColumnConstraint::Unique(..) => unique = true, + ast::ColumnConstraint::Default(expr) => { + default + .replace(translate_ident_to_string_literal(expr).unwrap_or(expr.clone())); + } + ast::ColumnConstraint::Collate { collation_name } => { + collation.replace( + CollationSeq::new(collation_name.as_str()) + .expect("collation should have been set correctly in create table"), + ); + } + _ => {} + }; + } + + let ty = match value.col_type { + Some(ref data_type) => type_from_name(&data_type.name).0, + None => Type::Null, + }; + + let ty_str = value + .col_type + .as_ref() + .map(|t| t.name.to_string()) + .unwrap_or_default(); + + let hidden = ty_str.contains("HIDDEN"); + + Column::new( + Some(normalize_ident(name)), + ty_str, + default, + ty, + collation, + primary_key, + primary_key && matches!(ty, Type::Integer), + notnull, + unique, + hidden, + ) + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Type { + Null = 0, + Text = 1, + Numeric = 2, + Integer = 3, + Real = 4, + Blob = 5, +} + +impl Type { + #[inline] + const fn from_bits(bits: u8) -> Self { + match bits { + 0 => Type::Null, + 1 => Type::Text, + 2 => Type::Numeric, + 3 => Type::Integer, + 4 => Type::Real, + 5 => Type::Blob, + _ => Type::Null, + } + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Null => "", + Self::Text => "TEXT", + Self::Numeric => "NUMERIC", + Self::Integer => "INTEGER", + Self::Real => "REAL", + Self::Blob => "BLOB", + }; + write!(f, "{s}") + } +} + +pub fn translate_ident_to_string_literal(expr: &Expr) -> Option> { + match expr { + Expr::Name(name) => Some(Box::new(Expr::Literal(Literal::String(name.as_literal())))), + _ => None, + } +} + +// this function returns the affinity type and whether the type name was exactly "INTEGER" +// https://www.sqlite.org/datatype3.html +pub fn type_from_name(type_name: &str) -> (Type, bool) { + let type_name = type_name.as_bytes(); + if type_name.is_empty() { + return (Type::Blob, false); + } + + if eq_ignore_ascii_case!(type_name, b"INTEGER") { + return (Type::Integer, true); + } + + if contains_ignore_ascii_case!(type_name, b"INT") { + return (Type::Integer, false); + } + + if let Some(ty) = type_name.windows(4).find_map(|s| { + match_ignore_ascii_case!(match s { + b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text), + b"BLOB" => Some(Type::Blob), + b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real), + _ => None, + }) + }) { + return (ty, false); + } + + (Type::Numeric, false) +} diff --git a/common/schema/mod.rs b/common/schema/mod.rs new file mode 100644 index 0000000000..2b9ce0146e --- /dev/null +++ b/common/schema/mod.rs @@ -0,0 +1,55 @@ +pub mod affinity; +pub mod collate; +pub mod column; + +#[macro_export] +macro_rules! eq_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + ::turso_macros::match_ignore_ascii_case!(match $var { + $value => true, + _ => false, + }) + }}; +} + +#[macro_export] +macro_rules! contains_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + let compare_to_idx = $var.len().saturating_sub($value.len()); + if $var.len() < $value.len() { + false + } else { + let mut result = false; + for i in 0..=compare_to_idx { + if eq_ignore_ascii_case!(&$var[i..i + $value.len()], $value) { + result = true; + break; + } + } + + result + } + }}; +} + +#[macro_export] +macro_rules! starts_with_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + if $var.len() < $value.len() { + false + } else { + eq_ignore_ascii_case!(&$var[..$value.len()], $value) + } + }}; +} + +#[macro_export] +macro_rules! ends_with_ignore_ascii_case { + ( $var:expr, $value:literal ) => {{ + if $var.len() < $value.len() { + false + } else { + eq_ignore_ascii_case!(&$var[$var.len() - $value.len()..], $value) + } + }}; +} diff --git a/common/table_reference.rs b/common/table_reference.rs new file mode 100644 index 0000000000..7df5646e54 --- /dev/null +++ b/common/table_reference.rs @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +/// A fully resolved path to a table of the form "catalog.schema.table" +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ResolvedTableReference { + /// The catalog (aka database) containing the table + pub catalog: Arc, + /// The schema containing the table + pub schema: Arc, + /// The table name + pub table: Arc, +} + +impl std::fmt::Display for ResolvedTableReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}.{}.{}", self.catalog, self.schema, self.table) + } +} + +/// A multi part identifier (path) to a table that may require further +/// resolution (e.g. `foo.bar`). +/// +/// [`TableReference`]s are cheap to `clone()` as they are implemented with +/// `Arc`. +/// +/// See [`ResolvedTableReference`] for a fully resolved table reference. +/// +/// # Creating [`TableReference`] +/// +/// When converting strings to [`TableReference`]s, the string is parsed as +/// though it were a SQL identifier, normalizing (convert to lowercase) any +/// unquoted identifiers. [`TableReference::bare`] creates references without +/// applying normalization semantics. +/// +/// # Examples +/// ``` +/// // Get a table reference to 'mytable' +/// let table_reference = TableReference::from("mytable"); +/// assert_eq!(table_reference, TableReference::bare("mytable")); +/// +/// // Get a table reference to 'mytable' (note the capitalization) +/// let table_reference = TableReference::from("MyTable"); +/// assert_eq!(table_reference, TableReference::bare("mytable")); +/// +/// // Get a table reference to 'MyTable' (note the capitalization) using double quotes +/// // (programmatically it is better to use `TableReference::bare` for this) +/// let table_reference = TableReference::from(r#""MyTable""#); +/// assert_eq!(table_reference, TableReference::bare("MyTable")); +/// +/// // Get a table reference to 'myschema.mytable' (note the capitalization) +/// let table_reference = TableReference::from("MySchema.MyTable"); +/// assert_eq!(table_reference, TableReference::partial("myschema", "mytable")); +///``` +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum TableReference { + /// An unqualified table reference, e.g. "table" + Bare { + /// The table name + table: Arc, + }, + /// A partially resolved table reference, e.g. "schema.table" + Partial { + /// The schema containing the table + schema: Arc, + /// The table name + table: Arc, + }, + /// A fully resolved table reference, e.g. "catalog.schema.table" + Full { + /// The catalog (aka database) containing the table + catalog: Arc, + /// The schema containing the table + schema: Arc, + /// The table name + table: Arc, + }, +} + +impl std::fmt::Display for TableReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TableReference::Bare { table } => write!(f, "{table}"), + TableReference::Partial { schema, table } => { + write!(f, "{schema}.{table}") + } + TableReference::Full { + catalog, + schema, + table, + } => write!(f, "{catalog}.{schema}.{table}"), + } + } +} + +impl TableReference { + /// Convenience method for creating a typed none `None` + pub fn none() -> Option { + None + } + + /// Convenience method for creating a [`TableReference::Bare`] + /// + /// As described on [`TableReference`] this does *NO* normalization at + /// all, so "Foo.Bar" stays as a reference to the table named + /// "Foo.Bar" (rather than "foo"."bar") + pub fn bare(table: impl Into>) -> TableReference { + TableReference::Bare { + table: table.into(), + } + } + + /// Convenience method for creating a [`TableReference::Partial`]. + /// + /// Note: *NO* normalization is applied to the schema or table name. + pub fn partial(schema: impl Into>, table: impl Into>) -> TableReference { + TableReference::Partial { + schema: schema.into(), + table: table.into(), + } + } + + /// Convenience method for creating a [`TableReference::Full`] + /// + /// Note: *NO* normalization is applied to the catalog, schema or table + /// name. + pub fn full( + catalog: impl Into>, + schema: impl Into>, + table: impl Into>, + ) -> TableReference { + TableReference::Full { + catalog: catalog.into(), + schema: schema.into(), + table: table.into(), + } + } + + /// Retrieve the table name, regardless of qualification. + pub fn table(&self) -> &str { + match self { + Self::Full { table, .. } | Self::Partial { table, .. } | Self::Bare { table } => table, + } + } + + /// Retrieve the schema name if [`Self::Partial]` or [`Self::`Full`], + /// `None` otherwise. + pub fn schema(&self) -> Option<&str> { + match self { + Self::Full { schema, .. } | Self::Partial { schema, .. } => Some(schema), + _ => None, + } + } + + /// Retrieve the catalog name if [`Self::Full`], `None` otherwise. + pub fn catalog(&self) -> Option<&str> { + match self { + Self::Full { catalog, .. } => Some(catalog), + _ => None, + } + } + + /// Compare with another [`TableReference`] as if both are resolved. + /// This allows comparing across variants. If a field is not present + /// in both variants being compared then it is ignored in the comparison. + /// + /// e.g. this allows a [`TableReference::Bare`] to be considered equal to a + /// fully qualified [`TableReference::Full`] if the table names match. + pub fn resolved_eq(&self, other: &Self) -> bool { + match self { + TableReference::Bare { table } => **table == *other.table(), + TableReference::Partial { schema, table } => { + **table == *other.table() && other.schema().is_none_or(|s| *s == **schema) + } + TableReference::Full { + catalog, + schema, + table, + } => { + **table == *other.table() + && other.schema().is_none_or(|s| *s == **schema) + && other.catalog().is_none_or(|c| *c == **catalog) + } + } + } + + /// Given a default catalog and schema, ensure this table reference is fully + /// resolved + pub fn resolve(self, default_catalog: &str, default_schema: &str) -> ResolvedTableReference { + match self { + Self::Full { + catalog, + schema, + table, + } => ResolvedTableReference { + catalog, + schema, + table, + }, + Self::Partial { schema, table } => ResolvedTableReference { + catalog: default_catalog.into(), + schema, + table, + }, + Self::Bare { table } => ResolvedTableReference { + catalog: default_catalog.into(), + schema: default_schema.into(), + table, + }, + } + } + + /// Decompose a [`TableReference`] to separate parts. The result vector contains + /// at most three elements in the following sequence: + /// ```no_rust + /// [, , table] + /// ``` + pub fn to_vec(&self) -> Vec { + match self { + TableReference::Bare { table } => vec![table.to_string()], + TableReference::Partial { schema, table } => { + vec![schema.to_string(), table.to_string()] + } + TableReference::Full { + catalog, + schema, + table, + } => vec![catalog.to_string(), schema.to_string(), table.to_string()], + } + } +} diff --git a/common/utils.rs b/common/utils.rs new file mode 100644 index 0000000000..5c99d35638 --- /dev/null +++ b/common/utils.rs @@ -0,0 +1,5 @@ +pub fn normalize_ident(identifier: &str) -> String { + // quotes normalization already happened in the parser layer (see Name ast node implementation) + // so, we only need to convert identifier string to lowercase + identifier.to_lowercase() +} diff --git a/common/value.rs b/common/value.rs new file mode 100644 index 0000000000..637dffe316 --- /dev/null +++ b/common/value.rs @@ -0,0 +1,874 @@ +use either::Either; +#[cfg(feature = "serde")] +use serde::Deserialize; +use std::{ + borrow::{Borrow, Cow}, + fmt::{Debug, Display}, + ops::Deref, +}; + +use crate::numeric::format_float; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ValueType { + Null, + Integer, + Float, + Text, + Blob, + Error, +} + +impl Display for ValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value = match self { + Self::Null => "NULL", + Self::Integer => "INT", + Self::Float => "REAL", + Self::Blob => "BLOB", + Self::Text => "TEXT", + Self::Error => "ERROR", + }; + write!(f, "{value}") + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum TextSubtype { + Text, + #[cfg(feature = "json")] + Json, +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Text { + pub value: Cow<'static, str>, + pub subtype: TextSubtype, +} + +impl Display for Text { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl Text { + pub fn new(value: impl Into>) -> Self { + Self { + value: value.into(), + subtype: TextSubtype::Text, + } + } + #[cfg(feature = "json")] + pub fn json(value: String) -> Self { + Self { + value: value.into(), + subtype: TextSubtype::Json, + } + } + + pub fn as_str(&self) -> &str { + &self.value + } +} + +#[derive(Debug, Clone, Copy)] +pub struct TextRef<'a> { + pub value: &'a str, + pub subtype: TextSubtype, +} + +impl<'a> TextRef<'a> { + pub fn new(value: &'a str, subtype: TextSubtype) -> Self { + Self { value, subtype } + } + + #[inline] + pub fn as_str(&self) -> &'a str { + self.value + } +} + +impl<'a> Borrow for TextRef<'a> { + #[inline] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl<'a> Deref for TextRef<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +pub trait Extendable { + fn do_extend(&mut self, other: &T); +} + +impl Extendable for Text { + #[inline(always)] + fn do_extend(&mut self, other: &T) { + let value = self.value.to_mut(); + value.clear(); + value.push_str(other.as_ref()); + self.subtype = other.subtype(); + } +} + +impl Extendable for Vec { + #[inline(always)] + fn do_extend(&mut self, other: &T) { + self.clear(); + self.extend_from_slice(other.as_slice()); + } +} + +pub trait AnyText: AsRef { + fn subtype(&self) -> TextSubtype; +} + +impl AnyText for Text { + fn subtype(&self) -> TextSubtype { + self.subtype + } +} + +impl AnyText for &str { + fn subtype(&self) -> TextSubtype { + TextSubtype::Text + } +} + +pub trait AnyBlob { + fn as_slice(&self) -> &[u8]; +} + +impl AnyBlob for Vec { + fn as_slice(&self) -> &[u8] { + self.as_slice() + } +} + +impl AnyBlob for &[u8] { + fn as_slice(&self) -> &[u8] { + self + } +} + +impl AsRef for Text { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl From<&str> for Text { + fn from(value: &str) -> Self { + Text { + value: value.to_owned().into(), + subtype: TextSubtype::Text, + } + } +} + +impl From for Text { + fn from(value: String) -> Self { + Text { + value: Cow::from(value), + subtype: TextSubtype::Text, + } + } +} + +impl From for String { + fn from(value: Text) -> Self { + value.value.into_owned() + } +} + +#[cfg(feature = "serde")] +fn float_to_string(float: &f64, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&format!("{float}")) +} + +#[cfg(feature = "serde")] +fn string_to_float<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + match crate::numeric::str_to_f64(s) { + Some(result) => Ok(result.into()), + None => Err(serde::de::Error::custom("")), + } +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Value { + Null, + Integer(i64), + // we use custom serialization to preserve float precision + #[cfg_attr( + feature = "serde", + serde( + serialize_with = "float_to_string", + deserialize_with = "string_to_float" + ) + )] + Float(f64), + Text(Text), + Blob(Vec), +} + +/// Please use Display trait for all limbo output so we have single origin of truth +/// When you need value as string: +/// ---GOOD--- +/// format!("{}", value); +/// ---BAD--- +/// match value { +/// Value::Integer(i) => *i.as_str(), +/// Value::Float(f) => *f.as_str(), +/// .... +/// } +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, ""), + Self::Integer(i) => { + write!(f, "{i}") + } + Self::Float(fl) => f.write_str(&format_float(*fl)), + Self::Text(s) => { + write!(f, "{}", s.as_str()) + } + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), + } + } +} + +impl PartialEq for Value { + fn eq(&self, other: &Value) -> bool { + let (left, right) = (self.as_value_ref(), other.as_value_ref()); + left.eq(&right) + } +} + +impl PartialOrd for Value { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for Value {} + +impl Ord for Value { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let (left, right) = (self.as_value_ref(), other.as_value_ref()); + left.cmp(&right) + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: Self) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: f64) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::Add for Value { + type Output = Value; + + fn add(mut self, rhs: i64) -> Self::Output { + self += rhs; + self + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(mut self: &mut Self, rhs: Self) { + match (&mut self, rhs) { + (Self::Integer(int_left), Self::Integer(int_right)) => *int_left += int_right, + (Self::Integer(int_left), Self::Float(float_right)) => { + *self = Self::Float(*int_left as f64 + float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + *self = Self::Float(*float_left + int_right as f64) + } + (Self::Float(float_left), Self::Float(float_right)) => { + *float_left += float_right; + } + (Self::Text(string_left), Self::Text(string_right)) => { + string_left.value.to_mut().push_str(&string_right.value); + string_left.subtype = TextSubtype::Text; + } + (Self::Text(string_left), Self::Integer(int_right)) => { + let string_right = int_right.to_string(); + string_left.value.to_mut().push_str(&string_right); + string_left.subtype = TextSubtype::Text; + } + (Self::Integer(int_left), Self::Text(string_right)) => { + let string_left = int_left.to_string(); + *self = Self::build_text(string_left + string_right.as_str()); + } + (Self::Text(string_left), Self::Float(float_right)) => { + let string_right = Self::Float(float_right).to_string(); + string_left.value.to_mut().push_str(&string_right); + string_left.subtype = TextSubtype::Text; + } + (Self::Float(float_left), Self::Text(string_right)) => { + let string_left = Self::Float(*float_left).to_string(); + *self = Self::build_text(string_left + string_right.as_str()); + } + (_, Self::Null) => {} + (Self::Null, rhs) => *self = rhs, + _ => *self = Self::Float(0.0), + } + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(&mut self, rhs: i64) { + match self { + Self::Integer(int_left) => *int_left += rhs, + Self::Float(float_left) => *float_left += rhs as f64, + _ => unreachable!(), + } + } +} + +impl std::ops::AddAssign for Value { + fn add_assign(&mut self, rhs: f64) { + match self { + Self::Integer(int_left) => *self = Self::Float(*int_left as f64 + rhs), + Self::Float(float_left) => *float_left += rhs, + _ => unreachable!(), + } + } +} + +impl std::ops::Div for Value { + type Output = Value; + + fn div(self, rhs: Value) -> Self::Output { + match (self, rhs) { + (Self::Integer(int_left), Self::Integer(int_right)) => { + Self::Integer(int_left / int_right) + } + (Self::Integer(int_left), Self::Float(float_right)) => { + Self::Float(int_left as f64 / float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + Self::Float(float_left / int_right as f64) + } + (Self::Float(float_left), Self::Float(float_right)) => { + Self::Float(float_left / float_right) + } + _ => Self::Float(0.0), + } + } +} + +impl std::ops::DivAssign for Value { + fn div_assign(&mut self, rhs: Value) { + *self = self.clone() / rhs; + } +} + +impl Value { + pub fn as_ref<'a>(&'a self) -> ValueRef<'a> { + match self { + Value::Null => ValueRef::Null, + Value::Integer(v) => ValueRef::Integer(*v), + Value::Float(v) => ValueRef::Float(*v), + Value::Text(v) => ValueRef::Text(TextRef { + value: &v.value, + subtype: v.subtype, + }), + Value::Blob(v) => ValueRef::Blob(v.as_slice()), + } + } + + // A helper function that makes building a text Value easier. + pub fn build_text(text: impl Into>) -> Self { + Self::Text(Text::new(text)) + } + + pub fn to_blob(&self) -> Option<&[u8]> { + match self { + Self::Blob(blob) => Some(blob), + _ => None, + } + } + + pub fn from_blob(data: Vec) -> Self { + Value::Blob(data) + } + + pub fn to_text(&self) -> Option<&str> { + match self { + Value::Text(t) => Some(t.as_str()), + _ => None, + } + } + + pub fn as_blob(&self) -> &Vec { + match self { + Value::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + + pub fn as_blob_mut(&mut self) -> &mut Vec { + match self { + Value::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + pub fn as_float(&self) -> f64 { + match self { + Value::Float(f) => *f, + Value::Integer(i) => *i as f64, + _ => panic!("as_float must be called only for Value::Float or Value::Integer"), + } + } + + pub fn as_int(&self) -> Option { + match self { + Value::Integer(i) => Some(*i), + _ => None, + } + } + + pub fn as_uint(&self) -> u64 { + match self { + Value::Integer(i) => (*i).cast_unsigned(), + _ => 0, + } + } + + pub fn from_text(text: impl Into>) -> Self { + Value::Text(Text::new(text)) + } + + pub fn value_type(&self) -> ValueType { + match self { + Value::Null => ValueType::Null, + Value::Integer(_) => ValueType::Integer, + Value::Float(_) => ValueType::Float, + Value::Text(_) => ValueType::Text, + Value::Blob(_) => ValueType::Blob, + } + } + + /// Cast Value to String, if Value is NULL returns None + pub fn cast_text(&self) -> Option { + Some(match self { + Value::Null => return None, + v => v.to_string(), + }) + } +} + +#[derive(Clone, Copy)] +pub enum ValueRef<'a> { + Null, + Integer(i64), + Float(f64), + Text(TextRef<'a>), + Blob(&'a [u8]), +} + +impl<'a> ValueRef<'a> { + pub fn to_blob(&self) -> Option<&'a [u8]> { + match self { + Self::Blob(blob) => Some(*blob), + _ => None, + } + } + + pub fn to_text(&self) -> Option<&'a str> { + match self { + Self::Text(t) => Some(t.as_str()), + _ => None, + } + } + + pub fn as_blob(&self) -> &'a [u8] { + match self { + Self::Blob(b) => b, + _ => panic!("as_blob must be called only for Value::Blob"), + } + } + + pub fn as_float(&self) -> f64 { + match self { + Self::Float(f) => *f, + Self::Integer(i) => *i as f64, + _ => panic!("as_float must be called only for Value::Float or Value::Integer"), + } + } + + pub fn as_int(&self) -> Option { + match self { + Self::Integer(i) => Some(*i), + _ => None, + } + } + + pub fn as_uint(&self) -> u64 { + match self { + Self::Integer(i) => (*i).cast_unsigned(), + _ => 0, + } + } + + pub fn to_owned(&self) -> Value { + match self { + ValueRef::Null => Value::Null, + ValueRef::Integer(i) => Value::Integer(*i), + ValueRef::Float(f) => Value::Float(*f), + ValueRef::Text(text) => Value::Text(Text { + value: text.value.to_string().into(), + subtype: text.subtype, + }), + ValueRef::Blob(b) => Value::Blob(b.to_vec()), + } + } + + pub fn value_type(&self) -> ValueType { + match self { + Self::Null => ValueType::Null, + Self::Integer(_) => ValueType::Integer, + Self::Float(_) => ValueType::Float, + Self::Text(_) => ValueType::Text, + Self::Blob(_) => ValueType::Blob, + } + } +} + +impl Display for ValueRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{i}"), + Self::Float(fl) => write!(f, "{fl:?}"), + Self::Text(s) => write!(f, "{}", s.as_str()), + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), + } + } +} + +impl Debug for ValueRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ValueRef::Null => write!(f, "Null"), + ValueRef::Integer(i) => f.debug_tuple("Integer").field(i).finish(), + ValueRef::Float(float) => f.debug_tuple("Float").field(float).finish(), + ValueRef::Text(text_ref) => { + // truncate string to at most 256 chars + let text = text_ref.as_str(); + let max_len = text.len().min(256); + f.debug_struct("Text") + .field("data", &&text[0..max_len]) + // Indicates to the developer debugging that the data is truncated for printing + .field("truncated", &(text.len() > max_len)) + .finish() + } + ValueRef::Blob(blob) => { + // truncate blob_slice to at most 32 bytes + let max_len = blob.len().min(32); + f.debug_struct("Blob") + .field("data", &&blob[0..max_len]) + // Indicates to the developer debugging that the data is truncated for printing + .field("truncated", &(blob.len() > max_len)) + .finish() + } + } + } +} + +impl<'a> PartialEq> for ValueRef<'a> { + fn eq(&self, other: &ValueRef<'a>) -> bool { + match (self, other) { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right, + (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int)) => { + sqlite_int_float_compare(*int, *float).is_eq() + } + (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right, + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false, + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => false, + (Self::Text(text_left), Self::Text(text_right)) => { + text_left.value.as_bytes() == text_right.value.as_bytes() + } + (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.eq(blob_right), + (Self::Null, Self::Null) => true, + _ => false, + } + } +} + +impl<'a> PartialEq for ValueRef<'a> { + fn eq(&self, other: &Value) -> bool { + let other = other.as_value_ref(); + self.eq(&other) + } +} + +impl<'a> Eq for ValueRef<'a> {} + +#[expect(clippy::non_canonical_partial_ord_impl)] +impl<'a> PartialOrd> for ValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), + (Self::Integer(int_left), Self::Float(float_right)) => { + (*int_left as f64).partial_cmp(float_right) + } + (Self::Float(float_left), Self::Integer(int_right)) => { + float_left.partial_cmp(&(*int_right as f64)) + } + (Self::Float(float_left), Self::Float(float_right)) => { + float_left.partial_cmp(float_right) + } + // Numeric vs Text/Blob + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { + Some(std::cmp::Ordering::Greater) + } + + (Self::Text(text_left), Self::Text(text_right)) => text_left + .value + .as_bytes() + .partial_cmp(text_right.value.as_bytes()), + // Text vs Blob + (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.partial_cmp(blob_right), + (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, Self::Null) => Some(std::cmp::Ordering::Greater), + } + } +} + +impl<'a> Ord for ValueRef<'a> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +pub fn sqlite_int_float_compare(int_val: i64, float_val: f64) -> std::cmp::Ordering { + if float_val.is_nan() { + return std::cmp::Ordering::Greater; + } + + if float_val < -9223372036854775808.0 { + return std::cmp::Ordering::Greater; + } + if float_val >= 9223372036854775808.0 { + return std::cmp::Ordering::Less; + } + + let float_as_int = float_val as i64; + match int_val.cmp(&float_as_int) { + std::cmp::Ordering::Equal => { + let int_as_float = int_val as f64; + int_as_float + .partial_cmp(&float_val) + .unwrap_or(std::cmp::Ordering::Equal) + } + other => other, + } +} + +pub trait AsValueRef { + fn as_value_ref<'a>(&'a self) -> ValueRef<'a>; +} + +impl<'b> AsValueRef for ValueRef<'b> { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + *self + } +} + +impl AsValueRef for Value { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + self.as_ref() + } +} + +impl AsValueRef for &mut Value { + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + self.as_ref() + } +} + +impl AsValueRef for Either +where + V1: AsValueRef, + V2: AsValueRef, +{ + #[inline] + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + match self { + Either::Left(left) => left.as_value_ref(), + Either::Right(right) => right.as_value_ref(), + } + } +} + +impl AsValueRef for &V { + fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { + (*self).as_value_ref() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum FromSqlError { + #[error("Null value")] + NullValue, + #[error("invalid column type")] + InvalidColumnType, + #[error("Invalid blob size, expected {0}")] + InvalidBlobSize(usize), +} + +/// Convert a `Value` into the implementors type. +pub trait FromValue: Sealed { + fn from_sql(val: Value) -> Result + where + Self: Sized; +} + +impl FromValue for Value { + fn from_sql(val: Value) -> Result { + Ok(val) + } +} +impl Sealed for Value {} + +macro_rules! impl_int_from_value { + ($ty:ty, $cast:expr) => { + impl FromValue for $ty { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Integer(i) => Ok($cast(i)), + _ => unreachable!("invalid value type"), + } + } + } + + impl Sealed for $ty {} + }; +} + +impl_int_from_value!(i32, |i| i as i32); +impl_int_from_value!(u32, |i| i as u32); +impl_int_from_value!(i64, |i| i); +impl_int_from_value!(u64, |i| i as u64); + +impl FromValue for f64 { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Float(f) => Ok(f), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for f64 {} + +impl FromValue for Vec { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Blob(blob) => Ok(blob), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for Vec {} + +impl FromValue for [u8; N] { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Blob(blob) => blob + .try_into() + .map_err(|_| FromSqlError::InvalidBlobSize(N)), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for [u8; N] {} + +impl FromValue for String { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Text(s) => Ok(s.to_string()), + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for String {} + +impl FromValue for bool { + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Err(FromSqlError::NullValue), + Value::Integer(i) => match i { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(FromSqlError::InvalidColumnType), + }, + _ => unreachable!("invalid value type"), + } + } +} +impl Sealed for bool {} + +impl FromValue for Option +where + T: FromValue, +{ + fn from_sql(val: Value) -> Result { + match val { + Value::Null => Ok(None), + _ => T::from_sql(val).map(Some), + } + } +} +impl Sealed for Option {} + +mod sealed { + pub trait Sealed {} +} +use sealed::Sealed; diff --git a/core/Cargo.toml b/core/Cargo.toml index 348a8f107b..100bba6fb0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -26,7 +26,7 @@ time = [] fuzz = [] omit_autovacuum = [] simulator = ["fuzz", "serde"] -serde = ["dep:serde"] +serde = ["dep:serde", "turso_common/serde"] series = [] encryption = [] checksum = [] @@ -86,6 +86,7 @@ simsimd = "6.5.3" arc-swap = "1.7" rustc-hash = "2.0" either = { workspace = true } +turso_common = { workspace = true } [build-dependencies] chrono = { workspace = true, default-features = false } diff --git a/core/error.rs b/core/error.rs index c5bb811db2..a294672508 100644 --- a/core/error.rs +++ b/core/error.rs @@ -1,4 +1,5 @@ use thiserror::Error; +use turso_common::schema::collate::CollateError; use crate::storage::page_cache::CacheError; @@ -79,6 +80,14 @@ pub enum LimboError { PlanningError(String), } +impl From for LimboError { + fn from(value: CollateError) -> Self { + match value { + CollateError::NotFound(msg) => LimboError::ParseError(msg), + } + } +} + // We only propagate the error kind so we can avoid string allocation in hot path and copying/cloning enums is cheaper impl From for LimboError { fn from(value: std::io::Error) -> Self { diff --git a/core/ext/vtab_xconnect.rs b/core/ext/vtab_xconnect.rs index 7cd1321cf2..823aad36a4 100644 --- a/core/ext/vtab_xconnect.rs +++ b/core/ext/vtab_xconnect.rs @@ -1,4 +1,7 @@ -use crate::{types::Value, Connection, Statement, StepResult}; +use crate::{ + types::{FromExtValue, ToExtValue, Value}, + Connection, Statement, StepResult, +}; use std::{ boxed::Box, ffi::{c_char, c_void, CStr, CString}, diff --git a/core/lib.rs b/core/lib.rs index a32e5497b5..c97b04ca17 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -35,10 +35,10 @@ pub mod vector; mod vtab; #[cfg(feature = "fuzz")] -pub mod numeric; +pub use turso_common::numeric; #[cfg(not(feature = "fuzz"))] -mod numeric; +use turso_common::numeric; use crate::index_method::IndexMethod; use crate::storage::checksum::CHECKSUM_REQUIRED_RESERVED_BYTES; @@ -109,7 +109,8 @@ pub use types::ValueRef; use util::parse_schema_rows; pub use util::IOExt; pub use vdbe::{ - builder::QueryMode, explain::EXPLAIN_COLUMNS, explain::EXPLAIN_QUERY_PLAN_COLUMNS, Register, + builder::QueryMode, explain::EXPLAIN_COLUMNS, explain::EXPLAIN_QUERY_PLAN_COLUMNS, + value::ExecValue, Register, }; /// Configuration for database features diff --git a/core/schema.rs b/core/schema.rs index 4990b4b3c8..ebc70677b8 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -4,8 +4,8 @@ use crate::index_method::{IndexMethodAttachment, IndexMethodConfiguration}; use crate::translate::expr::{bind_and_rewrite_expr, walk_expr, BindingBehavior, WalkControl}; use crate::translate::index::{resolve_index_method_parameters, resolve_sorted_columns}; use crate::translate::planner::ROWID_STRS; -use crate::vdbe::affinity::Affinity; use parking_lot::RwLock; +pub use turso_common::schema::column::{Column, Type}; use turso_macros::AtomicEnum; #[derive(Debug, Clone, AtomicEnum)] @@ -85,15 +85,12 @@ use crate::{ Connection, LimboError, MvCursor, MvStore, Pager, SymbolTable, ValueRef, VirtualTable, }; use crate::{util::normalize_ident, Result}; -use core::fmt; use std::collections::{HashMap, HashSet, VecDeque}; use std::ops::Deref; use std::sync::Arc; use std::sync::Mutex; use tracing::trace; -use turso_parser::ast::{ - self, ColumnDefinition, Expr, InitDeferredPred, Literal, RefAct, SortOrder, TableOptions, -}; +use turso_parser::ast::{self, Expr, InitDeferredPred, Literal, RefAct, SortOrder, TableOptions}; use turso_parser::{ ast::{Cmd, CreateTableBody, ResultColumn, Stmt}, parser::Parser, @@ -2065,297 +2062,6 @@ impl ResolvedFkRef { } } -#[derive(Debug, Clone)] -pub struct Column { - pub name: Option, - pub ty_str: String, - pub default: Option>, - raw: u16, -} - -// flags -const F_PRIMARY_KEY: u16 = 1; -const F_ROWID_ALIAS: u16 = 2; -const F_NOTNULL: u16 = 4; -const F_UNIQUE: u16 = 8; -const F_HIDDEN: u16 = 16; - -// pack Type and Collation in the remaining bits -const TYPE_SHIFT: u16 = 5; -const TYPE_MASK: u16 = 0b111 << TYPE_SHIFT; -const COLL_SHIFT: u16 = TYPE_SHIFT + 3; -const COLL_MASK: u16 = 0b11 << COLL_SHIFT; - -impl Column { - pub fn affinity(&self) -> Affinity { - Affinity::affinity(&self.ty_str) - } - pub const fn new_default_text( - name: Option, - ty_str: String, - default: Option>, - ) -> Self { - Self::new( - name, - ty_str, - default, - Type::Text, - None, - false, - false, - false, - false, - false, - ) - } - pub const fn new_default_integer( - name: Option, - ty_str: String, - default: Option>, - ) -> Self { - Self::new( - name, - ty_str, - default, - Type::Integer, - None, - false, - false, - false, - false, - false, - ) - } - #[inline] - #[allow(clippy::too_many_arguments)] - pub const fn new( - name: Option, - ty_str: String, - default: Option>, - ty: Type, - col: Option, - primary_key: bool, - rowid_alias: bool, - notnull: bool, - unique: bool, - hidden: bool, - ) -> Self { - let mut raw = 0u16; - raw |= (ty as u16) << TYPE_SHIFT; - if let Some(c) = col { - raw |= (c as u16) << COLL_SHIFT; - } - if primary_key { - raw |= F_PRIMARY_KEY - } - if rowid_alias { - raw |= F_ROWID_ALIAS - } - if notnull { - raw |= F_NOTNULL - } - if unique { - raw |= F_UNIQUE - } - if hidden { - raw |= F_HIDDEN - } - Self { - name, - ty_str, - default, - raw, - } - } - #[inline] - pub const fn ty(&self) -> Type { - let v = ((self.raw & TYPE_MASK) >> TYPE_SHIFT) as u8; - Type::from_bits(v) - } - - #[inline] - pub const fn set_ty(&mut self, ty: Type) { - self.raw = (self.raw & !TYPE_MASK) | (((ty as u16) << TYPE_SHIFT) & TYPE_MASK); - } - - #[inline] - pub const fn collation_opt(&self) -> Option { - if self.has_explicit_collation() { - Some(self.collation()) - } else { - None - } - } - - #[inline] - pub const fn collation(&self) -> CollationSeq { - let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; - CollationSeq::from_bits(v) - } - - #[inline] - pub const fn has_explicit_collation(&self) -> bool { - let v = ((self.raw & COLL_MASK) >> COLL_SHIFT) as u8; - v != CollationSeq::Unset as u8 - } - - #[inline] - pub const fn set_collation(&mut self, c: Option) { - if let Some(c) = c { - self.raw = (self.raw & !COLL_MASK) | (((c as u16) << COLL_SHIFT) & COLL_MASK); - } - } - - #[inline] - pub fn primary_key(&self) -> bool { - self.raw & F_PRIMARY_KEY != 0 - } - #[inline] - pub const fn is_rowid_alias(&self) -> bool { - self.raw & F_ROWID_ALIAS != 0 - } - #[inline] - pub const fn notnull(&self) -> bool { - self.raw & F_NOTNULL != 0 - } - #[inline] - pub const fn unique(&self) -> bool { - self.raw & F_UNIQUE != 0 - } - #[inline] - pub const fn hidden(&self) -> bool { - self.raw & F_HIDDEN != 0 - } - - #[inline] - pub const fn set_primary_key(&mut self, v: bool) { - self.set_flag(F_PRIMARY_KEY, v); - } - #[inline] - pub const fn set_rowid_alias(&mut self, v: bool) { - self.set_flag(F_ROWID_ALIAS, v); - } - #[inline] - pub const fn set_notnull(&mut self, v: bool) { - self.set_flag(F_NOTNULL, v); - } - #[inline] - pub const fn set_unique(&mut self, v: bool) { - self.set_flag(F_UNIQUE, v); - } - #[inline] - pub const fn set_hidden(&mut self, v: bool) { - self.set_flag(F_HIDDEN, v); - } - - #[inline] - const fn set_flag(&mut self, mask: u16, val: bool) { - if val { - self.raw |= mask - } else { - self.raw &= !mask - } - } -} - -// TODO: This might replace some of util::columns_from_create_table_body -impl From<&ColumnDefinition> for Column { - fn from(value: &ColumnDefinition) -> Self { - let name = value.col_name.as_str(); - - let mut default = None; - let mut notnull = false; - let mut primary_key = false; - let mut unique = false; - let mut collation = None; - - for ast::NamedColumnConstraint { constraint, .. } in &value.constraints { - match constraint { - ast::ColumnConstraint::PrimaryKey { .. } => primary_key = true, - ast::ColumnConstraint::NotNull { .. } => notnull = true, - ast::ColumnConstraint::Unique(..) => unique = true, - ast::ColumnConstraint::Default(expr) => { - default - .replace(translate_ident_to_string_literal(expr).unwrap_or(expr.clone())); - } - ast::ColumnConstraint::Collate { collation_name } => { - collation.replace( - CollationSeq::new(collation_name.as_str()) - .expect("collation should have been set correctly in create table"), - ); - } - _ => {} - }; - } - - let ty = match value.col_type { - Some(ref data_type) => type_from_name(&data_type.name).0, - None => Type::Null, - }; - - let ty_str = value - .col_type - .as_ref() - .map(|t| t.name.to_string()) - .unwrap_or_default(); - - let hidden = ty_str.contains("HIDDEN"); - - Column::new( - Some(normalize_ident(name)), - ty_str, - default, - ty, - collation, - primary_key, - primary_key && matches!(ty, Type::Integer), - notnull, - unique, - hidden, - ) - } -} - -#[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Type { - Null = 0, - Text = 1, - Numeric = 2, - Integer = 3, - Real = 4, - Blob = 5, -} - -impl Type { - #[inline] - const fn from_bits(bits: u8) -> Self { - match bits { - 0 => Type::Null, - 1 => Type::Text, - 2 => Type::Numeric, - 3 => Type::Integer, - 4 => Type::Real, - 5 => Type::Blob, - _ => Type::Null, - } - } -} - -impl fmt::Display for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - Self::Null => "", - Self::Text => "TEXT", - Self::Numeric => "NUMERIC", - Self::Integer => "INTEGER", - Self::Real => "REAL", - Self::Blob => "BLOB", - }; - write!(f, "{s}") - } -} - pub fn sqlite_schema_table() -> BTreeTable { BTreeTable { root_page: 1, diff --git a/core/translate/collate.rs b/core/translate/collate.rs index 6c5048d509..a7d9cc5cac 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -1,5 +1,4 @@ -use std::{cmp::Ordering, str::FromStr as _}; - +pub use turso_common::schema::collate::*; use turso_parser::ast::Expr; use crate::{ @@ -10,67 +9,6 @@ use crate::{ Result, }; -// TODO: in the future allow user to define collation sequences -// Will have to meddle with ffi for this -#[derive( - Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, -)] -#[strum(ascii_case_insensitive)] -/// **Pre defined collation sequences**\ -/// Collating functions only matter when comparing string values. -/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). -#[repr(u8)] -pub enum CollationSeq { - Unset = 0, - #[default] - Binary = 1, - NoCase = 2, - Rtrim = 3, -} - -impl CollationSeq { - pub fn new(collation: &str) -> crate::Result { - CollationSeq::from_str(collation).map_err(|_| { - crate::LimboError::ParseError(format!("no such collation sequence: {collation}")) - }) - } - #[inline] - /// Returns the collation, defaulting to BINARY if unset - pub const fn from_bits(bits: u8) -> Self { - match bits { - 2 => CollationSeq::NoCase, - 3 => CollationSeq::Rtrim, - _ => CollationSeq::Binary, - } - } - - #[inline(always)] - pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { - match self { - CollationSeq::Unset | CollationSeq::Binary => Self::binary_cmp(lhs, rhs), - CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), - CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), - } - } - - #[inline(always)] - fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { - lhs.cmp(rhs) - } - - #[inline(always)] - fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { - let nocase_lhs = uncased::UncasedStr::new(lhs); - let nocase_rhs = uncased::UncasedStr::new(rhs); - nocase_lhs.cmp(nocase_rhs) - } - - #[inline(always)] - fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { - lhs.trim_end().cmp(rhs.trim_end()) - } -} - /// Every column of every table has an associated collating function. If no collating function is explicitly defined, /// then the collating function defaults to BINARY. /// The COLLATE clause of the column definition is used to define alternative collating functions for a column. diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fbda8897d7..9e4244fe0a 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use tracing::{instrument, Level}; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{self, Expr, SubqueryType, UnaryOperator}; use super::emitter::Resolver; @@ -15,7 +16,7 @@ use crate::translate::optimizer::TakeOwnership; use crate::translate::plan::{Operation, ResultSetColumn}; use crate::translate::planner::parse_row_id; use crate::util::{exprs_are_equivalent, normalize_ident, parse_numeric_literal}; -use crate::vdbe::affinity::Affinity; + use crate::vdbe::builder::CursorKey; use crate::vdbe::{ builder::ProgramBuilder, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 234323cd7b..0f76713caa 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -1,5 +1,6 @@ use std::num::NonZeroUsize; use std::sync::Arc; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{ self, Expr, InsertBody, OneSelect, QualifiedName, ResolveType, ResultColumn, Upsert, UpsertDo, }; @@ -27,7 +28,7 @@ use crate::translate::upsert::{ collect_set_clauses_for_upsert, emit_upsert, resolve_upsert_target, ResolvedUpsertTarget, }; use crate::util::normalize_ident; -use crate::vdbe::affinity::Affinity; + use crate::vdbe::builder::ProgramBuilderOpts; use crate::vdbe::insn::{CmpInsFlags, IdxInsertFlags, InsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 14424ea7fb..178b58a0dc 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,3 +1,4 @@ +use turso_common::schema::affinity::{self, Affinity}; use turso_parser::ast::{fmt::ToTokens, SortOrder}; use std::sync::Arc; @@ -18,6 +19,13 @@ use super::{ Search, SeekDef, SelectPlan, TableReferences, WhereTerm, }, }; +use crate::translate::{ + collate::get_collseq_from_expr, + emitter::UpdateRowSource, + plan::{EvalAt, NonFromClauseSubquery}, + subquery::emit_non_from_clause_subquery, + window::emit_window_loop_source, +}; use crate::{ schema::{Index, IndexColumn, Table}, translate::{ @@ -27,23 +35,12 @@ use crate::{ }, types::SeekOp, vdbe::{ - affinity, builder::{CursorKey, CursorType, ProgramBuilder}, insn::{CmpInsFlags, IdxInsertFlags, Insn}, BranchOffset, CursorID, }, Result, }; -use crate::{ - translate::{ - collate::get_collseq_from_expr, - emitter::UpdateRowSource, - plan::{EvalAt, NonFromClauseSubquery}, - subquery::emit_non_from_clause_subquery, - window::emit_window_loop_source, - }, - vdbe::affinity::Affinity, -}; // Metadata for handling LEFT JOIN operations #[derive(Debug)] diff --git a/core/translate/optimizer/constraints.rs b/core/translate/optimizer/constraints.rs index 4e6b6bbd04..133b7b6ab1 100644 --- a/core/translate/optimizer/constraints.rs +++ b/core/translate/optimizer/constraints.rs @@ -13,9 +13,9 @@ use crate::{ planner::{table_mask_from_expr, TableMask}, }, util::exprs_are_equivalent, - vdbe::affinity::Affinity, Result, }; +use turso_common::schema::affinity::Affinity; use turso_ext::{ConstraintInfo, ConstraintOp}; use turso_parser::ast::{self, SortOrder, TableInternalId}; diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index cc706b898d..51b40edb86 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -12,6 +12,7 @@ use cost::Cost; use join::{compute_best_join_order, BestJoinOrderResult}; use lift_common_subexpressions::lift_common_subexpressions_from_binary_or_terms; use order::{compute_order_target, plan_satisfies_order_target, EliminatesSortBy}; +use turso_common::schema::affinity::Affinity; use turso_ext::{ConstraintInfo, ConstraintUsage}; use turso_parser::ast::{self, Expr, SortOrder}; @@ -32,10 +33,7 @@ use crate::{ util::{ exprs_are_equivalent, simple_bind_expr, try_capture_parameters, try_substitute_parameters, }, - vdbe::{ - affinity::Affinity, - builder::{CursorKey, CursorType, ProgramBuilder}, - }, + vdbe::builder::{CursorKey, CursorType, ProgramBuilder}, LimboError, Result, }; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 775bdfa674..753d2ab896 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,4 +1,5 @@ use std::{cmp::Ordering, collections::HashMap, marker::PhantomData, sync::Arc}; +use turso_common::schema::affinity::Affinity; use turso_parser::ast::{ self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder, SubqueryType, }; @@ -11,7 +12,6 @@ use crate::{ optimizer::constraints::SeekRangeConstraint, }, vdbe::{ - affinity::Affinity, builder::{CursorKey, CursorType, ProgramBuilder}, insn::{IdxInsertFlags, Insn}, BranchOffset, CursorID, diff --git a/core/types.rs b/core/types.rs index f4c3434085..1ab59b6905 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,13 +1,13 @@ -use either::Either; -#[cfg(feature = "serde")] -use serde::Deserialize; +pub use turso_common::value::{ + sqlite_int_float_compare, AsValueRef, Extendable, FromValue, Text, TextRef, TextSubtype, Value, + ValueRef, ValueType, +}; use turso_ext::{AggCtx, FinalizeFunction, StepFunction}; use turso_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::index_method::IndexMethodCursor; -use crate::numeric::format_float; use crate::pseudo::PseudoCursor; use crate::schema::Index; use crate::storage::btree::CursorTrait; @@ -18,10 +18,8 @@ use crate::vdbe::sorter::Sorter; use crate::vdbe::Register; use crate::vtab::VirtualTableCursor; use crate::{Completion, CompletionError, Result, IO}; -use std::borrow::{Borrow, Cow}; -use std::fmt::{Debug, Display}; +use std::fmt::Debug; use std::iter::Peekable; -use std::ops::Deref; use std::task::Waker; /// SQLite by default uses 2000 as maximum numbers in a row. @@ -29,424 +27,6 @@ use std::task::Waker; /// But the hard limit of number of columns is 32,767 columns i16::MAX const MAX_COLUMN: usize = 2000; -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum ValueType { - Null, - Integer, - Float, - Text, - Blob, - Error, -} - -impl Display for ValueType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let value = match self { - Self::Null => "NULL", - Self::Integer => "INT", - Self::Float => "REAL", - Self::Blob => "BLOB", - Self::Text => "TEXT", - Self::Error => "ERROR", - }; - write!(f, "{value}") - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum TextSubtype { - Text, - #[cfg(feature = "json")] - Json, -} - -#[derive(Debug, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct Text { - pub value: Cow<'static, str>, - pub subtype: TextSubtype, -} - -impl Display for Text { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl Text { - pub fn new(value: impl Into>) -> Self { - Self { - value: value.into(), - subtype: TextSubtype::Text, - } - } - #[cfg(feature = "json")] - pub fn json(value: String) -> Self { - Self { - value: value.into(), - subtype: TextSubtype::Json, - } - } - - pub fn as_str(&self) -> &str { - &self.value - } -} - -#[derive(Debug, Clone, Copy)] -pub struct TextRef<'a> { - pub value: &'a str, - pub subtype: TextSubtype, -} - -impl<'a> TextRef<'a> { - pub fn new(value: &'a str, subtype: TextSubtype) -> Self { - Self { value, subtype } - } - - #[inline] - pub fn as_str(&self) -> &'a str { - self.value - } -} - -impl<'a> Borrow for TextRef<'a> { - #[inline] - fn borrow(&self) -> &str { - self.as_str() - } -} - -impl<'a> Deref for TextRef<'a> { - type Target = str; - - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -pub trait Extendable { - fn do_extend(&mut self, other: &T); -} - -impl Extendable for Text { - #[inline(always)] - fn do_extend(&mut self, other: &T) { - let value = self.value.to_mut(); - value.clear(); - value.push_str(other.as_ref()); - self.subtype = other.subtype(); - } -} - -impl Extendable for Vec { - #[inline(always)] - fn do_extend(&mut self, other: &T) { - self.clear(); - self.extend_from_slice(other.as_slice()); - } -} - -pub trait AnyText: AsRef { - fn subtype(&self) -> TextSubtype; -} - -impl AnyText for Text { - fn subtype(&self) -> TextSubtype { - self.subtype - } -} - -impl AnyText for &str { - fn subtype(&self) -> TextSubtype { - TextSubtype::Text - } -} - -pub trait AnyBlob { - fn as_slice(&self) -> &[u8]; -} - -impl AnyBlob for Vec { - fn as_slice(&self) -> &[u8] { - self.as_slice() - } -} - -impl AnyBlob for &[u8] { - fn as_slice(&self) -> &[u8] { - self - } -} - -impl AsRef for Text { - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl From<&str> for Text { - fn from(value: &str) -> Self { - Text { - value: value.to_owned().into(), - subtype: TextSubtype::Text, - } - } -} - -impl From for Text { - fn from(value: String) -> Self { - Text { - value: Cow::from(value), - subtype: TextSubtype::Text, - } - } -} - -impl From for String { - fn from(value: Text) -> Self { - value.value.into_owned() - } -} - -#[cfg(feature = "serde")] -fn float_to_string(float: &f64, serializer: S) -> Result -where - S: serde::Serializer, -{ - serializer.serialize_str(&format!("{float}")) -} - -#[cfg(feature = "serde")] -fn string_to_float<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - let s = String::deserialize(deserializer)?; - match crate::numeric::str_to_f64(s) { - Some(result) => Ok(result.into()), - None => Err(serde::de::Error::custom("")), - } -} - -#[derive(Debug, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum Value { - Null, - Integer(i64), - // we use custom serialization to preserve float precision - #[cfg_attr( - feature = "serde", - serde( - serialize_with = "float_to_string", - deserialize_with = "string_to_float" - ) - )] - Float(f64), - Text(Text), - Blob(Vec), -} - -#[derive(Clone, Copy)] -pub enum ValueRef<'a> { - Null, - Integer(i64), - Float(f64), - Text(TextRef<'a>), - Blob(&'a [u8]), -} - -impl Debug for ValueRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ValueRef::Null => write!(f, "Null"), - ValueRef::Integer(i) => f.debug_tuple("Integer").field(i).finish(), - ValueRef::Float(float) => f.debug_tuple("Float").field(float).finish(), - ValueRef::Text(text_ref) => { - // truncate string to at most 256 chars - let text = text_ref.as_str(); - let max_len = text.len().min(256); - f.debug_struct("Text") - .field("data", &&text[0..max_len]) - // Indicates to the developer debugging that the data is truncated for printing - .field("truncated", &(text.len() > max_len)) - .finish() - } - ValueRef::Blob(blob) => { - // truncate blob_slice to at most 32 bytes - let max_len = blob.len().min(32); - f.debug_struct("Blob") - .field("data", &&blob[0..max_len]) - // Indicates to the developer debugging that the data is truncated for printing - .field("truncated", &(blob.len() > max_len)) - .finish() - } - } - } -} - -pub trait AsValueRef { - fn as_value_ref<'a>(&'a self) -> ValueRef<'a>; -} - -impl<'b> AsValueRef for ValueRef<'b> { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - *self - } -} - -impl AsValueRef for Value { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - self.as_ref() - } -} - -impl AsValueRef for &mut Value { - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - self.as_ref() - } -} - -impl AsValueRef for Either -where - V1: AsValueRef, - V2: AsValueRef, -{ - #[inline] - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - match self { - Either::Left(left) => left.as_value_ref(), - Either::Right(right) => right.as_value_ref(), - } - } -} - -impl AsValueRef for &V { - fn as_value_ref<'a>(&'a self) -> ValueRef<'a> { - (*self).as_value_ref() - } -} - -impl Value { - pub fn as_ref<'a>(&'a self) -> ValueRef<'a> { - match self { - Value::Null => ValueRef::Null, - Value::Integer(v) => ValueRef::Integer(*v), - Value::Float(v) => ValueRef::Float(*v), - Value::Text(v) => ValueRef::Text(TextRef { - value: &v.value, - subtype: v.subtype, - }), - Value::Blob(v) => ValueRef::Blob(v.as_slice()), - } - } - - // A helper function that makes building a text Value easier. - pub fn build_text(text: impl Into>) -> Self { - Self::Text(Text::new(text)) - } - - pub fn to_blob(&self) -> Option<&[u8]> { - match self { - Self::Blob(blob) => Some(blob), - _ => None, - } - } - - pub fn from_blob(data: Vec) -> Self { - Value::Blob(data) - } - - pub fn to_text(&self) -> Option<&str> { - match self { - Value::Text(t) => Some(t.as_str()), - _ => None, - } - } - - pub fn as_blob(&self) -> &Vec { - match self { - Value::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - - pub fn as_blob_mut(&mut self) -> &mut Vec { - match self { - Value::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - pub fn as_float(&self) -> f64 { - match self { - Value::Float(f) => *f, - Value::Integer(i) => *i as f64, - _ => panic!("as_float must be called only for Value::Float or Value::Integer"), - } - } - - pub fn as_int(&self) -> Option { - match self { - Value::Integer(i) => Some(*i), - _ => None, - } - } - - pub fn as_uint(&self) -> u64 { - match self { - Value::Integer(i) => (*i).cast_unsigned(), - _ => 0, - } - } - - pub fn from_text(text: impl Into>) -> Self { - Value::Text(Text::new(text)) - } - - pub fn value_type(&self) -> ValueType { - match self { - Value::Null => ValueType::Null, - Value::Integer(_) => ValueType::Integer, - Value::Float(_) => ValueType::Float, - Value::Text(_) => ValueType::Text, - Value::Blob(_) => ValueType::Blob, - } - } - pub fn serialize_serial(&self, out: &mut Vec) { - match self { - Value::Null => {} - Value::Integer(i) => { - let serial_type = SerialType::from(self); - match serial_type.kind() { - SerialTypeKind::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialTypeKind::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialTypeKind::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialTypeKind::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialTypeKind::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialTypeKind::I64 => out.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), - } - } - Value::Float(f) => out.extend_from_slice(&f.to_be_bytes()), - Value::Text(t) => out.extend_from_slice(t.value.as_bytes()), - Value::Blob(b) => out.extend_from_slice(b), - }; - } - - /// Cast Value to String, if Value is NULL returns None - pub fn cast_text(&self) -> Option { - Some(match self { - Value::Null => return None, - v => v.to_string(), - }) - } -} - #[derive(Debug, Clone, PartialEq)] pub struct ExternalAggState { pub state: *mut AggCtx, @@ -455,44 +35,27 @@ pub struct ExternalAggState { pub finalize_fn: FinalizeFunction, } -/// Please use Display trait for all limbo output so we have single origin of truth -/// When you need value as string: -/// ---GOOD--- -/// format!("{}", value); -/// ---BAD--- -/// match value { -/// Value::Integer(i) => *i.as_str(), -/// Value::Float(f) => *f.as_str(), -/// .... -/// } -impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Null => write!(f, ""), - Self::Integer(i) => { - write!(f, "{i}") - } - Self::Float(fl) => f.write_str(&format_float(*fl)), - Self::Text(s) => { - write!(f, "{}", s.as_str()) - } - Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), +pub trait ToExtValue: AsValueRef { + fn to_ffi(&self) -> ExtValue { + let value = self.as_value_ref(); + match value { + ValueRef::Null => ExtValue::null(), + ValueRef::Integer(i) => ExtValue::from_integer(i), + ValueRef::Float(fl) => ExtValue::from_float(fl), + ValueRef::Text(text) => ExtValue::from_text(text.as_str().to_string()), + ValueRef::Blob(blob) => ExtValue::from_blob(blob.to_vec()), } } } -impl Value { - pub fn to_ffi(&self) -> ExtValue { - match self { - Self::Null => ExtValue::null(), - Self::Integer(i) => ExtValue::from_integer(*i), - Self::Float(fl) => ExtValue::from_float(*fl), - Self::Text(text) => ExtValue::from_text(text.as_str().to_string()), - Self::Blob(blob) => ExtValue::from_blob(blob.to_vec()), - } - } +impl ToExtValue for V {} + +pub trait FromExtValue: Sized { + fn from_ffi(v: ExtValue) -> Result; +} - pub fn from_ffi(v: ExtValue) -> Result { +impl FromExtValue for Value { + fn from_ffi(v: ExtValue) -> Result { let res = match v.value_type() { ExtValueType::Null => Ok(Value::Null), ExtValueType::Integer => { @@ -538,118 +101,6 @@ impl Value { } } -/// Convert a `Value` into the implementors type. -pub trait FromValue: Sealed { - fn from_sql(val: Value) -> Result - where - Self: Sized; -} - -impl FromValue for Value { - fn from_sql(val: Value) -> Result { - Ok(val) - } -} -impl Sealed for crate::Value {} - -macro_rules! impl_int_from_value { - ($ty:ty, $cast:expr) => { - impl FromValue for $ty { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Integer(i) => Ok($cast(i)), - _ => unreachable!("invalid value type"), - } - } - } - - impl Sealed for $ty {} - }; -} - -impl_int_from_value!(i32, |i| i as i32); -impl_int_from_value!(u32, |i| i as u32); -impl_int_from_value!(i64, |i| i); -impl_int_from_value!(u64, |i| i as u64); - -impl FromValue for f64 { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Float(f) => Ok(f), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for f64 {} - -impl FromValue for Vec { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Blob(blob) => Ok(blob), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for Vec {} - -impl FromValue for [u8; N] { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Blob(blob) => blob.try_into().map_err(|_| LimboError::InvalidBlobSize(N)), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for [u8; N] {} - -impl FromValue for String { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Text(s) => Ok(s.to_string()), - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for String {} - -impl FromValue for bool { - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Err(LimboError::NullValue), - Value::Integer(i) => match i { - 0 => Ok(false), - 1 => Ok(true), - _ => Err(LimboError::InvalidColumnType), - }, - _ => unreachable!("invalid value type"), - } - } -} -impl Sealed for bool {} - -impl FromValue for Option -where - T: FromValue, -{ - fn from_sql(val: Value) -> Result { - match val { - Value::Null => Ok(None), - _ => T::from_sql(val).map(Some), - } - } -} -impl Sealed for Option {} - -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - #[derive(Debug, Clone, PartialEq)] pub struct SumAggState { pub r_err: f64, // Error term for Kahan-Babushka-Neumaier summation @@ -688,191 +139,6 @@ impl AggContext { } } -impl PartialEq for Value { - fn eq(&self, other: &Value) -> bool { - let (left, right) = (self.as_value_ref(), other.as_value_ref()); - left.eq(&right) - } -} - -impl PartialOrd for Value { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialOrd for AggContext { - fn partial_cmp(&self, other: &AggContext) -> Option { - match (self, other) { - (Self::Avg(a, _), Self::Avg(b, _)) => a.partial_cmp(b), - (Self::Sum(a, _), Self::Sum(b, _)) => a.partial_cmp(b), - (Self::Count(a), Self::Count(b)) => a.partial_cmp(b), - (Self::Max(a), Self::Max(b)) => a.partial_cmp(b), - (Self::Min(a), Self::Min(b)) => a.partial_cmp(b), - (Self::GroupConcat(a), Self::GroupConcat(b)) => a.partial_cmp(b), - _ => None, - } - } -} - -impl Eq for Value {} - -impl Ord for Value { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let (left, right) = (self.as_value_ref(), other.as_value_ref()); - left.cmp(&right) - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: Self) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: f64) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::Add for Value { - type Output = Value; - - fn add(mut self, rhs: i64) -> Self::Output { - self += rhs; - self - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(mut self: &mut Self, rhs: Self) { - match (&mut self, rhs) { - (Self::Integer(int_left), Self::Integer(int_right)) => *int_left += int_right, - (Self::Integer(int_left), Self::Float(float_right)) => { - *self = Self::Float(*int_left as f64 + float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - *self = Self::Float(*float_left + int_right as f64) - } - (Self::Float(float_left), Self::Float(float_right)) => { - *float_left += float_right; - } - (Self::Text(string_left), Self::Text(string_right)) => { - string_left.value.to_mut().push_str(&string_right.value); - string_left.subtype = TextSubtype::Text; - } - (Self::Text(string_left), Self::Integer(int_right)) => { - let string_right = int_right.to_string(); - string_left.value.to_mut().push_str(&string_right); - string_left.subtype = TextSubtype::Text; - } - (Self::Integer(int_left), Self::Text(string_right)) => { - let string_left = int_left.to_string(); - *self = Self::build_text(string_left + string_right.as_str()); - } - (Self::Text(string_left), Self::Float(float_right)) => { - let string_right = Self::Float(float_right).to_string(); - string_left.value.to_mut().push_str(&string_right); - string_left.subtype = TextSubtype::Text; - } - (Self::Float(float_left), Self::Text(string_right)) => { - let string_left = Self::Float(*float_left).to_string(); - *self = Self::build_text(string_left + string_right.as_str()); - } - (_, Self::Null) => {} - (Self::Null, rhs) => *self = rhs, - _ => *self = Self::Float(0.0), - } - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(&mut self, rhs: i64) { - match self { - Self::Integer(int_left) => *int_left += rhs, - Self::Float(float_left) => *float_left += rhs as f64, - _ => unreachable!(), - } - } -} - -impl std::ops::AddAssign for Value { - fn add_assign(&mut self, rhs: f64) { - match self { - Self::Integer(int_left) => *self = Self::Float(*int_left as f64 + rhs), - Self::Float(float_left) => *float_left += rhs, - _ => unreachable!(), - } - } -} - -impl std::ops::Div for Value { - type Output = Value; - - fn div(self, rhs: Value) -> Self::Output { - match (self, rhs) { - (Self::Integer(int_left), Self::Integer(int_right)) => { - Self::Integer(int_left / int_right) - } - (Self::Integer(int_left), Self::Float(float_right)) => { - Self::Float(int_left as f64 / float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - Self::Float(float_left / int_right as f64) - } - (Self::Float(float_left), Self::Float(float_right)) => { - Self::Float(float_left / float_right) - } - _ => Self::Float(0.0), - } - } -} - -impl std::ops::DivAssign for Value { - fn div_assign(&mut self, rhs: Value) { - *self = self.clone() / rhs; - } -} - -impl TryFrom> for i64 { - type Error = LimboError; - - fn try_from(value: ValueRef<'_>) -> Result { - match value { - ValueRef::Integer(i) => Ok(i), - _ => Err(LimboError::ConversionError("Expected integer value".into())), - } - } -} - -impl TryFrom> for String { - type Error = LimboError; - - #[inline] - fn try_from(value: ValueRef<'_>) -> Result { - Ok(<&str>::try_from(value)?.to_string()) - } -} - -impl<'a> TryFrom> for &'a str { - type Error = LimboError; - - #[inline] - fn try_from(value: ValueRef<'a>) -> Result { - match value { - ValueRef::Text(s) => Ok(s.as_str()), - _ => Err(LimboError::ConversionError("Expected text value".into())), - } - } -} - /// This struct serves the purpose of not allocating multiple vectors of bytes if not needed. /// A value in a record that has already been serialized can stay serialized and what this struct offsers /// is easy acces to each value which point to the payload. @@ -1026,7 +292,7 @@ impl ImmutableRecord { let mut serial_type_buf = [0; 9]; // write serial types for value in values.clone() { - let serial_type = SerialType::from(value.as_value_ref()); + let serial_type = SerialType::from_value(value); let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); serials.push((serial_type_buf, n)); @@ -1059,7 +325,7 @@ impl ImmutableRecord { match value { ValueRef::Null => {} ValueRef::Integer(i) => { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); match serial_type.kind() { SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} SerialTypeKind::I8 => writer.extend_from_slice(&(i as i8).to_be_bytes()), @@ -1492,193 +758,6 @@ impl RecordCursor { } } -impl<'a> ValueRef<'a> { - pub fn to_ffi(&self) -> ExtValue { - match self { - Self::Null => ExtValue::null(), - Self::Integer(i) => ExtValue::from_integer(*i), - Self::Float(fl) => ExtValue::from_float(*fl), - Self::Text(text) => ExtValue::from_text(text.as_str().to_string()), - Self::Blob(blob) => ExtValue::from_blob(blob.to_vec()), - } - } - - pub fn to_blob(&self) -> Option<&'a [u8]> { - match self { - Self::Blob(blob) => Some(*blob), - _ => None, - } - } - - pub fn to_text(&self) -> Option<&'a str> { - match self { - Self::Text(t) => Some(t.as_str()), - _ => None, - } - } - - pub fn as_blob(&self) -> &'a [u8] { - match self { - Self::Blob(b) => b, - _ => panic!("as_blob must be called only for Value::Blob"), - } - } - - pub fn as_float(&self) -> f64 { - match self { - Self::Float(f) => *f, - Self::Integer(i) => *i as f64, - _ => panic!("as_float must be called only for Value::Float or Value::Integer"), - } - } - - pub fn as_int(&self) -> Option { - match self { - Self::Integer(i) => Some(*i), - _ => None, - } - } - - pub fn as_uint(&self) -> u64 { - match self { - Self::Integer(i) => (*i).cast_unsigned(), - _ => 0, - } - } - - pub fn to_owned(&self) -> Value { - match self { - ValueRef::Null => Value::Null, - ValueRef::Integer(i) => Value::Integer(*i), - ValueRef::Float(f) => Value::Float(*f), - ValueRef::Text(text) => Value::Text(Text { - value: text.value.to_string().into(), - subtype: text.subtype, - }), - ValueRef::Blob(b) => Value::Blob(b.to_vec()), - } - } - - pub fn value_type(&self) -> ValueType { - match self { - Self::Null => ValueType::Null, - Self::Integer(_) => ValueType::Integer, - Self::Float(_) => ValueType::Float, - Self::Text(_) => ValueType::Text, - Self::Blob(_) => ValueType::Blob, - } - } -} - -impl Display for ValueRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Null => write!(f, "NULL"), - Self::Integer(i) => write!(f, "{i}"), - Self::Float(fl) => write!(f, "{fl:?}"), - Self::Text(s) => write!(f, "{}", s.as_str()), - Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), - } - } -} - -impl<'a> PartialEq> for ValueRef<'a> { - fn eq(&self, other: &ValueRef<'a>) -> bool { - match (self, other) { - (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right, - (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int)) => { - sqlite_int_float_compare(*int, *float).is_eq() - } - (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right, - (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false, - (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => false, - (Self::Text(text_left), Self::Text(text_right)) => { - text_left.value.as_bytes() == text_right.value.as_bytes() - } - (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.eq(blob_right), - (Self::Null, Self::Null) => true, - _ => false, - } - } -} - -impl<'a> PartialEq for ValueRef<'a> { - fn eq(&self, other: &Value) -> bool { - let other = other.as_value_ref(); - self.eq(&other) - } -} - -impl<'a> Eq for ValueRef<'a> {} - -#[expect(clippy::non_canonical_partial_ord_impl)] -impl<'a> PartialOrd> for ValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), - (Self::Integer(int_left), Self::Float(float_right)) => { - (*int_left as f64).partial_cmp(float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - float_left.partial_cmp(&(*int_right as f64)) - } - (Self::Float(float_left), Self::Float(float_right)) => { - float_left.partial_cmp(float_right) - } - // Numeric vs Text/Blob - (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { - Some(std::cmp::Ordering::Less) - } - (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { - Some(std::cmp::Ordering::Greater) - } - - (Self::Text(text_left), Self::Text(text_right)) => text_left - .value - .as_bytes() - .partial_cmp(text_right.value.as_bytes()), - // Text vs Blob - (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), - (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), - - (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.partial_cmp(blob_right), - (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), - (Self::Null, _) => Some(std::cmp::Ordering::Less), - (_, Self::Null) => Some(std::cmp::Ordering::Greater), - } - } -} - -impl<'a> Ord for ValueRef<'a> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.partial_cmp(other).unwrap() - } -} - -fn sqlite_int_float_compare(int_val: i64, float_val: f64) -> std::cmp::Ordering { - if float_val.is_nan() { - return std::cmp::Ordering::Greater; - } - - if float_val < -9223372036854775808.0 { - return std::cmp::Ordering::Greater; - } - if float_val >= 9223372036854775808.0 { - return std::cmp::Ordering::Less; - } - - let float_as_int = float_val as i64; - match int_val.cmp(&float_as_int) { - std::cmp::Ordering::Equal => { - let int_as_float = int_val as f64; - int_as_float - .partial_cmp(&float_val) - .unwrap_or(std::cmp::Ordering::Equal) - } - other => other, - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct KeyInfo { pub sort_order: SortOrder, @@ -2338,10 +1417,8 @@ impl SerialType { SerialTypeKind::Blob => (self.0 as usize - 12) / 2, } } -} -impl From for SerialType { - fn from(value: T) -> Self { + pub fn from_value(value: impl AsValueRef) -> Self { let value = value.as_value_ref(); match value { ValueRef::Null => SerialType::null(), @@ -2420,7 +1497,7 @@ impl Record { // write serial types for value in &self.values { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type.into()); @@ -2433,7 +1510,7 @@ impl Record { match value { Value::Null => {} Value::Integer(i) => { - let serial_type = SerialType::from(value); + let serial_type = SerialType::from_value(value); match serial_type.kind() { SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} SerialTypeKind::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), @@ -2752,6 +1829,8 @@ impl WalFrameInfo { #[cfg(test)] mod tests { + use turso_common::value::{Text, TextRef, TextSubtype}; + use super::*; use crate::translate::collate::CollationSeq; diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index bba8b9cc98..5deb6747e9 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -13,14 +13,13 @@ use crate::storage::pager::{AtomicDbState, CreateBTreeFlags, DbState}; use crate::storage::sqlite3_ondisk::{read_varint_fast, DatabaseHeader, PageSize}; use crate::translate::collate::CollationSeq; use crate::types::{ - compare_immutable, compare_records_generic, AsValueRef, Extendable, IOCompletions, - ImmutableRecord, SeekResult, Text, + compare_immutable, compare_records_generic, AsValueRef, Extendable, FromExtValue, + IOCompletions, ImmutableRecord, SeekResult, Text, ToExtValue, }; use crate::util::{ normalize_ident, rewrite_column_references_if_needed, rewrite_fk_parent_cols_if_self_ref, rewrite_fk_parent_table_if_needed, rewrite_inline_col_fk_target_if_needed, }; -use crate::vdbe::affinity::{apply_numeric_affinity, try_for_float, Affinity, ParsedNumber}; use crate::vdbe::insn::InsertFlags; use crate::vdbe::value::ComparisonOp; use crate::vdbe::{registers_to_ref_values, EndStatement, TxnCleanup}; @@ -39,7 +38,7 @@ use crate::{ }, translate::emitter::TransactionMode, }; -use crate::{get_cursor, CheckpointMode, Connection, DatabaseStorage, MvCursor}; +use crate::{get_cursor, CheckpointMode, Connection, DatabaseStorage, ExecValue, MvCursor}; use either::Either; use std::any::Any; use std::env::temp_dir; @@ -48,6 +47,9 @@ use std::{ borrow::BorrowMut, sync::{atomic::Ordering, Arc, Mutex}, }; +use turso_common::schema::affinity::{ + apply_numeric_affinity, try_for_float, Affinity, ParsedNumber, +}; use turso_macros::match_ignore_ascii_case; use crate::pseudo::PseudoCursor; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index df70784d13..31d10f7574 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -9,11 +9,11 @@ use crate::{ storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, translate::{collate::CollationSeq, emitter::TransactionMode}, types::KeyInfo, - vdbe::affinity::Affinity, Value, }; use strum::EnumCount; use strum_macros::{EnumDiscriminants, FromRepr, VariantArray}; +use turso_common::schema::affinity::Affinity; use turso_macros::Description; use turso_parser::ast::SortOrder; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index bebb41aaa7..d574970807 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -17,7 +17,6 @@ //! //! https://www.sqlite.org/opcode.html -pub mod affinity; pub mod builder; pub mod execute; pub mod explain; diff --git a/core/vdbe/value.rs b/core/vdbe/value.rs index 62dfccd27f..1e9be6b350 100644 --- a/core/vdbe/value.rs +++ b/core/vdbe/value.rs @@ -1,13 +1,12 @@ -use std::collections::HashMap; - use regex::{Regex, RegexBuilder}; +use std::collections::HashMap; +use turso_common::schema::affinity::Affinity; use crate::{ function::MathFunc, numeric::{NullableInteger, Numeric}, translate::collate::CollationSeq, types::{compare_immutable_single, AsValueRef, SeekOp}, - vdbe::affinity::Affinity, LimboError, Result, Value, ValueRef, }; @@ -100,7 +99,7 @@ impl From for ComparisonOp { } } -enum TrimType { +pub enum TrimType { All, Left, Right, @@ -116,13 +115,132 @@ impl TrimType { } } -impl Value { - pub fn exec_lower(&self) -> Option { +pub trait ExecValue: Sized { + fn exec_lower(&self) -> Option; + + fn exec_length(&self) -> Self; + + fn exec_octet_length(&self) -> Self; + + fn exec_upper(&self) -> Option; + + fn exec_sign(&self) -> Option; + + /// Generates the Soundex code for a given word + fn exec_soundex(&self) -> Value; + + fn exec_abs(&self) -> Result; + + fn exec_random(generate_random_number: F) -> Self + where + F: Fn() -> i64; + + fn exec_randomblob(&self, fill_bytes: F) -> Value + where + F: Fn(&mut [u8]); + + fn exec_quote(&self) -> Self; + + fn exec_nullif(&self, second_value: &Self) -> Self; + + fn exec_substring(value: &Value, start_value: &Value, length_value: Option<&Value>) -> Value; + + fn exec_instr(&self, pattern: &Value) -> Value; + + fn exec_typeof(&self) -> Value; + + fn exec_hex(&self) -> Value; + + fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value; + + fn exec_unicode(&self) -> Value; + + fn exec_round(&self, precision: Option<&Value>) -> Value; + + fn _exec_trim(&self, pattern: Option<&Value>, trim_type: TrimType) -> Value; + + // Implements TRIM pattern matching. + fn exec_trim(&self, pattern: Option<&Value>) -> Value; + // Implements RTRIM pattern matching. + fn exec_rtrim(&self, pattern: Option<&Value>) -> Value; + + // Implements LTRIM pattern matching. + fn exec_ltrim(&self, pattern: Option<&Value>) -> Value; + + fn exec_zeroblob(&self) -> Value; + + // exec_if returns whether you should jump + fn exec_if(&self, jump_if_null: bool, not: bool) -> bool; + + fn exec_cast(&self, datatype: &str) -> Value; + + fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value; + + fn exec_math_unary(&self, function: &MathFunc) -> Value; + + fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value; + + fn exec_math_log(&self, base: Option<&Value>) -> Value; + + fn exec_add(&self, rhs: &Value) -> Value; + + fn exec_subtract(&self, rhs: &Value) -> Value; + + fn exec_multiply(&self, rhs: &Value) -> Value; + + fn exec_divide(&self, rhs: &Value) -> Value; + + fn exec_bit_and(&self, rhs: &Value) -> Value; + + fn exec_bit_or(&self, rhs: &Value) -> Value; + + fn exec_remainder(&self, rhs: &Value) -> Value; + + fn exec_bit_not(&self) -> Value; + + fn exec_shift_left(&self, rhs: &Value) -> Value; + + fn exec_shift_right(&self, rhs: &Value) -> Value; + + fn exec_boolean_not(&self) -> Value; + + fn exec_concat(&self, rhs: &Value) -> Value; + + fn exec_and(&self, rhs: &Value) -> Value; + + fn exec_or(&self, rhs: &Value) -> Value; + + // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided + fn exec_like( + regex_cache: Option<&mut HashMap>, + pattern: &str, + text: &str, + ) -> bool; + + fn exec_min<'a, T: Iterator>(regs: T) -> Value; + + fn exec_max<'a, T: Iterator>(regs: T) -> Value; + + fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self + where + Self: 'a; + + fn exec_concat_ws<'a, T: ExactSizeIterator>(registers: T) -> Self + where + Self: 'a; + + fn exec_char<'a, T: Iterator>(values: T) -> Self + where + Self: 'a; +} + +impl ExecValue for Value { + fn exec_lower(&self) -> Option { self.cast_text() .map(|s| Value::build_text(s.to_ascii_lowercase())) } - pub fn exec_length(&self) -> Self { + fn exec_length(&self) -> Self { match self { Value::Text(t) => { let s = t.as_str(); @@ -141,7 +259,7 @@ impl Value { } } - pub fn exec_octet_length(&self) -> Self { + fn exec_octet_length(&self) -> Self { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) => { Value::Integer(self.to_string().into_bytes().len() as i64) @@ -151,12 +269,12 @@ impl Value { } } - pub fn exec_upper(&self) -> Option { + fn exec_upper(&self) -> Option { self.cast_text() .map(|s| Value::build_text(s.to_ascii_uppercase())) } - pub fn exec_sign(&self) -> Option { + fn exec_sign(&self) -> Option { let v = Numeric::from_value_strict(self).try_into_f64()?; Some(Value::Integer(if v > 0.0 { @@ -169,7 +287,7 @@ impl Value { } /// Generates the Soundex code for a given word - pub fn exec_soundex(&self) -> Value { + fn exec_soundex(&self) -> Value { let s = match self { Value::Null => return Value::build_text("?000"), Value::Text(s) => { @@ -259,7 +377,7 @@ impl Value { Value::build_text(result.to_uppercase()) } - pub fn exec_abs(&self) -> Result { + fn exec_abs(&self) -> Result { Ok(match self { Value::Null => Value::Null, Value::Integer(v) => { @@ -280,14 +398,14 @@ impl Value { }) } - pub fn exec_random(generate_random_number: F) -> Self + fn exec_random(generate_random_number: F) -> Self where F: Fn() -> i64, { Value::Integer(generate_random_number()) } - pub fn exec_randomblob(&self, fill_bytes: F) -> Value + fn exec_randomblob(&self, fill_bytes: F) -> Value where F: Fn(&mut [u8]), { @@ -304,7 +422,7 @@ impl Value { Value::Blob(blob) } - pub fn exec_quote(&self) -> Self { + fn exec_quote(&self) -> Self { match self { Value::Null => Value::build_text("NULL"), Value::Integer(_) | Value::Float(_) => self.to_owned(), @@ -328,7 +446,7 @@ impl Value { } } - pub fn exec_nullif(&self, second_value: &Self) -> Self { + fn exec_nullif(&self, second_value: &Self) -> Self { if self != second_value { self.clone() } else { @@ -336,11 +454,7 @@ impl Value { } } - pub fn exec_substring( - value: &Value, - start_value: &Value, - length_value: Option<&Value>, - ) -> Value { + fn exec_substring(value: &Value, start_value: &Value, length_value: Option<&Value>) -> Value { /// Function is stabilized but not released for version 1.88 \ /// https://doc.rust-lang.org/src/core/str/mod.rs.html#453 const fn ceil_char_boundary(s: &str, index: usize) -> usize { @@ -434,7 +548,7 @@ impl Value { } } - pub fn exec_instr(&self, pattern: &Value) -> Value { + fn exec_instr(&self, pattern: &Value) -> Value { if self == &Value::Null || pattern == &Value::Null { return Value::Null; } @@ -471,7 +585,7 @@ impl Value { } } - pub fn exec_typeof(&self) -> Value { + fn exec_typeof(&self) -> Value { match self { Value::Null => Value::build_text("null"), Value::Integer(_) => Value::build_text("integer"), @@ -481,7 +595,7 @@ impl Value { } } - pub fn exec_hex(&self) -> Value { + fn exec_hex(&self) -> Value { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) => { let text = self.to_string(); @@ -492,7 +606,7 @@ impl Value { } } - pub fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value { + fn exec_unhex(&self, ignored_chars: Option<&Value>) -> Value { match self { Value::Null => Value::Null, _ => match ignored_chars { @@ -522,7 +636,7 @@ impl Value { } } - pub fn exec_unicode(&self) -> Value { + fn exec_unicode(&self) -> Value { match self { Value::Text(_) | Value::Integer(_) | Value::Float(_) | Value::Blob(_) => { let text = self.to_string(); @@ -536,7 +650,7 @@ impl Value { } } - pub fn exec_round(&self, precision: Option<&Value>) -> Value { + fn exec_round(&self, precision: Option<&Value>) -> Value { let Some(f) = Numeric::from(self).try_into_f64() else { return Value::Null; }; @@ -580,20 +694,20 @@ impl Value { } // Implements TRIM pattern matching. - pub fn exec_trim(&self, pattern: Option<&Value>) -> Value { + fn exec_trim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::All) } // Implements RTRIM pattern matching. - pub fn exec_rtrim(&self, pattern: Option<&Value>) -> Value { + fn exec_rtrim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::Right) } // Implements LTRIM pattern matching. - pub fn exec_ltrim(&self, pattern: Option<&Value>) -> Value { + fn exec_ltrim(&self, pattern: Option<&Value>) -> Value { self._exec_trim(pattern, TrimType::Left) } - pub fn exec_zeroblob(&self) -> Value { + fn exec_zeroblob(&self) -> Value { let length: i64 = match self { Value::Integer(i) => *i, Value::Float(f) => *f as i64, @@ -604,14 +718,14 @@ impl Value { } // exec_if returns whether you should jump - pub fn exec_if(&self, jump_if_null: bool, not: bool) -> bool { + fn exec_if(&self, jump_if_null: bool, not: bool) -> bool { Numeric::from(self) .try_into_bool() .map(|jump| if not { !jump } else { jump }) .unwrap_or(jump_if_null) } - pub fn exec_cast(&self, datatype: &str) -> Value { + fn exec_cast(&self, datatype: &str) -> Value { if matches!(self, Value::Null) { return Value::Null; } @@ -701,7 +815,7 @@ impl Value { } } - pub fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value { + fn exec_replace(source: &Value, pattern: &Value, replacement: &Value) -> Value { // The replace(X,Y,Z) function returns a string formed by substituting string Z for every occurrence of // string Y in string X. The BINARY collating sequence is used for comparisons. If Y is an empty string // then return X unchanged. If Z is not initially a string, it is cast to a UTF-8 string prior to processing. @@ -734,7 +848,7 @@ impl Value { } } - pub fn exec_math_unary(&self, function: &MathFunc) -> Value { + fn exec_math_unary(&self, function: &MathFunc) -> Value { let v = Numeric::from_value_strict(self); // In case of some functions and integer input, return the input as is @@ -786,7 +900,7 @@ impl Value { } } - pub fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value { + fn exec_math_binary(&self, rhs: &Value, function: &MathFunc) -> Value { let Some(lhs) = Numeric::from_value_strict(self).try_into_f64() else { return Value::Null; }; @@ -809,7 +923,7 @@ impl Value { } } - pub fn exec_math_log(&self, base: Option<&Value>) -> Value { + fn exec_math_log(&self, base: Option<&Value>) -> Value { let Some(f) = Numeric::from_value_strict(self).try_into_f64() else { return Value::Null; }; @@ -841,31 +955,31 @@ impl Value { Value::Float(result) } - pub fn exec_add(&self, rhs: &Value) -> Value { + fn exec_add(&self, rhs: &Value) -> Value { (Numeric::from(self) + Numeric::from(rhs)).into() } - pub fn exec_subtract(&self, rhs: &Value) -> Value { + fn exec_subtract(&self, rhs: &Value) -> Value { (Numeric::from(self) - Numeric::from(rhs)).into() } - pub fn exec_multiply(&self, rhs: &Value) -> Value { + fn exec_multiply(&self, rhs: &Value) -> Value { (Numeric::from(self) * Numeric::from(rhs)).into() } - pub fn exec_divide(&self, rhs: &Value) -> Value { + fn exec_divide(&self, rhs: &Value) -> Value { (Numeric::from(self) / Numeric::from(rhs)).into() } - pub fn exec_bit_and(&self, rhs: &Value) -> Value { + fn exec_bit_and(&self, rhs: &Value) -> Value { (NullableInteger::from(self) & NullableInteger::from(rhs)).into() } - pub fn exec_bit_or(&self, rhs: &Value) -> Value { + fn exec_bit_or(&self, rhs: &Value) -> Value { (NullableInteger::from(self) | NullableInteger::from(rhs)).into() } - pub fn exec_remainder(&self, rhs: &Value) -> Value { + fn exec_remainder(&self, rhs: &Value) -> Value { let convert_to_float = matches!(Numeric::from(self), Numeric::Float(_)) || matches!(Numeric::from(rhs), Numeric::Float(_)); @@ -881,26 +995,26 @@ impl Value { } } - pub fn exec_bit_not(&self) -> Value { + fn exec_bit_not(&self) -> Value { (!NullableInteger::from(self)).into() } - pub fn exec_shift_left(&self, rhs: &Value) -> Value { + fn exec_shift_left(&self, rhs: &Value) -> Value { (NullableInteger::from(self) << NullableInteger::from(rhs)).into() } - pub fn exec_shift_right(&self, rhs: &Value) -> Value { + fn exec_shift_right(&self, rhs: &Value) -> Value { (NullableInteger::from(self) >> NullableInteger::from(rhs)).into() } - pub fn exec_boolean_not(&self) -> Value { + fn exec_boolean_not(&self) -> Value { match Numeric::from(self).try_into_bool() { None => Value::Null, Some(v) => Value::Integer(!v as i64), } } - pub fn exec_concat(&self, rhs: &Value) -> Value { + fn exec_concat(&self, rhs: &Value) -> Value { if let (Value::Blob(lhs), Value::Blob(rhs)) = (self, rhs) { return Value::build_text( String::from_utf8_lossy(&[lhs.as_slice(), rhs.as_slice()].concat()).into_owned(), @@ -918,7 +1032,7 @@ impl Value { Value::build_text(lhs + &rhs) } - pub fn exec_and(&self, rhs: &Value) -> Value { + fn exec_and(&self, rhs: &Value) -> Value { match ( Numeric::from(self).try_into_bool(), Numeric::from(rhs).try_into_bool(), @@ -929,7 +1043,7 @@ impl Value { } } - pub fn exec_or(&self, rhs: &Value) -> Value { + fn exec_or(&self, rhs: &Value) -> Value { match ( Numeric::from(self).try_into_bool(), Numeric::from(rhs).try_into_bool(), @@ -941,7 +1055,7 @@ impl Value { } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided - pub fn exec_like( + fn exec_like( regex_cache: Option<&mut HashMap>, pattern: &str, text: &str, @@ -962,15 +1076,15 @@ impl Value { } } - pub fn exec_min<'a, T: Iterator>(regs: T) -> Value { + fn exec_min<'a, T: Iterator>(regs: T) -> Value { regs.min().map(|v| v.to_owned()).unwrap_or(Value::Null) } - pub fn exec_max<'a, T: Iterator>(regs: T) -> Value { + fn exec_max<'a, T: Iterator>(regs: T) -> Value { regs.max().map(|v| v.to_owned()).unwrap_or(Value::Null) } - pub fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self { + fn exec_concat_strings<'a, T: Iterator>(registers: T) -> Self { let mut result = String::new(); for val in registers { match val { @@ -982,7 +1096,7 @@ impl Value { Value::build_text(result) } - pub fn exec_concat_ws<'a, T: ExactSizeIterator>(mut registers: T) -> Self { + fn exec_concat_ws<'a, T: ExactSizeIterator>(mut registers: T) -> Self { if registers.len() == 0 { return Value::Null; } @@ -1001,7 +1115,7 @@ impl Value { Value::build_text(result) } - pub fn exec_char<'a, T: Iterator>(values: T) -> Self { + fn exec_char<'a, T: Iterator>(values: T) -> Self { let result: String = values .filter_map(|x| { if let Value::Integer(i) = x { @@ -1046,6 +1160,7 @@ pub fn construct_like_regex(pattern: &str) -> Regex { #[cfg(test)] mod tests { use crate::types::Value; + use crate::vdbe::value::ExecValue; use crate::vdbe::{Bitfield, Register}; use rand::{Rng, RngCore}; diff --git a/core/vtab.rs b/core/vtab.rs index 36e05e9bb7..695f697e6b 100644 --- a/core/vtab.rs +++ b/core/vtab.rs @@ -1,5 +1,6 @@ use crate::pragma::{PragmaVirtualTable, PragmaVirtualTableCursor}; use crate::schema::Column; +use crate::types::{FromExtValue, ToExtValue}; use crate::util::columns_from_create_table_body; use crate::{Connection, LimboError, SymbolTable, Value}; use parking_lot::RwLock; @@ -525,7 +526,7 @@ impl ExtVirtualTableCursor { fn column(&self, column: usize) -> crate::Result { let val = unsafe { (self.implementation.column)(self.cursor.as_ptr(), column as u32) }; - Value::from_ffi(val) + crate::Value::from_ffi(val) } fn next(&self) -> crate::Result { diff --git a/sql_generation/model/table.rs b/sql_generation/model/table.rs index dce2fdddf2..40b78471ea 100644 --- a/sql_generation/model/table.rs +++ b/sql_generation/model/table.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash, ops::Deref}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use turso_core::{numeric::Numeric, types}; +use turso_core::{numeric::Numeric, types, ExecValue}; use turso_parser::ast::{self, ColumnConstraint, SortOrder}; use crate::model::query::predicate::Predicate;