diff --git a/core/util.rs b/core/util.rs index 745eb7a0b8..38eab7f4ee 100644 --- a/core/util.rs +++ b/core/util.rs @@ -951,6 +951,24 @@ pub fn decode_percent(uri: &str) -> String { String::from_utf8_lossy(&decoded).to_string() } +pub fn trim_ascii_whitespace(s: &str) -> &str { + let bytes = s.as_bytes(); + let start = bytes + .iter() + .position(|&b| !b.is_ascii_whitespace()) + .unwrap_or(bytes.len()); + let end = bytes + .iter() + .rposition(|&b| !b.is_ascii_whitespace()) + .map(|i| i + 1) + .unwrap_or(0); + if start <= end { + &s[start..end] + } else { + "" + } +} + /// When casting a TEXT value to INTEGER, the longest possible prefix of the value that can be interpreted as an integer number /// is extracted from the TEXT value and the remainder ignored. Any leading spaces in the TEXT value when converting from TEXT to INTEGER are ignored. /// If there is no prefix that can be interpreted as an integer number, the result of the conversion is 0. @@ -2577,4 +2595,23 @@ pub mod tests { Ok(Float(-3.22)) ); } + + #[test] + fn test_trim_ascii_whitespace_helper() { + assert_eq!(trim_ascii_whitespace(" hello "), "hello"); + assert_eq!(trim_ascii_whitespace("\t\nhello\r\n"), "hello"); + assert_eq!(trim_ascii_whitespace("hello"), "hello"); + assert_eq!(trim_ascii_whitespace(" "), ""); + assert_eq!(trim_ascii_whitespace(""), ""); + + // non-breaking space should NOT be trimmed + assert_eq!( + trim_ascii_whitespace("\u{00A0}hello\u{00A0}"), + "\u{00A0}hello\u{00A0}" + ); + assert_eq!( + trim_ascii_whitespace(" \u{00A0}hello\u{00A0} "), + "\u{00A0}hello\u{00A0}" + ); + } } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 176d19ef91..47c850d102 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -19,6 +19,7 @@ use crate::types::{ }; use crate::util::{ normalize_ident, rewrite_column_references_if_needed, rewrite_fk_parent_cols_if_self_ref, + trim_ascii_whitespace, }; use crate::vdbe::insn::InsertFlags; use crate::vdbe::{registers_to_ref_values, EndStatement, TxnCleanup}; @@ -9882,7 +9883,7 @@ fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool { } if let Value::Text(t) = value { - let text = t.as_str().trim(); + let text = trim_ascii_whitespace(t.as_str()); // Handle hex numbers - they shouldn't be converted if text.starts_with("0x") { @@ -11784,4 +11785,62 @@ mod tests { assert!(!bitfield.get(i)); } } + + #[test] + fn test_ascii_whitespace_is_trimmed() { + // Regular ASCII whitespace SHOULD be trimmed + let ascii_whitespace_cases = vec![ + (" 12", 12i64), // space + ("12 ", 12i64), // trailing space + (" 12 ", 12i64), // both sides + ("\t42\t", 42i64), // tab + ("\n99\n", 99i64), // newline + (" \t\n123\r\n ", 123i64), // mixed ASCII whitespace + ]; + + for (input, expected_int) in ascii_whitespace_cases { + let mut register = Register::Value(Value::Text(input.into())); + apply_affinity_char(&mut register, Affinity::Integer); + + match register { + Register::Value(Value::Integer(i)) => { + assert_eq!( + i, expected_int, + "String '{input}' should convert to {expected_int}, got {i}" + ); + } + other => { + panic!("String '{input}' should be converted to integer {expected_int}, got {other:?}"); + } + } + } + } + + #[test] + fn test_non_breaking_space_not_trimmed() { + let test_strings = vec![ + ("12\u{00A0}", "text", 3), // '12' + non-breaking space (3 chars, 4 bytes) + ("\u{00A0}12", "text", 3), // non-breaking space + '12' (3 chars, 4 bytes) + ("12\u{00A0}34", "text", 5), // '12' + nbsp + '34' (5 chars, 6 bytes) + ]; + + for (input, _expected_type, expected_len) in test_strings { + let mut register = Register::Value(Value::Text(input.into())); + apply_affinity_char(&mut register, Affinity::Integer); + + match register { + Register::Value(Value::Text(t)) => { + assert_eq!( + t.as_str().chars().count(), + expected_len, + "String '{input}' should have {expected_len} characters", + ); + } + Register::Value(Value::Integer(_)) => { + panic!("String '{input}' should NOT be converted to integer"); + } + other => panic!("Unexpected value type: {other:?}"), + } + } + } } diff --git a/testing/affinity.test b/testing/affinity.test index 1c32d4e4dd..d580856c26 100755 --- a/testing/affinity.test +++ b/testing/affinity.test @@ -20,3 +20,9 @@ do_execsql_test_on_specific_db {:memory:} affinity-rowid { select * from t where a = '1'; } {1 1} + +do_execsql_test_on_specific_db {:memory:} affinity-ascii-whitespace-1.1 { + CREATE TABLE nb1(i INTEGER); + INSERT INTO nb1 VALUES ('12' || CHAR(160)); + SELECT TYPEOF(i), LENGTH(i) FROM nb1; +} {text|3} \ No newline at end of file