Trim spaces in SimpleTypeDeserializer before deserialize numbers, booleans and chars

Mingun · Mingun · commit 3ac943ae96e8 · 2025-11-08T18:00:02.000+05:00
diff --git a/Changelog.md b/Changelog.md
@@ -22,6 +22,10 @@
 
 ### Bug Fixes
 
+- [#912]: Fix deserialization of numbers, booleans and characters that is space-wrapped, for example
+  `<int>  42  </int>`. That space characters are usually indent added during serialization and
+  other XML serialization libraries trims them
+
 ### Misc Changes
 
 - [#901]: Fix running tests on 32-bit architecture
@@ -30,6 +34,7 @@
 [#353]: https://github.com/tafia/quick-xml/issues/353
 [#901]: https://github.com/tafia/quick-xml/pull/901
 [#909]: https://github.com/tafia/quick-xml/pull/909
+[#912]: https://github.com/tafia/quick-xml/pull/912
 [`Serializer::text_format()`]: https://docs.rs/quick-xml/0.38.4/quick_xml/se/struct.Serializer.html#method.text_format
 
 
diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs
@@ -7,7 +7,7 @@ use crate::de::Text;
 use crate::encoding::Decoder;
 use crate::errors::serialize::DeError;
 use crate::escape::unescape;
-use crate::utils::CowRef;
+use crate::utils::{trim_xml_spaces, CowRef};
 use memchr::memchr;
 use serde::de::value::UnitDeserializer;
 use serde::de::{
@@ -25,9 +25,9 @@ macro_rules! deserialize_num {
             V: Visitor<'de>,
         {
             let text: &str = self.content.as_ref();
-            match text.parse() {
+            match trim_xml_spaces(text).parse() {
                 Ok(number) => visitor.$visit(number),
-                Err(_) => self.content.deserialize_str(visitor),
+                Err(_) => self.deserialize_str(visitor),
             }
         }
     };
@@ -146,7 +146,20 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
     where
         V: Visitor<'de>,
     {
-        self.content.deserialize_bool(visitor)
+        let text = self.content.as_ref();
+        let text = if self.escaped {
+            unescape(text)?
+        } else {
+            Cow::Borrowed(text)
+        };
+        match trim_xml_spaces(&text) {
+            "1" | "true" => visitor.visit_bool(true),
+            "0" | "false" => visitor.visit_bool(false),
+            _ => match text {
+                Cow::Borrowed(_) => self.content.deserialize_str(visitor),
+                Cow::Owned(s) => visitor.visit_string(s),
+            },
+        }
     }
 
     deserialize_num!(deserialize_i8  => visit_i8);
@@ -172,7 +185,24 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
     where
         V: Visitor<'de>,
     {
-        self.deserialize_str(visitor)
+        let text: &str = self.content.as_ref();
+        let text = if self.escaped {
+            unescape(text)?
+        } else {
+            Cow::Borrowed(text)
+        };
+        let trimmed = trim_xml_spaces(&text);
+        // If string is empty or contains only XML space characters (probably only one),
+        // deserialize as usual string and allow visitor to accept or reject it.
+        // Otherwise trim spaces and allow visitor to accept or reject the rest.
+        if trimmed.is_empty() {
+            match text {
+                Cow::Borrowed(_) => self.content.deserialize_str(visitor),
+                Cow::Owned(s) => visitor.visit_string(s),
+            }
+        } else {
+            visitor.visit_str(trimmed)
+        }
     }
 
     /// Supply to the visitor borrowed string, string slice, or owned string
diff --git a/src/utils.rs b/src/utils.rs
@@ -375,6 +375,19 @@ pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
     bytes
 }
 
+/// Returns a string slice with XML whitespace characters removed.
+///
+/// 'Whitespace' refers to the definition used by [`is_whitespace`].
+#[inline]
+pub const fn trim_xml_spaces(text: &str) -> &str {
+    let bytes = trim_xml_end(trim_xml_start(text.as_bytes()));
+    match core::str::from_utf8(bytes) {
+        Ok(s) => s,
+        // SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8.
+        _ => unreachable!(),
+    }
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /// Splits string into pieces which can be part of a single `CDATA` section.