Skip to content

Commit 8188346

Browse files
authored
Merge pull request #912 from Mingun/trim-spaces-for-primitives
Trim spaces when deserialize numbers, booleans and characters
2 parents 44cdd48 + 10d0646 commit 8188346

File tree

7 files changed

+383
-118
lines changed

7 files changed

+383
-118
lines changed

Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222

2323
### Bug Fixes
2424

25+
- [#912]: Fix deserialization of numbers, booleans and characters that is space-wrapped, for example
26+
`<int> 42 </int>`. That space characters are usually indent added during serialization and
27+
other XML serialization libraries trims them
28+
2529
### Misc Changes
2630

2731
- [#901]: Fix running tests on 32-bit architecture
@@ -30,6 +34,7 @@
3034
[#353]: https://github.com/tafia/quick-xml/issues/353
3135
[#901]: https://github.com/tafia/quick-xml/pull/901
3236
[#909]: https://github.com/tafia/quick-xml/pull/909
37+
[#912]: https://github.com/tafia/quick-xml/pull/912
3338
[`Serializer::text_format()`]: https://docs.rs/quick-xml/0.38.4/quick_xml/se/struct.Serializer.html#method.text_format
3439

3540

src/de/map.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use crate::{
1111
events::attributes::IterState,
1212
events::BytesStart,
1313
name::QName,
14-
utils::CowRef,
1514
};
1615
use serde::de::value::BorrowedStrDeserializer;
1716
use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};

src/de/mod.rs

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,21 +1977,14 @@
19771977
// Also, macros should be imported before using them
19781978
use serde::serde_if_integer128;
19791979

1980-
macro_rules! deserialize_num {
1981-
($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1980+
macro_rules! forward_to_simple_type {
1981+
($deserialize:ident, $($mut:tt)?) => {
1982+
#[inline]
19821983
fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
19831984
where
19841985
V: Visitor<'de>,
19851986
{
1986-
// No need to unescape because valid integer representations cannot be escaped
1987-
let text = self.read_string()?;
1988-
match text.parse() {
1989-
Ok(number) => visitor.$visit(number),
1990-
Err(_) => match text {
1991-
Cow::Borrowed(t) => visitor.visit_str(t),
1992-
Cow::Owned(t) => visitor.visit_string(t),
1993-
}
1994-
}
1987+
SimpleTypeDeserializer::from_text(self.read_string()?).$deserialize(visitor)
19951988
}
19961989
};
19971990
}
@@ -2000,63 +1993,29 @@ macro_rules! deserialize_num {
20001993
/// byte arrays, booleans and identifiers.
20011994
macro_rules! deserialize_primitives {
20021995
($($mut:tt)?) => {
2003-
deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
2004-
deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
2005-
deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
2006-
deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);
1996+
forward_to_simple_type!(deserialize_i8, $($mut)?);
1997+
forward_to_simple_type!(deserialize_i16, $($mut)?);
1998+
forward_to_simple_type!(deserialize_i32, $($mut)?);
1999+
forward_to_simple_type!(deserialize_i64, $($mut)?);
20072000

2008-
deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
2009-
deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
2010-
deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
2011-
deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);
2001+
forward_to_simple_type!(deserialize_u8, $($mut)?);
2002+
forward_to_simple_type!(deserialize_u16, $($mut)?);
2003+
forward_to_simple_type!(deserialize_u32, $($mut)?);
2004+
forward_to_simple_type!(deserialize_u64, $($mut)?);
20122005

20132006
serde_if_integer128! {
2014-
deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
2015-
deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
2007+
forward_to_simple_type!(deserialize_i128, $($mut)?);
2008+
forward_to_simple_type!(deserialize_u128, $($mut)?);
20162009
}
20172010

2018-
deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
2019-
deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);
2011+
forward_to_simple_type!(deserialize_f32, $($mut)?);
2012+
forward_to_simple_type!(deserialize_f64, $($mut)?);
20202013

2021-
fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
2022-
where
2023-
V: Visitor<'de>,
2024-
{
2025-
let text = match self.read_string()? {
2026-
Cow::Borrowed(s) => CowRef::Input(s),
2027-
Cow::Owned(s) => CowRef::Owned(s),
2028-
};
2029-
text.deserialize_bool(visitor)
2030-
}
2014+
forward_to_simple_type!(deserialize_bool, $($mut)?);
2015+
forward_to_simple_type!(deserialize_char, $($mut)?);
20312016

2032-
/// Character represented as [strings](#method.deserialize_str).
2033-
#[inline]
2034-
fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
2035-
where
2036-
V: Visitor<'de>,
2037-
{
2038-
self.deserialize_str(visitor)
2039-
}
2040-
2041-
fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
2042-
where
2043-
V: Visitor<'de>,
2044-
{
2045-
let text = self.read_string()?;
2046-
match text {
2047-
Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
2048-
Cow::Owned(string) => visitor.visit_string(string),
2049-
}
2050-
}
2051-
2052-
/// Representation of owned strings the same as [non-owned](#method.deserialize_str).
2053-
#[inline]
2054-
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
2055-
where
2056-
V: Visitor<'de>,
2057-
{
2058-
self.deserialize_str(visitor)
2059-
}
2017+
forward_to_simple_type!(deserialize_str, $($mut)?);
2018+
forward_to_simple_type!(deserialize_string, $($mut)?);
20602019

20612020
/// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
20622021
#[inline]
@@ -2163,7 +2122,6 @@ use crate::{
21632122
events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
21642123
name::QName,
21652124
reader::NsReader,
2166-
utils::CowRef,
21672125
};
21682126
use serde::de::{
21692127
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
@@ -2921,13 +2879,17 @@ where
29212879
/// [`CData`]: Event::CData
29222880
fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
29232881
match self.next()? {
2882+
// Reached by doc tests only: this file, lines 979 and 996
29242883
DeEvent::Text(e) => Ok(e.text),
29252884
// allow one nested level
2885+
// Reached by trivial::{...}::{field, field_nested, field_tag_after, field_tag_before, nested, tag_after, tag_before, wrapped}
29262886
DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2887+
// TODO: not reached by any tests
29272888
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
29282889
// SAFETY: The reader is guaranteed that we don't have unmatched tags
29292890
// If we here, then our deserializer has a bug
29302891
DeEvent::End(e) => unreachable!("{:?}", e),
2892+
// Reached by trivial::{empty_doc, only_comment}
29312893
DeEvent::Eof => Err(DeError::UnexpectedEof),
29322894
}
29332895
}
@@ -2941,17 +2903,23 @@ where
29412903
match self.next()? {
29422904
DeEvent::Text(e) => match self.next()? {
29432905
// The matching tag name is guaranteed by the reader
2906+
// Reached by trivial::{...}::{field, wrapped}
29442907
DeEvent::End(_) => Ok(e.text),
29452908
// SAFETY: Cannot be two consequent Text events, they would be merged into one
29462909
DeEvent::Text(_) => unreachable!(),
2910+
// Reached by trivial::{...}::{field_tag_after, tag_after}
29472911
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2912+
// Reached by struct_::non_closed::elements_child
29482913
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
29492914
},
29502915
// We can get End event in case of `<tag></tag>` or `<tag/>` input
29512916
// Return empty text in that case
29522917
// The matching tag name is guaranteed by the reader
2918+
// Reached by {...}::xs_list::empty
29532919
DeEvent::End(_) => Ok("".into()),
2920+
// Reached by trivial::{...}::{field_nested, field_tag_before, nested, tag_before}
29542921
DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2922+
// Reached by struct_::non_closed::elements_child
29552923
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
29562924
}
29572925
}

src/de/simple_type.rs

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::de::Text;
77
use crate::encoding::Decoder;
88
use crate::errors::serialize::DeError;
99
use crate::escape::unescape;
10-
use crate::utils::CowRef;
10+
use crate::utils::{trim_xml_spaces, CowRef};
1111
use memchr::memchr;
1212
use serde::de::value::UnitDeserializer;
1313
use serde::de::{
@@ -25,9 +25,9 @@ macro_rules! deserialize_num {
2525
V: Visitor<'de>,
2626
{
2727
let text: &str = self.content.as_ref();
28-
match text.parse() {
28+
match trim_xml_spaces(text).parse() {
2929
Ok(number) => visitor.$visit(number),
30-
Err(_) => self.content.deserialize_str(visitor),
30+
Err(_) => self.deserialize_str(visitor),
3131
}
3232
}
3333
};
@@ -146,7 +146,20 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
146146
where
147147
V: Visitor<'de>,
148148
{
149-
self.content.deserialize_bool(visitor)
149+
let text = self.content.as_ref();
150+
let text = if self.escaped {
151+
unescape(text)?
152+
} else {
153+
Cow::Borrowed(text)
154+
};
155+
match trim_xml_spaces(&text) {
156+
"1" | "true" => visitor.visit_bool(true),
157+
"0" | "false" => visitor.visit_bool(false),
158+
_ => match text {
159+
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
160+
Cow::Owned(s) => visitor.visit_string(s),
161+
},
162+
}
150163
}
151164

152165
deserialize_num!(deserialize_i8 => visit_i8);
@@ -172,7 +185,24 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
172185
where
173186
V: Visitor<'de>,
174187
{
175-
self.deserialize_str(visitor)
188+
let text: &str = self.content.as_ref();
189+
let text = if self.escaped {
190+
unescape(text)?
191+
} else {
192+
Cow::Borrowed(text)
193+
};
194+
let trimmed = trim_xml_spaces(&text);
195+
// If string is empty or contains only XML space characters (probably only one),
196+
// deserialize as usual string and allow visitor to accept or reject it.
197+
// Otherwise trim spaces and allow visitor to accept or reject the rest.
198+
if trimmed.is_empty() {
199+
match text {
200+
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
201+
Cow::Owned(s) => visitor.visit_string(s),
202+
}
203+
} else {
204+
visitor.visit_str(trimmed)
205+
}
176206
}
177207

178208
/// Supply to the visitor borrowed string, string slice, or owned string
@@ -611,43 +641,11 @@ impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> {
611641
deserialize_primitive!(deserialize_f32);
612642
deserialize_primitive!(deserialize_f64);
613643

644+
deserialize_primitive!(deserialize_char);
614645
deserialize_primitive!(deserialize_str);
615-
616-
/// Forwards deserialization to the [`Self::deserialize_str`]
617-
#[inline]
618-
fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error>
619-
where
620-
V: Visitor<'de>,
621-
{
622-
self.deserialize_str(visitor)
623-
}
624-
625-
/// Forwards deserialization to the [`Self::deserialize_str`]
626-
#[inline]
627-
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
628-
where
629-
V: Visitor<'de>,
630-
{
631-
self.deserialize_str(visitor)
632-
}
633-
634-
/// Forwards deserialization to the [`Self::deserialize_str`]
635-
#[inline]
636-
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
637-
where
638-
V: Visitor<'de>,
639-
{
640-
self.deserialize_str(visitor)
641-
}
642-
643-
/// Forwards deserialization to the [`Self::deserialize_str`]
644-
#[inline]
645-
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
646-
where
647-
V: Visitor<'de>,
648-
{
649-
self.deserialize_bytes(visitor)
650-
}
646+
deserialize_primitive!(deserialize_string);
647+
deserialize_primitive!(deserialize_bytes);
648+
deserialize_primitive!(deserialize_byte_buf);
651649

652650
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
653651
where

src/de/text.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use crate::{
22
de::simple_type::SimpleTypeDeserializer,
33
de::{Text, TEXT_KEY},
44
errors::serialize::DeError,
5-
utils::CowRef,
65
};
76
use serde::de::value::BorrowedStrDeserializer;
87
use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};

src/utils.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,19 @@ pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
375375
bytes
376376
}
377377

378+
/// Returns a string slice with XML whitespace characters removed from both sides.
379+
///
380+
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
381+
#[inline]
382+
pub fn trim_xml_spaces(text: &str) -> &str {
383+
let bytes = trim_xml_end(trim_xml_start(text.as_bytes()));
384+
match core::str::from_utf8(bytes) {
385+
Ok(s) => s,
386+
// SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8.
387+
_ => unreachable!(),
388+
}
389+
}
390+
378391
////////////////////////////////////////////////////////////////////////////////////////////////////
379392

380393
/// Splits string into pieces which can be part of a single `CDATA` section.

0 commit comments

Comments
 (0)