Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@

### Bug Fixes

- [#912]: Fix deserialization of numbers, booleans and characters that is space-wrapped, for example
`<int> 42 </int>`. That space characters are usually indent added during serialization and
other XML serialization libraries trims them

### Misc Changes

- [#901]: Fix running tests on 32-bit architecture
Expand All @@ -30,6 +34,7 @@
[#353]: https://github.com/tafia/quick-xml/issues/353
[#901]: https://github.com/tafia/quick-xml/pull/901
[#909]: https://github.com/tafia/quick-xml/pull/909
[#912]: https://github.com/tafia/quick-xml/pull/912
[`Serializer::text_format()`]: https://docs.rs/quick-xml/0.38.4/quick_xml/se/struct.Serializer.html#method.text_format


Expand Down
1 change: 0 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use crate::{
events::attributes::IterState,
events::BytesStart,
name::QName,
utils::CowRef,
};
use serde::de::value::BorrowedStrDeserializer;
use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
Expand Down
92 changes: 30 additions & 62 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1977,21 +1977,14 @@
// Also, macros should be imported before using them
use serde::serde_if_integer128;

macro_rules! deserialize_num {
($deserialize:ident => $visit:ident, $($mut:tt)?) => {
macro_rules! forward_to_simple_type {
($deserialize:ident, $($mut:tt)?) => {
#[inline]
fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
// No need to unescape because valid integer representations cannot be escaped
let text = self.read_string()?;
match text.parse() {
Ok(number) => visitor.$visit(number),
Err(_) => match text {
Cow::Borrowed(t) => visitor.visit_str(t),
Cow::Owned(t) => visitor.visit_string(t),
}
}
SimpleTypeDeserializer::from_text(self.read_string()?).$deserialize(visitor)
}
};
}
Expand All @@ -2000,63 +1993,29 @@ macro_rules! deserialize_num {
/// byte arrays, booleans and identifiers.
macro_rules! deserialize_primitives {
($($mut:tt)?) => {
deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);
forward_to_simple_type!(deserialize_i8, $($mut)?);
forward_to_simple_type!(deserialize_i16, $($mut)?);
forward_to_simple_type!(deserialize_i32, $($mut)?);
forward_to_simple_type!(deserialize_i64, $($mut)?);

deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);
forward_to_simple_type!(deserialize_u8, $($mut)?);
forward_to_simple_type!(deserialize_u16, $($mut)?);
forward_to_simple_type!(deserialize_u32, $($mut)?);
forward_to_simple_type!(deserialize_u64, $($mut)?);

serde_if_integer128! {
deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
forward_to_simple_type!(deserialize_i128, $($mut)?);
forward_to_simple_type!(deserialize_u128, $($mut)?);
}

deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);
forward_to_simple_type!(deserialize_f32, $($mut)?);
forward_to_simple_type!(deserialize_f64, $($mut)?);

fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
let text = match self.read_string()? {
Cow::Borrowed(s) => CowRef::Input(s),
Cow::Owned(s) => CowRef::Owned(s),
};
text.deserialize_bool(visitor)
}
forward_to_simple_type!(deserialize_bool, $($mut)?);
forward_to_simple_type!(deserialize_char, $($mut)?);

/// Character represented as [strings](#method.deserialize_str).
#[inline]
fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}

fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
let text = self.read_string()?;
match text {
Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
Cow::Owned(string) => visitor.visit_string(string),
}
}

/// Representation of owned strings the same as [non-owned](#method.deserialize_str).
#[inline]
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}
forward_to_simple_type!(deserialize_str, $($mut)?);
forward_to_simple_type!(deserialize_string, $($mut)?);

/// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
#[inline]
Expand Down Expand Up @@ -2163,7 +2122,6 @@ use crate::{
events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
name::QName,
reader::NsReader,
utils::CowRef,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
Expand Down Expand Up @@ -2921,13 +2879,17 @@ where
/// [`CData`]: Event::CData
fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
match self.next()? {
// Reached by doc tests only: this file, lines 979 and 996
DeEvent::Text(e) => Ok(e.text),
// allow one nested level
// Reached by trivial::{...}::{field, field_nested, field_tag_after, field_tag_before, nested, tag_after, tag_before, wrapped}
DeEvent::Start(e) if allow_start => self.read_text(e.name()),
// TODO: not reached by any tests
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
// SAFETY: The reader is guaranteed that we don't have unmatched tags
// If we here, then our deserializer has a bug
DeEvent::End(e) => unreachable!("{:?}", e),
// Reached by trivial::{empty_doc, only_comment}
DeEvent::Eof => Err(DeError::UnexpectedEof),
}
}
Expand All @@ -2941,17 +2903,23 @@ where
match self.next()? {
DeEvent::Text(e) => match self.next()? {
// The matching tag name is guaranteed by the reader
// Reached by trivial::{...}::{field, wrapped}
DeEvent::End(_) => Ok(e.text),
// SAFETY: Cannot be two consequent Text events, they would be merged into one
DeEvent::Text(_) => unreachable!(),
// Reached by trivial::{...}::{field_tag_after, tag_after}
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
// Reached by struct_::non_closed::elements_child
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
},
// We can get End event in case of `<tag></tag>` or `<tag/>` input
// Return empty text in that case
// The matching tag name is guaranteed by the reader
// Reached by {...}::xs_list::empty
DeEvent::End(_) => Ok("".into()),
// Reached by trivial::{...}::{field_nested, field_tag_before, nested, tag_before}
DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
// Reached by struct_::non_closed::elements_child
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
}
}
Expand Down
80 changes: 39 additions & 41 deletions src/de/simple_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::de::Text;
use crate::encoding::Decoder;
use crate::errors::serialize::DeError;
use crate::escape::unescape;
use crate::utils::CowRef;
use crate::utils::{trim_xml_spaces, CowRef};
use memchr::memchr;
use serde::de::value::UnitDeserializer;
use serde::de::{
Expand All @@ -25,9 +25,9 @@ macro_rules! deserialize_num {
V: Visitor<'de>,
{
let text: &str = self.content.as_ref();
match text.parse() {
match trim_xml_spaces(text).parse() {
Ok(number) => visitor.$visit(number),
Err(_) => self.content.deserialize_str(visitor),
Err(_) => self.deserialize_str(visitor),
}
}
};
Expand Down Expand Up @@ -146,7 +146,20 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
where
V: Visitor<'de>,
{
self.content.deserialize_bool(visitor)
let text = self.content.as_ref();
let text = if self.escaped {
unescape(text)?
} else {
Cow::Borrowed(text)
};
match trim_xml_spaces(&text) {
"1" | "true" => visitor.visit_bool(true),
"0" | "false" => visitor.visit_bool(false),
_ => match text {
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
Cow::Owned(s) => visitor.visit_string(s),
},
}
}

deserialize_num!(deserialize_i8 => visit_i8);
Expand All @@ -172,7 +185,24 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
let text: &str = self.content.as_ref();
let text = if self.escaped {
unescape(text)?
} else {
Cow::Borrowed(text)
};
let trimmed = trim_xml_spaces(&text);
// If string is empty or contains only XML space characters (probably only one),
// deserialize as usual string and allow visitor to accept or reject it.
// Otherwise trim spaces and allow visitor to accept or reject the rest.
if trimmed.is_empty() {
match text {
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
Cow::Owned(s) => visitor.visit_string(s),
}
} else {
visitor.visit_str(trimmed)
}
}

/// Supply to the visitor borrowed string, string slice, or owned string
Expand Down Expand Up @@ -611,43 +641,11 @@ impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> {
deserialize_primitive!(deserialize_f32);
deserialize_primitive!(deserialize_f64);

deserialize_primitive!(deserialize_char);
deserialize_primitive!(deserialize_str);

/// Forwards deserialization to the [`Self::deserialize_str`]
#[inline]
fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}

/// Forwards deserialization to the [`Self::deserialize_str`]
#[inline]
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}

/// Forwards deserialization to the [`Self::deserialize_str`]
#[inline]
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}

/// Forwards deserialization to the [`Self::deserialize_str`]
#[inline]
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserialize_bytes(visitor)
}
deserialize_primitive!(deserialize_string);
deserialize_primitive!(deserialize_bytes);
deserialize_primitive!(deserialize_byte_buf);

fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
Expand Down
1 change: 0 additions & 1 deletion src/de/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::{
de::simple_type::SimpleTypeDeserializer,
de::{Text, TEXT_KEY},
errors::serialize::DeError,
utils::CowRef,
};
use serde::de::value::BorrowedStrDeserializer;
use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
Expand Down
13 changes: 13 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,19 @@ pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
bytes
}

/// Returns a string slice with XML whitespace characters removed from both sides.
///
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
#[inline]
pub fn trim_xml_spaces(text: &str) -> &str {
let bytes = trim_xml_end(trim_xml_start(text.as_bytes()));
match core::str::from_utf8(bytes) {
Ok(s) => s,
// SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8.
_ => unreachable!(),
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Splits string into pieces which can be part of a single `CDATA` section.
Expand Down
Loading