Skip to content

Commit 3ac943a

Browse files
committed
Trim spaces in SimpleTypeDeserializer before deserialize numbers, booleans and chars
1 parent 1b07a6a commit 3ac943a

File tree

3 files changed

+53
-5
lines changed

3 files changed

+53
-5
lines changed

Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222

2323
### Bug Fixes
2424

25+
- [#912]: Fix deserialization of numbers, booleans and characters that is space-wrapped, for example
26+
`<int> 42 </int>`. That space characters are usually indent added during serialization and
27+
other XML serialization libraries trims them
28+
2529
### Misc Changes
2630

2731
- [#901]: Fix running tests on 32-bit architecture
@@ -30,6 +34,7 @@
3034
[#353]: https://github.com/tafia/quick-xml/issues/353
3135
[#901]: https://github.com/tafia/quick-xml/pull/901
3236
[#909]: https://github.com/tafia/quick-xml/pull/909
37+
[#912]: https://github.com/tafia/quick-xml/pull/912
3338
[`Serializer::text_format()`]: https://docs.rs/quick-xml/0.38.4/quick_xml/se/struct.Serializer.html#method.text_format
3439

3540

src/de/simple_type.rs

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::de::Text;
77
use crate::encoding::Decoder;
88
use crate::errors::serialize::DeError;
99
use crate::escape::unescape;
10-
use crate::utils::CowRef;
10+
use crate::utils::{trim_xml_spaces, CowRef};
1111
use memchr::memchr;
1212
use serde::de::value::UnitDeserializer;
1313
use serde::de::{
@@ -25,9 +25,9 @@ macro_rules! deserialize_num {
2525
V: Visitor<'de>,
2626
{
2727
let text: &str = self.content.as_ref();
28-
match text.parse() {
28+
match trim_xml_spaces(text).parse() {
2929
Ok(number) => visitor.$visit(number),
30-
Err(_) => self.content.deserialize_str(visitor),
30+
Err(_) => self.deserialize_str(visitor),
3131
}
3232
}
3333
};
@@ -146,7 +146,20 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
146146
where
147147
V: Visitor<'de>,
148148
{
149-
self.content.deserialize_bool(visitor)
149+
let text = self.content.as_ref();
150+
let text = if self.escaped {
151+
unescape(text)?
152+
} else {
153+
Cow::Borrowed(text)
154+
};
155+
match trim_xml_spaces(&text) {
156+
"1" | "true" => visitor.visit_bool(true),
157+
"0" | "false" => visitor.visit_bool(false),
158+
_ => match text {
159+
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
160+
Cow::Owned(s) => visitor.visit_string(s),
161+
},
162+
}
150163
}
151164

152165
deserialize_num!(deserialize_i8 => visit_i8);
@@ -172,7 +185,24 @@ impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> {
172185
where
173186
V: Visitor<'de>,
174187
{
175-
self.deserialize_str(visitor)
188+
let text: &str = self.content.as_ref();
189+
let text = if self.escaped {
190+
unescape(text)?
191+
} else {
192+
Cow::Borrowed(text)
193+
};
194+
let trimmed = trim_xml_spaces(&text);
195+
// If string is empty or contains only XML space characters (probably only one),
196+
// deserialize as usual string and allow visitor to accept or reject it.
197+
// Otherwise trim spaces and allow visitor to accept or reject the rest.
198+
if trimmed.is_empty() {
199+
match text {
200+
Cow::Borrowed(_) => self.content.deserialize_str(visitor),
201+
Cow::Owned(s) => visitor.visit_string(s),
202+
}
203+
} else {
204+
visitor.visit_str(trimmed)
205+
}
176206
}
177207

178208
/// Supply to the visitor borrowed string, string slice, or owned string

src/utils.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,19 @@ pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
375375
bytes
376376
}
377377

378+
/// Returns a string slice with XML whitespace characters removed.
379+
///
380+
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
381+
#[inline]
382+
pub const fn trim_xml_spaces(text: &str) -> &str {
383+
let bytes = trim_xml_end(trim_xml_start(text.as_bytes()));
384+
match core::str::from_utf8(bytes) {
385+
Ok(s) => s,
386+
// SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8.
387+
_ => unreachable!(),
388+
}
389+
}
390+
378391
////////////////////////////////////////////////////////////////////////////////////////////////////
379392

380393
/// Splits string into pieces which can be part of a single `CDATA` section.

0 commit comments

Comments
 (0)