@@ -2005,7 +2005,7 @@ use crate::{
20052005 errors:: Error ,
20062006 events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
20072007 name:: QName ,
2008- reader:: Reader ,
2008+ reader:: { Config , Reader } ,
20092009} ;
20102010use serde:: de:: { self , Deserialize , DeserializeOwned , DeserializeSeed , SeqAccess , Visitor } ;
20112011use std:: borrow:: Cow ;
@@ -2169,6 +2169,31 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
21692169 entity_resolver : E ,
21702170}
21712171
2172+ fn trim_cow < ' a , F > ( value : Cow < ' a , str > , trim : F ) -> Cow < ' a , str >
2173+ where
2174+ F : FnOnce ( & str ) -> & str ,
2175+ {
2176+ match value {
2177+ Cow :: Borrowed ( bytes) => Cow :: Borrowed ( trim ( bytes) ) ,
2178+ Cow :: Owned ( mut bytes) => {
2179+ let trimmed = trim ( & bytes) ;
2180+ if trimmed. len ( ) != bytes. len ( ) {
2181+ bytes = trimmed. to_string ( ) ;
2182+ }
2183+ Cow :: Owned ( bytes)
2184+ }
2185+ }
2186+ }
2187+
2188+ /// Removes trailing XML whitespace bytes from text content.
2189+ ///
2190+ /// Returns `true` if content is empty after that
2191+ fn inplace_trim_end ( mut s : & mut Cow < str > ) -> bool {
2192+ let c: Cow < str > = replace ( & mut s, Cow :: Borrowed ( "" ) ) ;
2193+ * s = trim_cow ( c, str:: trim_end) ;
2194+ s. is_empty ( )
2195+ }
2196+
21722197impl < ' i , R : XmlRead < ' i > , E : EntityResolver > XmlReader < ' i , R , E > {
21732198 fn new ( mut reader : R , entity_resolver : E ) -> Self {
21742199 // Lookahead by one event immediately, so we do not need to check in the
@@ -2206,20 +2231,23 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22062231 /// Read all consequent [`Text`] and [`CData`] events until non-text event
22072232 /// occurs. Content of all events would be appended to `result` and returned
22082233 /// as [`DeEvent::Text`].
2234+ ///
2235+ /// If the resulting text empty, this function returns None to avoid creating an empty Event.
22092236 ///
22102237 /// [`Text`]: PayloadEvent::Text
22112238 /// [`CData`]: PayloadEvent::CData
2212- fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < DeEvent < ' i > , DeError > {
2239+ fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < Option < DeEvent < ' i > > , DeError > {
22132240 loop {
22142241 if self . current_event_is_last_text ( ) {
22152242 break ;
22162243 }
2217-
22182244 match self . next_impl ( ) ? {
22192245 PayloadEvent :: Text ( mut e) => {
22202246 if self . current_event_is_last_text ( ) {
22212247 // FIXME: Actually, we should trim after decoding text, but now we trim before
2222- e. inplace_trim_end ( ) ;
2248+ if self . reader . config ( ) . trim_text_end {
2249+ e. inplace_trim_end ( ) ;
2250+ }
22232251 }
22242252 result
22252253 . to_mut ( )
@@ -2228,10 +2256,12 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22282256 PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
22292257
22302258 // SAFETY: current_event_is_last_text checks that event is Text or CData
2231- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2259+ e => {
2260+ unreachable ! ( "Only `Text` and `CData` events can come here: {:?}" , & e) ;
2261+ }
22322262 }
22332263 }
2234- Ok ( DeEvent :: Text ( Text { text : result } ) )
2264+ Ok ( Some ( DeEvent :: Text ( Text { text : result } ) ) )
22352265 }
22362266
22372267 /// Return an input-borrowing event.
@@ -2241,17 +2271,24 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22412271 PayloadEvent :: Start ( e) => Ok ( DeEvent :: Start ( e) ) ,
22422272 PayloadEvent :: End ( e) => Ok ( DeEvent :: End ( e) ) ,
22432273 PayloadEvent :: Text ( mut e) => {
2244- if self . current_event_is_last_text ( ) && e. inplace_trim_end ( ) {
2245- // FIXME: Actually, we should trim after decoding text, but now we trim before
2246- continue ;
2274+ if self . current_event_is_last_text ( ) {
2275+ if self . reader . config ( ) . trim_text_end && e. inplace_trim_end ( ) {
2276+ continue ;
2277+ }
22472278 }
2279+
22482280 match e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) . map ( |res| self . drain_text ( res) ) {
2249- Ok ( x) => x,
2281+ Ok ( Ok ( None ) ) => continue ,
2282+ Ok ( Ok ( Some ( x) ) ) => Ok ( x) ,
2283+ Ok ( Err ( x) ) => Err ( x) ,
22502284 // failed to escape treat as binary blob.
22512285 Err ( _) => Ok ( DeEvent :: Binary ( Binary { text : e. into_inner ( ) } ) ) ,
22522286 }
22532287 }
2254- PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
2288+ PayloadEvent :: CData ( e) => match self . drain_text ( e. decode ( ) ?) . transpose ( ) {
2289+ None => continue ,
2290+ Some ( x) => x,
2291+ } ,
22552292 PayloadEvent :: DocType ( e) => {
22562293 self . entity_resolver
22572294 . capture ( e)
@@ -2834,6 +2871,8 @@ where
28342871 pub fn from_str_with_resolver ( source : & ' de str , entity_resolver : E ) -> Self {
28352872 let mut reader = Reader :: from_str ( source) ;
28362873 let config = reader. config_mut ( ) ;
2874+ config. trim_text_start = true ;
2875+ config. trim_text_end = true ;
28372876 config. expand_empty_elements = true ;
28382877
28392878 Self :: new (
@@ -3135,6 +3174,9 @@ pub trait XmlRead<'i> {
31353174
31363175 /// A copy of the reader's decoder used to decode strings.
31373176 fn decoder ( & self ) -> Decoder ;
3177+
3178+ /// Returns a reference to the reader config.
3179+ fn config ( & self ) -> & Config ;
31383180}
31393181
31403182/// XML input source that reads from a std::io input stream.
@@ -3204,6 +3246,10 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
32043246 fn decoder ( & self ) -> Decoder {
32053247 self . reader . decoder ( )
32063248 }
3249+
3250+ fn config ( & self ) -> & Config {
3251+ self . reader . config ( )
3252+ }
32073253}
32083254
32093255/// XML input source that reads from a slice of bytes and can borrow from it.
@@ -3269,6 +3315,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
32693315 fn decoder ( & self ) -> Decoder {
32703316 self . reader . decoder ( )
32713317 }
3318+
3319+ fn config ( & self ) -> & Config {
3320+ self . reader . config ( )
3321+ }
32723322}
32733323
32743324#[ cfg( test) ]
0 commit comments