@@ -27,24 +27,30 @@ pub enum EscapeError {
2727 InvalidDecimal ( char ) ,
2828 /// Not a valid unicode codepoint
2929 InvalidCodepoint ( u32 ) ,
30+ /// The recursion limit was reached while attempting to unescape XML entities,
31+ /// or normalize an attribute value. This could indicate an entity loop.
32+ ///
33+ /// Limiting recursion prevents Denial-of-Service type attacks
34+ /// such as the "billion laughs" [attack](https://en.wikipedia.org/wiki/Billion_laughs_attack).
35+ ReachedRecursionLimit ( Range < usize > ) ,
3036}
3137
3238impl std:: fmt:: Display for EscapeError {
3339 fn fmt ( & self , f : & mut std:: fmt:: Formatter ) -> std:: fmt:: Result {
3440 match self {
3541 EscapeError :: EntityWithNull ( e) => write ! (
3642 f,
37- "Error while escaping character at range {:?}: Null character entity not allowed" ,
43+ "Error while unescaping character at range {:?}: Null character entity not allowed" ,
3844 e
3945 ) ,
4046 EscapeError :: UnrecognizedSymbol ( rge, res) => write ! (
4147 f,
42- "Error while escaping character at range {:?}: Unrecognized escape symbol: {:?}" ,
48+ "Error while unescaping character at range {:?}: Unrecognized escape symbol: {:?}" ,
4349 rge, res
4450 ) ,
4551 EscapeError :: UnterminatedEntity ( e) => write ! (
4652 f,
47- "Error while escaping character at range {:?}: Cannot find ';' after '&'" ,
53+ "Error while unescaping character at range {:?}: Cannot find ';' after '&'" ,
4854 e
4955 ) ,
5056 EscapeError :: TooLongHexadecimal => write ! ( f, "Cannot convert hexadecimal to utf8" ) ,
@@ -54,6 +60,10 @@ impl std::fmt::Display for EscapeError {
5460 EscapeError :: TooLongDecimal => write ! ( f, "Cannot convert decimal to utf8" ) ,
5561 EscapeError :: InvalidDecimal ( e) => write ! ( f, "'{}' is not a valid decimal character" , e) ,
5662 EscapeError :: InvalidCodepoint ( n) => write ! ( f, "'{}' is not a valid codepoint" , n) ,
63+ EscapeError :: ReachedRecursionLimit ( e) => write ! (
64+ f,
65+ "Error while unescaping entity at range {:?}: Recursion limit reached"
66+ ) ,
5767 }
5868 }
5969}
@@ -310,7 +320,7 @@ where
310320 let mut iter = bytes. iter ( ) ;
311321
312322 if let Some ( i) = iter. position ( is_normalization_char) {
313- let mut normalized = String :: new ( ) ;
323+ let mut normalized = String :: with_capacity ( value . len ( ) ) ;
314324 let pos = normalize_step (
315325 & mut normalized,
316326 & mut iter,
@@ -405,7 +415,7 @@ where
405415 let pat = & input[ start..end] ;
406416 // 1. For a character reference, append the referenced character
407417 // to the normalized value.
408- if pat. starts_with ( '#' ) {
418+ if let Some ( entity ) = pat. strip_prefix ( '#' ) {
409419 let entity = & pat[ 1 ..] ; // starts after the #
410420 let codepoint = parse_number ( entity, start..end) ?;
411421 normalized. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
@@ -432,6 +442,7 @@ where
432442 Ok ( index + 1 ) // +1 - skip character
433443 }
434444
445+ // SAFETY: We call normalize_step only when is_normalization_char() return true
435446 _ => unreachable ! ( "Only '\\ t', '\\ n', '\\ r', ' ', and '&' are possible here" ) ,
436447 }
437448}
@@ -2160,14 +2171,14 @@ mod normalization {
21602171 fn unclosed_entity ( ) {
21612172 assert_eq ! (
21622173 normalize_attribute_value( "string with unclosed &entity reference" , |_| {
2163- // 0 ^ = 21 ^ = 38
2174+ // 0 ^ = 21 ^ = 38
21642175 Some ( "replacement" )
21652176 } ) ,
21662177 Err ( EscapeError :: UnterminatedEntity ( 21 ..38 ) )
21672178 ) ;
21682179 assert_eq ! (
21692180 normalize_attribute_value( "string with unclosed   (character) reference" , |_| {
2170- // 0 ^ = 21 ^ = 47
2181+ // 0 ^ = 21 ^ = 47
21712182 None
21722183 } ) ,
21732184 Err ( EscapeError :: UnterminatedEntity ( 21 ..47 ) )
@@ -2178,7 +2189,7 @@ mod normalization {
21782189 fn unknown_entity ( ) {
21792190 assert_eq ! (
21802191 normalize_attribute_value( "string with unknown &entity; reference" , |_| { None } ) ,
2181- // 0 ^ ^ = 21.. 27
2192+ // 0 ^ 21 ^ 27
21822193 Err ( EscapeError :: UnrecognizedSymbol (
21832194 21 ..27 ,
21842195 "entity" . to_string( )
0 commit comments