Skip to content

Commit

Permalink
Tag Processor: throw when supplied unacceptible attribute names.
Browse files Browse the repository at this point in the history
The `WP_HTML_Tag_Processor` allows setting new HTML attributes with
a given name and value. Previously this has allowed any string input
for the attribute name, but we have to be careful not to print output
that might break the HTML we're modifying.

In this patch we're adding a check against the given attribute name
and rejecting invalid or unacceptible names. WordPress here is more
restrictive than HTML5.
  • Loading branch information
dmsnell committed Sep 23, 2022
1 parent 8b9df32 commit c284114
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 0 deletions.
32 changes: 32 additions & 0 deletions lib/experimental/html/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,38 @@ public function set_attribute( $name, $value ) {
return;
}

/*
* Verify that the attribute name is allowable. In WP_DEBUG
* environments we want to crash to quickly alert developers
* of typos and issues; but in production we don't want to
* interrupt a normal page view, so we'll silently avoid
* updating the attribute in those cases.
*
* Of note, we're disallowing more characters than are strictly
* forbidden in HTML5. This is to prevent additional security
* risks deeper in the WordPress and plugin stack. Specifically
* we reject the less-than (<) and ampersand (&) characters.
*
* The use of a PCRE match allows us to look for specific Unicode
* code points without writing a UTF-8 decoder. Whereas scanning
* for one-byte characters is trivial, scanning for the longer
* byte sequences would be more complicated, and this shouldn't
* be in the hot path for execution so we can compromise on the
* efficiency at this point.
*
* @see https://html.spec.whatwg.org/#attributes-2
*/
if ( preg_match(
'~[ "\'>&</=\x{00}-\x{1F}\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]~Ssu',
$name
) ) {
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
throw new Exception( 'Invalid attribute name' );
}

return;
}

/*
* > The values "true" and "false" are not allowed on boolean attributes.
* > To represent a false value, the attribute has to be omitted altogether.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,57 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr
$this->assertSame( '<div test-attribute="test-value" id="first"><span id="second">Text</span></div>', (string) $p );
}

/**
* Attribute names with invalid characters should be rejected.
*
* > Attributes have a name and a value. Attribute names must
* > consist of one or more characters other than controls,
* > U+0020 SPACE, U+0022 ("), U+0027 ('), U+003E (>),
* > U+002F (/), U+003D (=), and noncharacters.
*
* @see https://html.spec.whatwg.org/#attributes-2
*
* @dataProvider data_invalid_attribute_names
* @covers set_attribute
*/
public function test_set_attribute_rejects_invalid_attribute_names( $attribute_name ) {
$p = new WP_HTML_Tag_Processor( '<span></span>' );

$this->expectException( Exception::class );

$p->next_tag();
$p->set_attribute( $attribute_name, "test" );

$this->assertEquals( '<span></span>', (string) $p );
}

/**
* Data provider with invalid HTML attribute names.
*
* @return array {
* @type string $attribute_name Text considered invalid for HTML attribute names.
* }
*/
public function data_invalid_attribute_names() {
return array(
'controls_null' => array( "i\x00d" ),
'controls_newline' => array( "\nbroken-expectations" ),
'space' => array( "aria label" ),
'double-quote' => array( '"id"' ),
'single-quote' => array( "'id'" ),
'greater-than' => array( 'sneaky>script' ),
'solidus' => array( 'data/test-id' ),
'equals' => array( 'checked=checked' ),
'noncharacters_1' => array( html_entity_decode( 'anything&#xFDD0;' ) ),
'noncharacters_2' => array( html_entity_decode( 'te&#xFFFF;st' ) ),
'noncharacters_3' => array( html_entity_decode( 'te&#x2FFFE;st' ) ),
'noncharacters_4' => array( html_entity_decode( 'te&#xDFFFF;st' ) ),
'noncharacters_5' => array( html_entity_decode( '&#x10FFFE;' ) ),
'wp_no_lt' => array( 'id<script'),
'wp_no_amp' => array( 'class&lt;script'),
);
}

/**
* According to HTML spec, only the first instance of an attribute counts.
* The other ones are ignored.
Expand Down

0 comments on commit c284114

Please sign in to comment.