Skip to content

Commit c284114

Browse files
committed
Tag Processor: throw when supplied unacceptible attribute names.
The `WP_HTML_Tag_Processor` allows setting new HTML attributes with a given name and value. Previously this has allowed any string input for the attribute name, but we have to be careful not to print output that might break the HTML we're modifying. In this patch we're adding a check against the given attribute name and rejecting invalid or unacceptible names. WordPress here is more restrictive than HTML5.
1 parent 8b9df32 commit c284114

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

lib/experimental/html/class-wp-html-tag-processor.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,38 @@ public function set_attribute( $name, $value ) {
948948
return;
949949
}
950950

951+
/*
952+
* Verify that the attribute name is allowable. In WP_DEBUG
953+
* environments we want to crash to quickly alert developers
954+
* of typos and issues; but in production we don't want to
955+
* interrupt a normal page view, so we'll silently avoid
956+
* updating the attribute in those cases.
957+
*
958+
* Of note, we're disallowing more characters than are strictly
959+
* forbidden in HTML5. This is to prevent additional security
960+
* risks deeper in the WordPress and plugin stack. Specifically
961+
* we reject the less-than (<) and ampersand (&) characters.
962+
*
963+
* The use of a PCRE match allows us to look for specific Unicode
964+
* code points without writing a UTF-8 decoder. Whereas scanning
965+
* for one-byte characters is trivial, scanning for the longer
966+
* byte sequences would be more complicated, and this shouldn't
967+
* be in the hot path for execution so we can compromise on the
968+
* efficiency at this point.
969+
*
970+
* @see https://html.spec.whatwg.org/#attributes-2
971+
*/
972+
if ( preg_match(
973+
'~[ "\'>&</=\x{00}-\x{1F}\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]~Ssu',
974+
$name
975+
) ) {
976+
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
977+
throw new Exception( 'Invalid attribute name' );
978+
}
979+
980+
return;
981+
}
982+
951983
/*
952984
* > The values "true" and "false" are not allowed on boolean attributes.
953985
* > To represent a false value, the attribute has to be omitted altogether.

phpunit/html/wp-html-tag-processor-test.php renamed to phpunit/html/WP_HTML_Tag_Processor_Test.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,57 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr
248248
$this->assertSame( '<div test-attribute="test-value" id="first"><span id="second">Text</span></div>', (string) $p );
249249
}
250250

251+
/**
252+
* Attribute names with invalid characters should be rejected.
253+
*
254+
* > Attributes have a name and a value. Attribute names must
255+
* > consist of one or more characters other than controls,
256+
* > U+0020 SPACE, U+0022 ("), U+0027 ('), U+003E (>),
257+
* > U+002F (/), U+003D (=), and noncharacters.
258+
*
259+
* @see https://html.spec.whatwg.org/#attributes-2
260+
*
261+
* @dataProvider data_invalid_attribute_names
262+
* @covers set_attribute
263+
*/
264+
public function test_set_attribute_rejects_invalid_attribute_names( $attribute_name ) {
265+
$p = new WP_HTML_Tag_Processor( '<span></span>' );
266+
267+
$this->expectException( Exception::class );
268+
269+
$p->next_tag();
270+
$p->set_attribute( $attribute_name, "test" );
271+
272+
$this->assertEquals( '<span></span>', (string) $p );
273+
}
274+
275+
/**
276+
* Data provider with invalid HTML attribute names.
277+
*
278+
* @return array {
279+
* @type string $attribute_name Text considered invalid for HTML attribute names.
280+
* }
281+
*/
282+
public function data_invalid_attribute_names() {
283+
return array(
284+
'controls_null' => array( "i\x00d" ),
285+
'controls_newline' => array( "\nbroken-expectations" ),
286+
'space' => array( "aria label" ),
287+
'double-quote' => array( '"id"' ),
288+
'single-quote' => array( "'id'" ),
289+
'greater-than' => array( 'sneaky>script' ),
290+
'solidus' => array( 'data/test-id' ),
291+
'equals' => array( 'checked=checked' ),
292+
'noncharacters_1' => array( html_entity_decode( 'anything&#xFDD0;' ) ),
293+
'noncharacters_2' => array( html_entity_decode( 'te&#xFFFF;st' ) ),
294+
'noncharacters_3' => array( html_entity_decode( 'te&#x2FFFE;st' ) ),
295+
'noncharacters_4' => array( html_entity_decode( 'te&#xDFFFF;st' ) ),
296+
'noncharacters_5' => array( html_entity_decode( '&#x10FFFE;' ) ),
297+
'wp_no_lt' => array( 'id<script'),
298+
'wp_no_amp' => array( 'class&lt;script'),
299+
);
300+
}
301+
251302
/**
252303
* According to HTML spec, only the first instance of an attribute counts.
253304
* The other ones are ignored.

0 commit comments

Comments
 (0)