diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index be6179c963571..3f4ff354d6acd 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -948,6 +948,38 @@ public function set_attribute( $name, $value ) { return; } + /* + * Verify that the attribute name is allowable. In WP_DEBUG + * environments we want to crash to quickly alert developers + * of typos and issues; but in production we don't want to + * interrupt a normal page view, so we'll silently avoid + * updating the attribute in those cases. + * + * Of note, we're disallowing more characters than are strictly + * forbidden in HTML5. This is to prevent additional security + * risks deeper in the WordPress and plugin stack. Specifically + * we reject the less-than (<) and ampersand (&) characters. + * + * The use of a PCRE match allows us to look for specific Unicode + * code points without writing a UTF-8 decoder. Whereas scanning + * for one-byte characters is trivial, scanning for the longer + * byte sequences would be more complicated, and this shouldn't + * be in the hot path for execution so we can compromise on the + * efficiency at this point. + * + * @see https://html.spec.whatwg.org/#attributes-2 + */ + if ( preg_match( + '~[ "\'>& The values "true" and "false" are not allowed on boolean attributes. * > To represent a false value, the attribute has to be omitted altogether. diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/WP_HTML_Tag_Processor_Test.php similarity index 95% rename from phpunit/html/wp-html-tag-processor-test.php rename to phpunit/html/WP_HTML_Tag_Processor_Test.php index 41bf04a138abc..eb5badd9fef48 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/WP_HTML_Tag_Processor_Test.php @@ -248,6 +248,57 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr $this->assertSame( '
Text
', (string) $p ); } + /** + * Attribute names with invalid characters should be rejected. + * + * > Attributes have a name and a value. Attribute names must + * > consist of one or more characters other than controls, + * > U+0020 SPACE, U+0022 ("), U+0027 ('), U+003E (>), + * > U+002F (/), U+003D (=), and noncharacters. + * + * @see https://html.spec.whatwg.org/#attributes-2 + * + * @dataProvider data_invalid_attribute_names + * @covers set_attribute + */ + public function test_set_attribute_rejects_invalid_attribute_names( $attribute_name ) { + $p = new WP_HTML_Tag_Processor( '' ); + + $this->expectException( Exception::class ); + + $p->next_tag(); + $p->set_attribute( $attribute_name, "test" ); + + $this->assertEquals( '', (string) $p ); + } + + /** + * Data provider with invalid HTML attribute names. + * + * @return array { + * @type string $attribute_name Text considered invalid for HTML attribute names. + * } + */ + public function data_invalid_attribute_names() { + return array( + 'controls_null' => array( "i\x00d" ), + 'controls_newline' => array( "\nbroken-expectations" ), + 'space' => array( "aria label" ), + 'double-quote' => array( '"id"' ), + 'single-quote' => array( "'id'" ), + 'greater-than' => array( 'sneaky>script' ), + 'solidus' => array( 'data/test-id' ), + 'equals' => array( 'checked=checked' ), + 'noncharacters_1' => array( html_entity_decode( 'anything﷐' ) ), + 'noncharacters_2' => array( html_entity_decode( 'te￿st' ) ), + 'noncharacters_3' => array( html_entity_decode( 'te𯿾st' ) ), + 'noncharacters_4' => array( html_entity_decode( 'te󟿿st' ) ), + 'noncharacters_5' => array( html_entity_decode( '􏿾' ) ), + 'wp_no_lt' => array( 'id array( 'class<script'), + ); + } + /** * According to HTML spec, only the first instance of an attribute counts. * The other ones are ignored.