From c2841141f150abab859938a6a7a71f9a3136f3b3 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 23 Sep 2022 15:40:17 -0700 Subject: [PATCH] Tag Processor: throw when supplied unacceptible attribute names. The `WP_HTML_Tag_Processor` allows setting new HTML attributes with a given name and value. Previously this has allowed any string input for the attribute name, but we have to be careful not to print output that might break the HTML we're modifying. In this patch we're adding a check against the given attribute name and rejecting invalid or unacceptible names. WordPress here is more restrictive than HTML5. --- .../html/class-wp-html-tag-processor.php | 32 ++++++++++++ ...est.php => WP_HTML_Tag_Processor_Test.php} | 51 +++++++++++++++++++ 2 files changed, 83 insertions(+) rename phpunit/html/{wp-html-tag-processor-test.php => WP_HTML_Tag_Processor_Test.php} (95%) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index be6179c963571..3f4ff354d6acd 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -948,6 +948,38 @@ public function set_attribute( $name, $value ) { return; } + /* + * Verify that the attribute name is allowable. In WP_DEBUG + * environments we want to crash to quickly alert developers + * of typos and issues; but in production we don't want to + * interrupt a normal page view, so we'll silently avoid + * updating the attribute in those cases. + * + * Of note, we're disallowing more characters than are strictly + * forbidden in HTML5. This is to prevent additional security + * risks deeper in the WordPress and plugin stack. Specifically + * we reject the less-than (<) and ampersand (&) characters. + * + * The use of a PCRE match allows us to look for specific Unicode + * code points without writing a UTF-8 decoder. Whereas scanning + * for one-byte characters is trivial, scanning for the longer + * byte sequences would be more complicated, and this shouldn't + * be in the hot path for execution so we can compromise on the + * efficiency at this point. + * + * @see https://html.spec.whatwg.org/#attributes-2 + */ + if ( preg_match( + '~[ "\'>& The values "true" and "false" are not allowed on boolean attributes. * > To represent a false value, the attribute has to be omitted altogether. diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/WP_HTML_Tag_Processor_Test.php similarity index 95% rename from phpunit/html/wp-html-tag-processor-test.php rename to phpunit/html/WP_HTML_Tag_Processor_Test.php index 41bf04a138abc..eb5badd9fef48 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/WP_HTML_Tag_Processor_Test.php @@ -248,6 +248,57 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr $this->assertSame( '
Text
', (string) $p ); } + /** + * Attribute names with invalid characters should be rejected. + * + * > Attributes have a name and a value. Attribute names must + * > consist of one or more characters other than controls, + * > U+0020 SPACE, U+0022 ("), U+0027 ('), U+003E (>), + * > U+002F (/), U+003D (=), and noncharacters. + * + * @see https://html.spec.whatwg.org/#attributes-2 + * + * @dataProvider data_invalid_attribute_names + * @covers set_attribute + */ + public function test_set_attribute_rejects_invalid_attribute_names( $attribute_name ) { + $p = new WP_HTML_Tag_Processor( '' ); + + $this->expectException( Exception::class ); + + $p->next_tag(); + $p->set_attribute( $attribute_name, "test" ); + + $this->assertEquals( '', (string) $p ); + } + + /** + * Data provider with invalid HTML attribute names. + * + * @return array { + * @type string $attribute_name Text considered invalid for HTML attribute names. + * } + */ + public function data_invalid_attribute_names() { + return array( + 'controls_null' => array( "i\x00d" ), + 'controls_newline' => array( "\nbroken-expectations" ), + 'space' => array( "aria label" ), + 'double-quote' => array( '"id"' ), + 'single-quote' => array( "'id'" ), + 'greater-than' => array( 'sneaky>script' ), + 'solidus' => array( 'data/test-id' ), + 'equals' => array( 'checked=checked' ), + 'noncharacters_1' => array( html_entity_decode( 'anything﷐' ) ), + 'noncharacters_2' => array( html_entity_decode( 'te￿st' ) ), + 'noncharacters_3' => array( html_entity_decode( 'te𯿾st' ) ), + 'noncharacters_4' => array( html_entity_decode( 'te󟿿st' ) ), + 'noncharacters_5' => array( html_entity_decode( '􏿾' ) ), + 'wp_no_lt' => array( 'id array( 'class<script'), + ); + } + /** * According to HTML spec, only the first instance of an attribute counts. * The other ones are ignored.