From d7c850bcbc66b6c2c1299efc3158e3af8d33e65b Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 10 Jan 2023 14:37:18 +0100 Subject: [PATCH 01/15] Add minimal test case for set_content_inside_balanced_tags --- phpunit/html/wp-html-processor-test.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index ae9089d761f4f..da252ed4b1d26 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -222,4 +222,12 @@ public function test_can_scan_through_tags_at_a_given_depth() { // Did we only visit the tags inside section > * > p? $this->assertEquals( 1, $p2_count ); } + + public function test_set_content_inside_balanced_tags_sets_content_correctly() { + $tags = new WP_HTML_Processor( '
outside
inside
' ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } } From 3a99ab277ecc467f6e758e35321d4688f41f7af8 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 10 Jan 2023 16:07:51 +0100 Subject: [PATCH 02/15] Add test to verify that bookmarks are kept intact --- phpunit/html/wp-html-processor-test.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index da252ed4b1d26..127045249cd6d 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -230,4 +230,20 @@ public function test_set_content_inside_balanced_tags_sets_content_correctly() { $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); } + + public function test_set_content_inside_balanced_tags_updates_bookmarks_correctly() { + $tags = new WP_HTML_Processor( '
outside
inside
' ); + + $tags->next_tag( 'div' ); + $tags->set_bookmark( 'start' ); + $tags->next_tag( 'img' ); + $this->assertSame( 'IMG', $tags->get_tag() ); + $tags->set_bookmark( 'after' ); + $tags->seek( 'start' ); + + $tags->set_content_inside_balanced_tags( 'This is the new div content.' ); + $this->assertSame( '
This is the new div content.
inside
', $tags->get_updated_html() ); + $tags->seek( 'after' ); + $this->assertSame( 'IMG', $tags->get_tag() ); + } } From a8d798b6ad6ccb48ea21d5b65aa52c339c39801a Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 10 Jan 2023 14:56:30 +0100 Subject: [PATCH 03/15] Basic set_content_inside_balanced_tags implementation --- .../html/class-wp-html-processor.php | 78 ++++++++++++++----- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php index 6fa236aa94a33..ca6691d17f9a7 100644 --- a/lib/experimental/html/class-wp-html-processor.php +++ b/lib/experimental/html/class-wp-html-processor.php @@ -135,28 +135,30 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul } public function get_content_inside_balanced_tags() { - static $start_name = null; - static $end_name = null; + list( $start_name, $end_name ) = $this->get_balanced_tags(); + $content = $this->get_content_inside_bookmarks( $start_name, $end_name ); + $this->seek( $start_name ); - if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) { - $rand_id = rand( 1, PHP_INT_MAX ); - $start_name = "start_{$rand_id}"; - } + $this->release_bookmark( $start_name ); + $this->release_bookmark( $end_name ); - if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) { - $rand_id = rand( 1, PHP_INT_MAX ); - $end_name = "start_{$rand_id}"; + return $content; + } + + private function get_content_inside_bookmarks( $start_bookmark, $end_bookmark ) { + if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) { + return null; } - $this->set_bookmark( $start_name ); + $start = $this->bookmarks[ $start_bookmark ]; + $end = $this->bookmarks[ $end_bookmark ]; - $state = self::new_state(); - while ( $this->balanced_next( $state ) ) { - continue; - } + return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 ); + } - $this->set_bookmark( $end_name ); - $content = $this->content_inside_bookmarks( $start_name, $end_name ); + public function set_content_inside_balanced_tags( $content ) { + list( $start_name, $end_name ) = $this->get_balanced_tags(); + $this->set_content_inside_bookmarks( $start_name, $end_name, $content ); $this->seek( $start_name ); $this->release_bookmark( $start_name ); @@ -165,15 +167,55 @@ public function get_content_inside_balanced_tags() { return $content; } - private function content_inside_bookmarks( $start_bookmark, $end_bookmark ) { + private function set_content_inside_bookmarks( $start_bookmark, $end_bookmark, $content ) { if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) { return null; } + $this->get_updated_html(); + $start = $this->bookmarks[ $start_bookmark ]; $end = $this->bookmarks[ $end_bookmark ]; - return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 ); + $this->add_lexical_update( $start->end + 1, $end->start - 1, $content ); + } + + /** + * If on an opening tag, return a pair of bookmarks for it, and for the matching closing tag. + * + * @return array A pair of bookmarks for the current opening and matching closing tags. + */ + public function get_balanced_tags() { + static $start_name = null; + static $end_name = null; + + if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) { + $rand_id = rand( 1, PHP_INT_MAX ); + $start_name = "start_{$rand_id}"; + } + + if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) { + $rand_id = rand( 1, PHP_INT_MAX ); + $end_name = "start_{$rand_id}"; + } + + $this->set_bookmark( $start_name ); + $this->find_matching_closing_tag(); + $this->set_bookmark( $end_name ); + + return array( $start_name, $end_name ); + } + + /** + * If on an opening tag, navigate to the matching closing tag. + * + * @return void + */ + public function find_matching_closing_tag() { + $state = self::new_state(); + while ( $this->balanced_next( $state ) ) { + continue; + } } /* From 435987b995fb2104e27ab9a95c192d6544761c2a Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Wed, 11 Jan 2023 11:01:03 +0100 Subject: [PATCH 04/15] WP_HTML_Tag_Processor: Rename attribute_updates to lexical_updates --- .../html/class-wp-html-tag-processor.php | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 72c342dbd02a7..73183edb7bbd2 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -423,7 +423,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Text_Replacement[] */ - private $attribute_updates = array(); + private $lexical_updates = array(); /** * Tracks how many times we've performed a `seek()` @@ -1097,15 +1097,15 @@ private function skip_whitespace() { } /** - * Applies attribute updates and cleans up once a tag is fully parsed. + * Applies lexical updates and cleans up once a tag is fully parsed. * * @since 6.2.0 * * @return void */ private function after_tag() { - $this->class_name_updates_to_attributes_updates(); - $this->apply_attributes_updates(); + $this->class_name_updates_to_lexical_updates(); + $this->apply_lexical_updates(); $this->tag_name_starts_at = null; $this->tag_name_length = null; $this->tag_ends_at = null; @@ -1114,20 +1114,20 @@ private function after_tag() { } /** - * Converts class name updates into tag attributes updates + * Converts class name updates into tag lexical updates * (they are accumulated in different data formats for performance). * - * This method is only meant to run right before the attribute updates are applied. + * This method is only meant to run right before the lexical updates are applied. * The behavior in all other cases is undefined. * * @return void * @since 6.2.0 * * @see $classname_updates - * @see $attribute_updates + * @see $lexical_updates */ - private function class_name_updates_to_attributes_updates() { - if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) { + private function class_name_updates_to_lexical_updates() { + if ( count( $this->classname_updates ) === 0 || isset( $this->lexical_updates['class'] ) ) { $this->classname_updates = array(); return; } @@ -1247,13 +1247,13 @@ private function class_name_updates_to_attributes_updates() { * * @since 6.2.0 */ - private function apply_attributes_updates() { - if ( ! count( $this->attribute_updates ) ) { + private function apply_lexical_updates() { + if ( ! count( $this->lexical_updates ) ) { return; } /** - * Attribute updates can be enqueued in any order but as we + * Lexical updates can be enqueued in any order but as we * progress through the document to replace them we have to * make our replacements in the order in which they are found * in that document. @@ -1262,9 +1262,9 @@ private function apply_attributes_updates() { * out of order, which could otherwise lead to mangled output, * partially-duplicate attributes, and overwritten attributes. */ - usort( $this->attribute_updates, array( self::class, 'sort_start_ascending' ) ); + usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); - foreach ( $this->attribute_updates as $diff ) { + foreach ( $this->lexical_updates as $diff ) { $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes ); $this->updated_html .= $diff->text; $this->updated_bytes = $diff->end; @@ -1272,7 +1272,7 @@ private function apply_attributes_updates() { foreach ( $this->bookmarks as $bookmark ) { /** - * As we loop through $this->attribute_updates, we keep comparing + * As we loop through $this->lexical_updates, we keep comparing * $bookmark->start and $bookmark->end to $diff->start. We can't * change it and still expect the correct result, so let's accumulate * the deltas separately and apply them all at once after the loop. @@ -1280,7 +1280,7 @@ private function apply_attributes_updates() { $head_delta = 0; $tail_delta = 0; - foreach ( $this->attribute_updates as $diff ) { + foreach ( $this->lexical_updates as $diff ) { $update_head = $bookmark->start >= $diff->start; $update_tail = $bookmark->end >= $diff->start; @@ -1303,7 +1303,7 @@ private function apply_attributes_updates() { $bookmark->end += $tail_delta; } - $this->attribute_updates = array(); + $this->lexical_updates = array(); } /** @@ -1346,8 +1346,8 @@ public function seek( $bookmark_name ) { * * @since 6.2.0 * - * @param WP_HTML_Text_Replacement $a First attribute update. - * @param WP_HTML_Text_Replacement $b Second attribute update. + * @param WP_HTML_Text_Replacement $a First lexical update. + * @param WP_HTML_Text_Replacement $b Second lexical update. * @return integer */ private static function sort_start_ascending( $a, $b ) { @@ -1582,8 +1582,8 @@ public function set_attribute( $name, $value ) { * * Result:
*/ - $existing_attribute = $this->attributes[ $comparable_name ]; - $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement( + $existing_attribute = $this->attributes[ $comparable_name ]; + $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( $existing_attribute->start, $existing_attribute->end, $updated_attribute @@ -1600,7 +1600,7 @@ public function set_attribute( $name, $value ) { * * Result:
*/ - $this->attribute_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( + $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $this->tag_name_starts_at + $this->tag_name_length, $this->tag_name_starts_at + $this->tag_name_length, ' ' . $updated_attribute @@ -1640,7 +1640,7 @@ public function remove_attribute( $name ) { * * Result:
*/ - $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement( + $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( $this->attributes[ $name ]->start, $this->attributes[ $name ]->end, '' @@ -1702,7 +1702,7 @@ public function __toString() { */ public function get_updated_html() { // Short-circuit if there are no new updates to apply. - if ( ! count( $this->classname_updates ) && ! count( $this->attribute_updates ) ) { + if ( ! count( $this->classname_updates ) && ! count( $this->lexical_updates ) ) { return $this->updated_html . substr( $this->html, $this->updated_bytes ); } @@ -1715,8 +1715,8 @@ public function get_updated_html() { $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; // 1. Apply the attributes updates to the original HTML - $this->class_name_updates_to_attributes_updates(); - $this->apply_attributes_updates(); + $this->class_name_updates_to_lexical_updates(); + $this->apply_lexical_updates(); // 2. Replace the original HTML with the updated HTML $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); From bc92001ce529f31c931deda92443b35e5e0ffda9 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Wed, 11 Jan 2023 14:42:12 +0100 Subject: [PATCH 05/15] WP_HTML_Tag_Processor: Add add_lexical_update() method --- .../html/class-wp-html-tag-processor.php | 74 ++++++++++++++++--- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 73183edb7bbd2..af77aa2f11895 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -425,6 +425,18 @@ class WP_HTML_Tag_Processor { */ private $lexical_updates = array(); + /** + * Attribute replacements to apply to input HTML document. + * + * Unlike more generic lexical updates, attribute updates are stored + * in an associative array, where the keys are (lowercase-normalized) + * attribute names, in order to avoid duplication. + * + * @since 6.2.0 + * @var WP_HTML_Text_Replacement[] + */ + private $attribute_updates = array(); + /** * Tracks how many times we've performed a `seek()` * so that we can prevent accidental infinite loops. @@ -1104,7 +1116,8 @@ private function skip_whitespace() { * @return void */ private function after_tag() { - $this->class_name_updates_to_lexical_updates(); + $this->class_name_updates_to_attribute_updates(); + $this->attribute_updates_to_lexical_updates(); $this->apply_lexical_updates(); $this->tag_name_starts_at = null; $this->tag_name_length = null; @@ -1114,20 +1127,20 @@ private function after_tag() { } /** - * Converts class name updates into tag lexical updates + * Converts class name updates into tag attribute updates * (they are accumulated in different data formats for performance). * - * This method is only meant to run right before the lexical updates are applied. + * This method is only meant to run right before the attribute updates are applied. * The behavior in all other cases is undefined. * * @return void * @since 6.2.0 * * @see $classname_updates - * @see $lexical_updates + * @see $attribute_updates */ - private function class_name_updates_to_lexical_updates() { - if ( count( $this->classname_updates ) === 0 || isset( $this->lexical_updates['class'] ) ) { + private function class_name_updates_to_attribute_updates() { + if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) { $this->classname_updates = array(); return; } @@ -1242,6 +1255,26 @@ private function class_name_updates_to_lexical_updates() { } } + /** + * Converts attribute updates into lexical updates. + * + * This method is only meant to run right before the attribute updates are applied. + * The behavior in all other cases is undefined. + * + * @return void + * @since 6.2.0 + * + * @see $attribute_updates + * @see $lexical_updates + */ + private function attribute_updates_to_lexical_updates() { + $this->lexical_updates = array_merge( + $this->lexical_updates, + array_values( $this->attribute_updates ) + ); + $this->attribute_updates = array(); + } + /** * Applies updates to attributes. * @@ -1479,6 +1512,18 @@ public function is_tag_closer() { return $this->is_closing_tag; } + /** + * Add a lexical update, i.e. a replacement of HTML at a given position. + * + * @param int $start The start offset of the replacement. + * @param int $end The end offset of the replacement. + * @param string $text The replacement. + * @return void + */ + protected function add_lexical_update( $start, $end, $text ) { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, $end, $text ); + } + /** * Updates or creates a new attribute on the currently matched tag with the value passed. * @@ -1582,8 +1627,8 @@ public function set_attribute( $name, $value ) { * * Result:
*/ - $existing_attribute = $this->attributes[ $comparable_name ]; - $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( + $existing_attribute = $this->attributes[ $comparable_name ]; + $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement( $existing_attribute->start, $existing_attribute->end, $updated_attribute @@ -1600,7 +1645,7 @@ public function set_attribute( $name, $value ) { * * Result:
*/ - $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( + $this->attribute_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $this->tag_name_starts_at + $this->tag_name_length, $this->tag_name_starts_at + $this->tag_name_length, ' ' . $updated_attribute @@ -1640,7 +1685,7 @@ public function remove_attribute( $name ) { * * Result:
*/ - $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( + $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement( $this->attributes[ $name ]->start, $this->attributes[ $name ]->end, '' @@ -1702,7 +1747,11 @@ public function __toString() { */ public function get_updated_html() { // Short-circuit if there are no new updates to apply. - if ( ! count( $this->classname_updates ) && ! count( $this->lexical_updates ) ) { + if ( + ! count( $this->classname_updates ) && + ! count( $this->attribute_updates ) && + ! count( $this->lexical_updates ) + ) { return $this->updated_html . substr( $this->html, $this->updated_bytes ); } @@ -1715,7 +1764,8 @@ public function get_updated_html() { $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; // 1. Apply the attributes updates to the original HTML - $this->class_name_updates_to_lexical_updates(); + $this->class_name_updates_to_attribute_updates(); + $this->attribute_updates_to_lexical_updates(); $this->apply_lexical_updates(); // 2. Replace the original HTML with the updated HTML From 88a3961882cd58f1491e46bd4a2aba4f1e3903fb Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Thu, 12 Jan 2023 14:17:39 +0100 Subject: [PATCH 06/15] Introduce const in test --- phpunit/html/wp-html-processor-test.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 127045249cd6d..554070b08bbf8 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -23,8 +23,10 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {} * @coversDefaultClass WP_HTML_Processor */ class WP_HTML_Processor_Test extends WP_UnitTestCase { + const HTML = '
outside
inside
'; + public function test_find_descendant_tag() { - $tags = new WP_HTML_Processor( '
outside
inside
' ); + $tags = new WP_HTML_Processor( self::HTML ); $tags->next_tag( 'div' ); $state = $tags->new_state(); @@ -224,7 +226,7 @@ public function test_can_scan_through_tags_at_a_given_depth() { } public function test_set_content_inside_balanced_tags_sets_content_correctly() { - $tags = new WP_HTML_Processor( '
outside
inside
' ); + $tags = new WP_HTML_Processor( self::HTML ); $tags->next_tag( 'section' ); $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); @@ -232,7 +234,7 @@ public function test_set_content_inside_balanced_tags_sets_content_correctly() { } public function test_set_content_inside_balanced_tags_updates_bookmarks_correctly() { - $tags = new WP_HTML_Processor( '
outside
inside
' ); + $tags = new WP_HTML_Processor( self::HTML ); $tags->next_tag( 'div' ); $tags->set_bookmark( 'start' ); From c9d44b076c2488eaa629d83b922edfb2f5a1da08 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Thu, 12 Jan 2023 14:21:24 +0100 Subject: [PATCH 07/15] Test subsequent updates on the same tag --- phpunit/html/wp-html-processor-test.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 554070b08bbf8..8b35419d054dd 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -248,4 +248,13 @@ public function test_set_content_inside_balanced_tags_updates_bookmarks_correctl $tags->seek( 'after' ); $this->assertSame( 'IMG', $tags->get_tag() ); } + + public function test_set_content_inside_balanced_tags_subsequent_updates_on_the_same_tag_work() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $tags->set_content_inside_balanced_tags( 'This is the even newer section content.' ); + $this->assertSame( '
outside
This is the even newer section content.
', $tags->get_updated_html() ); + } } From e3f89fd1c69f7ab1da5e115ef35c1bfa46ceb345 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Thu, 12 Jan 2023 14:24:25 +0100 Subject: [PATCH 08/15] Test set_content_inside_balanced_tags followed by set_attribute works --- phpunit/html/wp-html-processor-test.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 8b35419d054dd..c27e6095c8801 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -257,4 +257,13 @@ public function test_set_content_inside_balanced_tags_subsequent_updates_on_the_ $tags->set_content_inside_balanced_tags( 'This is the even newer section content.' ); $this->assertSame( '
outside
This is the even newer section content.
', $tags->get_updated_html() ); } + + public function test_set_content_inside_balanced_tags_followed_by_set_attribute_works() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $tags->set_attribute( 'id', 'thesection' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } } From a989f71934997cd65d3b49d6fc385cf6d6c85698 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Thu, 12 Jan 2023 14:29:29 +0100 Subject: [PATCH 09/15] Test set_content_inside_balanced_tags preceded by set_attribute works --- phpunit/html/wp-html-processor-test.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index c27e6095c8801..68ad115e8140f 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -266,4 +266,13 @@ public function test_set_content_inside_balanced_tags_followed_by_set_attribute_ $tags->set_attribute( 'id', 'thesection' ); $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); } + + public function test_set_content_inside_balanced_tags_preceded_by_set_attribute_works() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_attribute( 'id', 'thesection' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } } From 6850a6542d2be02f9db36d80ef6767973d0b6816 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Mon, 16 Jan 2023 19:50:10 +0100 Subject: [PATCH 10/15] Fix test --- lib/experimental/html/class-wp-html-processor.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php index ca6691d17f9a7..2ca0f0e4df3ca 100644 --- a/lib/experimental/html/class-wp-html-processor.php +++ b/lib/experimental/html/class-wp-html-processor.php @@ -157,6 +157,7 @@ private function get_content_inside_bookmarks( $start_bookmark, $end_bookmark ) } public function set_content_inside_balanced_tags( $content ) { + $this->get_updated_html(); list( $start_name, $end_name ) = $this->get_balanced_tags(); $this->set_content_inside_bookmarks( $start_name, $end_name, $content ); $this->seek( $start_name ); @@ -172,8 +173,6 @@ private function set_content_inside_bookmarks( $start_bookmark, $end_bookmark, $ return null; } - $this->get_updated_html(); - $start = $this->bookmarks[ $start_bookmark ]; $end = $this->bookmarks[ $end_bookmark ]; From f3725de6632993ca7089d45199eaccd0086662e5 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 17 Jan 2023 13:27:22 +0100 Subject: [PATCH 11/15] Return false on void element --- lib/experimental/html/class-wp-html-processor.php | 4 ++++ phpunit/html/wp-html-processor-test.php | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php index 2ca0f0e4df3ca..d6d937562ca97 100644 --- a/lib/experimental/html/class-wp-html-processor.php +++ b/lib/experimental/html/class-wp-html-processor.php @@ -157,6 +157,10 @@ private function get_content_inside_bookmarks( $start_bookmark, $end_bookmark ) } public function set_content_inside_balanced_tags( $content ) { + if ( self::is_html_void_element( $this->get_tag() ) ) { + return false; + } + $this->get_updated_html(); list( $start_name, $end_name ) = $this->get_balanced_tags(); $this->set_content_inside_bookmarks( $start_name, $end_name, $content ); diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 68ad115e8140f..857b120b25591 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -225,6 +225,15 @@ public function test_can_scan_through_tags_at_a_given_depth() { $this->assertEquals( 1, $p2_count ); } + public function test_set_content_inside_balanced_tags_on_void_element_has_no_effect() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'img' ); + $content = $tags->set_content_inside_balanced_tags( 'This is the new img content' ); + $this->assertFalse( $content ); + $this->assertSame( self::HTML, $tags->get_updated_html() ); + } + public function test_set_content_inside_balanced_tags_sets_content_correctly() { $tags = new WP_HTML_Processor( self::HTML ); From ebfbb1ef58758cbc4f05f8381e2fe63c91330466 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Wed, 18 Jan 2023 16:00:44 +0100 Subject: [PATCH 12/15] Rename update_head and update_tail vars --- lib/experimental/html/class-wp-html-tag-processor.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index af77aa2f11895..44b592625613f 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -1314,20 +1314,20 @@ private function apply_lexical_updates() { $tail_delta = 0; foreach ( $this->lexical_updates as $diff ) { - $update_head = $bookmark->start >= $diff->start; - $update_tail = $bookmark->end >= $diff->start; + $bookmark_start_is_after_diff_start = $bookmark->start >= $diff->start; + $bookmark_end_is_after_diff_end = $bookmark->end >= $diff->start; - if ( ! $update_head && ! $update_tail ) { + if ( ! $bookmark_start_is_after_diff_start && ! $bookmark_end_is_after_diff_end ) { break; } $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); - if ( $update_head ) { + if ( $bookmark_start_is_after_diff_start ) { $head_delta += $delta; } - if ( $update_tail ) { + if ( $bookmark_end_is_after_diff_end ) { $tail_delta += $delta; } } From 7fa19f8ab3295970e9c6afd316d69d60a7f64695 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Wed, 18 Jan 2023 16:13:48 +0100 Subject: [PATCH 13/15] Add test coverage to check that bookmarks are invalidated --- phpunit/html/wp-html-processor-test.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 857b120b25591..942e248a30a79 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -284,4 +284,21 @@ public function test_set_content_inside_balanced_tags_preceded_by_set_attribute_ $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); } + + public function test_set_content_inside_balanced_tags_invalidates_bookmarks_that_point_to_replaced_content() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_bookmark( 'start' ); + $tags->next_tag( 'img' ); + $tags->set_bookmark( 'replaced' ); + $tags->seek( 'start' ); + + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + + $this->expectExceptionMessage( 'Invalid bookmark name' ); + $successful_seek = $tags->seek( 'replaced' ); + $this->assertFalse( $successful_seek ); + } } From 7d2cacf46bc4fda5234bacf003ab127cc6d92a19 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Wed, 18 Jan 2023 16:14:37 +0100 Subject: [PATCH 14/15] Implement logic to fix the previous test --- .../html/class-wp-html-tag-processor.php | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 44b592625613f..467861bc3b736 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -1303,7 +1303,7 @@ private function apply_lexical_updates() { $this->updated_bytes = $diff->end; } - foreach ( $this->bookmarks as $bookmark ) { + foreach ( $this->bookmarks as $bookmark_name => $bookmark ) { /** * As we loop through $this->lexical_updates, we keep comparing * $bookmark->start and $bookmark->end to $diff->start. We can't @@ -1317,6 +1317,14 @@ private function apply_lexical_updates() { $bookmark_start_is_after_diff_start = $bookmark->start >= $diff->start; $bookmark_end_is_after_diff_end = $bookmark->end >= $diff->start; + if ( $bookmark_start_is_after_diff_start ) { + $bookmark_end_is_before_diff_end = $bookmark->end < $diff->end; + if ( $bookmark_end_is_before_diff_end ) { + // The bookmark is fully contained within the diff. We need to invalidate it. + $this->release_bookmark( $bookmark_name ); + } + } + if ( ! $bookmark_start_is_after_diff_start && ! $bookmark_end_is_after_diff_end ) { break; } @@ -1332,6 +1340,11 @@ private function apply_lexical_updates() { } } + // Did we end up invalidating the bookmark? + if ( ! isset( $this->bookmarks[ $bookmark_name ] ) ) { + break; + } + $bookmark->start += $head_delta; $bookmark->end += $tail_delta; } From 4cce0fd94b67d06ecaec46bf531e41e654736405 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Mon, 23 Jan 2023 15:20:08 +0100 Subject: [PATCH 15/15] Dont' break, continue --- lib/experimental/html/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 467861bc3b736..3016a2fea79b6 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -1342,7 +1342,7 @@ private function apply_lexical_updates() { // Did we end up invalidating the bookmark? if ( ! isset( $this->bookmarks[ $bookmark_name ] ) ) { - break; + continue; } $bookmark->start += $head_delta;