diff --git a/lib/experimental/html/class-wp-html-attribute-token.php b/lib/experimental/html/class-wp-html-attribute-token.php
new file mode 100644
index 00000000000000..32adfe02e0fb8c
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-attribute-token.php
@@ -0,0 +1,89 @@
+name = $name;
+ $this->value_starts_at = $value_start;
+ $this->value_length = $value_length;
+ $this->start = $start;
+ $this->end = $end;
+ $this->is_true = $is_true;
+ }
+}
diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php
new file mode 100644
index 00000000000000..be6179c963571f
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-tag-processor.php
@@ -0,0 +1,1265 @@
+ "c" not " c"
+ * @TODO: Skip over `/` in attributes area, split attribute names by `/`
+ * @TODO: Decode HTML references/entities in class names when matching.
+ * E.g. match having class `1<"2` needs to recognize `class="1<"2"`.
+ * @TODO: Decode character references in `get_attribute()`
+ * @TODO: Properly escape attribute value in `set_attribute()`
+ *
+ * @package WordPress
+ * @subpackage HTML
+ * @since 6.1.0
+ */
+
+/**
+ * Processes an input HTML document by applying a specified set
+ * of patches to that input. Tokenizes HTML but does not fully
+ * parse the input document.
+ *
+ * @since 6.1.0
+ */
+class WP_HTML_Tag_Processor {
+
+ /**
+ * The HTML document to parse.
+ *
+ * @since 6.1.0
+ * @var string
+ */
+ private $html;
+
+ /**
+ * The last query passed to next_tag().
+ *
+ * @since 6.1.0
+ * @var array|null
+ */
+ private $last_query;
+
+ /**
+ * The tag name this processor currently scans for.
+ *
+ * @since 6.1.0
+ * @var string|null
+ */
+ private $sought_tag_name;
+
+ /**
+ * The CSS class name this processor currently scans for.
+ *
+ * @since 6.1.0
+ * @var string|null
+ */
+ private $sought_class_name;
+
+ /**
+ * The match offset this processor currently scans for.
+ *
+ * @since 6.1.0
+ * @var int|null
+ */
+ private $sought_match_offset;
+
+ /**
+ * The updated HTML document.
+ *
+ * @since 6.1.0
+ * @var string
+ */
+ private $updated_html = '';
+
+ /**
+ * How many bytes from the original HTML document were already read.
+ *
+ * @since 6.1.0
+ * @var int
+ */
+ private $parsed_bytes = 0;
+
+ /**
+ * How many bytes from the original HTML document were already treated
+ * with the requested replacements.
+ *
+ * @since 6.1.0
+ * @var int
+ */
+ private $updated_bytes = 0;
+
+ /**
+ * The name of the currently matched tag.
+ *
+ * @since 6.1.0
+ * @var integer|null
+ */
+ private $tag_name_starts_at;
+
+ /**
+ * Byte offset after the name of current tag.
+ * Example:
+ *
+ * // supposing the parser is working through this content
+ * // and stops after recognizing the `id` attribute
+ * //
+ * // ^ parsing will continue from this point
+ * $this->attributes = array(
+ * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 )
+ * );
+ *
+ * // when picking up parsing again, or when asking to find the
+ * // `class` attribute we will continue and add to this array
+ * $this->attributes = array(
+ * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ),
+ * 'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 )
+ * );
+ *
+ * // Note that only the `class` attribute value is stored in the index.
+ * // That's because it is the only value used by this class at the moment.
+ *
+ *
+ * @since 6.1.0
+ * @var WP_HTML_Attribute_Token[]
+ */
+ private $attributes = array();
+
+ /**
+ * Which class names to add or remove from a tag.
+ *
+ * These are tracked separately from attribute updates because they are
+ * semantically distinct, whereas this interface exists for the common
+ * case of adding and removing class names while other attributes are
+ * generally modified as with DOM `setAttribute` calls.
+ *
+ * When modifying an HTML document these will eventually be collapsed
+ * into a single lexical update to replace the `class` attribute.
+ *
+ * Example:
+ *
+ * // Add the `WP-block-group` class, remove the `WP-group` class.
+ * $class_changes = array(
+ * // Indexed by a comparable class name
+ * 'wp-block-group' => new WP_Class_Name_Operation( 'WP-block-group', WP_Class_Name_Operation::ADD ),
+ * 'wp-group' => new WP_Class_Name_Operation( 'WP-group', WP_Class_Name_Operation::REMOVE )
+ * );
+ *
+ *
+ * @since 6.1.0
+ * @var bool[]
+ */
+ private $classname_updates = array();
+
+ const ADD_CLASS = true;
+ const REMOVE_CLASS = false;
+ const SKIP_CLASS = null;
+
+ /**
+ * Lexical replacements to apply to input HTML document.
+ *
+ * HTML modifications collapse into lexical replacements in order to
+ * provide an efficient mechanism to update documents lazily and in
+ * order to support a variety of semantic modifications without
+ * building a complicated parsing machinery. That is, it's up to
+ * the calling class to generate the lexical modification from the
+ * semantic change requested.
+ *
+ * Example:
+ *
+ * // Replace an attribute stored with a new value, indices
+ * // sourced from the lazily-parsed HTML recognizer.
+ * $start = $attributes['src']->start;
+ * $end = $attributes['src']->end;
+ * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() );
+ *
+ * // Correspondingly, something like this
+ * // will appear in the replacements array.
+ * $replacements = array(
+ * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
+ * );
+ *
+ *
+ * @since 6.1.0
+ * @var WP_HTML_Text_Replacement[]
+ */
+ private $attribute_updates = array();
+
+ /**
+ * Constructor.
+ *
+ * @since 6.1.0
+ *
+ * @param string $html HTML to process.
+ */
+ public function __construct( $html ) {
+ $this->html = $html;
+ }
+
+ /**
+ * Finds the next tag matching the $query.
+ *
+ * @since 6.1.0
+ *
+ * @param array|string $query {
+ * Which tag name to find, having which class, etc.
+ *
+ * @type string|null $tag_name Which tag to find, or `null` for "any tag."
+ * @type int|null $match_offset Find the Nth tag matching all search criteria.
+ * 0 for "first" tag, 2 for "third," etc.
+ * Defaults to first tag.
+ * @type string|null $class_name Tag must contain this whole class name to match.
+ * }
+ * @return boolean Whether a tag was matched.
+ */
+ public function next_tag( $query = null ) {
+ $this->parse_query( $query );
+ $already_found = 0;
+
+ do {
+ /*
+ * Unfortunately we can't try to search for only the tag name we want because that might
+ * lead us to skip over other tags and lose track of our place. So we need to search for
+ * _every_ tag and then check after we find one if it's the one we are looking for.
+ */
+ if ( false === $this->parse_next_tag() ) {
+ $this->parsed_bytes = strlen( $this->html );
+
+ return false;
+ }
+
+ $this->parse_tag_opener_attributes();
+
+ if ( $this->matches() ) {
+ $already_found++;
+ }
+
+ // Avoid copying the tag name string when possible.
+ $t = $this->html[ $this->tag_name_starts_at ];
+ if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) {
+ $tag_name = $this->get_tag();
+
+ if ( 'script' === $tag_name ) {
+ $this->skip_script_data();
+ } elseif ( 'textarea' === $tag_name || 'title' === $tag_name ) {
+ $this->skip_rcdata( $tag_name );
+ }
+ }
+ } while ( $already_found < $this->sought_match_offset );
+
+ return true;
+ }
+
+ /**
+ * Skips the contents of the title and textarea tags until an appropriate
+ * tag closer is found.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
+ * @param string $tag_name – the lowercase tag name which will close the RCDATA region.
+ * @since 6.1.0
+ */
+ private function skip_rcdata( $tag_name ) {
+ $html = $this->html;
+ $doc_length = strlen( $html );
+ $tag_length = strlen( $tag_name );
+
+ $at = $this->parsed_bytes;
+
+ while ( true ) {
+ $at = strpos( $this->html, '', $at );
+
+ // If we have no possible tag closer then fail.
+ if ( false === $at || ( $at + $tag_length ) > $doc_length ) {
+ $this->parsed_bytes = $doc_length;
+ return;
+ }
+
+ $at += 2;
+
+ /*
+ * We have to find a case-insensitive match to the tag name.
+ * Note also that since tag names are limited to US-ASCII
+ * characters we can ignore any kind of Unicode normalizing
+ * forms when comparing. If we get a non-ASCII character it
+ * will never be a match.
+ */
+ for ( $i = 0; $i < $tag_length; $i++ ) {
+ $tag_char = $tag_name[ $i ];
+ $html_char = $html[ $at + $i ];
+
+ if ( $html_char !== $tag_char && strtolower( $html_char ) !== $tag_char ) {
+ $at += $i;
+ continue 2;
+ }
+ }
+
+ $at += $tag_length;
+ $this->parsed_bytes = $at;
+
+ /*
+ * Ensure we terminate the tag name, otherwise we might,
+ * for example, accidentally match the sequence
+ * "" for "".
+ */
+ $c = $html[ $at ];
+ if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
+ continue;
+ }
+
+ $this->skip_tag_closer_attributes();
+ $at = $this->parsed_bytes;
+
+ if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) {
+ $this->parsed_bytes++;
+ return;
+ }
+ }
+ }
+
+ /**
+ * Skips the contents of tags, so if we're not seeing the
+ * start of one of these tokens we can proceed to the next
+ * potential match in the text.
+ */
+ if ( ! (
+ $at + 6 < $doc_length &&
+ ( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
+ ( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
+ ( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
+ ( 'i' === $html[ $at + 3 ] || 'I' === $html[ $at + 3 ] ) &&
+ ( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) &&
+ ( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] )
+ ) ) {
+ $at++;
+ continue;
+ }
+
+ /*
+ * We also have to make sure we terminate the script tag opener/closer
+ * to avoid making partial matches on strings like `
' !== $c ) {
+ $at++;
+ continue;
+ }
+
+ if ( 'escaped' === $state && ! $is_closing ) {
+ $state = 'double-escaped';
+ continue;
+ }
+
+ if ( 'double-escaped' === $state && $is_closing ) {
+ $state = 'escaped';
+ continue;
+ }
+
+ if ( $is_closing ) {
+ $this->parsed_bytes = $at;
+ $this->skip_tag_closer_attributes();
+
+ if ( '>' === $html[ $this->parsed_bytes ] ) {
+ $this->parsed_bytes++;
+ return;
+ }
+ }
+
+ $at++;
+ }
+ }
+
+ /**
+ * Parses the next tag.
+ *
+ * @since 6.1.0
+ */
+ private function parse_next_tag() {
+ $this->after_tag();
+
+ $html = $this->html;
+ $at = $this->parsed_bytes;
+
+ while ( true ) {
+ $at = strpos( $html, '<', $at );
+ if ( false === $at ) {
+ return false;
+ }
+
+ /*
+ * HTML tag names must start with [a-zA-Z] otherwise they are not tags.
+ * For example, "<3" is rendered as text, not a tag opener. This means
+ * if we have at least one letter following the "<" then we _do_ have
+ * a tag opener and can process it as such. This is more common than
+ * HTML comments, DOCTYPE tags, and other structure starting with "<"
+ * so it's good to check first for the presence of the tag.
+ *
+ * Reference:
+ * * https://html.spec.whatwg.org/multipage/parsing.html#data-state
+ * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+ */
+ $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
+ if ( $tag_name_prefix_length > 0 ) {
+ $at++;
+ $tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+ $this->tag_name_starts_at = $at;
+ $this->tag_name_ends_at = $at + $tag_name_length;
+ $this->parsed_bytes = $at + $tag_name_length;
+ return true;
+ }
+
+ //
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+ if (
+ strlen( $html ) > $at + 3 &&
+ '-' === $html[ $at + 2 ] &&
+ '-' === $html[ $at + 3 ]
+ ) {
+ $at = strpos( $html, '-->', $at + 4 ) + 3;
+ continue;
+ }
+
+ //
+ // The CDATA is case-sensitive.
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+ if (
+ strlen( $html ) > $at + 8 &&
+ '[' === $html[ $at + 2 ] &&
+ 'C' === $html[ $at + 3 ] &&
+ 'D' === $html[ $at + 4 ] &&
+ 'A' === $html[ $at + 5 ] &&
+ 'T' === $html[ $at + 6 ] &&
+ 'A' === $html[ $at + 7 ] &&
+ '[' === $html[ $at + 8 ]
+ ) {
+ $at = strpos( $html, ']]>', $at + 9 ) + 3;
+ continue;
+ }
+
+ /*
+ *
+ * These are ASCII-case-insensitive.
+ * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+ */
+ if (
+ strlen( $html ) > $at + 8 &&
+ 'D' === strtoupper( $html[ $at + 2 ] ) &&
+ 'O' === strtoupper( $html[ $at + 3 ] ) &&
+ 'C' === strtoupper( $html[ $at + 4 ] ) &&
+ 'T' === strtoupper( $html[ $at + 5 ] ) &&
+ 'Y' === strtoupper( $html[ $at + 6 ] ) &&
+ 'P' === strtoupper( $html[ $at + 7 ] ) &&
+ 'E' === strtoupper( $html[ $at + 8 ] )
+ ) {
+ $at = strpos( $html, '>', $at + 9 ) + 1;
+ continue;
+ }
+
+ /*
+ * Anything else here is an incorrectly-opened comment and transitions
+ * to the bogus comment state - we can skip to the nearest >.
+ */
+ $at = strpos( $html, '>', $at + 1 );
+ continue;
+ }
+
+ /*
+ * transitions to a bogus comment state – we can skip to the nearest >
+ * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+ */
+ if ( '?' === $html[ $at + 1 ] ) {
+ $at = strpos( $html, '>', $at + 2 ) + 1;
+ continue;
+ }
+
+ $at++;
+ }
+ }
+
+ /**
+ * Parses all attributes of the current tag.
+ *
+ * @since 6.1.0
+ */
+ private function parse_tag_opener_attributes() {
+ while ( $this->parse_next_attribute() ) {
+ // Twiddle our thumbs...
+ }
+ }
+
+ /**
+ * Skips all attributes of the current tag.
+ *
+ * @since 6.1.0
+ */
+ private function skip_tag_closer_attributes() {
+ while ( $this->parse_next_attribute( 'tag-closer' ) ) {
+ // Twiddle our thumbs...
+ }
+ }
+
+ /**
+ * Parses the next attribute.
+ *
+ * @param string $context tag-opener or tag-closer.
+ * @since 6.1.0
+ */
+ private function parse_next_attribute( $context = 'tag-opener' ) {
+ // Skip whitespace and slashes.
+ $this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes );
+
+ /*
+ * Treat the equal sign ("=") as a part of the attribute name if it is the
+ * first encountered byte:
+ * https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+ */
+ $name_length = '=' === $this->html[ $this->parsed_bytes ]
+ ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 )
+ : strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes );
+
+ // No attribute, just tag closer.
+ if ( 0 === $name_length ) {
+ return false;
+ }
+
+ $attribute_start = $this->parsed_bytes;
+ $attribute_name = substr( $this->html, $attribute_start, $name_length );
+ $this->parsed_bytes += $name_length;
+
+ $this->skip_whitespace();
+
+ $has_value = '=' === $this->html[ $this->parsed_bytes ];
+ if ( $has_value ) {
+ $this->parsed_bytes++;
+ $this->skip_whitespace();
+
+ switch ( $this->html[ $this->parsed_bytes ] ) {
+ case "'":
+ case '"':
+ $quote = $this->html[ $this->parsed_bytes ];
+ $value_start = $this->parsed_bytes + 1;
+ $value_length = strcspn( $this->html, $quote, $value_start );
+ $attribute_end = $value_start + $value_length + 1;
+ $this->parsed_bytes = $attribute_end;
+ break;
+
+ default:
+ $value_start = $this->parsed_bytes;
+ $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start );
+ $attribute_end = $value_start + $value_length;
+ $this->parsed_bytes = $attribute_end;
+ }
+ } else {
+ $value_start = $this->parsed_bytes;
+ $value_length = 0;
+ $attribute_end = $attribute_start + $name_length;
+ }
+
+ if ( 'tag-opener' !== $context ) {
+ return true;
+ }
+
+ // If an attribute is listed many times, only use the first declaration and ignore the rest.
+ if ( ! array_key_exists( $attribute_name, $this->attributes ) ) {
+ $this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token(
+ $attribute_name,
+ $value_start,
+ $value_length,
+ $attribute_start,
+ $attribute_end,
+ ! $has_value
+ );
+ }
+
+ return $this->attributes[ $attribute_name ];
+ }
+
+ /**
+ * Move the pointer past any immediate successive whitespace.
+ *
+ * @since 6.1.0
+ *
+ * @return void
+ */
+ private function skip_whitespace() {
+ $this->parsed_bytes += strspn( $this->html, " \t\f\r\n", $this->parsed_bytes );
+ }
+
+ /**
+ * Applies attribute updates and cleans up once a tag is fully parsed.
+ *
+ * @since 6.1.0
+ *
+ * @return void
+ */
+ private function after_tag() {
+ $this->class_name_updates_to_attributes_updates();
+ $this->apply_attributes_updates();
+ $this->tag_name_starts_at = null;
+ $this->tag_name_ends_at = null;
+ $this->attributes = array();
+ }
+
+ /**
+ * Converts class name updates into tag attributes updates
+ * (they are accumulated in different data formats for performance).
+ *
+ * This method is only meant to run right before the attribute updates are applied.
+ * The behavior in all other cases is undefined.
+ *
+ * @return void
+ * @since 6.1.0
+ *
+ * @see $classname_updates
+ * @see $attribute_updates
+ */
+ private function class_name_updates_to_attributes_updates() {
+ if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) {
+ $this->classname_updates = array();
+ return;
+ }
+
+ $existing_class = isset( $this->attributes['class'] )
+ ? substr( $this->html, $this->attributes['class']->value_starts_at, $this->attributes['class']->value_length )
+ : '';
+
+ /**
+ * Updated "class" attribute value.
+ *
+ * This is incrementally built as we scan through the existing class
+ * attribute, omitting removed classes as we do so, and then appending
+ * added classes at the end. Only when we're done processing will the
+ * value contain the final new value.
+
+ * @var string
+ */
+ $class = '';
+
+ /**
+ * Tracks the cursor position in the existing class
+ * attribute value where we're currently parsing.
+ *
+ * @var integer
+ */
+ $at = 0;
+
+ /**
+ * Indicates if we have made any actual modifications to the existing
+ * class attribute value, used to short-circuit string copying.
+ *
+ * It's possible that we are intending to remove certain classes and
+ * add others in such a way that we don't modify the existing value
+ * because calls to `add_class()` and `remove_class()` occur
+ * independent of the input values sent to the WP_HTML_Tag_Processor. That is, we
+ * might call `remove_class()` for a class that isn't already present
+ * and we might call `add_class()` for one that is, in which case we
+ * wouldn't need to break apart the string and rebuild it.
+ *
+ * This flag is set upon the first change that requires a string update.
+ *
+ * @var boolean
+ */
+ $modified = false;
+
+ // Remove unwanted classes by only copying the new ones.
+ while ( $at < strlen( $existing_class ) ) {
+ // Skip to the first non-whitespace character.
+ $ws_at = $at;
+ $ws_length = strspn( $existing_class, " \t\f\r\n", $ws_at );
+ $at += $ws_length;
+
+ // Capture the class name – it's everything until the next whitespace.
+ $name_length = strcspn( $existing_class, " \t\f\r\n", $at );
+ if ( 0 === $name_length ) {
+ // We're done, no more class names.
+ break;
+ }
+
+ $name = substr( $existing_class, $at, $name_length );
+ $at += $name_length;
+
+ // If this class is marked for removal, start processing the next one.
+ $remove_class = (
+ isset( $this->classname_updates[ $name ] ) &&
+ self::REMOVE_CLASS === $this->classname_updates[ $name ]
+ );
+
+ // Once we've seen a class, we should never add it again.
+ if ( ! $remove_class ) {
+ $this->classname_updates[ $name ] = self::SKIP_CLASS;
+ }
+
+ if ( $remove_class ) {
+ $modified = true;
+ continue;
+ }
+
+ /*
+ * Otherwise, append it to the new "class" attribute value.
+ *
+ * By preserving the existing whitespace instead of only adding a single
+ * space (which is a valid transformation we can make) we'll introduce
+ * fewer changes to the HTML content and hopefully make comparing
+ * before/after easier for people trying to debug the modified output.
+ */
+ $class .= substr( $existing_class, $ws_at, $ws_length );
+ $class .= $name;
+ }
+
+ // Add new classes by appending the ones we haven't already seen.
+ foreach ( $this->classname_updates as $name => $operation ) {
+ if ( self::ADD_CLASS === $operation ) {
+ $modified = true;
+
+ $class .= strlen( $class ) > 0 ? ' ' : '';
+ $class .= $name;
+ }
+ }
+
+ $this->classname_updates = array();
+ if ( ! $modified ) {
+ return;
+ }
+
+ if ( strlen( $class ) > 0 ) {
+ $this->set_attribute( 'class', $class );
+ } else {
+ $this->remove_attribute( 'class' );
+ }
+ }
+
+ /**
+ * Applies updates to attributes.
+ *
+ * @since 6.1.0
+ */
+ private function apply_attributes_updates() {
+ if ( ! count( $this->attribute_updates ) ) {
+ return;
+ }
+
+ /**
+ * Attribute updates can be enqueued in any order but as we
+ * progress through the document to replace them we have to
+ * make our replacements in the order in which they are found
+ * in that document.
+ *
+ * Sorting the updates ensures we don't make our replacements
+ * out of order, which could otherwise lead to mangled output,
+ * partially-duplicate attributes, and overwritten attributes.
+ */
+ usort( $this->attribute_updates, array( 'self', 'sort_start_ascending' ) );
+
+ foreach ( $this->attribute_updates as $diff ) {
+ $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes );
+ $this->updated_html .= $diff->text;
+ $this->updated_bytes = $diff->end;
+ }
+
+ $this->attribute_updates = array();
+ }
+
+ /**
+ * Sort function to arrange objects with a start property in ascending order.
+ *
+ * @since 6.1.0
+ *
+ * @param object $a First attribute update.
+ * @param object $b Second attribute update.
+ * @return integer
+ */
+ private static function sort_start_ascending( $a, $b ) {
+ return $a->start - $b->start;
+ }
+
+ /**
+ * Returns the value of the parsed attribute in the currently-opened tag.
+ *
+ * Example:
+ *
+ * $p = new WP_HTML_Tag_Processor( 'Test
' );
+ * $p->next_tag( [ 'class_name' => 'test' ] ) === true;
+ * $p->get_attribute( 'data-test-id' ) === '14';
+ * $p->get_attribute( 'enabled' ) === true;
+ * $p->get_attribute( 'aria-label' ) === null;
+ *
+ * $p->next_tag( [] ) === false;
+ * $p->get_attribute( 'class' ) === null;
+ *
+ *
+ * @since 6.1.0
+ *
+ * @param string $name Name of attribute whose value is requested.
+ * @return string|true|null Value of attribute or `null` if not available.
+ * Boolean attributes return `true`.
+ */
+ public function get_attribute( $name ) {
+ if ( null === $this->tag_name_starts_at ) {
+ return null;
+ }
+
+ $comparable = strtolower( $name );
+ if ( ! isset( $this->attributes[ $comparable ] ) ) {
+ return null;
+ }
+
+ $attribute = $this->attributes[ $comparable ];
+
+ if ( true === $attribute->is_true ) {
+ return true;
+ }
+
+ return substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
+ }
+
+ /**
+ * Returns the lowercase name of the currently-opened tag.
+ *
+ * Example:
+ *
+ * $p = new WP_HTML_Tag_Processor( 'Test
' );
+ * $p->next_tag( [] ) === true;
+ * $p->get_tag() === 'DIV';
+ *
+ * $p->next_tag( [] ) === false;
+ * $p->get_tag() === null;
+ *
+ *
+ * @since 6.1.0
+ *
+ * @return string|null Name of current tag in input HTML, or `null` if none currently open.
+ */
+ public function get_tag() {
+ if ( null === $this->tag_name_starts_at ) {
+ return null;
+ }
+
+ $tag_name_length = $this->tag_name_ends_at - $this->tag_name_starts_at;
+ $tag_name = substr( $this->html, $this->tag_name_starts_at, $tag_name_length );
+
+ return strtolower( $tag_name );
+ }
+
+ /**
+ * Updates or creates a new attribute on the currently matched tag with the value passed.
+ *
+ * For boolean attributes special handling is provided:
+ * - When `true` is passed as the value, then only the attribute name is added to the tag.
+ * - When `false` is passed, the attribute gets removed if it existed before.
+ *
+ * @since 6.1.0
+ *
+ * @param string $name The attribute name to target.
+ * @param string|boolean $value The new attribute value.
+ */
+ public function set_attribute( $name, $value ) {
+ if ( null === $this->tag_name_starts_at ) {
+ return;
+ }
+
+ /*
+ * > The values "true" and "false" are not allowed on boolean attributes.
+ * > To represent a false value, the attribute has to be omitted altogether.
+ * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes
+ */
+ if ( false === $value ) {
+ $this->remove_attribute( $name );
+ return;
+ }
+
+ if ( true === $value ) {
+ $updated_attribute = $name;
+ } else {
+ // @TODO: What escaping and sanitization do we need here?
+ $escaped_new_value = str_replace( '"', '"', $value );
+ $updated_attribute = "{$name}=\"{$escaped_new_value}\"";
+ }
+
+ if ( isset( $this->attributes[ $name ] ) ) {
+ /*
+ * Update an existing attribute.
+ *
+ * Example – set attribute id to "new" in :
+ *
+ * ^-------------^
+ * start end
+ * replacement: `id="new"`
+ *
+ * Result:
+ */
+ $existing_attribute = $this->attributes[ $name ];
+ $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $existing_attribute->start,
+ $existing_attribute->end,
+ $updated_attribute
+ );
+ } else {
+ /*
+ * Create a new attribute at the tag's name end.
+ *
+ * Example – add attribute id="new" to :
+ *
+ * ^
+ * start and end
+ * replacement: ` id="new"`
+ *
+ * Result:
+ */
+ $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $this->tag_name_ends_at,
+ $this->tag_name_ends_at,
+ ' ' . $updated_attribute
+ );
+ }
+ }
+
+ /**
+ * Removes an attribute of the currently matched tag.
+ *
+ * @since 6.1.0
+ *
+ * @param string $name The attribute name to remove.
+ */
+ public function remove_attribute( $name ) {
+ if ( ! isset( $this->attributes[ $name ] ) ) {
+ return;
+ }
+
+ /*
+ * Removes an existing tag attribute.
+ *
+ * Example – remove the attribute id from :
+ *
+ * ^-------------^
+ * start end
+ * replacement: ``
+ *
+ * Result:
+ */
+ $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $this->attributes[ $name ]->start,
+ $this->attributes[ $name ]->end,
+ ''
+ );
+ }
+
+ /**
+ * Adds a new class name to the currently matched tag.
+ *
+ * @since 6.1.0
+ *
+ * @param string $class_name The class name to add.
+ */
+ public function add_class( $class_name ) {
+ if ( null !== $this->tag_name_starts_at ) {
+ $this->classname_updates[ $class_name ] = self::ADD_CLASS;
+ }
+ }
+
+ /**
+ * Removes a class name from the currently matched tag.
+ *
+ * @since 6.1.0
+ *
+ * @param string $class_name The class name to remove.
+ */
+ public function remove_class( $class_name ) {
+ if ( null !== $this->tag_name_starts_at ) {
+ $this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
+ }
+ }
+
+ /**
+ * Returns the string representation of the HTML Tag Processor.
+ * It closes the HTML Tag Processor and prevents further lookups and modifications.
+ *
+ * @since 6.1.0
+ *
+ * @return string The processed HTML.
+ */
+ public function __toString() {
+ // Parsing either already finished or not started yet.
+ if ( null === $this->tag_name_ends_at ) {
+ return $this->updated_html . substr( $this->html, $this->updated_bytes );
+ }
+
+ /*
+ * Parsing is in progress – let's apply the attribute updates without moving on to the next tag.
+ *
+ * In practice, it means:
+ * 1. Applying the attributes updates to the original HTML
+ * 2. Replacing the original HTML with the updated HTML
+ * 3. Pointing this tag processor to the current tag name's end in that updated HTML
+ */
+
+ // Find tag name's end in the updated markup.
+ $markup_updated_up_to_a_tag_name_end = $this->updated_html . substr( $this->html, $this->updated_bytes, $this->tag_name_ends_at - $this->updated_bytes );
+ $updated_tag_name_ends_at = strlen( $markup_updated_up_to_a_tag_name_end );
+ $tag_name_length = $this->tag_name_ends_at - $this->tag_name_starts_at;
+ $updated_tag_name_starts_at = $updated_tag_name_ends_at - $tag_name_length;
+
+ // Apply attributes updates.
+ $this->updated_html = $markup_updated_up_to_a_tag_name_end;
+ $this->updated_bytes = $this->tag_name_ends_at;
+ $this->class_name_updates_to_attributes_updates();
+ $this->apply_attributes_updates();
+
+ // Replace $this->html with the updated markup.
+ $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes );
+
+ // Rewind this processor to the tag name's end.
+ $this->tag_name_starts_at = $updated_tag_name_starts_at;
+ $this->tag_name_ends_at = $updated_tag_name_ends_at;
+ $this->parsed_bytes = $this->tag_name_ends_at;
+
+ // Restore the previous version of the updated_html as we are not finished with the current_tag yet.
+ $this->updated_html = $markup_updated_up_to_a_tag_name_end;
+ $this->updated_bytes = $updated_tag_name_ends_at;
+
+ // Parse the attributes in the updated markup.
+ $this->attributes = array();
+ $this->parse_tag_opener_attributes();
+
+ return $this->html;
+ }
+
+ /**
+ * Prepares tag search criteria from input interface.
+ *
+ * @since 6.1.0
+ *
+ * @param array|string $query {
+ * Which tag name to find, having which class.
+ *
+ * @type string|null $tag_name Which tag to find, or `null` for "any tag."
+ * @type string|null $class_name Tag must contain this class name to match.
+ * }
+ */
+ private function parse_query( $query ) {
+ if ( null !== $query && $query === $this->last_query ) {
+ return;
+ }
+
+ $this->last_query = $query;
+ $this->sought_tag_name = null;
+ $this->sought_class_name = null;
+ $this->sought_match_offset = 1;
+
+ // A single string value means "find the tag of this name".
+ if ( is_string( $query ) ) {
+ $this->sought_tag_name = $query;
+ return;
+ }
+
+ // If not using the string interface we have to pass an associative array.
+ if ( ! is_array( $query ) ) {
+ return;
+ }
+
+ if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) {
+ $this->sought_tag_name = $query['tag_name'];
+ }
+
+ if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) {
+ $this->sought_class_name = $query['class_name'];
+ }
+
+ if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) {
+ $this->sought_match_offset = $query['match_offset'];
+ }
+ }
+
+
+ /**
+ * Checks whether a given tag and its attributes match the search criteria.
+ *
+ * @since 6.1.0
+ *
+ * @return boolean
+ */
+ private function matches() {
+ // Do we match a case-insensitive HTML tag name?
+ if ( null !== $this->sought_tag_name ) {
+ /*
+ * String (byte) length lookup is fast. If they aren't the
+ * same length then they can't be the same string values.
+ */
+ $length = $this->tag_name_ends_at - $this->tag_name_starts_at;
+ if ( strlen( $this->sought_tag_name ) !== $length ) {
+ return false;
+ }
+
+ /*
+ * Otherwise we have to check for each character if they
+ * are the same, and only `strtolower()` if we have to.
+ * Presuming that most people will supply lowercase tag
+ * names and most HTML will contain lowercase tag names,
+ * most of the time this runs we shouldn't expect to
+ * actually run the case-folding comparison.
+ */
+ for ( $i = 0; $i < $length; $i++ ) {
+ $html_char = $this->html[ $this->tag_name_starts_at + $i ];
+ $tag_char = $this->sought_tag_name[ $i ];
+
+ if ( $html_char !== $tag_char && strtolower( $html_char ) !== $tag_char ) {
+ return false;
+ }
+ }
+ }
+
+ $needs_class_name = null !== $this->sought_class_name;
+
+ if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) {
+ return false;
+ }
+
+ // Do we match a byte-for-byte (case-sensitive and encoding-form-sensitive) class name?
+ if ( $needs_class_name ) {
+ $class_start = $this->attributes['class']->value_starts_at;
+ $class_end = $class_start + $this->attributes['class']->value_length;
+ $class_at = $class_start;
+
+ /*
+ * We're going to have to jump through potential matches here because
+ * it's possible that we have classes containing the class name we're
+ * looking for. For instance, if we are looking for "even" we don't
+ * want to be confused when we come to the class "not-even." This is
+ * secured by ensuring that we find our sought-after class and that
+ * it's surrounded on both sides by proper boundaries.
+ *
+ * See https://html.spec.whatwg.org/#attributes-3
+ * See https://html.spec.whatwg.org/#space-separated-tokens
+ */
+ while (
+ // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
+ false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) &&
+ $class_at < $class_end
+ ) {
+ /*
+ * Verify this class starts at a boundary. If it were at 0 we'd be at
+ * the start of the string and that would be fine, otherwise we have
+ * to start at a place where the preceding character is whitespace.
+ */
+ if ( $class_at > $class_start ) {
+ $character = $this->html[ $class_at - 1 ];
+
+ if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
+ $class_at += strlen( $this->sought_class_name );
+ continue;
+ }
+ }
+
+ /*
+ * Similarly, verify this class ends at a boundary as well. Here we
+ * can end at the very end of the string value, otherwise we have
+ * to end at a place where the next character is whitespace.
+ */
+ if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
+ $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
+
+ if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
+ $class_at += strlen( $this->sought_class_name );
+ continue;
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ return true;
+ }
+}
diff --git a/lib/experimental/html/class-wp-html-text-replacement.php b/lib/experimental/html/class-wp-html-text-replacement.php
new file mode 100644
index 00000000000000..cbddd483538004
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-text-replacement.php
@@ -0,0 +1,59 @@
+start = $start;
+ $this->end = $end;
+ $this->text = $text;
+ }
+}
diff --git a/lib/experimental/html/index.php b/lib/experimental/html/index.php
new file mode 100644
index 00000000000000..e7d41f8cdf4863
--- /dev/null
+++ b/lib/experimental/html/index.php
@@ -0,0 +1,11 @@
+Text';
+ const HTML_WITH_CLASSES = '
Text
';
+ const HTML_MALFORMED = '
Back to notifications
';
+
+ /**
+ * @ticket 56299
+ *
+ * @covers get_tag
+ */
+ public function test_get_tag_returns_null_before_finding_tags() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertNull( $p->get_tag() );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_tag
+ */
+ public function test_get_tag_returns_null_when_not_in_open_tag() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertNull( $p->get_tag(), 'Accessing a non-existing tag did not return null' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_tag
+ */
+ public function test_get_tag_returns_open_tag_name() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+ $this->assertSame( 'div', $p->get_tag(), 'Accessing an existing tag name did not return "div"' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_null_before_finding_tags() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertNull( $p->get_attribute( 'class' ) );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_null_when_not_in_open_tag() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a non-existing tag did not return null' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_null_when_attribute_missing() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+ $this->assertNull( $p->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_attribute_value() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+ $this->assertSame( 'test', $p->get_attribute( 'class' ), 'Accessing a class="test" attribute value did not return "test"' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_true_for_boolean_attribute() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( array( 'class_name' => 'test' ) ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_get_attribute_returns_string_for_truthy_attributes() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+ $this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' );
+ $this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' );
+ $this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers get_attribute
+ */
+ public function test_attributes_parser_treats_slash_as_attribute_separator() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' );
+ $this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' );
+ $this->assertTrue( $p->get_attribute( 'c' ), 'Accessing an existing attribute did not return true' );
+ $this->assertTrue( $p->get_attribute( 'd' ), 'Accessing an existing attribute did not return true' );
+ $this->assertSame( 'test', $p->get_attribute( 'e' ), 'Accessing an existing e="test" did not return "test"' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers __toString
+ */
+ public function test_tostring_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
+ $p = new WP_HTML_Tag_Processor( '
Test
' );
+ $p->next_tag();
+ $p->remove_attribute( 'id' );
+
+ $p->next_tag();
+ $p->set_attribute( 'id', 'div-id-1' );
+ $p->add_class( 'new_class_1' );
+ $this->assertSame(
+ '
Test
',
+ (string) $p,
+ 'Calling __toString after updating the attributes of the second tag returned different HTML than expected'
+ );
+
+ $p->set_attribute( 'id', 'div-id-2' );
+ $p->add_class( 'new_class_2' );
+ $this->assertSame(
+ '
Test
',
+ (string) $p,
+ 'Calling __toString after updating the attributes of the second tag for the second time returned different HTML than expected'
+ );
+
+ $p->next_tag();
+ $p->remove_attribute( 'id' );
+ $this->assertSame(
+ '
Test
',
+ (string) $p,
+ 'Calling __toString after removing the id attribute of the third tag returned different HTML than expected'
+ );
+
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers __toString
+ */
+ public function test_tostring_without_updating_any_attributes_returns_the_original_html() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $this->assertSame( self::HTML_SIMPLE, (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ */
+ public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ */
+ public function test_next_tag_should_return_false_for_a_non_existing_tag() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ * @covers __toString
+ */
+ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertFalse( $p->next_tag( 'div' ), 'Querying a non-existing tag did not return false' );
+ $p->set_attribute( 'id', 'primary' );
+ $this->assertSame(
+ self::HTML_SIMPLE,
+ (string) $p,
+ 'Calling __toString after updating a non-existing tag returned an HTML that was different from the original HTML'
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->set_attribute( 'test-attribute', 'test-value' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * According to HTML spec, only the first instance of an attribute counts.
+ * The other ones are ignored.
+ *
+ * @ticket 56299
+ *
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_update_first_when_duplicated_attribute() {
+ $p = new WP_HTML_Tag_Processor( '
Text
' );
+ $p->next_tag();
+ $p->set_attribute( 'id', 'updated-id' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->set_attribute( 'id', 'new-id' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ while ( $p->next_tag() ) {
+ $p->set_attribute( 'data-foo', 'bar' );
+ }
+
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * Removing an attribute that's listed many times, e.g. `
` should remove
+ * all its instances and output just `
`.
+ *
+ * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case
+ * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it.
+ * Let's revisit if and when this becomes a problem.
+ *
+ * This test is in place to confirm this behavior, while incorrect, is well-defined.
+ *
+ * @ticket 56299
+ *
+ * @covers remove_attribute
+ * @covers __toString
+ */
+ public function test_remove_first_when_duplicated_attribute() {
+ $p = new WP_HTML_Tag_Processor( '
Text
' );
+ $p->next_tag();
+ $p->remove_attribute( 'id' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_attribute
+ * @covers __toString
+ */
+ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->remove_attribute( 'id' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_attribute
+ * @covers __toString
+ */
+ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->remove_attribute( 'no-such-attribute' );
+ $this->assertSame( self::HTML_SIMPLE, (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_add_class_creates_a_class_attribute_when_there_is_none() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->add_class( 'foo-class' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->add_class( 'foo-class' );
+ $p->add_class( 'bar-class' );
+ $this->assertSame( '
Text
', (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+ $p->next_tag();
+ $p->remove_class( 'foo-class' );
+ $this->assertSame( self::HTML_SIMPLE, (string) $p );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->add_class( 'foo-class' );
+ $p->add_class( 'bar-class' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->remove_class( 'main' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->remove_class( 'main' );
+ $p->remove_class( 'with-border' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_add_class_does_not_add_duplicate_class_names() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->add_class( 'with-border' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->add_class( 'main' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers __toString
+ */
+ public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() {
+ $p = new WP_HTML_Tag_Processor(
+ '
Text
'
+ );
+ $p->next_tag();
+ $p->add_class( 'foo-class' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() {
+ $p = new WP_HTML_Tag_Processor(
+ '
Text
'
+ );
+ $p->next_tag();
+ $p->remove_class( 'with-border' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() {
+ $p = new WP_HTML_Tag_Processor(
+ '
Text
'
+ );
+ $p->next_tag();
+ $p->remove_class( 'main' );
+ $p->remove_class( 'with-border' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p
+ );
+ }
+
+ /**
+ * When both set_attribute('class', $value) and add_class( $different_value ) are called,
+ * the final class name should be $value. In other words, the `add_class` call should be ignored,
+ * and the `set_attribute` call should win. This holds regardless of the order in which these methods
+ * are called.
+ *
+ * @ticket 56299
+ *
+ * @covers add_class
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_set_attribute_takes_priority_over_add_class() {
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->add_class( 'add_class' );
+ $p->set_attribute( 'class', 'set_attribute' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p,
+ 'Calling __toString after updating first tag\'s attributes did not return the expected HTML'
+ );
+
+ $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+ $p->next_tag();
+ $p->set_attribute( 'class', 'set_attribute' );
+ $p->add_class( 'add_class' );
+ $this->assertSame(
+ '
Text
',
+ (string) $p,
+ 'Calling __toString after updating second tag\'s attributes did not return the expected HTML'
+ );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers set_attribute
+ * @covers remove_attribute
+ * @covers add_class
+ * @covers remove_class
+ * @covers __toString
+ */
+ public function test_advanced_use_case() {
+ $input = <<
+
+
+HTML;
+
+ $expected_output = <<
+
+
+HTML;
+
+ $p = new WP_HTML_Tag_Processor( $input );
+ $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+ $p->set_attribute( 'data-details', '{ "key": "value" }' );
+ $p->add_class( 'is-processed' );
+ $this->assertTrue(
+ $p->next_tag(
+ array(
+ 'tag_name' => 'div',
+ 'class_name' => 'BtnGroup',
+ )
+ ),
+ 'Querying an existing tag did not return true'
+ );
+ $p->remove_class( 'BtnGroup' );
+ $p->add_class( 'button-group' );
+ $p->add_class( 'Another-Mixed-Case' );
+ $this->assertTrue(
+ $p->next_tag(
+ array(
+ 'tag_name' => 'div',
+ 'class_name' => 'BtnGroup',
+ )
+ ),
+ 'Querying an existing tag did not return true'
+ );
+ $p->remove_class( 'BtnGroup' );
+ $p->add_class( 'button-group' );
+ $p->add_class( 'Another-Mixed-Case' );
+ $this->assertTrue(
+ $p->next_tag(
+ array(
+ 'tag_name' => 'button',
+ 'class_name' => 'btn',
+ 'match_offset' => 3,
+ )
+ ),
+ 'Querying an existing tag did not return true'
+ );
+ $p->remove_attribute( 'class' );
+ $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' );
+ $p->set_attribute( 'class', 'test' );
+ $this->assertSame( $expected_output, (string) $p, 'Calling __toString after updating the attributes did not return the expected HTML' );
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers remove_attribute
+ * @covers set_attribute
+ * @covers __toString
+ */
+ public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() {
+ $p = new WP_HTML_Tag_Processor(
+ '