From c6c5dbba30cdf90310e4f6767e6c31f760c68b70 Mon Sep 17 00:00:00 2001
From: Adam Zielinski <adam@adamziel.com>
Date: Fri, 23 Sep 2022 16:36:52 +1000
Subject: [PATCH] WP_HTML_Tag_Processor: Inject dynamic data to block HTML
 markup in PHP (#42485)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce WP_HTML_Tag_Processor for reliably modifying HTML attributes.

Dynamic blocks often need to inject a CSS class name or set <img src /> in the rendered block HTML markup but lack the means to do so. WP_HTML_Tag_Processor solves this problem.

It scans through an HTML document to find specific tags, then transforms those tags by adding, removing, or updating the values of the HTML attributes within that tag (opener).

Importantly, it does not fully parse HTML or _recurse_ into the HTML structure. Instead WP_HTML_Tag_Processor scans linearly through a document and only parses the HTML tag openers.

Example:

```
$p = new WP_HTML_Tag_Processor('<div id="first"><img /></div>');
$p->next_tag('img')->set_attribute('src', '/wp-content/logo.png');
echo $p;
// <div id="first"><img src="/wp-content/logo.png" /></div>
```

For more details and context, see the original GitHub Pull Request at https://github.com/WordPress/gutenberg/pull/42485 and the overview issue at https://github.com/WordPress/gutenberg/issues/44410.

Co-authored-by: Adam Zieliński <adam@adamziel.com>
Co-authored-by: Dennis Snell <dennis.snell@automattic.com>
Co-authored-by: Grzegorz Ziółkowski <grzegorz.ziolkowski@automattic.com>
Co-authored-by: Sören Wrede <soerenwrede@gmail.com>
Co-authored-by: Colin Stewart <79332690+costdev@users.noreply.github.com>
---
 .../html/class-wp-html-attribute-token.php    |   89 ++
 .../html/class-wp-html-tag-processor.php      | 1265 +++++++++++++++++
 .../html/class-wp-html-text-replacement.php   |   59 +
 lib/experimental/html/index.php               |   11 +
 lib/load.php                                  |    3 +
 phpunit/html/wp-html-tag-processor-test.php   | 1187 ++++++++++++++++
 6 files changed, 2614 insertions(+)
 create mode 100644 lib/experimental/html/class-wp-html-attribute-token.php
 create mode 100644 lib/experimental/html/class-wp-html-tag-processor.php
 create mode 100644 lib/experimental/html/class-wp-html-text-replacement.php
 create mode 100644 lib/experimental/html/index.php
 create mode 100644 phpunit/html/wp-html-tag-processor-test.php
diff --git a/lib/experimental/html/class-wp-html-attribute-token.php b/lib/experimental/html/class-wp-html-attribute-token.php
new file mode 100644
index 00000000000000..32adfe02e0fb8c
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-attribute-token.php
@@ -0,0 +1,89 @@
+<?php
+/**
+ * HTML Tag Processor: Attribute token structure class.
+ *
+ * @package WordPress
+ * @subpackage HTML
+ * @since 6.1.0
+ */
+
+/**
+ * Data structure for the attribute token that allows to drastically improve performance.
+ *
+ * This class is for internal usage of the WP_HTML_Tag_Processor class.
+ *
+ * @access private
+ * @since 6.1.0
+ *
+ * @see WP_HTML_Tag_Processor
+ */
+class WP_HTML_Attribute_Token {
+	/**
+	 * Attribute name.
+	 *
+	 * @since 6.1.0
+	 * @var string
+	 */
+	public $name;
+
+	/**
+	 * Attribute value.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $value_starts_at;
+
+	/**
+	 * How many bytes the value occupies in the input HTML.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $value_length;
+
+	/**
+	 * The string offset where the attribute name starts.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $start;
+
+	/**
+	 * The string offset after the attribute value or its name.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $end;
+
+	/**
+	 * Whether the attribute is a boolean attribute with value `true`.
+	 *
+	 * @since 6.1.0
+	 * @var bool
+	 */
+	public $is_true;
+
+	/**
+	 * Constructor.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $name         Attribute name.
+	 * @param int    $value_start  Attribute value.
+	 * @param int    $value_length Number of bytes attribute value spans.
+	 * @param int    $start        The string offset where the attribute name starts.
+	 * @param int    $end          The string offset after the attribute value or its name.
+	 * @param bool   $is_true      Whether the attribute is a boolean attribute with true value.
+	 */
+	public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) {
+		$this->name            = $name;
+		$this->value_starts_at = $value_start;
+		$this->value_length    = $value_length;
+		$this->start           = $start;
+		$this->end             = $end;
+		$this->is_true         = $is_true;
+	}
+}
diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php
new file mode 100644
index 00000000000000..be6179c963571f
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-tag-processor.php
@@ -0,0 +1,1265 @@
+<?php
+/**
+ * Scans through an HTML document to find specific tags, then
+ * transforms those tags by adding, removing, or updating the
+ * values of the HTML attributes within that tag (opener).
+ *
+ * Does not fully parse HTML or _recurse_ into the HTML structure
+ * Instead this scans linearly through a document and only parses
+ * the HTML tag openers.
+ *
+ * @TODO: Unify language around "currently-opened tag."
+ * @TODO: Organize unit test cases into normative tests, edge-case tests, regression tests.
+ * @TODO: Clean up attribute token class after is_true addition
+ * @TODO: Review (start,end) vs. (start,length) pairs for consistency and ease.
+ * @TODO: Prune whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c"
+ * @TODO: Skip over `/` in attributes area, split attribute names by `/`
+ * @TODO: Decode HTML references/entities in class names when matching.
+ *        E.g. match having class `1<"2` needs to recognize `class="1&lt;&quot;2"`.
+ * @TODO: Decode character references in `get_attribute()`
+ * @TODO: Properly escape attribute value in `set_attribute()`
+ *
+ * @package WordPress
+ * @subpackage HTML
+ * @since 6.1.0
+ */
+
+/**
+ * Processes an input HTML document by applying a specified set
+ * of patches to that input. Tokenizes HTML but does not fully
+ * parse the input document.
+ *
+ * @since 6.1.0
+ */
+class WP_HTML_Tag_Processor {
+
+	/**
+	 * The HTML document to parse.
+	 *
+	 * @since 6.1.0
+	 * @var string
+	 */
+	private $html;
+
+	/**
+	 * The last query passed to next_tag().
+	 *
+	 * @since 6.1.0
+	 * @var array|null
+	 */
+	private $last_query;
+
+	/**
+	 * The tag name this processor currently scans for.
+	 *
+	 * @since 6.1.0
+	 * @var string|null
+	 */
+	private $sought_tag_name;
+
+	/**
+	 * The CSS class name this processor currently scans for.
+	 *
+	 * @since 6.1.0
+	 * @var string|null
+	 */
+	private $sought_class_name;
+
+	/**
+	 * The match offset this processor currently scans for.
+	 *
+	 * @since 6.1.0
+	 * @var int|null
+	 */
+	private $sought_match_offset;
+
+	/**
+	 * The updated HTML document.
+	 *
+	 * @since 6.1.0
+	 * @var string
+	 */
+	private $updated_html = '';
+
+	/**
+	 * How many bytes from the original HTML document were already read.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	private $parsed_bytes = 0;
+
+	/**
+	 * How many bytes from the original HTML document were already treated
+	 * with the requested replacements.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	private $updated_bytes = 0;
+
+	/**
+	 * The name of the currently matched tag.
+	 *
+	 * @since 6.1.0
+	 * @var integer|null
+	 */
+	private $tag_name_starts_at;
+
+	/**
+	 * Byte offset after the name of current tag.
+	 * Example:
+	 *   <div
+	 *   01234
+	 *       ^ tag_name_ends_at = 4
+	 *
+	 * @since 6.1.0
+	 * @var number
+	 */
+	private $tag_name_ends_at;
+
+	/**
+	 * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name.
+	 *
+	 * Example:
+	 * <code>
+	 *     // supposing the parser is working through this content
+	 *     // and stops after recognizing the `id` attribute
+	 *     // <div id="test-4" class=outline title="data:text/plain;base64=asdk3nk1j3fo8">
+	 *     //                 ^ parsing will continue from this point
+	 *     $this->attributes = array(
+	 *         'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 )
+	 *     );
+	 *
+	 *     // when picking up parsing again, or when asking to find the
+	 *     // `class` attribute we will continue and add to this array
+	 *     $this->attributes = array(
+	 *         'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ),
+	 *         'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 )
+	 *     );
+	 *
+	 *     // Note that only the `class` attribute value is stored in the index.
+	 *     // That's because it is the only value used by this class at the moment.
+	 * </code>
+	 *
+	 * @since 6.1.0
+	 * @var WP_HTML_Attribute_Token[]
+	 */
+	private $attributes = array();
+
+	/**
+	 * Which class names to add or remove from a tag.
+	 *
+	 * These are tracked separately from attribute updates because they are
+	 * semantically distinct, whereas this interface exists for the common
+	 * case of adding and removing class names while other attributes are
+	 * generally modified as with DOM `setAttribute` calls.
+	 *
+	 * When modifying an HTML document these will eventually be collapsed
+	 * into a single lexical update to replace the `class` attribute.
+	 *
+	 * Example:
+	 * <code>
+	 *     // Add the `WP-block-group` class, remove the `WP-group` class.
+	 *     $class_changes = array(
+	 *         // Indexed by a comparable class name
+	 *         'wp-block-group' => new WP_Class_Name_Operation( 'WP-block-group', WP_Class_Name_Operation::ADD ),
+	 *         'wp-group'       => new WP_Class_Name_Operation( 'WP-group', WP_Class_Name_Operation::REMOVE )
+	 *     );
+	 * </code>
+	 *
+	 * @since 6.1.0
+	 * @var bool[]
+	 */
+	private $classname_updates = array();
+
+	const ADD_CLASS    = true;
+	const REMOVE_CLASS = false;
+	const SKIP_CLASS   = null;
+
+	/**
+	 * Lexical replacements to apply to input HTML document.
+	 *
+	 * HTML modifications collapse into lexical replacements in order to
+	 * provide an efficient mechanism to update documents lazily and in
+	 * order to support a variety of semantic modifications without
+	 * building a complicated parsing machinery. That is, it's up to
+	 * the calling class to generate the lexical modification from the
+	 * semantic change requested.
+	 *
+	 * Example:
+	 * <code>
+	 *     // Replace an attribute stored with a new value, indices
+	 *     // sourced from the lazily-parsed HTML recognizer.
+	 *     $start = $attributes['src']->start;
+	 *     $end   = $attributes['src']->end;
+	 *     $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() );
+	 *
+	 *     // Correspondingly, something like this
+	 *     // will appear in the replacements array.
+	 *     $replacements = array(
+	 *         WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
+	 *     );
+	 * </code>
+	 *
+	 * @since 6.1.0
+	 * @var WP_HTML_Text_Replacement[]
+	 */
+	private $attribute_updates = array();
+
+	/**
+	 * Constructor.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $html HTML to process.
+	 */
+	public function __construct( $html ) {
+		$this->html = $html;
+	}
+
+	/**
+	 * Finds the next tag matching the $query.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param array|string $query {
+	 *     Which tag name to find, having which class, etc.
+	 *
+	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
+	 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
+	 *                                     0 for "first" tag, 2 for "third," etc.
+	 *                                     Defaults to first tag.
+	 *     @type string|null $class_name   Tag must contain this whole class name to match.
+	 * }
+	 * @return boolean Whether a tag was matched.
+	 */
+	public function next_tag( $query = null ) {
+		$this->parse_query( $query );
+		$already_found = 0;
+
+		do {
+			/*
+			 * Unfortunately we can't try to search for only the tag name we want because that might
+			 * lead us to skip over other tags and lose track of our place. So we need to search for
+			 * _every_ tag and then check after we find one if it's the one we are looking for.
+			 */
+			if ( false === $this->parse_next_tag() ) {
+				$this->parsed_bytes = strlen( $this->html );
+
+				return false;
+			}
+
+			$this->parse_tag_opener_attributes();
+
+			if ( $this->matches() ) {
+				$already_found++;
+			}
+
+			// Avoid copying the tag name string when possible.
+			$t = $this->html[ $this->tag_name_starts_at ];
+			if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) {
+				$tag_name = $this->get_tag();
+
+				if ( 'script' === $tag_name ) {
+					$this->skip_script_data();
+				} elseif ( 'textarea' === $tag_name || 'title' === $tag_name ) {
+					$this->skip_rcdata( $tag_name );
+				}
+			}
+		} while ( $already_found < $this->sought_match_offset );
+
+		return true;
+	}
+
+	/**
+	 * Skips the contents of the title and textarea tags until an appropriate
+	 * tag closer is found.
+	 *
+	 * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
+	 * @param string $tag_name – the lowercase tag name which will close the RCDATA region.
+	 * @since 6.1.0
+	 */
+	private function skip_rcdata( $tag_name ) {
+		$html       = $this->html;
+		$doc_length = strlen( $html );
+		$tag_length = strlen( $tag_name );
+
+		$at = $this->parsed_bytes;
+
+		while ( true ) {
+			$at = strpos( $this->html, '</', $at );
+
+			// If we have no possible tag closer then fail.
+			if ( false === $at || ( $at + $tag_length ) > $doc_length ) {
+				$this->parsed_bytes = $doc_length;
+				return;
+			}
+
+			$at += 2;
+
+			/*
+			 * We have to find a case-insensitive match to the tag name.
+			 * Note also that since tag names are limited to US-ASCII
+			 * characters we can ignore any kind of Unicode normalizing
+			 * forms when comparing. If we get a non-ASCII character it
+			 * will never be a match.
+			 */
+			for ( $i = 0; $i < $tag_length; $i++ ) {
+				$tag_char  = $tag_name[ $i ];
+				$html_char = $html[ $at + $i ];
+
+				if ( $html_char !== $tag_char && strtolower( $html_char ) !== $tag_char ) {
+					$at += $i;
+					continue 2;
+				}
+			}
+
+			$at                += $tag_length;
+			$this->parsed_bytes = $at;
+
+			/*
+			 * Ensure we terminate the tag name, otherwise we might,
+			 * for example, accidentally match the sequence
+			 * "</textarearug>" for "</textarea>".
+			 */
+			$c = $html[ $at ];
+			if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
+				continue;
+			}
+
+			$this->skip_tag_closer_attributes();
+			$at = $this->parsed_bytes;
+
+			if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) {
+				$this->parsed_bytes++;
+				return;
+			}
+		}
+	}
+
+	/**
+	 * Skips the contents of <script> tags.
+	 *
+	 * @since 6.1.0
+	 */
+	private function skip_script_data() {
+		$state      = 'unescaped';
+		$html       = $this->html;
+		$doc_length = strlen( $html );
+		$at         = $this->parsed_bytes;
+
+		while ( $at < $doc_length ) {
+			$at += strcspn( $html, '-<', $at );
+
+			/*
+			 * Regardless of the state we're in, a "-->"
+			 * will break out of it and bring us back
+			 * into the normal unescaped script mode.
+			 */
+			if (
+				$at + 2 < $doc_length &&
+				'-' === $html[ $at ] &&
+				'-' === $html[ $at + 1 ] &&
+				'>' === $html[ $at + 2 ]
+			) {
+				$at   += 3;
+				$state = 'unescaped';
+				continue;
+			}
+
+			// Everything past here has to start with "<".
+			if ( $at + 1 >= $doc_length || '<' !== $html[ $at++ ] ) {
+				continue;
+			}
+
+			/*
+			 * On the other hand, "<!--" only enters the
+			 * escaped mode if we aren't already there.
+			 *
+			 * Inside the escaped modes it's ignored and
+			 * shouldn't ever pull us out of double-escaped
+			 * and back into escaped.
+			 *
+			 * We'll continue parsing past it regardless of
+			 * our state though to avoid backtracking once
+			 * we recognize the snippet.
+			 */
+			if (
+				$at + 2 < $doc_length &&
+				'!' === $html[ $at ] &&
+				'-' === $html[ $at + 1 ] &&
+				'-' === $html[ $at + 2 ]
+			) {
+				$at   += 3;
+				$state = 'unescaped' === $state ? 'escaped' : $state;
+				continue;
+			}
+
+			if ( '/' === $html[ $at ] ) {
+				$is_closing = true;
+				$at++;
+			} else {
+				$is_closing = false;
+			}
+
+			/*
+			 * At this point we're only examining state-changes based off of
+			 * the <script> or </script> tags, so if we're not seeing the
+			 * start of one of these tokens we can proceed to the next
+			 * potential match in the text.
+			 */
+			if ( ! (
+				$at + 6 < $doc_length &&
+				( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
+				( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
+				( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
+				( 'i' === $html[ $at + 3 ] || 'I' === $html[ $at + 3 ] ) &&
+				( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) &&
+				( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] )
+			) ) {
+				$at++;
+				continue;
+			}
+
+			/*
+			 * We also have to make sure we terminate the script tag opener/closer
+			 * to avoid making partial matches on strings like `<script123`.
+			 */
+			$at += 6;
+			$c   = $html[ $at ];
+			if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
+				$at++;
+				continue;
+			}
+
+			if ( 'escaped' === $state && ! $is_closing ) {
+				$state = 'double-escaped';
+				continue;
+			}
+
+			if ( 'double-escaped' === $state && $is_closing ) {
+				$state = 'escaped';
+				continue;
+			}
+
+			if ( $is_closing ) {
+				$this->parsed_bytes = $at;
+				$this->skip_tag_closer_attributes();
+
+				if ( '>' === $html[ $this->parsed_bytes ] ) {
+					$this->parsed_bytes++;
+					return;
+				}
+			}
+
+			$at++;
+		}
+	}
+
+	/**
+	 * Parses the next tag.
+	 *
+	 * @since 6.1.0
+	 */
+	private function parse_next_tag() {
+		$this->after_tag();
+
+		$html = $this->html;
+		$at   = $this->parsed_bytes;
+
+		while ( true ) {
+			$at = strpos( $html, '<', $at );
+			if ( false === $at ) {
+				return false;
+			}
+
+			/*
+			 * HTML tag names must start with [a-zA-Z] otherwise they are not tags.
+			 * For example, "<3" is rendered as text, not a tag opener. This means
+			 * if we have at least one letter following the "<" then we _do_ have
+			 * a tag opener and can process it as such. This is more common than
+			 * HTML comments, DOCTYPE tags, and other structure starting with "<"
+			 * so it's good to check first for the presence of the tag.
+			 *
+			 * Reference:
+			 * * https://html.spec.whatwg.org/multipage/parsing.html#data-state
+			 * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+			 */
+			$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
+			if ( $tag_name_prefix_length > 0 ) {
+				$at++;
+				$tag_name_length          = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+				$this->tag_name_starts_at = $at;
+				$this->tag_name_ends_at   = $at + $tag_name_length;
+				$this->parsed_bytes       = $at + $tag_name_length;
+				return true;
+			}
+
+			// <! transitions to markup declaration open state
+			// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+			if ( '!' === $html[ $at + 1 ] ) {
+				// <!-- transitions to a bogus comment state – we can skip to the nearest -->
+				// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+				if (
+					strlen( $html ) > $at + 3 &&
+					'-' === $html[ $at + 2 ] &&
+					'-' === $html[ $at + 3 ]
+				) {
+					$at = strpos( $html, '-->', $at + 4 ) + 3;
+					continue;
+				}
+
+				// <![CDATA[ transitions to CDATA section state – we can skip to the nearest ]]>
+				// The CDATA is case-sensitive.
+				// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+				if (
+					strlen( $html ) > $at + 8 &&
+					'[' === $html[ $at + 2 ] &&
+					'C' === $html[ $at + 3 ] &&
+					'D' === $html[ $at + 4 ] &&
+					'A' === $html[ $at + 5 ] &&
+					'T' === $html[ $at + 6 ] &&
+					'A' === $html[ $at + 7 ] &&
+					'[' === $html[ $at + 8 ]
+				) {
+					$at = strpos( $html, ']]>', $at + 9 ) + 3;
+					continue;
+				}
+
+				/*
+				 * <!DOCTYPE transitions to DOCTYPE state – we can skip to the nearest >
+				 * These are ASCII-case-insensitive.
+				 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+				 */
+				if (
+					strlen( $html ) > $at + 8 &&
+					'D' === strtoupper( $html[ $at + 2 ] ) &&
+					'O' === strtoupper( $html[ $at + 3 ] ) &&
+					'C' === strtoupper( $html[ $at + 4 ] ) &&
+					'T' === strtoupper( $html[ $at + 5 ] ) &&
+					'Y' === strtoupper( $html[ $at + 6 ] ) &&
+					'P' === strtoupper( $html[ $at + 7 ] ) &&
+					'E' === strtoupper( $html[ $at + 8 ] )
+				) {
+					$at = strpos( $html, '>', $at + 9 ) + 1;
+					continue;
+				}
+
+				/*
+				 * Anything else here is an incorrectly-opened comment and transitions
+				 * to the bogus comment state - we can skip to the nearest >.
+				 */
+				$at = strpos( $html, '>', $at + 1 );
+				continue;
+			}
+
+			/*
+			 * <? transitions to a bogus comment state – we can skip to the nearest >
+			 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+			 */
+			if ( '?' === $html[ $at + 1 ] ) {
+				$at = strpos( $html, '>', $at + 2 ) + 1;
+				continue;
+			}
+
+			$at++;
+		}
+	}
+
+	/**
+	 * Parses all attributes of the current tag.
+	 *
+	 * @since 6.1.0
+	 */
+	private function parse_tag_opener_attributes() {
+		while ( $this->parse_next_attribute() ) {
+			// Twiddle our thumbs...
+		}
+	}
+
+	/**
+	 * Skips all attributes of the current tag.
+	 *
+	 * @since 6.1.0
+	 */
+	private function skip_tag_closer_attributes() {
+		while ( $this->parse_next_attribute( 'tag-closer' ) ) {
+			// Twiddle our thumbs...
+		}
+	}
+
+	/**
+	 * Parses the next attribute.
+	 *
+	 * @param string $context tag-opener or tag-closer.
+	 * @since 6.1.0
+	 */
+	private function parse_next_attribute( $context = 'tag-opener' ) {
+		// Skip whitespace and slashes.
+		$this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes );
+
+		/*
+		 * Treat the equal sign ("=") as a part of the attribute name if it is the
+		 * first encountered byte:
+		 * https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+		 */
+		$name_length = '=' === $this->html[ $this->parsed_bytes ]
+			? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 )
+			: strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes );
+
+		// No attribute, just tag closer.
+		if ( 0 === $name_length ) {
+			return false;
+		}
+
+		$attribute_start     = $this->parsed_bytes;
+		$attribute_name      = substr( $this->html, $attribute_start, $name_length );
+		$this->parsed_bytes += $name_length;
+
+		$this->skip_whitespace();
+
+		$has_value = '=' === $this->html[ $this->parsed_bytes ];
+		if ( $has_value ) {
+			$this->parsed_bytes++;
+			$this->skip_whitespace();
+
+			switch ( $this->html[ $this->parsed_bytes ] ) {
+				case "'":
+				case '"':
+					$quote              = $this->html[ $this->parsed_bytes ];
+					$value_start        = $this->parsed_bytes + 1;
+					$value_length       = strcspn( $this->html, $quote, $value_start );
+					$attribute_end      = $value_start + $value_length + 1;
+					$this->parsed_bytes = $attribute_end;
+					break;
+
+				default:
+					$value_start        = $this->parsed_bytes;
+					$value_length       = strcspn( $this->html, "> \t\f\r\n", $value_start );
+					$attribute_end      = $value_start + $value_length;
+					$this->parsed_bytes = $attribute_end;
+			}
+		} else {
+			$value_start   = $this->parsed_bytes;
+			$value_length  = 0;
+			$attribute_end = $attribute_start + $name_length;
+		}
+
+		if ( 'tag-opener' !== $context ) {
+			return true;
+		}
+
+		// If an attribute is listed many times, only use the first declaration and ignore the rest.
+		if ( ! array_key_exists( $attribute_name, $this->attributes ) ) {
+			$this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token(
+				$attribute_name,
+				$value_start,
+				$value_length,
+				$attribute_start,
+				$attribute_end,
+				! $has_value
+			);
+		}
+
+		return $this->attributes[ $attribute_name ];
+	}
+
+	/**
+	 * Move the pointer past any immediate successive whitespace.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @return void
+	 */
+	private function skip_whitespace() {
+		$this->parsed_bytes += strspn( $this->html, " \t\f\r\n", $this->parsed_bytes );
+	}
+
+	/**
+	 * Applies attribute updates and cleans up once a tag is fully parsed.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @return void
+	 */
+	private function after_tag() {
+		$this->class_name_updates_to_attributes_updates();
+		$this->apply_attributes_updates();
+		$this->tag_name_starts_at = null;
+		$this->tag_name_ends_at   = null;
+		$this->attributes         = array();
+	}
+
+	/**
+	 * Converts class name updates into tag attributes updates
+	 * (they are accumulated in different data formats for performance).
+	 *
+	 * This method is only meant to run right before the attribute updates are applied.
+	 * The behavior in all other cases is undefined.
+	 *
+	 * @return void
+	 * @since 6.1.0
+	 *
+	 * @see $classname_updates
+	 * @see $attribute_updates
+	 */
+	private function class_name_updates_to_attributes_updates() {
+		if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) {
+			$this->classname_updates = array();
+			return;
+		}
+
+		$existing_class = isset( $this->attributes['class'] )
+			? substr( $this->html, $this->attributes['class']->value_starts_at, $this->attributes['class']->value_length )
+			: '';
+
+		/**
+		 * Updated "class" attribute value.
+		 *
+		 * This is incrementally built as we scan through the existing class
+		 * attribute, omitting removed classes as we do so, and then appending
+		 * added classes at the end. Only when we're done processing will the
+		 * value contain the final new value.
+
+		 * @var string
+		 */
+		$class = '';
+
+		/**
+		 * Tracks the cursor position in the existing class
+		 * attribute value where we're currently parsing.
+		 *
+		 * @var integer
+		 */
+		$at = 0;
+
+		/**
+		 * Indicates if we have made any actual modifications to the existing
+		 * class attribute value, used to short-circuit string copying.
+		 *
+		 * It's possible that we are intending to remove certain classes and
+		 * add others in such a way that we don't modify the existing value
+		 * because calls to `add_class()` and `remove_class()` occur
+		 * independent of the input values sent to the WP_HTML_Tag_Processor. That is, we
+		 * might call `remove_class()` for a class that isn't already present
+		 * and we might call `add_class()` for one that is, in which case we
+		 * wouldn't need to break apart the string and rebuild it.
+		 *
+		 * This flag is set upon the first change that requires a string update.
+		 *
+		 * @var boolean
+		 */
+		$modified = false;
+
+		// Remove unwanted classes by only copying the new ones.
+		while ( $at < strlen( $existing_class ) ) {
+			// Skip to the first non-whitespace character.
+			$ws_at     = $at;
+			$ws_length = strspn( $existing_class, " \t\f\r\n", $ws_at );
+			$at       += $ws_length;
+
+			// Capture the class name – it's everything until the next whitespace.
+			$name_length = strcspn( $existing_class, " \t\f\r\n", $at );
+			if ( 0 === $name_length ) {
+				// We're done, no more class names.
+				break;
+			}
+
+			$name = substr( $existing_class, $at, $name_length );
+			$at  += $name_length;
+
+			// If this class is marked for removal, start processing the next one.
+			$remove_class = (
+				isset( $this->classname_updates[ $name ] ) &&
+				self::REMOVE_CLASS === $this->classname_updates[ $name ]
+			);
+
+			// Once we've seen a class, we should never add it again.
+			if ( ! $remove_class ) {
+				$this->classname_updates[ $name ] = self::SKIP_CLASS;
+			}
+
+			if ( $remove_class ) {
+				$modified = true;
+				continue;
+			}
+
+			/*
+			 * Otherwise, append it to the new "class" attribute value.
+			 *
+			 * By preserving the existing whitespace instead of only adding a single
+			 * space (which is a valid transformation we can make) we'll introduce
+			 * fewer changes to the HTML content and hopefully make comparing
+			 * before/after easier for people trying to debug the modified output.
+			 */
+			$class .= substr( $existing_class, $ws_at, $ws_length );
+			$class .= $name;
+		}
+
+		// Add new classes by appending the ones we haven't already seen.
+		foreach ( $this->classname_updates as $name => $operation ) {
+			if ( self::ADD_CLASS === $operation ) {
+				$modified = true;
+
+				$class .= strlen( $class ) > 0 ? ' ' : '';
+				$class .= $name;
+			}
+		}
+
+		$this->classname_updates = array();
+		if ( ! $modified ) {
+			return;
+		}
+
+		if ( strlen( $class ) > 0 ) {
+			$this->set_attribute( 'class', $class );
+		} else {
+			$this->remove_attribute( 'class' );
+		}
+	}
+
+	/**
+	 * Applies updates to attributes.
+	 *
+	 * @since 6.1.0
+	 */
+	private function apply_attributes_updates() {
+		if ( ! count( $this->attribute_updates ) ) {
+			return;
+		}
+
+		/**
+		 * Attribute updates can be enqueued in any order but as we
+		 * progress through the document to replace them we have to
+		 * make our replacements in the order in which they are found
+		 * in that document.
+		 *
+		 * Sorting the updates ensures we don't make our replacements
+		 * out of order, which could otherwise lead to mangled output,
+		 * partially-duplicate attributes, and overwritten attributes.
+		 */
+		usort( $this->attribute_updates, array( 'self', 'sort_start_ascending' ) );
+
+		foreach ( $this->attribute_updates as $diff ) {
+			$this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes );
+			$this->updated_html .= $diff->text;
+			$this->updated_bytes = $diff->end;
+		}
+
+		$this->attribute_updates = array();
+	}
+
+	/**
+	 * Sort function to arrange objects with a start property in ascending order.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param object $a First attribute update.
+	 * @param object $b Second attribute update.
+	 * @return integer
+	 */
+	private static function sort_start_ascending( $a, $b ) {
+		return $a->start - $b->start;
+	}
+
+	/**
+	 * Returns the value of the parsed attribute in the currently-opened tag.
+	 *
+	 * Example:
+	 * <code>
+	 *     $p = new WP_HTML_Tag_Processor( '<div enabled class="test" data-test-id="14">Test</div>' );
+	 *     $p->next_tag( [ 'class_name' => 'test' ] ) === true;
+	 *     $p->get_attribute( 'data-test-id' ) === '14';
+	 *     $p->get_attribute( 'enabled' ) === true;
+	 *     $p->get_attribute( 'aria-label' ) === null;
+	 *
+	 *     $p->next_tag( [] ) === false;
+	 *     $p->get_attribute( 'class' ) === null;
+	 * </code>
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $name Name of attribute whose value is requested.
+	 * @return string|true|null Value of attribute or `null` if not available.
+	 *                          Boolean attributes return `true`.
+	 */
+	public function get_attribute( $name ) {
+		if ( null === $this->tag_name_starts_at ) {
+			return null;
+		}
+
+		$comparable = strtolower( $name );
+		if ( ! isset( $this->attributes[ $comparable ] ) ) {
+			return null;
+		}
+
+		$attribute = $this->attributes[ $comparable ];
+
+		if ( true === $attribute->is_true ) {
+			return true;
+		}
+
+		return substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
+	}
+
+	/**
+	 * Returns the lowercase name of the currently-opened tag.
+	 *
+	 * Example:
+	 * <code>
+	 *     $p = new WP_HTML_Tag_Processor( '<DIV CLASS="test">Test</DIV>' );
+	 *     $p->next_tag( [] ) === true;
+	 *     $p->get_tag() === 'DIV';
+	 *
+	 *     $p->next_tag( [] ) === false;
+	 *     $p->get_tag() === null;
+	 * </code>
+	 *
+	 * @since 6.1.0
+	 *
+	 * @return string|null Name of current tag in input HTML, or `null` if none currently open.
+	 */
+	public function get_tag() {
+		if ( null === $this->tag_name_starts_at ) {
+			return null;
+		}
+
+		$tag_name_length = $this->tag_name_ends_at - $this->tag_name_starts_at;
+		$tag_name        = substr( $this->html, $this->tag_name_starts_at, $tag_name_length );
+
+		return strtolower( $tag_name );
+	}
+
+	/**
+	 * Updates or creates a new attribute on the currently matched tag with the value passed.
+	 *
+	 * For boolean attributes special handling is provided:
+	 *  - When `true` is passed as the value, then only the attribute name is added to the tag.
+	 *  - When `false` is passed, the attribute gets removed if it existed before.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string         $name  The attribute name to target.
+	 * @param string|boolean $value The new attribute value.
+	 */
+	public function set_attribute( $name, $value ) {
+		if ( null === $this->tag_name_starts_at ) {
+			return;
+		}
+
+		/*
+		 * > The values "true" and "false" are not allowed on boolean attributes.
+		 * > To represent a false value, the attribute has to be omitted altogether.
+		 *     - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes
+		 */
+		if ( false === $value ) {
+			$this->remove_attribute( $name );
+			return;
+		}
+
+		if ( true === $value ) {
+			$updated_attribute = $name;
+		} else {
+			// @TODO: What escaping and sanitization do we need here?
+			$escaped_new_value = str_replace( '"', '&quot;', $value );
+			$updated_attribute = "{$name}=\"{$escaped_new_value}\"";
+		}
+
+		if ( isset( $this->attributes[ $name ] ) ) {
+			/*
+			 * Update an existing attribute.
+			 *
+			 * Example – set attribute id to "new" in <div id="initial_id" />:
+			 *    <div id="initial_id"/>
+			 *         ^-------------^
+			 *         start         end
+			 *    replacement: `id="new"`
+			 *
+			 *    Result: <div id="new"/>
+			 */
+			$existing_attribute               = $this->attributes[ $name ];
+			$this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+				$existing_attribute->start,
+				$existing_attribute->end,
+				$updated_attribute
+			);
+		} else {
+			/*
+			 * Create a new attribute at the tag's name end.
+			 *
+			 * Example – add attribute id="new" to <div />:
+			 *    <div/>
+			 *        ^
+			 *        start and end
+			 *    replacement: ` id="new"`
+			 *
+			 *    Result: <div id="new"/>
+			 */
+			$this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+				$this->tag_name_ends_at,
+				$this->tag_name_ends_at,
+				' ' . $updated_attribute
+			);
+		}
+	}
+
+	/**
+	 * Removes an attribute of the currently matched tag.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $name The attribute name to remove.
+	 */
+	public function remove_attribute( $name ) {
+		if ( ! isset( $this->attributes[ $name ] ) ) {
+			return;
+		}
+
+		/*
+		 * Removes an existing tag attribute.
+		 *
+		 * Example – remove the attribute id from <div id="main"/>:
+		 *    <div id="initial_id"/>
+		 *         ^-------------^
+		 *         start         end
+		 *    replacement: ``
+		 *
+		 *    Result: <div />
+		 */
+		$this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
+			$this->attributes[ $name ]->start,
+			$this->attributes[ $name ]->end,
+			''
+		);
+	}
+
+	/**
+	 * Adds a new class name to the currently matched tag.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $class_name The class name to add.
+	 */
+	public function add_class( $class_name ) {
+		if ( null !== $this->tag_name_starts_at ) {
+			$this->classname_updates[ $class_name ] = self::ADD_CLASS;
+		}
+	}
+
+	/**
+	 * Removes a class name from the currently matched tag.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param string $class_name The class name to remove.
+	 */
+	public function remove_class( $class_name ) {
+		if ( null !== $this->tag_name_starts_at ) {
+			$this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
+		}
+	}
+
+	/**
+	 * Returns the string representation of the HTML Tag Processor.
+	 * It closes the HTML Tag Processor and prevents further lookups and modifications.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @return string The processed HTML.
+	 */
+	public function __toString() {
+		// Parsing either already finished or not started yet.
+		if ( null === $this->tag_name_ends_at ) {
+			return $this->updated_html . substr( $this->html, $this->updated_bytes );
+		}
+
+		/*
+		 * Parsing is in progress – let's apply the attribute updates without moving on to the next tag.
+		 *
+		 * In practice, it means:
+		 * 1. Applying the attributes updates to the original HTML
+		 * 2. Replacing the original HTML with the updated HTML
+		 * 3. Pointing this tag processor to the current tag name's end in that updated HTML
+		 */
+
+		// Find tag name's end in the updated markup.
+		$markup_updated_up_to_a_tag_name_end = $this->updated_html . substr( $this->html, $this->updated_bytes, $this->tag_name_ends_at - $this->updated_bytes );
+		$updated_tag_name_ends_at            = strlen( $markup_updated_up_to_a_tag_name_end );
+		$tag_name_length                     = $this->tag_name_ends_at - $this->tag_name_starts_at;
+		$updated_tag_name_starts_at          = $updated_tag_name_ends_at - $tag_name_length;
+
+		// Apply attributes updates.
+		$this->updated_html  = $markup_updated_up_to_a_tag_name_end;
+		$this->updated_bytes = $this->tag_name_ends_at;
+		$this->class_name_updates_to_attributes_updates();
+		$this->apply_attributes_updates();
+
+		// Replace $this->html with the updated markup.
+		$this->html = $this->updated_html . substr( $this->html, $this->updated_bytes );
+
+		// Rewind this processor to the tag name's end.
+		$this->tag_name_starts_at = $updated_tag_name_starts_at;
+		$this->tag_name_ends_at   = $updated_tag_name_ends_at;
+		$this->parsed_bytes       = $this->tag_name_ends_at;
+
+		// Restore the previous version of the updated_html as we are not finished with the current_tag yet.
+		$this->updated_html  = $markup_updated_up_to_a_tag_name_end;
+		$this->updated_bytes = $updated_tag_name_ends_at;
+
+		// Parse the attributes in the updated markup.
+		$this->attributes = array();
+		$this->parse_tag_opener_attributes();
+
+		return $this->html;
+	}
+
+	/**
+	 * Prepares tag search criteria from input interface.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param array|string $query {
+	 *     Which tag name to find, having which class.
+	 *
+	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
+	 *     @type string|null $class_name   Tag must contain this class name to match.
+	 * }
+	 */
+	private function parse_query( $query ) {
+		if ( null !== $query && $query === $this->last_query ) {
+			return;
+		}
+
+		$this->last_query          = $query;
+		$this->sought_tag_name     = null;
+		$this->sought_class_name   = null;
+		$this->sought_match_offset = 1;
+
+		// A single string value means "find the tag of this name".
+		if ( is_string( $query ) ) {
+			$this->sought_tag_name = $query;
+			return;
+		}
+
+		// If not using the string interface we have to pass an associative array.
+		if ( ! is_array( $query ) ) {
+			return;
+		}
+
+		if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) {
+			$this->sought_tag_name = $query['tag_name'];
+		}
+
+		if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) {
+			$this->sought_class_name = $query['class_name'];
+		}
+
+		if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) {
+			$this->sought_match_offset = $query['match_offset'];
+		}
+	}
+
+
+	/**
+	 * Checks whether a given tag and its attributes match the search criteria.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @return boolean
+	 */
+	private function matches() {
+		// Do we match a case-insensitive HTML tag name?
+		if ( null !== $this->sought_tag_name ) {
+			/*
+			 * String (byte) length lookup is fast. If they aren't the
+			 * same length then they can't be the same string values.
+			 */
+			$length = $this->tag_name_ends_at - $this->tag_name_starts_at;
+			if ( strlen( $this->sought_tag_name ) !== $length ) {
+				return false;
+			}
+
+			/*
+			 * Otherwise we have to check for each character if they
+			 * are the same, and only `strtolower()` if we have to.
+			 * Presuming that most people will supply lowercase tag
+			 * names and most HTML will contain lowercase tag names,
+			 * most of the time this runs we shouldn't expect to
+			 * actually run the case-folding comparison.
+			 */
+			for ( $i = 0; $i < $length; $i++ ) {
+				$html_char = $this->html[ $this->tag_name_starts_at + $i ];
+				$tag_char  = $this->sought_tag_name[ $i ];
+
+				if ( $html_char !== $tag_char && strtolower( $html_char ) !== $tag_char ) {
+					return false;
+				}
+			}
+		}
+
+		$needs_class_name = null !== $this->sought_class_name;
+
+		if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) {
+			return false;
+		}
+
+		// Do we match a byte-for-byte (case-sensitive and encoding-form-sensitive) class name?
+		if ( $needs_class_name ) {
+			$class_start = $this->attributes['class']->value_starts_at;
+			$class_end   = $class_start + $this->attributes['class']->value_length;
+			$class_at    = $class_start;
+
+			/*
+			 * We're going to have to jump through potential matches here because
+			 * it's possible that we have classes containing the class name we're
+			 * looking for. For instance, if we are looking for "even" we don't
+			 * want to be confused when we come to the class "not-even." This is
+			 * secured by ensuring that we find our sought-after class and that
+			 * it's surrounded on both sides by proper boundaries.
+			 *
+			 * See https://html.spec.whatwg.org/#attributes-3
+			 * See https://html.spec.whatwg.org/#space-separated-tokens
+			 */
+			while (
+				// phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
+				false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) &&
+				$class_at < $class_end
+			) {
+				/*
+				 * Verify this class starts at a boundary. If it were at 0 we'd be at
+				 * the start of the string and that would be fine, otherwise we have
+				 * to start at a place where the preceding character is whitespace.
+				 */
+				if ( $class_at > $class_start ) {
+					$character = $this->html[ $class_at - 1 ];
+
+					if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
+						$class_at += strlen( $this->sought_class_name );
+						continue;
+					}
+				}
+
+				/*
+				 * Similarly, verify this class ends at a boundary as well. Here we
+				 * can end at the very end of the string value, otherwise we have
+				 * to end at a place where the next character is whitespace.
+				 */
+				if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
+					$character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
+
+					if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
+						$class_at += strlen( $this->sought_class_name );
+						continue;
+					}
+				}
+
+				return true;
+			}
+
+			return false;
+		}
+
+		return true;
+	}
+}
diff --git a/lib/experimental/html/class-wp-html-text-replacement.php b/lib/experimental/html/class-wp-html-text-replacement.php
new file mode 100644
index 00000000000000..cbddd483538004
--- /dev/null
+++ b/lib/experimental/html/class-wp-html-text-replacement.php
@@ -0,0 +1,59 @@
+<?php
+/**
+ * HTML Tag Processor: Text replacement class.
+ *
+ * @package WordPress
+ * @subpackage HTML
+ * @since 6.1.0
+ */
+
+/**
+ * Data structure used to replace existing content from start to end that allows to drastically improve performance.
+ *
+ * This class is for internal usage of the WP_HTML_Tag_Processor class.
+ *
+ * @access private
+ * @since 6.1.0
+ *
+ * @see WP_HTML_Tag_Processor
+ */
+class WP_HTML_Text_Replacement {
+	/**
+	 * Byte offset into document where replacement span begins.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $start;
+
+	/**
+	 * Byte offset into document where replacement span ends.
+	 *
+	 * @since 6.1.0
+	 * @var int
+	 */
+	public $end;
+
+	/**
+	 * Span of text to insert in document to replace existing content from start to end.
+	 *
+	 * @since 6.1.0
+	 * @var string
+	 */
+	public $text;
+
+	/**
+	 * Constructor.
+	 *
+	 * @since 6.1.0
+	 *
+	 * @param int    $start Byte offset into document where replacement span begins.
+	 * @param int    $end   Byte offset into document where replacement span ends.
+	 * @param string $text  Span of text to insert in document to replace existing content from start to end.
+	 */
+	public function __construct( $start, $end, $text ) {
+		$this->start = $start;
+		$this->end   = $end;
+		$this->text  = $text;
+	}
+}
diff --git a/lib/experimental/html/index.php b/lib/experimental/html/index.php
new file mode 100644
index 00000000000000..e7d41f8cdf4863
--- /dev/null
+++ b/lib/experimental/html/index.php
@@ -0,0 +1,11 @@
+<?php
+/**
+ * Load all files for the HTML Tag Processor.
+ *
+ * @package gutenberg
+ */
+
+// All class files necessary for the HTML Tag Processor.
+require_once __DIR__ . '/class-wp-html-attribute-token.php';
+require_once __DIR__ . '/class-wp-html-text-replacement.php';
+require_once __DIR__ . '/class-wp-html-tag-processor.php';
diff --git a/lib/load.php b/lib/load.php
index 9792ef48709375..bf7483a375d900 100644
--- a/lib/load.php
+++ b/lib/load.php
@@ -99,6 +99,9 @@ function gutenberg_is_experiment_enabled( $name ) {
 remove_action( 'plugins_loaded', '_wp_theme_json_webfonts_handler' ); // Turns off WP 6.0's stopgap handler for Webfonts API.
 require __DIR__ . '/experimental/block-editor-settings-mobile.php';
 require __DIR__ . '/experimental/register-webfonts-from-theme-json.php';
+if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) {
+	require __DIR__ . '/experimental/html/index.php';
+}
 require __DIR__ . '/experimental/class-wp-theme-json-gutenberg.php';
 require __DIR__ . '/experimental/class-wp-theme-json-resolver-gutenberg.php';
 require __DIR__ . '/experimental/class-wp-webfonts.php';
diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-test.php
new file mode 100644
index 00000000000000..41bf04a138abc8
--- /dev/null
+++ b/phpunit/html/wp-html-tag-processor-test.php
@@ -0,0 +1,1187 @@
+<?php
+/**
+ * Unit tests covering WP_HTML_Tag_Processor functionality.
+ *
+ * @package WordPress
+ * @subpackage HTML
+ */
+
+if ( ! function_exists( 'esc_attr' ) ) {
+	function esc_attr( $s ) {
+		return str_replace( '"', '&quot;', $s );
+	}
+}
+
+if ( ! class_exists( 'WP_UnitTestCase' ) ) {
+	abstract class WP_UnitTestCase extends \PHPUnit\Framework\TestCase {}
+}
+
+require_once __DIR__ . '/../../lib/experimental/html/index.php';
+
+/**
+ * @group html
+ *
+ * @coversDefaultClass WP_HTML_Tag_Processor
+ */
+class WP_HTML_Tag_Processor_Test extends WP_UnitTestCase {
+	const HTML_SIMPLE       = '<div id="first"><span id="second">Text</span></div>';
+	const HTML_WITH_CLASSES = '<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>';
+	const HTML_MALFORMED    = '<div><span class="d-md-none" Notifications</span><span class="d-none d-md-inline">Back to notifications</span></div>';
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers get_tag
+	 */
+	public function test_get_tag_returns_null_before_finding_tags() {
+		$p = new WP_HTML_Tag_Processor( '<div>Test</div>' );
+		$this->assertNull( $p->get_tag() );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_tag
+	 */
+	public function test_get_tag_returns_null_when_not_in_open_tag() {
+		$p = new WP_HTML_Tag_Processor( '<div>Test</div>' );
+		$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertNull( $p->get_tag(), 'Accessing a non-existing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_tag
+	 */
+	public function test_get_tag_returns_open_tag_name() {
+		$p = new WP_HTML_Tag_Processor( '<div>Test</div>' );
+		$this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+		$this->assertSame( 'div', $p->get_tag(), 'Accessing an existing tag name did not return "div"' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_null_before_finding_tags() {
+		$p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
+		$this->assertNull( $p->get_attribute( 'class' ) );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_null_when_not_in_open_tag() {
+		$p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
+		$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a non-existing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_null_when_attribute_missing() {
+		$p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
+		$this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+		$this->assertNull( $p->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_attribute_value() {
+		$p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
+		$this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+		$this->assertSame( 'test', $p->get_attribute( 'class' ), 'Accessing a class="test" attribute value did not return "test"' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_true_for_boolean_attribute() {
+		$p = new WP_HTML_Tag_Processor( '<div enabled class="test">Test</div>' );
+		$this->assertTrue( $p->next_tag( array( 'class_name' => 'test' ) ), 'Querying an existing tag did not return true' );
+		$this->assertTrue( $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_get_attribute_returns_string_for_truthy_attributes() {
+		$p = new WP_HTML_Tag_Processor( '<div enabled=enabled checked=1 hidden="true" class="test">Test</div>' );
+		$this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+		$this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' );
+		$this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' );
+		$this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers get_attribute
+	 */
+	public function test_attributes_parser_treats_slash_as_attribute_separator() {
+		$p = new WP_HTML_Tag_Processor( '<div a/b/c/d/e="test">Test</div>' );
+		$this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+		$this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' );
+		$this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' );
+		$this->assertTrue( $p->get_attribute( 'c' ), 'Accessing an existing attribute did not return true' );
+		$this->assertTrue( $p->get_attribute( 'd' ), 'Accessing an existing attribute did not return true' );
+		$this->assertSame( 'test', $p->get_attribute( 'e' ), 'Accessing an existing e="test" did not return "test"' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers __toString
+	 */
+	public function test_tostring_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
+		$p = new WP_HTML_Tag_Processor( '<hr id="remove" /><div enabled class="test">Test</div><span id="span-id"></span>' );
+		$p->next_tag();
+		$p->remove_attribute( 'id' );
+
+		$p->next_tag();
+		$p->set_attribute( 'id', 'div-id-1' );
+		$p->add_class( 'new_class_1' );
+		$this->assertSame(
+			'<hr  /><div id="div-id-1" enabled class="test new_class_1">Test</div><span id="span-id"></span>',
+			(string) $p,
+			'Calling __toString after updating the attributes of the second tag returned different HTML than expected'
+		);
+
+		$p->set_attribute( 'id', 'div-id-2' );
+		$p->add_class( 'new_class_2' );
+		$this->assertSame(
+			'<hr  /><div id="div-id-2" enabled class="test new_class_1 new_class_2">Test</div><span id="span-id"></span>',
+			(string) $p,
+			'Calling __toString after updating the attributes of the second tag for the second time returned different HTML than expected'
+		);
+
+		$p->next_tag();
+		$p->remove_attribute( 'id' );
+		$this->assertSame(
+			'<hr  /><div id="div-id-2" enabled class="test new_class_1 new_class_2">Test</div><span ></span>',
+			(string) $p,
+			'Calling __toString after removing the id attribute of the third tag returned different HTML than expected'
+		);
+
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers __toString
+	 */
+	public function test_tostring_without_updating_any_attributes_returns_the_original_html() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$this->assertSame( self::HTML_SIMPLE, (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 */
+	public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 */
+	public function test_next_tag_should_return_false_for_a_non_existing_tag() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers __toString
+	 */
+	public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertFalse( $p->next_tag( 'div' ), 'Querying a non-existing tag did not return false' );
+		$p->set_attribute( 'id', 'primary' );
+		$this->assertSame(
+			self::HTML_SIMPLE,
+			(string) $p,
+			'Calling __toString after updating a non-existing tag returned an HTML that was different from the original HTML'
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->set_attribute( 'test-attribute', 'test-value' );
+		$this->assertSame( '<div test-attribute="test-value" id="first"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * According to HTML spec, only the first instance of an attribute counts.
+	 * The other ones are ignored.
+	 *
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_update_first_when_duplicated_attribute() {
+		$p = new WP_HTML_Tag_Processor( '<div id="update-me" id="ignored-id"><span id="second">Text</span></div>' );
+		$p->next_tag();
+		$p->set_attribute( 'id', 'updated-id' );
+		$this->assertSame( '<div id="updated-id" id="ignored-id"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->set_attribute( 'id', 'new-id' );
+		$this->assertSame( '<div id="new-id"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		while ( $p->next_tag() ) {
+			$p->set_attribute( 'data-foo', 'bar' );
+		}
+
+		$this->assertSame( '<div data-foo="bar" id="first"><span data-foo="bar" id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * Removing an attribute that's listed many times, e.g. `<div id="a" id="b" />` should remove
+	 * all its instances and output just `<div />`.
+	 *
+	 * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case
+	 * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it.
+	 * Let's revisit if and when this becomes a problem.
+	 *
+	 * This test is in place to confirm this behavior, while incorrect, is well-defined.
+	 *
+	 * @ticket 56299
+	 *
+	 * @covers remove_attribute
+	 * @covers __toString
+	 */
+	public function test_remove_first_when_duplicated_attribute() {
+		$p = new WP_HTML_Tag_Processor( '<div id="update-me" id="ignored-id"><span id="second">Text</span></div>' );
+		$p->next_tag();
+		$p->remove_attribute( 'id' );
+		$this->assertSame( '<div  id="ignored-id"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_attribute
+	 * @covers __toString
+	 */
+	public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->remove_attribute( 'id' );
+		$this->assertSame( '<div ><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_attribute
+	 * @covers __toString
+	 */
+	public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->remove_attribute( 'no-such-attribute' );
+		$this->assertSame( self::HTML_SIMPLE, (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_add_class_creates_a_class_attribute_when_there_is_none() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->add_class( 'foo-class' );
+		$this->assertSame( '<div class="foo-class" id="first"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->add_class( 'foo-class' );
+		$p->add_class( 'bar-class' );
+		$this->assertSame( '<div class="foo-class bar-class" id="first"><span id="second">Text</span></div>', (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
+		$p->next_tag();
+		$p->remove_class( 'foo-class' );
+		$this->assertSame( self::HTML_SIMPLE, (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->add_class( 'foo-class' );
+		$p->add_class( 'bar-class' );
+		$this->assertSame(
+			'<div class="main with-border foo-class bar-class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->remove_class( 'main' );
+		$this->assertSame(
+			'<div class=" with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->remove_class( 'main' );
+		$p->remove_class( 'with-border' );
+		$this->assertSame(
+			'<div  id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_add_class_does_not_add_duplicate_class_names() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->add_class( 'with-border' );
+		$this->assertSame(
+			'<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->add_class( 'main' );
+		$this->assertSame(
+			'<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers __toString
+	 */
+	public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() {
+		$p = new WP_HTML_Tag_Processor(
+			'<div class="   main   with-border   " id="first"><span class="not-main bold with-border" id="second">Text</span></div>'
+		);
+		$p->next_tag();
+		$p->add_class( 'foo-class' );
+		$this->assertSame(
+			'<div class="   main   with-border foo-class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() {
+		$p = new WP_HTML_Tag_Processor(
+			'<div class="   main   with-border   " id="first"><span class="not-main bold with-border" id="second">Text</span></div>'
+		);
+		$p->next_tag();
+		$p->remove_class( 'with-border' );
+		$this->assertSame(
+			'<div class="   main" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() {
+		$p = new WP_HTML_Tag_Processor(
+			'<div class="   main   with-border   " id="first"><span class="not-main bold with-border" id="second">Text</span></div>'
+		);
+		$p->next_tag();
+		$p->remove_class( 'main' );
+		$p->remove_class( 'with-border' );
+		$this->assertSame(
+			'<div  id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * When both set_attribute('class', $value) and add_class( $different_value ) are called,
+	 * the final class name should be $value. In other words, the `add_class` call should be ignored,
+	 * and the `set_attribute` call should win. This holds regardless of the order in which these methods
+	 * are called.
+	 *
+	 * @ticket 56299
+	 *
+	 * @covers add_class
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_set_attribute_takes_priority_over_add_class() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->add_class( 'add_class' );
+		$p->set_attribute( 'class', 'set_attribute' );
+		$this->assertSame(
+			'<div class="set_attribute" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p,
+			'Calling __toString after updating first tag\'s attributes did not return the expected HTML'
+		);
+
+		$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
+		$p->next_tag();
+		$p->set_attribute( 'class', 'set_attribute' );
+		$p->add_class( 'add_class' );
+		$this->assertSame(
+			'<div class="set_attribute" id="first"><span class="not-main bold with-border" id="second">Text</span></div>',
+			(string) $p,
+			'Calling __toString after updating second tag\'s attributes did not return the expected HTML'
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers remove_attribute
+	 * @covers add_class
+	 * @covers remove_class
+	 * @covers __toString
+	 */
+	public function test_advanced_use_case() {
+		$input = <<<HTML
+<div selected class="merge-message" checked>
+	<div class="select-menu d-inline-block">
+		<div checked class="BtnGroup MixedCaseHTML position-relative" />
+		<div checked class="BtnGroup MixedCaseHTML position-relative">
+			<button type="button" class="merge-box-button btn-group-merge rounded-left-2 btn  BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Merge pull request
+			</button>
+
+			<button type="button" class="merge-box-button btn-group-squash rounded-left-2 btn  BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Squash and merge
+			</button>
+
+			<button type="button" class="merge-box-button btn-group-rebase rounded-left-2 btn  BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Rebase and merge
+			</button>
+
+			<button aria-label="Select merge method" disabled="disabled" type="button" data-view-component="true" class="select-menu-button btn BtnGroup-item"></button>
+		</div>
+	</div>
+</div>
+HTML;
+
+		$expected_output = <<<HTML
+<div data-details="{ &quot;key&quot;: &quot;value&quot; }" selected class="merge-message is-processed" checked>
+	<div class="select-menu d-inline-block">
+		<div checked class=" MixedCaseHTML position-relative button-group Another-Mixed-Case" />
+		<div checked class=" MixedCaseHTML position-relative button-group Another-Mixed-Case">
+			<button type="button" class="merge-box-button btn-group-merge rounded-left-2 btn  BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Merge pull request
+			</button>
+
+			<button type="button" class="merge-box-button btn-group-squash rounded-left-2 btn  BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Squash and merge
+			</button>
+
+			<button type="button"  aria-expanded="false" data-details-container=".js-merge-pr" disabled="">
+			  Rebase and merge
+			</button>
+
+			<button aria-label="Select merge method" disabled="disabled" type="button" data-view-component="true" class="select-menu-button btn BtnGroup-item"></button>
+		</div>
+	</div>
+</div>
+HTML;
+
+		$p = new WP_HTML_Tag_Processor( $input );
+		$this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
+		$p->set_attribute( 'data-details', '{ "key": "value" }' );
+		$p->add_class( 'is-processed' );
+		$this->assertTrue(
+			$p->next_tag(
+				array(
+					'tag_name'   => 'div',
+					'class_name' => 'BtnGroup',
+				)
+			),
+			'Querying an existing tag did not return true'
+		);
+		$p->remove_class( 'BtnGroup' );
+		$p->add_class( 'button-group' );
+		$p->add_class( 'Another-Mixed-Case' );
+		$this->assertTrue(
+			$p->next_tag(
+				array(
+					'tag_name'   => 'div',
+					'class_name' => 'BtnGroup',
+				)
+			),
+			'Querying an existing tag did not return true'
+		);
+		$p->remove_class( 'BtnGroup' );
+		$p->add_class( 'button-group' );
+		$p->add_class( 'Another-Mixed-Case' );
+		$this->assertTrue(
+			$p->next_tag(
+				array(
+					'tag_name'     => 'button',
+					'class_name'   => 'btn',
+					'match_offset' => 3,
+				)
+			),
+			'Querying an existing tag did not return true'
+		);
+		$p->remove_attribute( 'class' );
+		$this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' );
+		$p->set_attribute( 'class', 'test' );
+		$this->assertSame( $expected_output, (string) $p, 'Calling __toString after updating the attributes did not return the expected HTML' );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers remove_attribute
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() {
+		$p = new WP_HTML_Tag_Processor(
+			'<div id=\'first\'><span id=\'second\'>Text</span></div>'
+		);
+		$p->next_tag(
+			array(
+				'tag_name' => 'div',
+				'id'       => 'first',
+			)
+		);
+		$p->remove_attribute( 'id' );
+		$p->next_tag(
+			array(
+				'tag_name' => 'span',
+				'id'       => 'second',
+			)
+		);
+		$p->set_attribute( 'id', 'single-quote' );
+		$this->assertSame(
+			'<div ><span id="single-quote">Text</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() {
+		$p = new WP_HTML_Tag_Processor(
+			'<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
+		);
+		$p->next_tag( 'input' );
+		$p->set_attribute( 'checked', true );
+		$this->assertSame(
+			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() {
+		$p = new WP_HTML_Tag_Processor(
+			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
+		);
+		$p->next_tag( 'input' );
+		$p->set_attribute( 'checked', false );
+		$this->assertSame(
+			'<form action="/action_page.php"><input  type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
+		$html_input = '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>';
+		$p          = new WP_HTML_Tag_Processor( $html_input );
+		$p->next_tag( 'input' );
+		$p->set_attribute( 'checked', false );
+		$this->assertSame( $html_input, (string) $p );
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() {
+		$p = new WP_HTML_Tag_Processor(
+			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
+		);
+		$p->next_tag( 'input' );
+		$p->set_attribute( 'checked', 'checked' );
+		$this->assertSame(
+			'<form action="/action_page.php"><input checked="checked" type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers get_tag
+	 * @covers next_tag
+	 */
+	public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() {
+		$p = new WP_HTML_Tag_Processor( '<script>' );
+		$p->next_tag();
+		$this->assertSame( 'script', $p->get_tag() );
+		$p->next_tag();
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 *
+	 * @dataProvider data_script_state
+	 */
+	public function test_next_tag_ignores_the_contents_of_a_script_tag( $script_then_div ) {
+		$p = new WP_HTML_Tag_Processor( $script_then_div );
+		$p->next_tag();
+		$this->assertSame( 'script', $p->get_tag(), 'The first found tag was not "script"' );
+		$p->next_tag();
+		$this->assertSame( 'div', $p->get_tag(), 'The second found tag was not "∂iv"' );
+	}
+
+	/**
+	 * Data provider for test_ignores_contents_of_a_script_tag().
+	 *
+	 * @return array {
+	 *     @type array {
+	 *         @type string $script_then_div The HTML snippet containing script and div tags.
+	 *     }
+	 * }
+	 */
+	public function data_script_state() {
+		$examples = array();
+
+		$examples['Simple script tag'] = array(
+			'<script><span class="d-none d-md-inline">Back to notifications</span></script><div></div>',
+		);
+
+		$examples['Simple uppercase script tag'] = array(
+			'<script><span class="d-none d-md-inline">Back to notifications</span></SCRIPT><div></div>',
+		);
+
+		$examples['Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)'] = array(
+			'<script class="d-md-none"><!--</script><div></div>-->',
+		);
+
+		$examples['Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)'] = array(
+			'<script class="d-md-none"><!--<script><span1></script><span2></span2></script><div></div>-->',
+		);
+
+		$examples['Double escaped script with a tricky opener'] = array(
+			'<script class="d-md-none"><!--<script attr="</script>"></script>"><div></div>',
+		);
+
+		$examples['Double escaped script with a tricky closer'] = array(
+			'<script class="d-md-none"><!--<script><span></script attr="</script>"><div></div>',
+		);
+
+		$examples['Double escaped, then escaped, then double escaped'] = array(
+			'<script class="d-md-none"><!--<script></script><script></script><span></span></script><div></div>',
+		);
+
+		$examples['Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)'] = array(
+			'<script class="d-md-none"><!--<script>--><span></script><div></div>-->',
+		);
+
+		$examples['Script closer with another script tag in closer attributes'] = array(
+			'<script><span class="d-none d-md-inline">Back to notifications</title</span></script <script><div></div>',
+		);
+
+		$examples['Script closer with attributes'] = array(
+			'<script class="d-md-none"><span class="d-none d-md-inline">Back to notifications</span></script id="test"><div></div>',
+		);
+
+		$examples['Script opener with title closer inside'] = array(
+			'<script class="d-md-none"></title></script><div></div>',
+		);
+
+		$examples['Complex script with many parsing states'] = array(
+			'<script class="d-md-none"><!--<script>--><scRipt><span><!--<span><Script</script>--></scripT><div></div>-->',
+		);
+		return $examples;
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 *
+	 * @dataProvider data_rcdata_state
+	 */
+	public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) {
+		$p = new WP_HTML_Tag_Processor( $rcdata_then_div );
+		$p->next_tag();
+		$this->assertSame( $rcdata_tag, $p->get_tag(), "The first found tag was not '$rcdata_tag'" );
+		$p->next_tag();
+		$this->assertSame( 'div', $p->get_tag(), "The second found tag was not 'div'" );
+	}
+
+	/**
+	 * Data provider for test_ignores_contents_of_a_rcdata_tag().
+	 *
+	 * @return array {
+	 *     @type array {
+	 *         @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags.
+	 *         @type string $rcdata_tag      The RCDATA tag.
+	 *     }
+	 * }
+	 */
+	public function data_rcdata_state() {
+		$examples                    = array();
+		$examples['Simple textarea'] = array(
+			'<textarea><span class="d-none d-md-inline">Back to notifications</span></textarea><div></div>',
+			'textarea',
+		);
+
+		$examples['Simple title'] = array(
+			'<title><span class="d-none d-md-inline">Back to notifications</title</span></title><div></div>',
+			'title',
+		);
+
+		$examples['Comment opener inside a textarea tag should be ignored'] = array(
+			'<textarea class="d-md-none"><!--</textarea><div></div>-->',
+			'textarea',
+		);
+
+		$examples['Textarea closer with another textarea tag in closer attributes'] = array(
+			'<textarea><span class="d-none d-md-inline">Back to notifications</title</span></textarea <textarea><div></div>',
+			'textarea',
+		);
+
+		$examples['Textarea closer with attributes'] = array(
+			'<textarea class="d-md-none"><span class="d-none d-md-inline">Back to notifications</span></textarea id="test"><div></div>',
+			'textarea',
+		);
+
+		$examples['Textarea opener with title closer inside'] = array(
+			'<textarea class="d-md-none"></title></textarea><div></div>',
+			'textarea',
+		);
+		return $examples;
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_can_query_and_update_wrongly_nested_tags() {
+		$p = new WP_HTML_Tag_Processor(
+			'<span>123<p>456</span>789</p>'
+		);
+		$p->next_tag( 'span' );
+		$p->set_attribute( 'class', 'span-class' );
+		$p->next_tag( 'p' );
+		$p->set_attribute( 'class', 'p-class' );
+		$this->assertSame(
+			'<span class="span-class">123<p class="p-class">456</span>789</p>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers remove_attribute
+	 * @covers __toString
+	 */
+	public function test_removing_attributes_works_even_in_malformed_html() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED );
+		$p->next_tag( 'span' );
+		$p->remove_attribute( 'Notifications<' );
+		$this->assertSame(
+			'<div><span class="d-md-none" /span><span class="d-none d-md-inline">Back to notifications</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_Tag
+	 * @covers set_attribute
+	 * @covers __toString
+	 */
+	public function test_updating_attributes_works_even_in_malformed_html_1() {
+		$p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED );
+		$p->next_tag( 'span' );
+		$p->set_attribute( 'id', 'first' );
+		$p->next_tag( 'span' );
+		$p->set_attribute( 'id', 'second' );
+		$this->assertSame(
+			'<div><span id="first" class="d-md-none" Notifications</span><span id="second" class="d-none d-md-inline">Back to notifications</span></div>',
+			(string) $p
+		);
+	}
+
+	/**
+	 * @ticket 56299
+	 *
+	 * @covers next_tag
+	 * @covers set_attribute
+	 * @covers add_class
+	 * @covers __toString
+	 *
+	 * @dataProvider data_malformed_tag
+	 */
+	public function test_updating_attributes_works_even_in_malformed_html_2( $html_input, $html_expected ) {
+		$p = new WP_HTML_Tag_Processor( $html_input );
+		$p->next_tag();
+		$p->set_attribute( 'foo', 'bar' );
+		$p->add_class( 'firstTag' );
+		$p->next_tag();
+		$p->add_class( 'secondTag' );
+		$this->assertSame(
+			$html_expected,
+			(string) $p
+		);
+	}
+
+	/**
+	 * Data provider for test_updates_when_malformed_tag().
+	 *
+	 * @return array {
+	 *     @type array {
+	 *         @type string $html_input    The input HTML snippet.
+	 *         @type string $html_expected The expected HTML snippet after processing.
+	 *     }
+	 * }
+	 */
+	public function data_malformed_tag() {
+		$null_byte = chr( 0 );
+		$examples  = array();
+		$examples['Invalid entity inside attribute value'] = array(
+			'<img src="https://s0.wp.com/i/atat.png" title="&; First &lt;title&gt; is &notit;" TITLE="second title" title="An Imperial &imperial; AT-AT"><span>test</span>',
+			'<img foo="bar" class="firstTag" src="https://s0.wp.com/i/atat.png" title="&; First &lt;title&gt; is &notit;" TITLE="second title" title="An Imperial &imperial; AT-AT"><span class="secondTag">test</span>',
+		);
+
+		$examples['HTML tag opening inside attribute value'] = array(
+			'<pre id="<code" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
+			'<pre foo="bar" id="<code" class="wp-block-code <code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
+		);
+
+		$examples['HTML tag brackets in attribute values and data markup'] = array(
+			'<pre id="<code-&gt;-block-&gt;" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
+			'<pre foo="bar" id="<code-&gt;-block-&gt;" class="wp-block-code <code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
+		);
+
+		$examples['Single and double quotes in attribute value'] = array(
+			'<p title="Demonstrating how to use single quote (\') and double quote (&quot;)"><span>test</span>',
+			'<p foo="bar" class="firstTag" title="Demonstrating how to use single quote (\') and double quote (&quot;)"><span class="secondTag">test</span>',
+		);
+
+		$examples['Unquoted attribute values'] = array(
+			'<hr a=1 a=2 a=3 a=5 /><span>test</span>',
+			'<hr foo="bar" class="firstTag" a=1 a=2 a=3 a=5 /><span class="secondTag">test</span>',
+		);
+
+		$examples['Double-quotes escaped in double-quote attribute value'] = array(
+			'<hr title="This is a &quot;double-quote&quot;"><span>test</span>',
+			'<hr foo="bar" class="firstTag" title="This is a &quot;double-quote&quot;"><span class="secondTag">test</span>',
+		);
+
+		$examples['Unquoted attribute value'] = array(
+			'<hr id=code><span>test</span>',
+			'<hr foo="bar" class="firstTag" id=code><span class="secondTag">test</span>',
+		);
+
+		$examples['Unquoted attribute value with tag-like value'] = array(
+			'<hr id= 	<code> ><span>test</span>',
+			'<hr foo="bar" class="firstTag" id= 	<code> ><span class="secondTag">test</span>',
+		);
+
+		$examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array(
+			'<hr id=code>><span>test</span>',
+			'<hr foo="bar" class="firstTag" id=code>><span class="secondTag">test</span>',
+		);
+
+		$examples['1'] = array(
+			'<hr id=&quo;code><span>test</span>',
+			'<hr foo="bar" class="firstTag" id=&quo;code><span class="secondTag">test</span>',
+		);
+
+		$examples['2'] = array(
+			'<hr id/test=5><span>test</span>',
+			'<hr foo="bar" class="firstTag" id/test=5><span class="secondTag">test</span>',
+		);
+
+		$examples['4'] = array(
+			'<hr title="<hr>"><span>test</span>',
+			'<hr foo="bar" class="firstTag" title="<hr>"><span class="secondTag">test</span>',
+		);
+
+		$examples['5'] = array(
+			'<hr id=>code><span>test</span>',
+			'<hr foo="bar" class="firstTag" id=>code><span class="secondTag">test</span>',
+		);
+
+		$examples['6'] = array(
+			'<hr id"quo="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" id"quo="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['7'] = array(
+			'<hr id' . $null_byte . 'zero="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" id' . $null_byte . 'zero="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['8'] = array(
+			'<hr >id="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" >id="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['9'] = array(
+			'<hr =id="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" =id="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['10'] = array(
+			'</><span>test</span>',
+			'</><span foo="bar" class="firstTag">test</span>',
+		);
+
+		$examples['11'] = array(
+			'The applicative operator <* works well in Haskell; <data-tag> is what?<span>test</span>',
+			'The applicative operator <* works well in Haskell; <data-tag foo="bar" class="firstTag"> is what?<span class="secondTag">test</span>',
+		);
+
+		$examples['12'] = array(
+			'<3 is a heart but <t3> is a tag.<span>test</span>',
+			'<3 is a heart but <t3 foo="bar" class="firstTag"> is a tag.<span class="secondTag">test</span>',
+		);
+
+		$examples['13'] = array(
+			'<?comment --><span>test</span>',
+			'<?comment --><span foo="bar" class="firstTag">test</span>',
+		);
+
+		$examples['14'] = array(
+			'<!-- this is a comment. no <strong>tags</strong> allowed --><span>test</span>',
+			'<!-- this is a comment. no <strong>tags</strong> allowed --><span foo="bar" class="firstTag">test</span>',
+		);
+
+		$examples['15'] = array(
+			'<![CDATA[This <is> a <strong id="yes">HTML Tag</strong>]]><span>test</span>',
+			'<![CDATA[This <is> a <strong id="yes">HTML Tag</strong>]]><span foo="bar" class="firstTag">test</span>',
+		);
+
+		$examples['16'] = array(
+			'<hr ===name="value"><span>test</span>',
+			'<hr foo="bar" class="firstTag" ===name="value"><span class="secondTag">test</span>',
+		);
+
+		$examples['17'] = array(
+			'<hr asdf="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" asdf="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['18'] = array(
+			'<hr =asdf="tes"><span>test</span>',
+			'<hr foo="bar" class="firstTag" =asdf="tes"><span class="secondTag">test</span>',
+		);
+
+		$examples['19'] = array(
+			'<hr ==="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" ==="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['20'] = array(
+			'<hr =><span>test</span>',
+			'<hr foo="bar" class="firstTag" =><span class="secondTag">test</span>',
+		);
+
+		$examples['21'] = array(
+			'<hr =5><span>test</span>',
+			'<hr foo="bar" class="firstTag" =5><span class="secondTag">test</span>',
+		);
+
+		$examples['22'] = array(
+			'<hr ==><span>test</span>',
+			'<hr foo="bar" class="firstTag" ==><span class="secondTag">test</span>',
+		);
+
+		$examples['23'] = array(
+			'<hr ===><span>test</span>',
+			'<hr foo="bar" class="firstTag" ===><span class="secondTag">test</span>',
+		);
+
+		$examples['24'] = array(
+			'<hr disabled><span>test</span>',
+			'<hr foo="bar" class="firstTag" disabled><span class="secondTag">test</span>',
+		);
+
+		$examples['25'] = array(
+			'<hr a"sdf="test"><span>test</span>',
+			'<hr foo="bar" class="firstTag" a"sdf="test"><span class="secondTag">test</span>',
+		);
+
+		$examples['Multiple unclosed tags treated as a single tag'] = array(
+			'<hr id=">"code
+<hr id="value>"code
+<hr id="/>"code
+<hr id="value/>"code
+/>
+<span>test</span>',
+			'<hr foo="bar" class="firstTag" id=">"code
+<hr id="value>"code
+<hr id="/>"code
+<hr id="value/>"code
+/>
+<span class="secondTag">test</span>',
+		);
+
+		$examples['27'] = array(
+			'<hr id   =5><span>test</span>',
+			'<hr foo="bar" class="firstTag" id   =5><span class="secondTag">test</span>',
+		);
+
+		$examples['28'] = array(
+			'<hr id a  =5><span>test</span>',
+			'<hr foo="bar" class="firstTag" id a  =5><span class="secondTag">test</span>',
+		);
+
+		return $examples;
+	}
+}