diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php
index 7c03c484ddb70..f3ee41537cd9e 100644
--- a/src/wp-includes/formatting.php
+++ b/src/wp-includes/formatting.php
@@ -604,16 +604,116 @@ function wpautop( $text, $br = true ) {
 	return $text;
 }
 
+/**
+ * Returns a Tag Processor exposing the raw matched tokens.
+ *
+ * @since 6.6.0
+ *
+ * @param string $html Passed into the Tag Processor.
+ * @return WP_HTML_Tag_Processor|__anonymous@23567
+ */
+function wp_get_internal_tag_processor( $html ) {
+	return new class( $html ) extends WP_HTML_Tag_Processor {
+		/**
+		 * Returns the raw token from the input string at the
+		 * current location, if paused at a location.
+		 *
+		 * @return false|string
+		 */
+		public function get_raw_token() {
+			if (
+				WP_HTML_Tag_Processor::STATE_READY === $this->parser_state ||
+				WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+				WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+			) {
+				return false;
+			}
+
+			$this->set_bookmark( 'here' );
+			$here = $this->bookmarks['here'];
+
+			return substr( $this->html, $here->start, $here->length );
+		}
+	};
+}
+
 /**
  * Separates HTML elements and comments from the text.
  *
+ * This function tokenizes an HTML document into its
+ * components and returns the array of tokens.
+ *
  * @since 4.2.4
+ * @since 6.6.0 Relies on the HTML API for parsing.
  *
- * @param string $input The text which has to be formatted.
- * @return string[] Array of the formatted text.
+ * @param string $input_html Raw HTML potentially containing a mixture of tags,
+ *                           comments, text nodes, and other sytnax.
+ * @return string[]
  */
-function wp_html_split( $input ) {
-	return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE );
+function wp_html_split( $input_html ) {
+	$chunks    = array();
+	$processor = wp_get_internal_tag_processor( $input_html );
+
+	while ( $processor->next_token() ) {
+		/*
+		 * There's a legacy behavior where text nodes are always stored in even
+		 * indices and "elements" are stored in odd indices. To preserve this,
+		 * empty text nodes are inserted when there's none between other syntax
+		 * tokens.
+		 */
+		if ( 0 === count( $chunks ) % 2 && '#text' !== $processor->get_token_name() ) {
+			$chunks[] = '';
+		}
+
+		$is_special_atomic_element = in_array(
+			$processor->get_tag(),
+			array( 'SCRIPT', 'STYLE', 'XMP', 'NOEMBED', 'NOFRAMES', 'TITLE', 'TEXTAREA' ),
+			true
+		);
+
+		if ( ! $is_special_atomic_element ) {
+			$chunks[] = $processor->get_raw_token();
+			continue;
+		}
+
+		/*
+		 * For special atomic tags, it's necessary to redo some work to find
+		 * the opening and closing tag, because the Tag Processor consumes
+		 * them all in one go.
+		 *
+		 * By replacing the first character of the tag name, it's possible to
+		 * trick the Tag Processor into thinking it's non-special content, and
+		 * then get the starting and ending tags, then restore the tag name at
+		 * the end.
+		 *
+		 * Because the end tag for these special atomic elements are matched
+		 * if they are unexpected, then the final closing tag will be found
+		 * after renaming the opening.
+		 */
+
+		$raw_html    = $processor->get_raw_token();
+		$first_char  = $raw_html[1];
+		$raw_html[1] = 'X';
+		$special     = wp_get_internal_tag_processor( $raw_html );
+
+		// The first tag is the modified tag.
+		$special->next_tag();
+		$opening_tag    = $special->get_raw_token();
+		$opening_tag[1] = $first_char;
+		$chunks[]       = $opening_tag;
+
+		$special->set_bookmark( 'last' );
+		while ( $special->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
+			$special->set_bookmark( 'last' );
+		}
+		$special->seek( 'last' );
+		$closing_tag = $special->get_raw_token();
+
+		$chunks[] = substr( $raw_html, strlen( $opening_tag ), -strlen( $closing_tag ) );
+		$chunks[] = $closing_tag;
+	}
+
+	return $chunks;
 }
 
 /**
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 4597a888b5efe..26d22c072e48e 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -926,8 +926,8 @@ private function base_class_next_token() {
 			return false;
 		}
 		$this->parser_state         = self::STATE_MATCHED_TAG;
-		$this->token_length         = $tag_ends_at - $this->token_starts_at;
 		$this->bytes_already_parsed = $tag_ends_at + 1;
+		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 
 		/*
 		 * For non-DATA sections which might contain text that looks like HTML tags but
@@ -1013,7 +1013,7 @@ private function base_class_next_token() {
 		 */
 		$this->token_starts_at      = $was_at;
 		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
-		$this->text_starts_at       = $tag_ends_at + 1;
+		$this->text_starts_at       = $tag_ends_at;
 		$this->text_length          = $this->tag_name_starts_at - $this->text_starts_at;
 		$this->tag_name_starts_at   = $tag_name_starts_at;
 		$this->tag_name_length      = $tag_name_length;
@@ -2687,7 +2687,7 @@ public function has_self_closing_flag() {
 		 *     <figure />
 		 *             ^ this appears one character before the end of the closing ">".
 		 */
-		return '/' === $this->html[ $this->token_starts_at + $this->token_length - 1 ];
+		return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ];
 	}
 
 	/**
diff --git a/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php b/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php
index 3b2dcb1237971..b12dcb4b3b158 100644
--- a/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php
+++ b/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php
@@ -107,7 +107,7 @@ public function append_content_after_template_tag_closer( string $new_content ):
 
 		$bookmark = 'append_content_after_template_tag_closer';
 		$this->set_bookmark( $bookmark );
-		$after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length + 1;
+		$after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length;
 		$this->release_bookmark( $bookmark );
 
 		// Appends the new content.
@@ -140,7 +140,7 @@ private function get_after_opener_tag_and_before_closer_tag_positions( bool $rew
 		}
 		list( $opener_tag, $closer_tag ) = $bookmarks;
 
-		$after_opener_tag  = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length + 1;
+		$after_opener_tag  = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length;
 		$before_closer_tag = $this->bookmarks[ $closer_tag ]->start;
 
 		if ( $rewind ) {
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
index 824630b33516a..ddebb7d98b4fb 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -2746,7 +2746,7 @@ public function test_applies_updates_before_proceeding() {
 			public function insert_after( $new_html ) {
 				$this->set_bookmark( 'here' );
 				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
-					$this->bookmarks['here']->start + $this->bookmarks['here']->length + 1,
+					$this->bookmarks['here']->start + $this->bookmarks['here']->length,
 					0,
 					$new_html
 				);