From 726bcfdc12cec8f3d897779923975410be831df9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 11 Jul 2025 17:25:14 -0500 Subject: [PATCH 1/8] HTML API: Refactor `wp_kses_hair()` (#9248) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trac ticket: Core-63694 `wp_kses_hair()` is built around an impressive state machine for parsing the `$attr` of an HTML tag, that is, the span of text after the tag name and before the closing `>`. Unfortunately, that parsing code doesn’t fully-implement the HTML specification and may be prone to mis-parsing. This patch replaces the existing state machine with a straight-forward use of the HTML API to parse the attributes for us, constructing a shell take for the `$attr` string and reading the attributes structurally. This shell is necessary because a previous stage of the pipeline has already separated what it thinks is the so-called “attribute list” from a tag. Props: dmsnell --- src/wp-includes/kses.php | 188 +++++++++------------------------- tests/phpunit/tests/media.php | 2 +- 2 files changed, 51 insertions(+), 139 deletions(-) diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index 35327e1a01cce..38d6e2a437309 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -1585,160 +1585,72 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe } /** - * Builds an attribute list from string containing attributes. - * - * This function does a lot of work. It parses an attribute list into an array - * with attribute data, and tries to do the right thing even if it gets weird - * input. It will add quotes around attribute values that don't have any quotes - * or apostrophes around them, to make it easier to produce HTML code that will - * conform to W3C's HTML specification. It will also remove bad URL protocols - * from attribute values. It also reduces duplicate attributes by using the - * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). + * Given a string of HTML attributes and values, parse into a structured attribute list. + * + * This function performs a number of transformations while parsing attribute strings: + * - It normalizes attribute values and surrounds them with double quotes. + * - It normalizes HTML character references inside attribute values. + * - It removes “bad” URL protocols from attribute values. + * + * Otherwise this reads the attributes as if they were part of an HTML tag. It performs + * these transformations to lower the risk of mis-parsing down the line and to perform + * URL sanitization in line with the rest of the `kses` subsystem. Importantly, it does + * not decode the attribute values, meaning that special HTML syntax characters will + * be left with character references in the `value` property. + * + * Example: + * + * $attrs = wp_kses_hair( 'class="is-wide" inert data-lazy=\'<img>\' =/🐮=/' ); + * $attrs === array( + * 'class' => array( 'name' => 'class', 'value' => 'is-wide', 'whole' => 'class="is-wide"', 'vless' => 'n' ), + * 'inert' => array( 'name' => 'inert', 'value' => '', 'whole' => 'inert', 'vless' => 'y' ), + * 'data-lazy' => array( 'name' => 'data-lazy', 'value' => '<img>', 'whole' => 'data-lazy="<img>"', 'vless' => 'n' ), + * '=' => array( 'name' => '=', 'value' => '', 'whole' => '=', vless => 'y' ), + * '🐮' => array( 'name' => '🐮', 'value' => '/', 'whole' => '🐮="/"', vless => 'n' ), + * ); * * @since 1.0.0 + * @since 6.9.0 Rebuilt on HTML API * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { - $attrarr = array(); - $mode = 0; - $attrname = ''; - $uris = wp_kses_uri_attributes(); - - // Loop through the whole attribute list. - - while ( strlen( $attr ) !== 0 ) { - $working = 0; // Was the last operation successful? - - switch ( $mode ) { - case 0: - if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { - $attrname = $match[1]; - $working = 1; - $mode = 1; - $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); - } - - break; - - case 1: - if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. - $working = 1; - $mode = 2; - $attr = preg_replace( '/^\s*=\s*/', '', $attr ); - break; - } - - if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. - $working = 1; - $mode = 0; - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', - ); - } - - $attr = preg_replace( '/^\s+/', '', $attr ); - } - - break; - - case 2: - if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { - // "value" - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); - break; - } + $attributes = array(); + $uris = wp_kses_uri_attributes(); - if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { - // 'value' - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname='$thisval'", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); - break; - } + $processor = new WP_HTML_Tag_Processor( "" ); + $processor->next_token(); - if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { - // value - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - // We add quotes to conform to W3C's HTML spec. - $working = 1; - $mode = 0; - $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); - } + foreach ( $processor->get_attribute_names_with_prefix( '' ) as $name ) { + $value = $processor->get_attribute( $name ); + $is_bool = true === $value; + if ( is_string( $value ) && in_array( $name, $uris, true ) ) { + $value = wp_kses_bad_protocol( $value, $allowed_protocols ); + } - break; - } // End switch. + // Reconstruct and normalize the attribute value. + $syntax_characters = array( + '&' => '&', + '<' => '<', + '>' => '>', + "'" => ''', + '"' => '"', + ); - if ( 0 === $working ) { // Not well-formed, remove and try again. - $attr = wp_kses_html_error( $attr ); - $mode = 0; - } - } // End while. + $recoded = $is_bool ? '' : strtr( $value, $syntax_characters ); + $whole = $is_bool ? $name : "{$name}=\"{$recoded}\""; - if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { - /* - * Special case, for when the attribute list ends with a valueless - * attribute like "selected". - */ - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', + $attributes[ $name ] = array( + 'name' => $name, + 'value' => $recoded, + 'whole' => $whole, + 'vless' => $is_bool ? 'y' : 'n', ); } - return $attrarr; + return $attributes; } /** diff --git a/tests/phpunit/tests/media.php b/tests/phpunit/tests/media.php index 1ab836fe856d8..84cf96c082f1c 100644 --- a/tests/phpunit/tests/media.php +++ b/tests/phpunit/tests/media.php @@ -227,7 +227,7 @@ public function test_new_img_caption_shortcode_with_html_caption() { $this->assertStringNotContainsString( self::HTML_CONTENT, $mark, - 'Test caption content should not contain the mark surround it: check test setup.' + 'Test caption content should not contain the mark surrounding it: check test setup.' ); $result = img_caption_shortcode( From e5c90d335f0583be164743d09d53e4d15ab2d2a3 Mon Sep 17 00:00:00 2001 From: mayor Date: Fri, 9 Jan 2026 17:46:43 +0100 Subject: [PATCH 2/8] Tests: Add comprehensive test suite for wp_kses_hair() Related: https://github.com/WordPress/wordpress-develop/pull/9248 Co-Authored-By: Claude Sonnet 4.5 --- tests/phpunit/tests/kses/wpKsesHair.php | 1001 +++++++++++++++++++++++ 1 file changed, 1001 insertions(+) create mode 100644 tests/phpunit/tests/kses/wpKsesHair.php diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php new file mode 100644 index 0000000000000..74dcca98a02ed --- /dev/null +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -0,0 +1,1001 @@ +allowed_protocols = wp_allowed_protocols(); + } + + /** + * Test wp_kses_hair() with various attribute patterns. + * + * @ticket 63724 + * @dataProvider data_attribute_parsing + * @covers ::wp_kses_hair + */ + public function test_attribute_parsing( $input, $expected ) { + $result = wp_kses_hair( $input, $this->allowed_protocols ); + $this->assertSame( $expected, $result ); + } + + /** + * Data provider for attribute parsing tests. + * + * @return Generator + */ + public function data_attribute_parsing() { + // Basic Attribute Parsing. + yield 'single attribute with double quotes' => array( + 'class="test-class"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'test-class', + 'whole' => 'class="test-class"', + 'vless' => 'n', + ), + ), + ); + + yield 'single attribute with single quotes' => array( + "title='My Title'", + array( + 'title' => array( + 'name' => 'title', + 'value' => 'My Title', + 'whole' => "title='My Title'", + 'vless' => 'n', + ), + ), + ); + + yield 'unquoted attribute value' => array( + 'id=test123', + array( + 'id' => array( + 'name' => 'id', + 'value' => 'test123', + 'whole' => 'id="test123"', + 'vless' => 'n', + ), + ), + ); + + yield 'multiple attributes' => array( + 'class="btn" id="submit-btn" data-value="123"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'btn', + 'whole' => 'class="btn"', + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'submit-btn', + 'whole' => 'id="submit-btn"', + 'vless' => 'n', + ), + 'data-value' => array( + 'name' => 'data-value', + 'value' => '123', + 'whole' => 'data-value="123"', + 'vless' => 'n', + ), + ), + ); + + yield 'valueless attributes' => array( + 'disabled required checked', + array( + 'disabled' => array( + 'name' => 'disabled', + 'value' => '', + 'whole' => 'disabled', + 'vless' => 'y', + ), + 'required' => array( + 'name' => 'required', + 'value' => '', + 'whole' => 'required', + 'vless' => 'y', + ), + 'checked' => array( + 'name' => 'checked', + 'value' => '', + 'whole' => 'checked', + 'vless' => 'y', + ), + ), + ); + + yield 'valueless attribute at end' => array( + 'type="checkbox" checked', + array( + 'type' => array( + 'name' => 'type', + 'value' => 'checkbox', + 'whole' => 'type="checkbox"', + 'vless' => 'n', + ), + 'checked' => array( + 'name' => 'checked', + 'value' => '', + 'whole' => 'checked', + 'vless' => 'y', + ), + ), + ); + + yield 'mixed valued and valueless' => array( + 'disabled class="form-control" readonly id=input1', + array( + 'disabled' => array( + 'name' => 'disabled', + 'value' => '', + 'whole' => 'disabled', + 'vless' => 'y', + ), + 'class' => array( + 'name' => 'class', + 'value' => 'form-control', + 'whole' => 'class="form-control"', + 'vless' => 'n', + ), + 'readonly' => array( + 'name' => 'readonly', + 'value' => '', + 'whole' => 'readonly', + 'vless' => 'y', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'input1', + 'whole' => 'id="input1"', + 'vless' => 'n', + ), + ), + ); + + // Character Reference Handling. + yield 'named entities' => array( + 'title="<Hello> & "World""', + array( + 'title' => array( + 'name' => 'title', + 'value' => '<Hello> & "World"', + 'whole' => 'title="<Hello> & "World""', + 'vless' => 'n', + ), + ), + ); + + yield 'numeric decimal entities' => array( + 'title="<test>"', + array( + 'title' => array( + 'name' => 'title', + 'value' => '<test>', + 'whole' => 'title="<test>"', + 'vless' => 'n', + ), + ), + ); + + yield 'numeric hex entities lowercase' => array( + 'title="<hex>"', + array( + 'title' => array( + 'name' => 'title', + 'value' => '<hex>', + 'whole' => 'title="<hex>"', + 'vless' => 'n', + ), + ), + ); + + yield 'numeric hex entities uppercase' => array( + 'title="<HEX>"', + array( + 'title' => array( + 'name' => 'title', + 'value' => '<HEX>', + 'whole' => 'title="<HEX>"', + 'vless' => 'n', + ), + ), + ); + + yield 'invalid character references' => array( + 'title="&invalid; &#; &#x;"', + array( + 'title' => array( + 'name' => 'title', + 'value' => '&invalid; &#; &#x;', + 'whole' => 'title="&invalid; &#; &#x;"', + 'vless' => 'n', + ), + ), + ); + + // Quote Handling. + yield 'double quotes' => array( + 'data-text="Double quoted value"', + array( + 'data-text' => array( + 'name' => 'data-text', + 'value' => 'Double quoted value', + 'whole' => 'data-text="Double quoted value"', + 'vless' => 'n', + ), + ), + ); + + yield 'single quotes' => array( + "data-text='Single quoted value'", + array( + 'data-text' => array( + 'name' => 'data-text', + 'value' => 'Single quoted value', + 'whole' => "data-text='Single quoted value'", + 'vless' => 'n', + ), + ), + ); + + yield 'mixed quotes' => array( + 'title="double" alt=\'single\' id=unquoted', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'double', + 'whole' => 'title="double"', + 'vless' => 'n', + ), + 'alt' => array( + 'name' => 'alt', + 'value' => 'single', + 'whole' => "alt='single'", + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'unquoted', + 'whole' => 'id="unquoted"', + 'vless' => 'n', + ), + ), + ); + + yield 'single quotes in double quoted value' => array( + 'title="It\'s working"', + array( + 'title' => array( + 'name' => 'title', + 'value' => "It's working", + 'whole' => 'title="It\'s working"', + 'vless' => 'n', + ), + ), + ); + + yield 'double quotes in single quoted value' => array( + 'title=\'He said "hello"\'', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'He said "hello"', + 'whole' => 'title=\'He said "hello"\'', + 'vless' => 'n', + ), + ), + ); + + yield 'unquoted with special chars' => array( + 'data-value=test-123_value', + array( + 'data-value' => array( + 'name' => 'data-value', + 'value' => 'test-123_value', + 'whole' => 'data-value="test-123_value"', + 'vless' => 'n', + ), + ), + ); + + // Edge Cases. + yield 'empty string' => array( + '', + array(), + ); + + yield 'whitespace only' => array( + ' ', + array(), + ); + + yield 'invalid attribute name starting with number' => array( + '1invalid="value"', + array(), + ); + + yield 'invalid attribute name special chars' => array( + '@invalid="value" $bad="value"', + array(), + ); + + yield 'duplicate attributes first wins' => array( + 'id="first" class="test" id="second"', + array( + 'id' => array( + 'name' => 'id', + 'value' => 'first', + 'whole' => 'id="first"', + 'vless' => 'n', + ), + 'class' => array( + 'name' => 'class', + 'value' => 'test', + 'whole' => 'class="test"', + 'vless' => 'n', + ), + ), + ); + + yield 'malformed unclosed double quote' => array( + 'title="unclosed class="test"', + array(), + ); + + yield 'very long attribute value' => array( + 'data-long="' . str_repeat( 'a', 10000 ) . '"', + array( + 'data-long' => array( + 'name' => 'data-long', + 'value' => str_repeat( 'a', 10000 ), + 'whole' => 'data-long="' . str_repeat( 'a', 10000 ) . '"', + 'vless' => 'n', + ), + ), + ); + + yield 'attribute names with colons and dots' => array( + 'xml:lang="en" data.value="test" xlink:href="#anchor"', + array( + 'xml:lang' => array( + 'name' => 'xml:lang', + 'value' => 'en', + 'whole' => 'xml:lang="en"', + 'vless' => 'n', + ), + 'data.value' => array( + 'name' => 'data.value', + 'value' => 'test', + 'whole' => 'data.value="test"', + 'vless' => 'n', + ), + 'xlink:href' => array( + 'name' => 'xlink:href', + 'value' => '#anchor', + 'whole' => 'xlink:href="#anchor"', + 'vless' => 'n', + ), + ), + ); + + yield 'multiple spaces between attributes' => array( + 'class="test" id="value" title="spaced"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'test', + 'whole' => 'class="test"', + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'value', + 'whole' => 'id="value"', + 'vless' => 'n', + ), + 'title' => array( + 'name' => 'title', + 'value' => 'spaced', + 'whole' => 'title="spaced"', + 'vless' => 'n', + ), + ), + ); + + yield 'spaces around equals' => array( + 'id = "spaced" class ="left" title= "right"', + array( + 'id' => array( + 'name' => 'id', + 'value' => 'spaced', + 'whole' => 'id="spaced"', + 'vless' => 'n', + ), + 'class' => array( + 'name' => 'class', + 'value' => 'left', + 'whole' => 'class="left"', + 'vless' => 'n', + ), + 'title' => array( + 'name' => 'title', + 'value' => 'right', + 'whole' => 'title="right"', + 'vless' => 'n', + ), + ), + ); + + // WordPress-specific Cases. + yield 'common wordpress attributes' => array( + 'class="wp-block" id="post-123" style="color: red;"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'wp-block', + 'whole' => 'class="wp-block"', + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'post-123', + 'whole' => 'id="post-123"', + 'vless' => 'n', + ), + 'style' => array( + 'name' => 'style', + 'value' => 'color: red;', + 'whole' => 'style="color: red;"', + 'vless' => 'n', + ), + ), + ); + + yield 'data attributes' => array( + 'data-post-id="123" data-action="delete" data-confirm="true"', + array( + 'data-post-id' => array( + 'name' => 'data-post-id', + 'value' => '123', + 'whole' => 'data-post-id="123"', + 'vless' => 'n', + ), + 'data-action' => array( + 'name' => 'data-action', + 'value' => 'delete', + 'whole' => 'data-action="delete"', + 'vless' => 'n', + ), + 'data-confirm' => array( + 'name' => 'data-confirm', + 'value' => 'true', + 'whole' => 'data-confirm="true"', + 'vless' => 'n', + ), + ), + ); + + yield 'aria attributes' => array( + 'aria-label="Close" aria-hidden="true" aria-describedby="help-text"', + array( + 'aria-label' => array( + 'name' => 'aria-label', + 'value' => 'Close', + 'whole' => 'aria-label="Close"', + 'vless' => 'n', + ), + 'aria-hidden' => array( + 'name' => 'aria-hidden', + 'value' => 'true', + 'whole' => 'aria-hidden="true"', + 'vless' => 'n', + ), + 'aria-describedby' => array( + 'name' => 'aria-describedby', + 'value' => 'help-text', + 'whole' => 'aria-describedby="help-text"', + 'vless' => 'n', + ), + ), + ); + + yield 'role attribute' => array( + 'role="navigation"', + array( + 'role' => array( + 'name' => 'role', + 'value' => 'navigation', + 'whole' => 'role="navigation"', + 'vless' => 'n', + ), + ), + ); + + yield 'tabindex attribute' => array( + 'tabindex="0"', + array( + 'tabindex' => array( + 'name' => 'tabindex', + 'value' => '0', + 'whole' => 'tabindex="0"', + 'vless' => 'n', + ), + ), + ); + + yield 'complex wordpress attributes' => array( + 'class="wp-block-button__link" href="https://wordpress.org" target="_blank" rel="noopener" aria-label="Visit WordPress" data-track="click"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'wp-block-button__link', + 'whole' => 'class="wp-block-button__link"', + 'vless' => 'n', + ), + 'href' => array( + 'name' => 'href', + 'value' => 'https://wordpress.org', + 'whole' => 'href="https://wordpress.org"', + 'vless' => 'n', + ), + 'target' => array( + 'name' => 'target', + 'value' => '_blank', + 'whole' => 'target="_blank"', + 'vless' => 'n', + ), + 'rel' => array( + 'name' => 'rel', + 'value' => 'noopener', + 'whole' => 'rel="noopener"', + 'vless' => 'n', + ), + 'aria-label' => array( + 'name' => 'aria-label', + 'value' => 'Visit WordPress', + 'whole' => 'aria-label="Visit WordPress"', + 'vless' => 'n', + ), + 'data-track' => array( + 'name' => 'data-track', + 'value' => 'click', + 'whole' => 'data-track="click"', + 'vless' => 'n', + ), + ), + ); + + yield 'underscore in attribute name' => array( + '_custom="value" data_value="test"', + array( + '_custom' => array( + 'name' => '_custom', + 'value' => 'value', + 'whole' => '_custom="value"', + 'vless' => 'n', + ), + 'data_value' => array( + 'name' => 'data_value', + 'value' => 'test', + 'whole' => 'data_value="test"', + 'vless' => 'n', + ), + ), + ); + + yield 'empty attribute value' => array( + 'title="" alt=\'\' class=""', + array( + 'title' => array( + 'name' => 'title', + 'value' => '', + 'whole' => 'title=""', + 'vless' => 'n', + ), + 'alt' => array( + 'name' => 'alt', + 'value' => '', + 'whole' => "alt=''", + 'vless' => 'n', + ), + 'class' => array( + 'name' => 'class', + 'value' => '', + 'whole' => 'class=""', + 'vless' => 'n', + ), + ), + ); + + // Whitespace Handling. + yield 'forward slashes between attributes' => array( + 'att="val" / att-2="val-2" / / ///// att-3="val-3"', + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att-2' => array( + 'name' => 'att-2', + 'value' => 'val-2', + 'whole' => 'att-2="val-2"', + 'vless' => 'n', + ), + 'att-3' => array( + 'name' => 'att-3', + 'value' => 'val-3', + 'whole' => 'att-3="val-3"', + 'vless' => 'n', + ), + ), + ); + + yield 'tab whitespace' => array( + "att=\"val\"\tatt2=\"val2\"", + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att2' => array( + 'name' => 'att2', + 'value' => 'val2', + 'whole' => 'att2="val2"', + 'vless' => 'n', + ), + ), + ); + + yield 'form feed whitespace' => array( + "att=\"val\"\fatt2=\"val2\"", + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att2' => array( + 'name' => 'att2', + 'value' => 'val2', + 'whole' => 'att2="val2"', + 'vless' => 'n', + ), + ), + ); + + yield 'carriage return whitespace' => array( + "att=\"val\"\ratt2=\"val2\"", + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att2' => array( + 'name' => 'att2', + 'value' => 'val2', + 'whole' => 'att2="val2"', + 'vless' => 'n', + ), + ), + ); + + yield 'newline whitespace' => array( + "att=\"val\"\natt2=\"val2\"", + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att2' => array( + 'name' => 'att2', + 'value' => 'val2', + 'whole' => 'att2="val2"', + 'vless' => 'n', + ), + ), + ); + + yield 'mixed whitespace types' => array( + "att=\"val\"\t\r\n\f att2=\"val2\"", + array( + 'att' => array( + 'name' => 'att', + 'value' => 'val', + 'whole' => 'att="val"', + 'vless' => 'n', + ), + 'att2' => array( + 'name' => 'att2', + 'value' => 'val2', + 'whole' => 'att2="val2"', + 'vless' => 'n', + ), + ), + ); + + // Malformed Equals Patterns. + yield 'multiple equals signs' => array( + 'att=="val"', + array(), + ); + + yield 'equals with strange spacing' => array( + 'att= ="val"', + array(), + ); + + yield 'triple equals signs' => array( + 'att==="val"', + array(), + ); + + yield 'equals echo pattern' => array( + "att==echo 'something'", + array( + 'att' => array( + 'name' => 'att', + 'value' => '=echo', + 'whole' => 'att="=echo"', + 'vless' => 'n', + ), + ), + ); + + yield 'attribute starting with equals' => array( + '= bool k=v', + array( + 'bool' => array( + 'name' => 'bool', + 'value' => '', + 'whole' => 'bool', + 'vless' => 'y', + ), + 'k' => array( + 'name' => 'k', + 'value' => 'v', + 'whole' => 'k="v"', + 'vless' => 'n', + ), + ), + ); + + yield 'mixed quotes and equals chaos' => array( + 'k=v ="' . "' j=w", + array( + 'k' => array( + 'name' => 'k', + 'value' => 'v', + 'whole' => 'k="v"', + 'vless' => 'n', + ), + ), + ); + + yield 'triple equals quoted whitespace' => array( + '===" "', + array(), + ); + + yield 'boolean with contradictory value' => array( + 'disabled=enabled checked', + array( + 'disabled' => array( + 'name' => 'disabled', + 'value' => 'enabled', + 'whole' => 'disabled="enabled"', + 'vless' => 'n', + ), + 'checked' => array( + 'name' => 'checked', + 'value' => '', + 'whole' => 'checked', + 'vless' => 'y', + ), + ), + ); + + yield 'empty attribute name with value' => array( + '="value" class="test"', + array( + 'class' => array( + 'name' => 'class', + 'value' => 'test', + 'whole' => 'class="test"', + 'vless' => 'n', + ), + ), + ); + } + + /** + * Test wp_kses_hair() with URL protocol filtering. + * + * @ticket 63724 + * @dataProvider data_protocol_filtering + * @covers ::wp_kses_hair + */ + public function test_protocol_filtering( $input, $expected ) { + $result = wp_kses_hair( $input, $this->allowed_protocols ); + $this->assertSame( $expected, $result ); + } + + /** + * Data provider for URL protocol filtering tests. + * + * @return Generator + */ + public function data_protocol_filtering() { + yield 'href allowed protocol http' => array( + 'href="http://example.com"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'http://example.com', + 'whole' => 'href="http://example.com"', + 'vless' => 'n', + ), + ), + ); + + yield 'href allowed protocol https' => array( + 'href="https://secure.example.com"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'https://secure.example.com', + 'whole' => 'href="https://secure.example.com"', + 'vless' => 'n', + ), + ), + ); + + yield 'href disallowed protocol javascript' => array( + 'href="javascript:alert(1)"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'alert(1)', + 'whole' => 'href="alert(1)"', + 'vless' => 'n', + ), + ), + ); + + yield 'href disallowed protocol javascript single quotes' => array( + "href='javascript:alert(1)'", + array( + 'href' => array( + 'name' => 'href', + 'value' => 'alert(1)', + 'whole' => "href='alert(1)'", + 'vless' => 'n', + ), + ), + ); + + yield 'href disallowed protocol javascript unquoted' => array( + 'href=javascript:alert(1)', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'alert(1)', + 'whole' => 'href="alert(1)"', + 'vless' => 'n', + ), + ), + ); + + yield 'src allowed protocol' => array( + 'src="https://example.com/image.jpg"', + array( + 'src' => array( + 'name' => 'src', + 'value' => 'https://example.com/image.jpg', + 'whole' => 'src="https://example.com/image.jpg"', + 'vless' => 'n', + ), + ), + ); + + yield 'src data protocol' => array( + 'src="data:text/html,"', + array( + 'src' => array( + 'name' => 'src', + 'value' => 'text/html,', + 'whole' => 'src="text/html,"', + 'vless' => 'n', + ), + ), + ); + + yield 'protocol filtering only uri attributes' => array( + 'data-url="javascript:alert(1)"', + array( + 'data-url' => array( + 'name' => 'data-url', + 'value' => 'javascript:alert(1)', + 'whole' => 'data-url="javascript:alert(1)"', + 'vless' => 'n', + ), + ), + ); + + yield 'href relative url' => array( + 'href="/path/to/page"', + array( + 'href' => array( + 'name' => 'href', + 'value' => '/path/to/page', + 'whole' => 'href="/path/to/page"', + 'vless' => 'n', + ), + ), + ); + + yield 'href anchor link' => array( + 'href="#section"', + array( + 'href' => array( + 'name' => 'href', + 'value' => '#section', + 'whole' => 'href="#section"', + 'vless' => 'n', + ), + ), + ); + } + + /** + * Test wp_kses_hair() with custom allowed protocols. + * + * @ticket 63724 + * @covers ::wp_kses_hair + */ + public function test_custom_allowed_protocols() { + $custom_protocols = array( 'http', 'https', 'ftp' ); + $attr = 'href="ftp://ftp.example.com"'; + $result = wp_kses_hair( $attr, $custom_protocols ); + + $expected = array( + 'href' => array( + 'name' => 'href', + 'value' => 'ftp://ftp.example.com', + 'whole' => 'href="ftp://ftp.example.com"', + 'vless' => 'n', + ), + ); + + $this->assertSame( $expected, $result ); + } +} From a8f8a527fd41faa1e3183d210215f50b73e6e1a3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:06:26 +0100 Subject: [PATCH 3/8] Correct covers annotations --- tests/phpunit/tests/kses/wpKsesHair.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index 74dcca98a02ed..af2595d461759 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -3,7 +3,6 @@ * Test wp_kses_hair() function. * * @group kses - * @covers wp_kses_hair */ class Tests_Kses_WpKsesHair extends WP_UnitTestCase { @@ -27,7 +26,7 @@ public function set_up() { * * @ticket 63724 * @dataProvider data_attribute_parsing - * @covers ::wp_kses_hair + * @covers wp_kses_hair */ public function test_attribute_parsing( $input, $expected ) { $result = wp_kses_hair( $input, $this->allowed_protocols ); @@ -842,7 +841,7 @@ public function data_attribute_parsing() { * * @ticket 63724 * @dataProvider data_protocol_filtering - * @covers ::wp_kses_hair + * @covers wp_kses_hair */ public function test_protocol_filtering( $input, $expected ) { $result = wp_kses_hair( $input, $this->allowed_protocols ); @@ -980,7 +979,7 @@ public function data_protocol_filtering() { * Test wp_kses_hair() with custom allowed protocols. * * @ticket 63724 - * @covers ::wp_kses_hair + * @covers wp_kses_hair */ public function test_custom_allowed_protocols() { $custom_protocols = array( 'http', 'https', 'ftp' ); From 0c1ee53588111656cadd332dcd9f5466b726a0b8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:10:13 +0100 Subject: [PATCH 4/8] Add parameter types --- tests/phpunit/tests/kses/wpKsesHair.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index af2595d461759..1d3abe60e803d 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -28,7 +28,7 @@ public function set_up() { * @dataProvider data_attribute_parsing * @covers wp_kses_hair */ - public function test_attribute_parsing( $input, $expected ) { + public function test_attribute_parsing( string $input, array $expected ) { $result = wp_kses_hair( $input, $this->allowed_protocols ); $this->assertSame( $expected, $result ); } @@ -843,7 +843,7 @@ public function data_attribute_parsing() { * @dataProvider data_protocol_filtering * @covers wp_kses_hair */ - public function test_protocol_filtering( $input, $expected ) { + public function test_protocol_filtering( string $input, array $expected ) { $result = wp_kses_hair( $input, $this->allowed_protocols ); $this->assertSame( $expected, $result ); } From e3fb5ed0151d41d0665baa2c2a6a2c32c70a400c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:13:24 +0100 Subject: [PATCH 5/8] use unusual protocol for protocol test --- tests/phpunit/tests/kses/wpKsesHair.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index 1d3abe60e803d..53ec5293d5f8b 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -982,15 +982,15 @@ public function data_protocol_filtering() { * @covers wp_kses_hair */ public function test_custom_allowed_protocols() { - $custom_protocols = array( 'http', 'https', 'ftp' ); - $attr = 'href="ftp://ftp.example.com"'; + $custom_protocols = array( 'gopher' ); + $attr = 'href="gopher://gopher.example.org"'; $result = wp_kses_hair( $attr, $custom_protocols ); $expected = array( 'href' => array( 'name' => 'href', - 'value' => 'ftp://ftp.example.com', - 'whole' => 'href="ftp://ftp.example.com"', + 'value' => 'gopher://gopher.example.org', + 'whole' => 'href="gopher://gopher.example.org"', 'vless' => 'n', ), ); From 93551e3870a37395429c7a0ceec9de73d8ade662 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:14:40 +0100 Subject: [PATCH 6/8] character references, not entities --- tests/phpunit/tests/kses/wpKsesHair.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index 53ec5293d5f8b..3a4e8ef218d97 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -173,7 +173,7 @@ public function data_attribute_parsing() { ); // Character Reference Handling. - yield 'named entities' => array( + yield 'named character references' => array( 'title="<Hello> & "World""', array( 'title' => array( @@ -185,7 +185,7 @@ public function data_attribute_parsing() { ), ); - yield 'numeric decimal entities' => array( + yield 'numeric decimal character references' => array( 'title="<test>"', array( 'title' => array( @@ -197,7 +197,7 @@ public function data_attribute_parsing() { ), ); - yield 'numeric hex entities lowercase' => array( + yield 'numeric hex character references lowercase' => array( 'title="<hex>"', array( 'title' => array( @@ -209,7 +209,7 @@ public function data_attribute_parsing() { ), ); - yield 'numeric hex entities uppercase' => array( + yield 'numeric hex character references uppercase' => array( 'title="<HEX>"', array( 'title' => array( From 5db3dc109bf4d8bd07923af6e6f73a311c5371f9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:24:48 +0100 Subject: [PATCH 7/8] test cleanup --- tests/phpunit/tests/kses/wpKsesHair.php | 58 +++++++++++-------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index 3a4e8ef218d97..2ed83679f2e3d 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -39,7 +39,6 @@ public function test_attribute_parsing( string $input, array $expected ) { * @return Generator */ public function data_attribute_parsing() { - // Basic Attribute Parsing. yield 'single attribute with double quotes' => array( 'class="test-class"', array( @@ -172,7 +171,6 @@ public function data_attribute_parsing() { ), ); - // Character Reference Handling. yield 'named character references' => array( 'title="<Hello> & "World""', array( @@ -233,7 +231,6 @@ public function data_attribute_parsing() { ), ); - // Quote Handling. yield 'double quotes' => array( 'data-text="Double quoted value"', array( @@ -318,7 +315,6 @@ public function data_attribute_parsing() { ), ); - // Edge Cases. yield 'empty string' => array( '', array(), @@ -446,8 +442,7 @@ public function data_attribute_parsing() { ), ); - // WordPress-specific Cases. - yield 'common wordpress attributes' => array( + yield 'common WordPress attributes' => array( 'class="wp-block" id="post-123" style="color: red;"', array( 'class' => array( @@ -543,7 +538,7 @@ public function data_attribute_parsing() { ), ); - yield 'complex wordpress attributes' => array( + yield 'complex WordPress attributes' => array( 'class="wp-block-button__link" href="https://wordpress.org" target="_blank" rel="noopener" aria-label="Visit WordPress" data-track="click"', array( 'class' => array( @@ -627,98 +622,97 @@ public function data_attribute_parsing() { ), ); - // Whitespace Handling. yield 'forward slashes between attributes' => array( - 'att="val" / att-2="val-2" / / ///// att-3="val-3"', + 'att / att2=2 /// att3="3"', array( 'att' => array( 'name' => 'att', - 'value' => 'val', - 'whole' => 'att="val"', - 'vless' => 'n', + 'value' => '', + 'whole' => 'att', + 'vless' => 'y', ), - 'att-2' => array( - 'name' => 'att-2', - 'value' => 'val-2', - 'whole' => 'att-2="val-2"', + 'att2' => array( + 'name' => 'att2', + 'value' => '2', + 'whole' => 'att2="2"', 'vless' => 'n', ), - 'att-3' => array( - 'name' => 'att-3', - 'value' => 'val-3', - 'whole' => 'att-3="val-3"', + 'att3' => array( + 'name' => 'att3', + 'value' => '3', + 'whole' => 'att3="3"', 'vless' => 'n', ), ), ); yield 'tab whitespace' => array( - "att=\"val\"\tatt2=\"val2\"", + "att='val'\tatt2='val2'", array( 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => 'att="val"', + 'whole' => "att='val'", 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => 'att2="val2"', + 'whole' => "att2='val2'", 'vless' => 'n', ), ), ); yield 'form feed whitespace' => array( - "att=\"val\"\fatt2=\"val2\"", + "att='val'\fatt2='val2'", array( 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => 'att="val"', + 'whole' => "att='val'", 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => 'att2="val2"', + 'whole' => "att2='val2'", 'vless' => 'n', ), ), ); yield 'carriage return whitespace' => array( - "att=\"val\"\ratt2=\"val2\"", + "att='val'\ratt2='val2'", array( 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => 'att="val"', + 'whole' => "att='val'", 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => 'att2="val2"', + 'whole' => "att2='val2'", 'vless' => 'n', ), ), ); yield 'newline whitespace' => array( - "att=\"val\"\natt2=\"val2\"", + "att='val'\ratt2='val2'", array( 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => 'att="val"', + 'whole' => "att='val'", 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => 'att2="val2"', + 'whole' => "att2='val2'", 'vless' => 'n', ), ), From f815a62b5b23689f19eec1f7f824b7eb295c80b9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 9 Jan 2026 19:59:42 +0100 Subject: [PATCH 8/8] Update to fix tests --- tests/phpunit/tests/kses/wpKsesHair.php | 173 ++++++++++++++++++------ 1 file changed, 135 insertions(+), 38 deletions(-) diff --git a/tests/phpunit/tests/kses/wpKsesHair.php b/tests/phpunit/tests/kses/wpKsesHair.php index 2ed83679f2e3d..05d573bc070bc 100644 --- a/tests/phpunit/tests/kses/wpKsesHair.php +++ b/tests/phpunit/tests/kses/wpKsesHair.php @@ -57,7 +57,7 @@ public function data_attribute_parsing() { 'title' => array( 'name' => 'title', 'value' => 'My Title', - 'whole' => "title='My Title'", + 'whole' => 'title="My Title"', 'vless' => 'n', ), ), @@ -188,8 +188,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => '<test>', - 'whole' => 'title="<test>"', + 'value' => '<test>', + 'whole' => 'title="<test>"', 'vless' => 'n', ), ), @@ -200,8 +200,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => '<hex>', - 'whole' => 'title="<hex>"', + 'value' => '<hex>', + 'whole' => 'title="<hex>"', 'vless' => 'n', ), ), @@ -212,8 +212,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => '<HEX>', - 'whole' => 'title="<HEX>"', + 'value' => '<HEX>', + 'whole' => 'title="<HEX>"', 'vless' => 'n', ), ), @@ -224,8 +224,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => '&invalid; &#; &#x;', - 'whole' => 'title="&invalid; &#; &#x;"', + 'value' => '&invalid; &#; &#x;', + 'whole' => 'title="&invalid; &#; &#x;"', 'vless' => 'n', ), ), @@ -249,7 +249,7 @@ public function data_attribute_parsing() { 'data-text' => array( 'name' => 'data-text', 'value' => 'Single quoted value', - 'whole' => "data-text='Single quoted value'", + 'whole' => 'data-text="Single quoted value"', 'vless' => 'n', ), ), @@ -267,7 +267,7 @@ public function data_attribute_parsing() { 'alt' => array( 'name' => 'alt', 'value' => 'single', - 'whole' => "alt='single'", + 'whole' => 'alt="single"', 'vless' => 'n', ), 'id' => array( @@ -284,8 +284,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => "It's working", - 'whole' => 'title="It\'s working"', + 'value' => 'It's working', + 'whole' => 'title="It's working"', 'vless' => 'n', ), ), @@ -296,8 +296,8 @@ public function data_attribute_parsing() { array( 'title' => array( 'name' => 'title', - 'value' => 'He said "hello"', - 'whole' => 'title=\'He said "hello"\'', + 'value' => 'He said "hello"', + 'whole' => 'title="He said "hello""', 'vless' => 'n', ), ), @@ -327,12 +327,32 @@ public function data_attribute_parsing() { yield 'invalid attribute name starting with number' => array( '1invalid="value"', - array(), + array( + '1invalid' => array( + 'name' => '1invalid', + 'value' => 'value', + 'whole' => '1invalid="value"', + 'vless' => 'n', + ), + ), ); yield 'invalid attribute name special chars' => array( '@invalid="value" $bad="value"', - array(), + array( + '@invalid' => array( + 'name' => '@invalid', + 'value' => 'value', + 'whole' => '@invalid="value"', + 'vless' => 'n', + ), + '$bad' => array( + 'name' => '$bad', + 'value' => 'value', + 'whole' => '$bad="value"', + 'vless' => 'n', + ), + ), ); yield 'duplicate attributes first wins' => array( @@ -355,7 +375,20 @@ public function data_attribute_parsing() { yield 'malformed unclosed double quote' => array( 'title="unclosed class="test"', - array(), + array( + 'title' => array( + 'name' => 'title', + 'value' => 'unclosed class=', + 'whole' => 'title="unclosed class="', + 'vless' => 'n', + ), + 'test"' => array( + 'name' => 'test"', + 'value' => '', + 'whole' => 'test"', + 'vless' => 'y', + ), + ), ); yield 'very long attribute value' => array( @@ -610,7 +643,7 @@ public function data_attribute_parsing() { 'alt' => array( 'name' => 'alt', 'value' => '', - 'whole' => "alt=''", + 'whole' => 'alt=""', 'vless' => 'n', ), 'class' => array( @@ -625,7 +658,7 @@ public function data_attribute_parsing() { yield 'forward slashes between attributes' => array( 'att / att2=2 /// att3="3"', array( - 'att' => array( + 'att' => array( 'name' => 'att', 'value' => '', 'whole' => 'att', @@ -652,13 +685,13 @@ public function data_attribute_parsing() { 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => "att='val'", + 'whole' => 'att="val"', 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => "att2='val2'", + 'whole' => 'att2="val2"', 'vless' => 'n', ), ), @@ -670,13 +703,13 @@ public function data_attribute_parsing() { 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => "att='val'", + 'whole' => 'att="val"', 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => "att2='val2'", + 'whole' => 'att2="val2"', 'vless' => 'n', ), ), @@ -688,13 +721,13 @@ public function data_attribute_parsing() { 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => "att='val'", + 'whole' => 'att="val"', 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => "att2='val2'", + 'whole' => 'att2="val2"', 'vless' => 'n', ), ), @@ -706,13 +739,13 @@ public function data_attribute_parsing() { 'att' => array( 'name' => 'att', 'value' => 'val', - 'whole' => "att='val'", + 'whole' => 'att="val"', 'vless' => 'n', ), 'att2' => array( 'name' => 'att2', 'value' => 'val2', - 'whole' => "att2='val2'", + 'whole' => 'att2="val2"', 'vless' => 'n', ), ), @@ -739,34 +772,67 @@ public function data_attribute_parsing() { // Malformed Equals Patterns. yield 'multiple equals signs' => array( 'att=="val"', - array(), + array( + 'att' => array( + 'name' => 'att', + 'value' => '="val"', + 'whole' => 'att="="val""', + 'vless' => 'n', + ), + ), ); yield 'equals with strange spacing' => array( 'att= ="val"', - array(), + array( + 'att' => array( + 'name' => 'att', + 'value' => '="val"', + 'whole' => 'att="="val""', + 'vless' => 'n', + ), + ), ); yield 'triple equals signs' => array( 'att==="val"', - array(), + array( + 'att' => array( + 'name' => 'att', + 'value' => '=="val"', + 'whole' => 'att="=="val""', + 'vless' => 'n', + ), + ), ); yield 'equals echo pattern' => array( "att==echo 'something'", array( - 'att' => array( + 'att' => array( 'name' => 'att', 'value' => '=echo', 'whole' => 'att="=echo"', 'vless' => 'n', ), + "'something'" => array( + 'name' => "'something'", + 'value' => '', + 'whole' => "'something'", + 'vless' => 'y', + ), ), ); yield 'attribute starting with equals' => array( '= bool k=v', array( + '=' => array( + 'name' => '=', + 'value' => '', + 'whole' => '=', + 'vless' => 'y', + ), 'bool' => array( 'name' => 'bool', 'value' => '', @@ -785,18 +851,43 @@ public function data_attribute_parsing() { yield 'mixed quotes and equals chaos' => array( 'k=v ="' . "' j=w", array( - 'k' => array( + 'k' => array( 'name' => 'k', 'value' => 'v', 'whole' => 'k="v"', 'vless' => 'n', ), + '="' . "'" => array( + 'name' => '="' . "'", + 'value' => '', + 'whole' => '="' . "'", + 'vless' => 'y', + ), + 'j' => array( + 'name' => 'j', + 'value' => 'w', + 'whole' => 'j="w"', + 'vless' => 'n', + ), ), ); yield 'triple equals quoted whitespace' => array( '===" "', - array(), + array( + '=' => array( + 'name' => '=', + 'value' => '="', + 'whole' => '=="=""', + 'vless' => 'n', + ), + '"' => array( + 'name' => '"', + 'value' => '', + 'whole' => '"', + 'vless' => 'y', + ), + ), ); yield 'boolean with contradictory value' => array( @@ -820,7 +911,13 @@ public function data_attribute_parsing() { yield 'empty attribute name with value' => array( '="value" class="test"', array( - 'class' => array( + '="value"' => array( + 'name' => '="value"', + 'value' => '', + 'whole' => '="value"', + 'vless' => 'y', + ), + 'class' => array( 'name' => 'class', 'value' => 'test', 'whole' => 'class="test"', @@ -890,7 +987,7 @@ public function data_protocol_filtering() { 'href' => array( 'name' => 'href', 'value' => 'alert(1)', - 'whole' => "href='alert(1)'", + 'whole' => 'href="alert(1)"', 'vless' => 'n', ), ), @@ -925,8 +1022,8 @@ public function data_protocol_filtering() { array( 'src' => array( 'name' => 'src', - 'value' => 'text/html,', - 'whole' => 'src="text/html,"', + 'value' => 'text/html,<script>alert(1)</script>', + 'whole' => 'src="text/html,<script>alert(1)</script>"', 'vless' => 'n', ), ),