From 805535352cb3c380297a68babc71317e889a3221 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 13 Aug 2025 17:30:30 -0500 Subject: [PATCH 1/3] Refactor get_post_galleries to use Block_Processor --- src/wp-includes/media.php | 192 ++++++++++++++++++++++---------------- 1 file changed, 110 insertions(+), 82 deletions(-) diff --git a/src/wp-includes/media.php b/src/wp-includes/media.php index f35da615b5344..4b096d3e6738b 100644 --- a/src/wp-includes/media.php +++ b/src/wp-includes/media.php @@ -5248,15 +5248,16 @@ function get_media_embedded_in_content( $content, $types = null ) { * * @since 3.6.0 * - * @param int|WP_Post $post Post ID or object. - * @param bool $html Optional. Whether to return HTML or data in the array. Default true. - * @return array A list of arrays, each containing gallery data and srcs parsed - * from the expanded shortcode. + * @param int|WP_Post $post Post ID or object. + * @param bool|null $html Optional. Whether to return HTML or data in the array. Default true. + * @param int|null $max_galleries Optional. Only collect up to this many galleries. Default unlimited. + * @return array{ids?: string, src: string[]} A list of arrays, each containing gallery data and 'src' + * attributes parsed from the expanded shortcode. */ -function get_post_galleries( $post, $html = true ) { +function get_post_galleries( $post, $html = true, $max_galleries = PHP_INT_MAX ): array { $post = get_post( $post ); - if ( ! $post ) { + if ( ! ( $post instanceof WP_Post ) ) { return array(); } @@ -5268,7 +5269,7 @@ function get_post_galleries( $post, $html = true ) { if ( preg_match_all( '/' . get_shortcode_regex() . '/s', $post->post_content, $matches, PREG_SET_ORDER ) ) { foreach ( $matches as $shortcode ) { if ( 'gallery' === $shortcode[2] ) { - $srcs = array(); + $sources = array(); $shortcode_attrs = shortcode_parse_atts( $shortcode[3] ); @@ -5284,14 +5285,14 @@ function get_post_galleries( $post, $html = true ) { preg_match_all( '#src=([\'"])(.+?)\1#is', $gallery, $src, PREG_SET_ORDER ); if ( ! empty( $src ) ) { foreach ( $src as $s ) { - $srcs[] = $s[2]; + $sources[] = $s[2]; } } $galleries[] = array_merge( $shortcode_attrs, array( - 'src' => array_values( array_unique( $srcs ) ), + 'src' => array_values( array_unique( $sources ) ), ) ); } @@ -5299,96 +5300,123 @@ function get_post_galleries( $post, $html = true ) { } } - if ( has_block( 'gallery', $post->post_content ) ) { - $post_blocks = parse_blocks( $post->post_content ); - - while ( $block = array_shift( $post_blocks ) ) { - $has_inner_blocks = ! empty( $block['innerBlocks'] ); - - // Skip blocks with no blockName and no innerHTML. - if ( ! $block['blockName'] ) { - continue; - } - - // Skip non-Gallery blocks. - if ( 'core/gallery' !== $block['blockName'] ) { - // Move inner blocks into the root array before skipping. - if ( $has_inner_blocks ) { - array_push( $post_blocks, ...$block['innerBlocks'] ); - } - continue; + $processor = new WP_Block_Processor( $post->post_content ); + while ( count( $galleries ) < $max_galleries && $processor->next_block( 'gallery' ) ) { + /* + * It’s not clear yet whether this will have inner blocks. + * Until then, start computing for both paths, then bail + * once the block vintage is known. + */ + $gallery_depth = $processor->get_depth(); + $has_inner_blocks = false; + $gallery_ids = $processor->allocate_and_return_parsed_attributes()['ids'] ?? array(); + $ids = array(); + $sources = array(); + $inner_html = ''; + + while ( $processor->next_token() && $processor->get_depth() > $gallery_depth ) { + if ( ! $processor->is_html() ) { + $has_inner_blocks = true; + break; } - // New Gallery block format as HTML. - if ( $has_inner_blocks && $html ) { - $block_html = wp_list_pluck( $block['innerBlocks'], 'innerHTML' ); - $galleries[] = '
' . implode( ' ', $block_html ) . '
'; - continue; + if ( $processor->get_depth() === $gallery_depth + 1 ) { + $inner_html .= $processor->get_html_content(); } + } - $srcs = array(); + // New Gallery block format, returning HTML. + if ( $has_inner_blocks && $html ) { + // Reset this, because the new format stores everything inside the inner blocks. + $nested_inner_html = ''; - // New Gallery block format as an array. - if ( $has_inner_blocks ) { - $attrs = wp_list_pluck( $block['innerBlocks'], 'attrs' ); - $ids = wp_list_pluck( $attrs, 'id' ); + // Get the rest of the innerHTML of the Gallery’s direct children. + while ( $processor->next_token() && $processor->get_depth() > $gallery_depth ) { + if ( $processor->is_html() && $processor->get_depth() === $gallery_depth + 2 ) { + $nested_inner_html .= $processor->get_html_content(); + } + } - foreach ( $ids as $id ) { - $url = wp_get_attachment_url( $id ); + $galleries[] = "
{$nested_inner_html}
"; + continue; + } - if ( is_string( $url ) && ! in_array( $url, $srcs, true ) ) { - $srcs[] = $url; + // New Gallery block format, returning an array. + if ( $has_inner_blocks ) { + /** + * There are inner blocks and this is the first one; + * the loop above aborted at its opening, so initially the processor + * is paused on the opening delimiter of the first inner block. + * + * @todo Could avoid computation by tracking seen ids and only looking up the + * attachment url if the id hasn’t already been resolved. + */ + do { + // Examine only the direct children of the gallery block. + if ( $processor->get_depth() === $gallery_depth + 1 ) { + /** @todo Perfect use-case for lazy parsing here — only the `id` is wanted. */ + $id = $processor->allocate_and_return_parsed_attributes()['id'] ?? null; + if ( isset( $id ) ) { + $ids[] = $id; + $url = wp_get_attachment_url( $id ); + + if ( is_string( $url ) && ! in_array( $url, $sources, true ) ) { + $sources[] = $url; + } } } + } while ( $processor->next_block() && $processor->get_depth() > $gallery_depth ); - $galleries[] = array( - 'ids' => implode( ',', $ids ), - 'src' => $srcs, - ); + $galleries[] = array( + 'ids' => implode( ',', $ids ), + 'src' => $sources, + ); - continue; - } + continue; + } - // Old Gallery block format as HTML. - if ( $html ) { - $galleries[] = $block['innerHTML']; - continue; - } + // Old Gallery block format, returning HTML. + if ( $html ) { + $galleries[] = $inner_html; + continue; + } - // Old Gallery block format as an array. - $ids = ! empty( $block['attrs']['ids'] ) ? $block['attrs']['ids'] : array(); + // Old Gallery block format, returning an array. - // If present, use the image IDs from the JSON blob as canonical. - if ( ! empty( $ids ) ) { - foreach ( $ids as $id ) { - $url = wp_get_attachment_url( $id ); + /* + * If present, use the image IDs from the JSON blob as canonical. + * This is sourced above when the Block Processor first reaches + * the outer `gallery` block and saved for now; this is because + * once the Processor moves on to the inner text or inner blocks, + * the attributes are no longer accessible. + */ + if ( ! empty( $gallery_ids ) ) { + foreach ( $gallery_ids as $id ) { + $url = wp_get_attachment_url( $id ); - if ( is_string( $url ) && ! in_array( $url, $srcs, true ) ) { - $srcs[] = $url; - } + if ( is_string( $url ) && ! in_array( $url, $sources, true ) ) { + $sources[] = $url; } - - $galleries[] = array( - 'ids' => implode( ',', $ids ), - 'src' => $srcs, - ); - - continue; } - // Otherwise, extract srcs from the innerHTML. - preg_match_all( '#src=([\'"])(.+?)\1#is', $block['innerHTML'], $found_srcs, PREG_SET_ORDER ); + $galleries[] = array( + 'ids' => implode( ',', $gallery_ids ), + 'src' => $sources, + ); - if ( ! empty( $found_srcs[0] ) ) { - foreach ( $found_srcs as $src ) { - if ( isset( $src[2] ) && ! in_array( $src[2], $srcs, true ) ) { - $srcs[] = $src[2]; - } - } - } + continue; + } - $galleries[] = array( 'src' => $srcs ); + // Otherwise, extract srcs from the innerHTML. + $src_finder = new WP_HTML_Tag_Processor( $inner_html ); + while ( $src_finder->next_tag() ) { + $src = $src_finder->get_attribute( 'src' ); + if ( is_string( $src ) && ! in_array( $src, $sources, true ) ) { + $sources[] = $src; + } } + + $galleries[] = array( 'src' => $sources ); } /** @@ -5396,8 +5424,8 @@ function get_post_galleries( $post, $html = true ) { * * @since 3.6.0 * - * @param array $galleries Associative array of all found post galleries. - * @param WP_Post $post Post object. + * @param array{ids?: string, src: string[]} $galleries Associative array of all found post galleries. + * @param WP_Post $post Post object. */ return apply_filters( 'get_post_galleries', $galleries, $post ); } @@ -5412,8 +5440,8 @@ function get_post_galleries( $post, $html = true ) { * @return string|array Gallery data and srcs parsed from the expanded shortcode. */ function get_post_gallery( $post = 0, $html = true ) { - $galleries = get_post_galleries( $post, $html ); - $gallery = reset( $galleries ); + $galleries = get_post_galleries( $post, $html, 1 ); + $gallery = $galleries[0] ?? false; /** * Filters the first-found post gallery. From 07da8bada1541c34f8718dfddcd7e6c0a720cf42 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 9 Jan 2026 14:33:24 -0700 Subject: [PATCH 2/3] Galleries: tests and fixes. --- src/wp-includes/media.php | 8 ++++ .../phpunit/tests/media/getPostGalleries.php | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/wp-includes/media.php b/src/wp-includes/media.php index 4b096d3e6738b..06e299bd65e1f 100644 --- a/src/wp-includes/media.php +++ b/src/wp-includes/media.php @@ -5268,6 +5268,10 @@ function get_post_galleries( $post, $html = true, $max_galleries = PHP_INT_MAX ) $galleries = array(); if ( preg_match_all( '/' . get_shortcode_regex() . '/s', $post->post_content, $matches, PREG_SET_ORDER ) ) { foreach ( $matches as $shortcode ) { + if ( count( $galleries ) >= $max_galleries ) { + return $galleries; + } + if ( 'gallery' === $shortcode[2] ) { $sources = array(); @@ -5300,6 +5304,10 @@ function get_post_galleries( $post, $html = true, $max_galleries = PHP_INT_MAX ) } } + if ( count( $galleries ) > $max_galleries ) { + return array_slice( $galleries, 0, $max_galleries ); + } + $processor = new WP_Block_Processor( $post->post_content ); while ( count( $galleries ) < $max_galleries && $processor->next_block( 'gallery' ) ) { /* diff --git a/tests/phpunit/tests/media/getPostGalleries.php b/tests/phpunit/tests/media/getPostGalleries.php index 1ba21013e19a9..3cc71f872ba9d 100644 --- a/tests/phpunit/tests/media/getPostGalleries.php +++ b/tests/phpunit/tests/media/getPostGalleries.php @@ -221,6 +221,46 @@ public function test_returns_no_srcs_with_block_in_post_with_no_attached_images( ); } + /** + * Ensures that the function only returns up to the requested count of galleries. + * + * @dataProvider data_unique_gallery_type_content + * + * @param $gallery + */ + public function tests_returns_requested_max_number_of_galleries( $gallery ) { + // @todo Why is this necessary? + + // add_shortcode( 'gallery', 'gallery_shortcode' ); + + $post_id = self::factory()->post->create( + array( 'post_content' => str_repeat( "{$gallery}\n", 5 ) ) + ); + + // Test negative counts, the zero count, and a max count above the total contained galleries. + foreach ( range( -5, 10 ) as $max_count ) { + $this->assertCount( + max( 0, min( 5, $max_count ) ), + get_post_galleries( $post_id, false, $max_count ), + 'Failed to fetch up to the max requested number of galleries.' + ); + } + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_unique_gallery_type_content() { + return array( + 'Shortcode with ids' => array( '[gallery ids="11,12,13"]' ), + 'Shortcode without ids' => array( '[gallery]
[/gallery]' ), + 'Block with ids' => array( '' ), + 'Block with inner blocks' => array( '' ), + ); + } + /** * Tests that no srcs are returned for a gallery block v2 * in a post with no attached images. From 6e041af38ea216e1e3744f5c02fac3dc5182512f Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 9 Jan 2026 14:58:58 -0700 Subject: [PATCH 3/3] Test with more counts; does the failure count also go closer? --- tests/phpunit/tests/media/getPostGalleries.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/phpunit/tests/media/getPostGalleries.php b/tests/phpunit/tests/media/getPostGalleries.php index 3cc71f872ba9d..0c745588cb703 100644 --- a/tests/phpunit/tests/media/getPostGalleries.php +++ b/tests/phpunit/tests/media/getPostGalleries.php @@ -229,18 +229,14 @@ public function test_returns_no_srcs_with_block_in_post_with_no_attached_images( * @param $gallery */ public function tests_returns_requested_max_number_of_galleries( $gallery ) { - // @todo Why is this necessary? - - // add_shortcode( 'gallery', 'gallery_shortcode' ); - $post_id = self::factory()->post->create( - array( 'post_content' => str_repeat( "{$gallery}\n", 5 ) ) + array( 'post_content' => str_repeat( "{$gallery}\n", 15 ) ) ); // Test negative counts, the zero count, and a max count above the total contained galleries. - foreach ( range( -5, 10 ) as $max_count ) { + foreach ( range( -5, 25 ) as $max_count ) { $this->assertCount( - max( 0, min( 5, $max_count ) ), + max( 0, min( 15, $max_count ) ), get_post_galleries( $post_id, false, $max_count ), 'Failed to fetch up to the max requested number of galleries.' );