Skip to content

Commit d9de381

Browse files
committed
feat: add PREG_REPLACE_COUNT_CHANGES flag for counting actual replacements
Add a new flag PREG_REPLACE_COUNT_CHANGES to preg_replace() and preg_replace_callback() that counts only effective changes rather than all matched replacements. When this flag is set, the replacement count is incremented only when the replacement differs from the original matched text, allowing callers to distinguish between identity replacements and actual content changes. - Add PREG_REPLACE_COUNT_CHANGES constant definition - Extend php_pcre_replace() and php_pcre_replace_impl() signatures with flags parameter - Implement change detection logic comparing replacement with original match - Update function signatures throughout the codebase (php_pcre_replace_array, php_replace_in_subject, _preg_replace_common) - Add test coverage for identity replacements, backreferences, and callbacks - Update stub definitions and internal function signatures
1 parent 46e55dd commit d9de381

File tree

8 files changed

+158
-33
lines changed

8 files changed

+158
-33
lines changed

ext/fileinfo/libmagic.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2414,7 +2414,7 @@ diff -u libmagic.orig/funcs.c libmagic/funcs.c
24142414
+ }
24152415
+
24162416
+ repl = zend_string_init(rep, strlen(rep), 0);
2417-
+ res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), repl, -1, &rep_cnt);
2417+
+ res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), repl, -1, &rep_cnt, 0);
24182418
+
24192419
+ zend_string_release_ex(repl, 0);
24202420
+ if (NULL == res) {

ext/fileinfo/libmagic/funcs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep)
671671
}
672672

673673
repl = zend_string_init(rep, strlen(rep), 0);
674-
res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), repl, -1, &rep_cnt);
674+
res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), repl, -1, &rep_cnt, 0);
675675

676676
zend_string_release_ex(repl, 0);
677677
if (NULL == res) {

ext/pcre/php_pcre.c

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#define PREG_OFFSET_CAPTURE (1<<8)
2828
#define PREG_UNMATCHED_AS_NULL (1<<9)
2929

30+
#define PREG_REPLACE_COUNT_CHANGES (1<<0)
31+
3032
#define PREG_SPLIT_NO_EMPTY (1<<0)
3133
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
3234
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
@@ -1571,7 +1573,8 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
15711573
zend_string *subject_str,
15721574
const char *subject, size_t subject_len,
15731575
zend_string *replace_str,
1574-
size_t limit, size_t *replace_count)
1576+
size_t limit, size_t *replace_count,
1577+
zend_long flags)
15751578
{
15761579
pcre_cache_entry *pce; /* Compiled regular expression */
15771580
zend_string *result; /* Function result */
@@ -1587,15 +1590,15 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
15871590
}
15881591
pce->refcount++;
15891592
result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1590-
limit, replace_count);
1593+
limit, replace_count, flags);
15911594
pce->refcount--;
15921595

15931596
return result;
15941597
}
15951598
/* }}} */
15961599

15971600
/* {{{ php_pcre_replace_impl() */
1598-
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1601+
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count, zend_long flags)
15991602
{
16001603
uint32_t options; /* Execution options */
16011604
int count; /* Count of matched subpatterns */
@@ -1658,6 +1661,9 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
16581661

16591662
if (count >= 0 && limit > 0) {
16601663
bool simple_string;
1664+
const char *rep_ptr = NULL;
1665+
size_t rep_len = 0;
1666+
size_t match_len_local = 0;
16611667

16621668
/* Check for too many substrings condition. */
16631669
if (UNEXPECTED(count == 0)) {
@@ -1675,12 +1681,9 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
16751681
break;
16761682
}
16771683

1678-
if (replace_count) {
1679-
++*replace_count;
1680-
}
1681-
16821684
/* Set the match location in subject */
16831685
match = subject + offsets[0];
1686+
match_len_local = (size_t)(offsets[1] - offsets[0]);
16841687

16851688
new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
16861689

@@ -1723,10 +1726,15 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
17231726
}
17241727

17251728
if (simple_string) {
1726-
/* copy replacement */
1727-
memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1728-
result_len += ZSTR_LEN(replace_str);
1729+
rep_ptr = ZSTR_VAL(replace_str);
1730+
rep_len = ZSTR_LEN(replace_str);
1731+
1732+
/* copy replacement */
1733+
memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1734+
result_len += ZSTR_LEN(replace_str);
17291735
} else {
1736+
char *rep_start = ZSTR_VAL(result) + result_len;
1737+
17301738
/* copy replacement and backrefs */
17311739
walkbuf = ZSTR_VAL(result) + result_len;
17321740

@@ -1753,10 +1761,26 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
17531761
walk_last = walk[-1];
17541762
}
17551763
*walkbuf = '\0';
1764+
1765+
rep_ptr = rep_start;
1766+
rep_len = (size_t)(walkbuf - rep_start);
1767+
17561768
/* increment the result length by how much we've added to the string */
17571769
result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
17581770
}
17591771

1772+
if (replace_count) {
1773+
bool count_changes = flags & PREG_REPLACE_COUNT_CHANGES;
1774+
if (!count_changes) {
1775+
++*replace_count;
1776+
} else {
1777+
if (rep_len != match_len_local ||
1778+
(match_len_local && memcmp(rep_ptr, match, match_len_local) != 0)) {
1779+
++*replace_count;
1780+
}
1781+
}
1782+
}
1783+
17601784
limit--;
17611785

17621786
/* Advance to the next piece. */
@@ -1922,10 +1946,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
19221946
break;
19231947
}
19241948

1925-
if (replace_count) {
1926-
++*replace_count;
1927-
}
1928-
19291949
/* Set the match location in subject */
19301950
match = ZSTR_VAL(subject_str) + offsets[0];
19311951

@@ -1940,6 +1960,20 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
19401960
if (UNEXPECTED(eval_result == NULL)) {
19411961
goto error;
19421962
}
1963+
1964+
if (replace_count) {
1965+
zend_long count_changes = flags & PREG_REPLACE_COUNT_CHANGES;
1966+
if (!count_changes) {
1967+
++*replace_count;
1968+
} else {
1969+
size_t match_len = (size_t)(offsets[1] - offsets[0]);
1970+
if (ZSTR_LEN(eval_result) != match_len ||
1971+
(match_len && memcmp(ZSTR_VAL(eval_result), match, match_len) != 0)) {
1972+
++*replace_count;
1973+
}
1974+
}
1975+
}
1976+
19431977
new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
19441978
if (new_len >= alloc_len) {
19451979
alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
@@ -2057,7 +2091,7 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
20572091
/* {{{ php_pcre_replace_array */
20582092
static zend_string *php_pcre_replace_array(HashTable *regex,
20592093
zend_string *replace_str, HashTable *replace_ht,
2060-
zend_string *subject_str, size_t limit, size_t *replace_count)
2094+
zend_string *subject_str, size_t limit, size_t *replace_count, zend_long flags)
20612095
{
20622096
zval *regex_entry;
20632097
zend_string *result;
@@ -2093,7 +2127,7 @@ static zend_string *php_pcre_replace_array(HashTable *regex,
20932127
/* Do the actual replacement and put the result back into subject_str
20942128
for further replacements. */
20952129
result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2096-
ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2130+
ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count, flags);
20972131
zend_tmp_string_release(tmp_replace_entry_str);
20982132
zend_tmp_string_release(tmp_regex_str);
20992133
zend_string_release_ex(subject_str, 0);
@@ -2115,7 +2149,7 @@ static zend_string *php_pcre_replace_array(HashTable *regex,
21152149
/* Do the actual replacement and put the result back into subject_str
21162150
for further replacements. */
21172151
result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2118-
ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2152+
ZSTR_LEN(subject_str), replace_str, limit, replace_count, flags);
21192153
zend_tmp_string_release(tmp_regex_str);
21202154
zend_string_release_ex(subject_str, 0);
21212155
subject_str = result;
@@ -2134,18 +2168,18 @@ static zend_string *php_pcre_replace_array(HashTable *regex,
21342168
static zend_always_inline zend_string *php_replace_in_subject(
21352169
zend_string *regex_str, HashTable *regex_ht,
21362170
zend_string *replace_str, HashTable *replace_ht,
2137-
zend_string *subject, size_t limit, size_t *replace_count)
2171+
zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
21382172
{
21392173
zend_string *result;
21402174

21412175
if (regex_str) {
21422176
ZEND_ASSERT(replace_str != NULL);
21432177
result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2144-
replace_str, limit, replace_count);
2178+
replace_str, limit, replace_count, flags);
21452179
} else {
21462180
ZEND_ASSERT(regex_ht != NULL);
21472181
result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2148-
limit, replace_count);
2182+
limit, replace_count, flags);
21492183
}
21502184
return result;
21512185
}
@@ -2254,6 +2288,7 @@ static void _preg_replace_common(
22542288
HashTable *subject_ht, zend_string *subject_str,
22552289
zend_long limit,
22562290
zval *zcount,
2291+
zend_long flags,
22572292
bool is_filter
22582293
) {
22592294
size_t replace_count = 0;
@@ -2269,7 +2304,7 @@ static void _preg_replace_common(
22692304
if (subject_str) {
22702305
old_replace_count = replace_count;
22712306
result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2272-
subject_str, limit, &replace_count);
2307+
subject_str, limit, &replace_count, flags);
22732308
if (result != NULL) {
22742309
if (!is_filter || replace_count > old_replace_count) {
22752310
RETVAL_STR(result);
@@ -2298,7 +2333,7 @@ static void _preg_replace_common(
22982333
zend_string *tmp_subject_entry_str;
22992334
zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
23002335
result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2301-
subject_entry_str, limit, &replace_count);
2336+
subject_entry_str, limit, &replace_count, flags);
23022337

23032338
if (result != NULL) {
23042339
if (!is_filter || replace_count > old_replace_count) {
@@ -2329,23 +2364,26 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
23292364
HashTable *regex_ht, *replace_ht, *subject_ht;
23302365
zend_long limit = -1;
23312366
zval *zcount = NULL;
2367+
zend_long flags = 0;
23322368

23332369
/* Get function parameters and do error-checking. */
2334-
ZEND_PARSE_PARAMETERS_START(3, 5)
2370+
ZEND_PARSE_PARAMETERS_START(3, 6)
23352371
Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
23362372
Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
23372373
Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
23382374
Z_PARAM_OPTIONAL
23392375
Z_PARAM_LONG(limit)
23402376
Z_PARAM_ZVAL(zcount)
2377+
Z_PARAM_LONG(flags)
23412378
ZEND_PARSE_PARAMETERS_END();
23422379

23432380
_preg_replace_common(
23442381
return_value,
23452382
regex_ht, regex_str,
23462383
replace_ht, replace_str,
23472384
subject_ht, subject_str,
2348-
limit, zcount, is_filter);
2385+
limit, zcount,
2386+
flags, is_filter);
23492387
}
23502388
/* }}} */
23512389

@@ -2371,7 +2409,7 @@ ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
23712409
regex_ht, regex_str,
23722410
replace_ht, replace_str,
23732411
subject_ht, subject_str,
2374-
/* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2412+
/* limit */ -1, /* zcount */ NULL, /* flags */ 0, /* is_filter */ false);
23752413

23762414
flf_clean:;
23772415
Z_FLF_PARAM_FREE_STR(1, regex_tmp);

ext/pcre/php_pcre.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
#include <locale.h>
2727

28-
PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count);
28+
PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count, zend_long flags);
2929
PHPAPI pcre2_code* pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count);
3030

3131
extern zend_module_entry pcre_module_entry;
@@ -53,7 +53,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str
5353
zval *subpats, bool global, zend_long flags, zend_off_t start_offset);
5454

5555
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str,
56-
size_t limit, size_t *replace_count);
56+
size_t limit, size_t *replace_count, zend_long flags);
5757

5858
PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
5959
zend_long limit_val, zend_long flags);

ext/pcre/php_pcre.stub.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
* @cvalue PREG_UNMATCHED_AS_NULL
2323
*/
2424
const PREG_UNMATCHED_AS_NULL = UNKNOWN;
25+
/**
26+
* @var int
27+
* @cvalue PREG_REPLACE_COUNT_CHANGES
28+
*/
29+
const PREG_REPLACE_COUNT_CHANGES = UNKNOWN;
2530
/**
2631
* @var int
2732
* @cvalue PREG_SPLIT_NO_EMPTY
@@ -112,7 +117,7 @@ function preg_match_all(string $pattern, string $subject, &$matches = null, int
112117
* @return string|array<int|string, string>|null
113118
* @frameless-function {"arity": 3}
114119
*/
115-
function preg_replace(string|array $pattern, string|array $replacement, string|array $subject, int $limit = -1, &$count = null): string|array|null {}
120+
function preg_replace(string|array $pattern, string|array $replacement, string|array $subject, int $limit = -1, &$count = null, int $flags = 0): string|array|null {}
116121

117122
/**
118123
* @param int $count

ext/pcre/php_pcre_arginfo.h

Lines changed: 10 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)