From 22a28b91792ad7a50eae363c8d8dc2beefb42ba4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 10 Sep 2025 13:33:58 -0500 Subject: [PATCH 1/8] gh-138682: Add symmetric difference to Counter --- Doc/library/collections.rst | 13 ++++-- Doc/whatsnew/3.15.rst | 9 ++++ Lib/collections/__init__.py | 46 +++++++++++++++++++ Lib/test/test_collections.py | 43 +++++++++++++++++ ...-09-10-13-32-25.gh-issue-138682.iExqx1.rst | 1 + 5 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index fdd31799bd90d3..9a8108d882e02f 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -367,9 +367,11 @@ Several mathematical operations are provided for combining :class:`Counter` objects to produce multisets (counters that have counts greater than zero). Addition and subtraction combine counters by adding or subtracting the counts of corresponding elements. Intersection and union return the minimum and -maximum of corresponding counts. Equality and inclusion compare -corresponding counts. Each operation can accept inputs with signed -counts, but the output will exclude results with counts of zero or less. +maximum of corresponding counts. Symmetric difference returns the difference +between the maximum and minimum of the corresponding counts. Equality and +inclusion compare corresponding counts. Each operation can accept inputs +with signed counts, but the output will exclude results with counts of zero +or below. .. doctest:: @@ -383,6 +385,8 @@ counts, but the output will exclude results with counts of zero or less. Counter({'a': 1, 'b': 1}) >>> c | d # union: max(c[x], d[x]) Counter({'a': 3, 'b': 2}) + >>> c ^ d # max(c[x], d[x]) - min(c[x], d[x]) + Counter({'a': 2, 'b': 1}) >>> c == d # equality: c[x] == d[x] False >>> c <= d # inclusion: c[x] <= d[x] @@ -400,6 +404,9 @@ or subtracting from an empty counter. .. versionadded:: 3.3 Added support for unary plus, unary minus, and in-place multiset operations. +.. versionadded:: 3.15 + Added support for the symmetric difference multiset operation, ``c ^ d``. + .. note:: Counters were primarily designed to work with positive integers to represent diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 56ef80c068634a..ed8996495135b7 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -283,6 +283,15 @@ New modules Improved modules ================ +collections +----------- + +* Added :meth:`collections.Counter.__xor__` and + :meth:`collections.Counter.__ixor__` to compute the symmetric difference + between Counter objects. + (Contributed by Raymond Hettinger in :gh:`138682`.) + + dbm --- diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index b8653f40a942f0..72e970328d2333 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -796,6 +796,7 @@ def __repr__(self): # set(cp - cq) == sp - sq # set(cp | cq) == sp | sq # set(cp & cq) == sp & sq + # set(cp ^ cq) == sp ^ sq def __eq__(self, other): 'True if all counts agree. Missing counts are treated as zero.' @@ -908,6 +909,35 @@ def __and__(self, other): result[elem] = newcount return result + def __xor__(self, other): + '''Symmetric difference. Absolute value of count differences. + + The symmetric difference p ^ q is equivalent to: + + (p - q) | (q - p). + + For each element, symmetric difference gives the same result as: + + max(p[elem], q[elem]) - min(p[elem], q[elem]) + + >>> Counter(a=5, b=3, c=2, d=2) ^ Counter(a=1, b=3, c=5, e=1) + Counter({'a': 4, 'c': 3, 'd': 2, 'e': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = abs(count - other[elem]) + if newcount: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self: + newcount = abs(count) + if newcount: + result[elem] = newcount + return result + def __pos__(self): 'Adds an empty counter, effectively stripping negative and zero counts' result = Counter() @@ -990,6 +1020,22 @@ def __iand__(self, other): self[elem] = other_count return self._keep_positive() + def __ixor__(self, other): + '''Inplace symmetric difference. Absolute value of count differences. + + >>> c = Counter(a=5, b=3, c=2, d=2) + >>> c ^= Counter(a=1, b=3, c=5, e=1) + >>> c + Counter({'a': 4, 'c': 3, 'd': 2, 'e': 1}) + + ''' + for elem, count in self.items(): + self[elem] = abs(count - other[elem]) + for elem, count in other.items(): + if elem not in self: + self[elem] = abs(count) + return self._keep_positive() + ######################################################################## ### ChainMap diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index f33e4b3256a9b9..bbe519877fd50b 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -2140,6 +2140,7 @@ def correctly_ordered(seq): self.assertTrue(correctly_ordered(p - q)) self.assertTrue(correctly_ordered(p | q)) self.assertTrue(correctly_ordered(p & q)) + self.assertTrue(correctly_ordered(p ^ q)) p, q = Counter(ps), Counter(qs) p += q @@ -2157,6 +2158,10 @@ def correctly_ordered(seq): p &= q self.assertTrue(correctly_ordered(p)) + p, q = Counter(ps), Counter(qs) + p ^= q + self.assertTrue(correctly_ordered(p)) + p, q = Counter(ps), Counter(qs) p.update(q) self.assertTrue(correctly_ordered(p)) @@ -2239,6 +2244,7 @@ def test_multiset_operations(self): (Counter.__sub__, lambda x, y: max(0, x-y)), (Counter.__or__, lambda x, y: max(0,x,y)), (Counter.__and__, lambda x, y: max(0, min(x,y))), + (Counter.__xor__, lambda x, y: max(0, max(x,y) - min(x,y))), ]: result = counterop(p, q) for x in elements: @@ -2256,6 +2262,7 @@ def test_multiset_operations(self): (Counter.__sub__, set.__sub__), (Counter.__or__, set.__or__), (Counter.__and__, set.__and__), + (Counter.__xor__, set.__xor__), ]: counter_result = counterop(p, q) set_result = setop(set(p.elements()), set(q.elements())) @@ -2274,6 +2281,7 @@ def test_inplace_operations(self): (Counter.__isub__, Counter.__sub__), (Counter.__ior__, Counter.__or__), (Counter.__iand__, Counter.__and__), + (Counter.__ixor__, Counter.__xor__), ]: c = p.copy() c_id = id(c) @@ -2349,6 +2357,7 @@ def test_multiset_operations_equivalent_to_set_operations(self): self.assertEqual(set(cp - cq), sp - sq) self.assertEqual(set(cp | cq), sp | sq) self.assertEqual(set(cp & cq), sp & sq) + self.assertEqual(set(cp ^ cq), sp ^ sq) self.assertEqual(cp == cq, sp == sq) self.assertEqual(cp != cq, sp != sq) self.assertEqual(cp <= cq, sp <= sq) @@ -2376,6 +2385,40 @@ def test_gt(self): self.assertTrue(Counter(a=3, b=2, c=0) > Counter('aab')) self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab')) + def test_symmetric_difference(self): + pop = (-4, -3, -2, -1, 0, 1, 2, 3, 4) + + for a, b1, b2, c in product(pop, repeat=4): + p = Counter(a=a, b=b1) + q = Counter(b=b2, c=c) + r = p ^ q + + # Elementwise invariants + for k in ('a', 'b', 'c'): + self.assertEqual(r[k], max(p[k], q[k]) - min(p[k], q[k])) + self.assertEqual(r[k], abs(p[k] - q[k])) + + # Invariant for all positive, negative, and zero counts + self.assertEqual(r, (p - q) | (q - p)) + + # Invariant for non-negative counts + if a >= 0 and b1 >= 0 and b2 >= 0 and c >= 0: + self.assertEqual(r, (p | q) - (p & q)) + + # Zeros and negatives eliminated + self.assertTrue(all(value > 0 for value in r.values())) + + # Output preserves input order: p first and then q + keys = list(p) + list(q) + indices = [keys.index(k) for k in r] + self.assertEqual(indices, sorted(indices)) + + # Inplace operation matches binary operation + pp = Counter(p) + qq = Counter(q) + pp ^= qq + assert pp == r + def load_tests(loader, tests, pattern): tests.addTest(doctest.DocTestSuite(collections)) diff --git a/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst new file mode 100644 index 00000000000000..52520e4f3470af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst @@ -0,0 +1 @@ +Added symmetric difference support to Counter objects. From 4eaf601d072f28c92bff065e4e0c58981be85f2b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 10 Sep 2025 13:49:11 -0500 Subject: [PATCH 2/8] No reference target --- Doc/whatsnew/3.15.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index ed8996495135b7..bcfeacdb33cb2a 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -286,8 +286,8 @@ Improved modules collections ----------- -* Added :meth:`collections.Counter.__xor__` and - :meth:`collections.Counter.__ixor__` to compute the symmetric difference +* Added :meth:`!collections.Counter.__xor__` and + :meth:`!collections.Counter.__ixor__` to compute the symmetric difference between Counter objects. (Contributed by Raymond Hettinger in :gh:`138682`.) From c6cd67ce29abb9b610916cb31fc9df2a81edd48b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 24 Sep 2025 07:58:57 -0500 Subject: [PATCH 3/8] Update Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst index 52520e4f3470af..fe6bdcd894be71 100644 --- a/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst +++ b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst @@ -1 +1 @@ -Added symmetric difference support to Counter objects. +Added symmetric difference support to :class:`collections.Counter` objects. From 5470a58e55e5d2dd59397b335c6f46a73ef5dbb0 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 24 Sep 2025 07:59:08 -0500 Subject: [PATCH 4/8] Update Doc/library/collections.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/collections.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 9a8108d882e02f..52178d6c526a40 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -404,7 +404,7 @@ or subtracting from an empty counter. .. versionadded:: 3.3 Added support for unary plus, unary minus, and in-place multiset operations. -.. versionadded:: 3.15 +.. versionadded:: next Added support for the symmetric difference multiset operation, ``c ^ d``. .. note:: From 984fa567de1a20c5b6ddda7a10129a9f173ed194 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 24 Sep 2025 08:00:15 -0500 Subject: [PATCH 5/8] Update Doc/whatsnew/3.15.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/whatsnew/3.15.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index bcfeacdb33cb2a..97aa0a32553b1d 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -288,7 +288,7 @@ collections * Added :meth:`!collections.Counter.__xor__` and :meth:`!collections.Counter.__ixor__` to compute the symmetric difference - between Counter objects. + between :class:`~collections.Counter` objects. (Contributed by Raymond Hettinger in :gh:`138682`.) From 2945c9b4f49d2a2db75138cb91479f3ccf6e3540 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 24 Sep 2025 10:13:51 -0500 Subject: [PATCH 6/8] Update Lib/test/test_collections.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index bbe519877fd50b..a35acfcc0554e5 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -2417,7 +2417,7 @@ def test_symmetric_difference(self): pp = Counter(p) qq = Counter(q) pp ^= qq - assert pp == r + self.assertEqual(pp, r) def load_tests(loader, tests, pattern): From 2255af31462559e94e47b1094113cf7b3aa1aae1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 24 Sep 2025 10:22:12 -0500 Subject: [PATCH 7/8] Update Lib/collections/__init__.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Algebraic simplification saving a local variable store and two fetches. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/collections/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 72e970328d2333..25ac4d1d524bc2 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -932,10 +932,8 @@ def __xor__(self, other): if newcount: result[elem] = newcount for elem, count in other.items(): - if elem not in self: - newcount = abs(count) - if newcount: - result[elem] = newcount + if elem not in self and count: + result[elem] = abs(count) return result def __pos__(self): From 19377487a52a27245f7ed3dacdcd5fd89390ddaa Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 25 Sep 2025 23:33:52 -0500 Subject: [PATCH 8/8] . --- Lib/test/test_collections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 1261a3320fb764..22595239252814 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -2425,9 +2425,9 @@ def test_gt(self): self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab')) def test_symmetric_difference(self): - pop = (-4, -3, -2, -1, 0, 1, 2, 3, 4) + population = (-4, -3, -2, -1, 0, 1, 2, 3, 4) - for a, b1, b2, c in product(pop, repeat=4): + for a, b1, b2, c in product(population, repeat=4): p = Counter(a=a, b=b1) q = Counter(b=b2, c=c) r = p ^ q