Fix matching against union of tuples (#19600)

saulshanabrook · hauntsaninja · web-flow · commit 7fdbed08d130 · 2026-02-04T23:06:51.000-08:00
This pull request fixes handling of union types containing tuples in match statements. Previously, when a tuple was part of a union, all its items would be unioned together and treated as a homogeneous tuple of that union type, which was incorrect. It still fallbacks on this behavior if we there are multiple tuples in the union with Unpack in them, but otherwise now it should be handled correctly. I attempted to keep as much of the existing semantics the same besides for this change. I also tried to keep the performance roughly similar, not unioning types more than needed. Fixes #19599 Fixes #19082 --------- Co-authored-by: Shantanu Jain <hauntsaninja@gmail.com>
diff --git a/mypy/checkpattern.py b/mypy/checkpattern.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import itertools
 from collections import defaultdict
 from typing import Final, NamedTuple
 
@@ -247,37 +248,91 @@ def visit_sequence_pattern(self, o: SequencePattern) -> PatternType:
         if star_position is not None:
             required_patterns -= 1
 
-        #
-        # get inner types of original type
-        #
+        # 1. Go through all possible types and filter to only those which are sequences that
+        #    could match that number of items
+        # 2. If there is exactly one tuple left with an unpack, then use that type
+        #    and the unpack index
+        # 3. Otherwise, take the product of the item types so that each index can have a
+        #    unique type. For tuples with unpack fallback to merging all of their types
+        #    for each index, since we can't handle multiple unpacked items at once yet.
+
+        # Whether we have encountered a type that we don't know how to handle in the union
+        unknown_type = False
+        # A list of types that could match any of the items in the sequence.
+        sequence_types: list[Type] = []
+        #  A list of tuple types that could match the sequence, per index
+        tuple_types: list[list[Type]] = []
+        # A list of all the unpack tuple types that we encountered, each containing the
+        # tuple type, unpack index, and union index
+        unpack_tuple_types: list[tuple[TupleType, int, int]] = []
+        for i, t in enumerate(
+            current_type.items if isinstance(current_type, UnionType) else [current_type]
+        ):
+            t = get_proper_type(t)
+            if isinstance(t, TupleType):
+                tuple_items = list(t.items)
+                unpack_index = find_unpack_in_list(tuple_items)
+                if unpack_index is None:
+                    size_diff = len(tuple_items) - required_patterns
+                    if size_diff < 0:
+                        continue
+                    if size_diff > 0 and star_position is None:
+                        continue
+                    if not size_diff and star_position is not None:
+                        # Above we subtract from required_patterns if star_position is not None
+                        tuple_items.append(UninhabitedType())
+                    tuple_types.append(tuple_items)
+                else:
+                    normalized_inner_types = []
+                    for it in tuple_items:
+                        # Unfortunately, it is not possible to "split" the TypeVarTuple
+                        # into individual items, so we just use its upper bound for the whole
+                        # analysis instead.
+                        if isinstance(it, UnpackType) and isinstance(it.type, TypeVarTupleType):
+                            it = UnpackType(it.type.upper_bound)
+                        normalized_inner_types.append(it)
+                    if (
+                        len(normalized_inner_types) - 1 > required_patterns
+                        and star_position is None
+                    ):
+                        continue
+                    t = t.copy_modified(items=normalized_inner_types)
+                    unpack_tuple_types.append((t, unpack_index, i))
+                    # In case we have multiple unpacks we want to combine them all, so add
+                    # the combined tuple type to the sequence types.
+                    sequence_types.append(self.chk.iterable_item_type(tuple_fallback(t), o))
+            elif isinstance(t, AnyType):
+                sequence_types.append(AnyType(TypeOfAny.from_another_any, t))
+            elif self.chk.type_is_iterable(t) and isinstance(t, Instance):
+                sequence_types.append(self.chk.iterable_item_type(t, o))
+            else:
+                unknown_type = True
+
+        inner_types: list[Type]
+
+        # If we only got one unpack tuple type, we can use that
         unpack_index = None
-        if isinstance(current_type, TupleType):
-            inner_types = current_type.items
-            unpack_index = find_unpack_in_list(inner_types)
-            if unpack_index is None:
-                size_diff = len(inner_types) - required_patterns
-                if size_diff < 0:
-                    return self.early_non_match()
-                elif size_diff > 0 and star_position is None:
-                    return self.early_non_match()
+        if len(unpack_tuple_types) == 1 and len(sequence_types) == 1 and not tuple_types:
+            update_tuple_type, unpack_index, union_index = unpack_tuple_types[0]
+            inner_types = update_tuple_type.items
+            if isinstance(current_type, UnionType):
+                union_items = list(current_type.items)
+                union_items[union_index] = update_tuple_type
+                current_type = get_proper_type(UnionType.make_union(items=union_items))
             else:
-                normalized_inner_types = []
-                for it in inner_types:
-                    # Unfortunately, it is not possible to "split" the TypeVarTuple
-                    # into individual items, so we just use its upper bound for the whole
-                    # analysis instead.
-                    if isinstance(it, UnpackType) and isinstance(it.type, TypeVarTupleType):
-                        it = UnpackType(it.type.upper_bound)
-                    normalized_inner_types.append(it)
-                inner_types = normalized_inner_types
-                current_type = current_type.copy_modified(items=normalized_inner_types)
-                if len(inner_types) - 1 > required_patterns and star_position is None:
-                    return self.early_non_match()
+                current_type = update_tuple_type
+        # If we only got tuples we can't match, then exit early
+        elif not tuple_types and not sequence_types and not unknown_type:
+            return self.early_non_match()
+        elif tuple_types:
+            inner_types = [
+                make_simplified_union([*sequence_types, *[t for t in group if t is not None]])
+                for group in itertools.zip_longest(*tuple_types)
+            ]
+        elif sequence_types:
+            inner_types = [make_simplified_union(sequence_types)] * len(o.patterns)
         else:
-            inner_type = self.get_sequence_type(current_type, o)
-            if inner_type is None:
-                inner_type = self.chk.named_type("builtins.object")
-            inner_types = [inner_type] * len(o.patterns)
+            inner_types = [self.chk.named_type("builtins.object")] * len(o.patterns)
 
         #
         # match inner patterns
@@ -356,25 +411,6 @@ def visit_sequence_pattern(self, o: SequencePattern) -> PatternType:
                 new_type = self.narrow_sequence_child(current_type, new_inner_type, o)
         return PatternType(new_type, rest_type, captures)
 
-    def get_sequence_type(self, t: Type, context: Context) -> Type | None:
-        t = get_proper_type(t)
-        if isinstance(t, AnyType):
-            return AnyType(TypeOfAny.from_another_any, t)
-        if isinstance(t, UnionType):
-            items = [self.get_sequence_type(item, context) for item in t.items]
-            not_none_items = [item for item in items if item is not None]
-            if not_none_items:
-                return make_simplified_union(not_none_items)
-            else:
-                return None
-
-        if self.chk.type_is_iterable(t) and isinstance(t, (Instance, TupleType)):
-            if isinstance(t, TupleType):
-                t = tuple_fallback(t)
-            return self.chk.iterable_item_type(t, context)
-        else:
-            return None
-
     def contract_starred_pattern_types(
         self, types: list[Type], star_pos: int | None, num_patterns: int
     ) -> list[Type]:
diff --git a/test-data/unit/check-python310.test b/test-data/unit/check-python310.test
@@ -1746,6 +1746,73 @@ match m6:
 
 [builtins fixtures/tuple.pyi]
 
+[case testMatchTupleUnions]
+from typing_extensions import Unpack
+
+m1: tuple[int, str] | None
+match m1:
+    case (a1, b1):
+        reveal_type(a1)  # N: Revealed type is "builtins.int"
+        reveal_type(b1)  # N: Revealed type is "builtins.str"
+
+m2: tuple[int, str] | tuple[float, str]
+match m2:
+    case (a2, b2):
+        reveal_type(a2)  # N: Revealed type is "builtins.int | builtins.float"
+        reveal_type(b2)  # N: Revealed type is "builtins.str"
+
+m3: tuple[int] | tuple[float, str]
+match m3:
+    case (a3, b3):
+        reveal_type(a3)  # N: Revealed type is "builtins.float"
+        reveal_type(b3)  # N: Revealed type is "builtins.str"
+
+m4: tuple[int] | list[str]
+match m4:
+    case (a4, b4):
+        reveal_type(a4)  # N: Revealed type is "builtins.str"
+        reveal_type(b4)  # N: Revealed type is "builtins.str"
+
+# properly handles unpack when all other patterns are not sequences
+m5: tuple[int, Unpack[tuple[float, ...]]] | None
+match m5:
+    case (a5, b5):
+        reveal_type(a5)  # N: Revealed type is "builtins.int"
+        reveal_type(b5)  # N: Revealed type is "builtins.float"
+
+# currently can't handle combing unpacking with other sequence patterns, if this happens revert to worst case
+# of combing all types
+m6: tuple[int, Unpack[tuple[float, ...]]] | list[str]
+match m6:
+    case (a6, b6):
+        reveal_type(a6)  # N: Revealed type is "builtins.int | builtins.float | builtins.str"
+        reveal_type(b6)  # N: Revealed type is "builtins.int | builtins.float | builtins.str"
+
+# but do still separate types from non unpacked types
+m7: tuple[int, Unpack[tuple[float, ...]]] | tuple[str, str]
+match m7:
+    case (a7, b7, *rest7):
+        reveal_type(a7)  # N: Revealed type is "builtins.int | builtins.float | builtins.str"
+        reveal_type(b7)  # N: Revealed type is "builtins.int | builtins.float | builtins.str"
+        reveal_type(rest7)  # N: Revealed type is "builtins.list[builtins.int | builtins.float]"
+
+# verify that if we are unpacking, it will get the type of the sequence if the tuple is too short
+m8: tuple[int, str] | list[float]
+match m8:
+    case (a8, b8, *rest8):
+        reveal_type(a8)  # N: Revealed type is "builtins.float | builtins.int"
+        reveal_type(b8)  # N: Revealed type is "builtins.float | builtins.str"
+        reveal_type(rest8)  # N: Revealed type is "builtins.list[builtins.float]"
+
+m9: tuple[str, str, int] | tuple[str, str]
+match m9:
+    case (a9, *rest9):
+        reveal_type(a9)  # N: Revealed type is "builtins.str"
+        reveal_type(rest9)  # N: Revealed type is "builtins.list[builtins.str | builtins.int]"
+
+[builtins fixtures/tuple.pyi]
+
+
 [case testMatchEnumSingleChoice]
 from enum import Enum
 from typing import NoReturn