From 5ab2d1ffa654ec7084173876f95579cbd99e0de0 Mon Sep 17 00:00:00 2001
From: Jukka Lehtosalo <jukka.lehtosalo@iki.fi>
Date: Fri, 13 Feb 2026 15:21:38 +0000
Subject: [PATCH 1/2] Rename topsort2 to topsort, and remove the old function

I've verified that the topological sort is no longer a significant
CPU bottleneck when using the new function.

I used codex to validate and update the docstring.
---
 mypy/build.py          |   6 +--
 mypy/graph_utils.py    |  62 +++----------------------
 mypy/solve.py          |   4 +-
 mypy/test/testgraph.py | 100 ++++++++++++++++++-----------------------
 4 files changed, 56 insertions(+), 116 deletions(-)

diff --git a/mypy/build.py b/mypy/build.py
index 1dbbc86fea7f7..0a0d6dc222a48 100644
--- a/mypy/build.py
+++ b/mypy/build.py
@@ -98,7 +98,7 @@
     ErrorTupleRaw,
     report_internal_error,
 )
-from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2
+from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
 from mypy.indirection import TypeIndirectionVisitor
 from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send
 from mypy.messages import MessageBuilder
@@ -4314,7 +4314,7 @@ def sorted_components(graph: Graph) -> list[SCC]:
     scc_dep_map = prepare_sccs_full(strongly_connected_components(vertices, edges), edges)
     # Topsort.
     res = []
-    for ready in topsort2(scc_dep_map):
+    for ready in topsort(scc_dep_map):
         # Sort the sets in ready by reversed smallest State.order.  Examples:
         #
         # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get
@@ -4349,7 +4349,7 @@ def sorted_components_inner(
     edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices}
     sccs = list(strongly_connected_components(vertices, edges))
     res = []
-    for ready in topsort2(prepare_sccs(sccs, edges)):
+    for ready in topsort(prepare_sccs(sccs, edges)):
         res.extend(sorted(ready, key=lambda scc: -min(graph[id].order for id in scc)))
     return res
 
diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py
index 30d1660e4c0a5..aa570971a856c 100644
--- a/mypy/graph_utils.py
+++ b/mypy/graph_utils.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from collections.abc import Iterable, Iterator, Set as AbstractSet
+from collections.abc import Iterator, Set as AbstractSet
 from typing import TypeVar
 
 T = TypeVar("T")
@@ -72,68 +72,20 @@ def prepare_sccs(
     return data
 
 
-def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]:
-    """Topological sort.
-
-    Args:
-      data: A map from vertices to all vertices that it has an edge
-            connecting it to.  NOTE: This data structure
-            is modified in place -- for normalization purposes,
-            self-dependencies are removed and entries representing
-            orphans are added.
-
-    Returns:
-      An iterator yielding sets of vertices that have an equivalent
-      ordering.
-
-    Example:
-      Suppose the input has the following structure:
-
-        {A: {B, C}, B: {D}, C: {D}}
-
-      This is normalized to:
-
-        {A: {B, C}, B: {D}, C: {D}, D: {}}
-
-      The algorithm will yield the following values:
-
-        {D}
-        {B, C}
-        {A}
-
-    From https://code.activestate.com/recipes/577413/.
-    """
-    # TODO: Use a faster algorithm?
-    for k, v in data.items():
-        v.discard(k)  # Ignore self dependencies.
-    for item in set.union(*data.values()) - set(data.keys()):
-        data[item] = set()
-    while True:
-        ready = {item for item, dep in data.items() if not dep}
-        if not ready:
-            break
-        yield ready
-        data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
-    assert not data, f"A cyclic dependency exists amongst {data!r}"
-
-
-class topsort2(Iterator[set[T]]):  # noqa: N801
+class topsort(Iterator[set[T]]):  # noqa: N801
     """Topological sort using Kahn's algorithm.
 
-    This is functionally equivalent to topsort() but avoids rebuilding
-    the full dict and set objects on each iteration. Instead it uses
-    in-degree counters and a reverse adjacency list, so the total work
-    is O(V + E) rather than O(depth * V).
+    Uses in-degree counters and a reverse adjacency list, so the total work
+    is O(V + E).
 
     Implemented as a class rather than a generator for better mypyc
     compilation.
 
     Args:
       data: A map from vertices to all vertices that it has an edge
-            connecting it to.  NOTE: This data structure
-            is modified in place -- for normalization purposes,
-            self-dependencies are removed and entries representing
-            orphans are added.
+            connecting it to. NOTE: dependency sets in this data
+            structure are modified in place to remove self-dependencies.
+            Orphans are handled internally and are not added to `data`.
     """
 
     def __init__(self, data: dict[T, set[T]]) -> None:
diff --git a/mypy/solve.py b/mypy/solve.py
index 57c002ff9b55c..e3709106996cd 100644
--- a/mypy/solve.py
+++ b/mypy/solve.py
@@ -8,7 +8,7 @@
 
 from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op
 from mypy.expandtype import expand_type
-from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2
+from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
 from mypy.join import join_type_list
 from mypy.meet import meet_type_list, meet_types
 from mypy.subtypes import is_subtype
@@ -147,7 +147,7 @@ def solve_with_dependent(
     sccs = list(strongly_connected_components(set(vars), dmap))
     if not all(check_linear(scc, lowers, uppers) for scc in sccs):
         return {}, []
-    raw_batches = list(topsort2(prepare_sccs(sccs, dmap)))
+    raw_batches = list(topsort(prepare_sccs(sccs, dmap)))
 
     free_vars = []
     free_solutions = {}
diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py
index b1d4daf079815..491fcf427e65d 100644
--- a/mypy/test/testgraph.py
+++ b/mypy/test/testgraph.py
@@ -8,7 +8,7 @@
 from mypy.build import BuildManager, BuildSourceSet, State, order_ascc, sorted_components
 from mypy.errors import Errors
 from mypy.fscache import FileSystemCache
-from mypy.graph_utils import strongly_connected_components, topsort, topsort2
+from mypy.graph_utils import strongly_connected_components, topsort
 from mypy.modulefinder import SearchPaths
 from mypy.options import Options
 from mypy.plugin import Plugin
@@ -20,75 +20,63 @@
 class GraphSuite(Suite):
     def test_topsort_empty(self) -> None:
         data: dict[AbstractSet[str], set[AbstractSet[str]]] = {}
-        assert_equal(list(topsort2(data)), [])
+        assert_equal(list(topsort(data)), [])
 
     def test_topsort(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            c = frozenset({"C"})
-            d = frozenset({"D"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}}
-            res = list(topsort_func(data))
-            assert_equal(res, [{d}, {b, c}, {a}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        c = frozenset({"C"})
+        d = frozenset({"D"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}}
+        res = list(topsort(data))
+        assert_equal(res, [{d}, {b, c}, {a}])
 
     def test_topsort_orphan(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}}
-            res = list(topsort_func(data))
-            assert_equal(res, [{b}, {a}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}}
+        res = list(topsort(data))
+        assert_equal(res, [{b}, {a}])
 
     def test_topsort_independent(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            c = frozenset({"C"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()}
-            res = list(topsort_func(data))
-            assert_equal(res, [{a, b, c}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        c = frozenset({"C"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()}
+        res = list(topsort(data))
+        assert_equal(res, [{a, b, c}])
 
     def test_topsort_linear_chain(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            c = frozenset({"C"})
-            d = frozenset({"D"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {
-                a: {b},
-                b: {c},
-                c: {d},
-                d: set(),
-            }
-            res = list(topsort_func(data))
-            assert_equal(res, [{d}, {c}, {b}, {a}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        c = frozenset({"C"})
+        d = frozenset({"D"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {c}, c: {d}, d: set()}
+        res = list(topsort(data))
+        assert_equal(res, [{d}, {c}, {b}, {a}])
 
     def test_topsort_self_dependency(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()}
-            res = list(topsort_func(data))
-            assert_equal(res, [{b}, {a}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()}
+        res = list(topsort(data))
+        assert_equal(res, [{b}, {a}])
 
     def test_topsort_orphan_diamond(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            c = frozenset({"C"})
-            # B and C are orphans -- they appear only in values, not as keys.
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}}
-            res = list(topsort_func(data))
-            assert_equal(res, [{b, c}, {a}])
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        c = frozenset({"C"})
+        # B and C are orphans -- they appear only in values, not as keys.
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}}
+        res = list(topsort(data))
+        assert_equal(res, [{b, c}, {a}])
 
     def test_topsort_cycle(self) -> None:
-        for topsort_func in [topsort, topsort2]:
-            a = frozenset({"A"})
-            b = frozenset({"B"})
-            data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}}
-            with self.assertRaises(AssertionError):
-                list(topsort_func(data))
+        a = frozenset({"A"})
+        b = frozenset({"B"})
+        data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}}
+        with self.assertRaises(AssertionError):
+            list(topsort(data))
 
     def test_scc(self) -> None:
         vertices = {"A", "B", "C", "D"}

From b74696c2d7a0b9cb12c0acf0d15a00aae13855bc Mon Sep 17 00:00:00 2001
From: Jukka Lehtosalo <jukka.lehtosalo@iki.fi>
Date: Fri, 13 Feb 2026 15:40:12 +0000
Subject: [PATCH 2/2] Improve docstring

---
 mypy/graph_utils.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py
index aa570971a856c..5699bab3f7e2f 100644
--- a/mypy/graph_utils.py
+++ b/mypy/graph_utils.py
@@ -86,6 +86,25 @@ class topsort(Iterator[set[T]]):  # noqa: N801
             connecting it to. NOTE: dependency sets in this data
             structure are modified in place to remove self-dependencies.
             Orphans are handled internally and are not added to `data`.
+
+    Returns:
+      An iterator yielding sets of vertices that have an equivalent
+      ordering.
+
+    Example:
+      Suppose the input has the following structure:
+
+        {A: {B, C}, B: {D}, C: {D}}
+
+      The algorithm treats orphan dependencies as if normalized to:
+
+        {A: {B, C}, B: {D}, C: {D}, D: {}}
+
+      It will yield the following values:
+
+        {D}
+        {B, C}
+        {A}
     """
 
     def __init__(self, data: dict[T, set[T]]) -> None: