From 5ab2d1ffa654ec7084173876f95579cbd99e0de0 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 13 Feb 2026 15:21:38 +0000 Subject: [PATCH 1/2] Rename topsort2 to topsort, and remove the old function I've verified that the topological sort is no longer a significant CPU bottleneck when using the new function. I used codex to validate and update the docstring. --- mypy/build.py | 6 +-- mypy/graph_utils.py | 62 +++---------------------- mypy/solve.py | 4 +- mypy/test/testgraph.py | 100 ++++++++++++++++++----------------------- 4 files changed, 56 insertions(+), 116 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 1dbbc86fea7f7..0a0d6dc222a48 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -98,7 +98,7 @@ ErrorTupleRaw, report_internal_error, ) -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send from mypy.messages import MessageBuilder @@ -4314,7 +4314,7 @@ def sorted_components(graph: Graph) -> list[SCC]: scc_dep_map = prepare_sccs_full(strongly_connected_components(vertices, edges), edges) # Topsort. res = [] - for ready in topsort2(scc_dep_map): + for ready in topsort(scc_dep_map): # Sort the sets in ready by reversed smallest State.order. Examples: # # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get @@ -4349,7 +4349,7 @@ def sorted_components_inner( edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices} sccs = list(strongly_connected_components(vertices, edges)) res = [] - for ready in topsort2(prepare_sccs(sccs, edges)): + for ready in topsort(prepare_sccs(sccs, edges)): res.extend(sorted(ready, key=lambda scc: -min(graph[id].order for id in scc))) return res diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py index 30d1660e4c0a5..aa570971a856c 100644 --- a/mypy/graph_utils.py +++ b/mypy/graph_utils.py @@ -2,7 +2,7 @@ from __future__ import annotations -from collections.abc import Iterable, Iterator, Set as AbstractSet +from collections.abc import Iterator, Set as AbstractSet from typing import TypeVar T = TypeVar("T") @@ -72,68 +72,20 @@ def prepare_sccs( return data -def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]: - """Topological sort. - - Args: - data: A map from vertices to all vertices that it has an edge - connecting it to. NOTE: This data structure - is modified in place -- for normalization purposes, - self-dependencies are removed and entries representing - orphans are added. - - Returns: - An iterator yielding sets of vertices that have an equivalent - ordering. - - Example: - Suppose the input has the following structure: - - {A: {B, C}, B: {D}, C: {D}} - - This is normalized to: - - {A: {B, C}, B: {D}, C: {D}, D: {}} - - The algorithm will yield the following values: - - {D} - {B, C} - {A} - - From https://code.activestate.com/recipes/577413/. - """ - # TODO: Use a faster algorithm? - for k, v in data.items(): - v.discard(k) # Ignore self dependencies. - for item in set.union(*data.values()) - set(data.keys()): - data[item] = set() - while True: - ready = {item for item, dep in data.items() if not dep} - if not ready: - break - yield ready - data = {item: (dep - ready) for item, dep in data.items() if item not in ready} - assert not data, f"A cyclic dependency exists amongst {data!r}" - - -class topsort2(Iterator[set[T]]): # noqa: N801 +class topsort(Iterator[set[T]]): # noqa: N801 """Topological sort using Kahn's algorithm. - This is functionally equivalent to topsort() but avoids rebuilding - the full dict and set objects on each iteration. Instead it uses - in-degree counters and a reverse adjacency list, so the total work - is O(V + E) rather than O(depth * V). + Uses in-degree counters and a reverse adjacency list, so the total work + is O(V + E). Implemented as a class rather than a generator for better mypyc compilation. Args: data: A map from vertices to all vertices that it has an edge - connecting it to. NOTE: This data structure - is modified in place -- for normalization purposes, - self-dependencies are removed and entries representing - orphans are added. + connecting it to. NOTE: dependency sets in this data + structure are modified in place to remove self-dependencies. + Orphans are handled internally and are not added to `data`. """ def __init__(self, data: dict[T, set[T]]) -> None: diff --git a/mypy/solve.py b/mypy/solve.py index 57c002ff9b55c..e3709106996cd 100644 --- a/mypy/solve.py +++ b/mypy/solve.py @@ -8,7 +8,7 @@ from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op from mypy.expandtype import expand_type -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.join import join_type_list from mypy.meet import meet_type_list, meet_types from mypy.subtypes import is_subtype @@ -147,7 +147,7 @@ def solve_with_dependent( sccs = list(strongly_connected_components(set(vars), dmap)) if not all(check_linear(scc, lowers, uppers) for scc in sccs): return {}, [] - raw_batches = list(topsort2(prepare_sccs(sccs, dmap))) + raw_batches = list(topsort(prepare_sccs(sccs, dmap))) free_vars = [] free_solutions = {} diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index b1d4daf079815..491fcf427e65d 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -8,7 +8,7 @@ from mypy.build import BuildManager, BuildSourceSet, State, order_ascc, sorted_components from mypy.errors import Errors from mypy.fscache import FileSystemCache -from mypy.graph_utils import strongly_connected_components, topsort, topsort2 +from mypy.graph_utils import strongly_connected_components, topsort from mypy.modulefinder import SearchPaths from mypy.options import Options from mypy.plugin import Plugin @@ -20,75 +20,63 @@ class GraphSuite(Suite): def test_topsort_empty(self) -> None: data: dict[AbstractSet[str], set[AbstractSet[str]]] = {} - assert_equal(list(topsort2(data)), []) + assert_equal(list(topsort(data)), []) def test_topsort(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - d = frozenset({"D"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} - res = list(topsort_func(data)) - assert_equal(res, [{d}, {b, c}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} + res = list(topsort(data)) + assert_equal(res, [{d}, {b, c}, {a}]) def test_topsort_orphan(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}} - res = list(topsort_func(data)) - assert_equal(res, [{b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}} + res = list(topsort(data)) + assert_equal(res, [{b}, {a}]) def test_topsort_independent(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()} - res = list(topsort_func(data)) - assert_equal(res, [{a, b, c}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()} + res = list(topsort(data)) + assert_equal(res, [{a, b, c}]) def test_topsort_linear_chain(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - d = frozenset({"D"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = { - a: {b}, - b: {c}, - c: {d}, - d: set(), - } - res = list(topsort_func(data)) - assert_equal(res, [{d}, {c}, {b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {c}, c: {d}, d: set()} + res = list(topsort(data)) + assert_equal(res, [{d}, {c}, {b}, {a}]) def test_topsort_self_dependency(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()} - res = list(topsort_func(data)) - assert_equal(res, [{b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()} + res = list(topsort(data)) + assert_equal(res, [{b}, {a}]) def test_topsort_orphan_diamond(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - # B and C are orphans -- they appear only in values, not as keys. - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}} - res = list(topsort_func(data)) - assert_equal(res, [{b, c}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + # B and C are orphans -- they appear only in values, not as keys. + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}} + res = list(topsort(data)) + assert_equal(res, [{b, c}, {a}]) def test_topsort_cycle(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}} - with self.assertRaises(AssertionError): - list(topsort_func(data)) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}} + with self.assertRaises(AssertionError): + list(topsort(data)) def test_scc(self) -> None: vertices = {"A", "B", "C", "D"} From b74696c2d7a0b9cb12c0acf0d15a00aae13855bc Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 13 Feb 2026 15:40:12 +0000 Subject: [PATCH 2/2] Improve docstring --- mypy/graph_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py index aa570971a856c..5699bab3f7e2f 100644 --- a/mypy/graph_utils.py +++ b/mypy/graph_utils.py @@ -86,6 +86,25 @@ class topsort(Iterator[set[T]]): # noqa: N801 connecting it to. NOTE: dependency sets in this data structure are modified in place to remove self-dependencies. Orphans are handled internally and are not added to `data`. + + Returns: + An iterator yielding sets of vertices that have an equivalent + ordering. + + Example: + Suppose the input has the following structure: + + {A: {B, C}, B: {D}, C: {D}} + + The algorithm treats orphan dependencies as if normalized to: + + {A: {B, C}, B: {D}, C: {D}, D: {}} + + It will yield the following values: + + {D} + {B, C} + {A} """ def __init__(self, data: dict[T, set[T]]) -> None: