diff --git a/mypy/build.py b/mypy/build.py index 1dbbc86fea7f7..0a0d6dc222a48 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -98,7 +98,7 @@ ErrorTupleRaw, report_internal_error, ) -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send from mypy.messages import MessageBuilder @@ -4314,7 +4314,7 @@ def sorted_components(graph: Graph) -> list[SCC]: scc_dep_map = prepare_sccs_full(strongly_connected_components(vertices, edges), edges) # Topsort. res = [] - for ready in topsort2(scc_dep_map): + for ready in topsort(scc_dep_map): # Sort the sets in ready by reversed smallest State.order. Examples: # # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get @@ -4349,7 +4349,7 @@ def sorted_components_inner( edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices} sccs = list(strongly_connected_components(vertices, edges)) res = [] - for ready in topsort2(prepare_sccs(sccs, edges)): + for ready in topsort(prepare_sccs(sccs, edges)): res.extend(sorted(ready, key=lambda scc: -min(graph[id].order for id in scc))) return res diff --git a/mypy/graph_utils.py b/mypy/graph_utils.py index 30d1660e4c0a5..5699bab3f7e2f 100644 --- a/mypy/graph_utils.py +++ b/mypy/graph_utils.py @@ -2,7 +2,7 @@ from __future__ import annotations -from collections.abc import Iterable, Iterator, Set as AbstractSet +from collections.abc import Iterator, Set as AbstractSet from typing import TypeVar T = TypeVar("T") @@ -72,15 +72,20 @@ def prepare_sccs( return data -def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]: - """Topological sort. +class topsort(Iterator[set[T]]): # noqa: N801 + """Topological sort using Kahn's algorithm. + + Uses in-degree counters and a reverse adjacency list, so the total work + is O(V + E). + + Implemented as a class rather than a generator for better mypyc + compilation. Args: data: A map from vertices to all vertices that it has an edge - connecting it to. NOTE: This data structure - is modified in place -- for normalization purposes, - self-dependencies are removed and entries representing - orphans are added. + connecting it to. NOTE: dependency sets in this data + structure are modified in place to remove self-dependencies. + Orphans are handled internally and are not added to `data`. Returns: An iterator yielding sets of vertices that have an equivalent @@ -91,49 +96,15 @@ def topsort(data: dict[T, set[T]]) -> Iterable[set[T]]: {A: {B, C}, B: {D}, C: {D}} - This is normalized to: + The algorithm treats orphan dependencies as if normalized to: {A: {B, C}, B: {D}, C: {D}, D: {}} - The algorithm will yield the following values: + It will yield the following values: {D} {B, C} {A} - - From https://code.activestate.com/recipes/577413/. - """ - # TODO: Use a faster algorithm? - for k, v in data.items(): - v.discard(k) # Ignore self dependencies. - for item in set.union(*data.values()) - set(data.keys()): - data[item] = set() - while True: - ready = {item for item, dep in data.items() if not dep} - if not ready: - break - yield ready - data = {item: (dep - ready) for item, dep in data.items() if item not in ready} - assert not data, f"A cyclic dependency exists amongst {data!r}" - - -class topsort2(Iterator[set[T]]): # noqa: N801 - """Topological sort using Kahn's algorithm. - - This is functionally equivalent to topsort() but avoids rebuilding - the full dict and set objects on each iteration. Instead it uses - in-degree counters and a reverse adjacency list, so the total work - is O(V + E) rather than O(depth * V). - - Implemented as a class rather than a generator for better mypyc - compilation. - - Args: - data: A map from vertices to all vertices that it has an edge - connecting it to. NOTE: This data structure - is modified in place -- for normalization purposes, - self-dependencies are removed and entries representing - orphans are added. """ def __init__(self, data: dict[T, set[T]]) -> None: diff --git a/mypy/solve.py b/mypy/solve.py index 57c002ff9b55c..e3709106996cd 100644 --- a/mypy/solve.py +++ b/mypy/solve.py @@ -8,7 +8,7 @@ from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op from mypy.expandtype import expand_type -from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort2 +from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.join import join_type_list from mypy.meet import meet_type_list, meet_types from mypy.subtypes import is_subtype @@ -147,7 +147,7 @@ def solve_with_dependent( sccs = list(strongly_connected_components(set(vars), dmap)) if not all(check_linear(scc, lowers, uppers) for scc in sccs): return {}, [] - raw_batches = list(topsort2(prepare_sccs(sccs, dmap))) + raw_batches = list(topsort(prepare_sccs(sccs, dmap))) free_vars = [] free_solutions = {} diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index b1d4daf079815..491fcf427e65d 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -8,7 +8,7 @@ from mypy.build import BuildManager, BuildSourceSet, State, order_ascc, sorted_components from mypy.errors import Errors from mypy.fscache import FileSystemCache -from mypy.graph_utils import strongly_connected_components, topsort, topsort2 +from mypy.graph_utils import strongly_connected_components, topsort from mypy.modulefinder import SearchPaths from mypy.options import Options from mypy.plugin import Plugin @@ -20,75 +20,63 @@ class GraphSuite(Suite): def test_topsort_empty(self) -> None: data: dict[AbstractSet[str], set[AbstractSet[str]]] = {} - assert_equal(list(topsort2(data)), []) + assert_equal(list(topsort(data)), []) def test_topsort(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - d = frozenset({"D"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} - res = list(topsort_func(data)) - assert_equal(res, [{d}, {b, c}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}, b: {d}, c: {d}} + res = list(topsort(data)) + assert_equal(res, [{d}, {b, c}, {a}]) def test_topsort_orphan(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}} - res = list(topsort_func(data)) - assert_equal(res, [{b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}} + res = list(topsort(data)) + assert_equal(res, [{b}, {a}]) def test_topsort_independent(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()} - res = list(topsort_func(data)) - assert_equal(res, [{a, b, c}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: set(), b: set(), c: set()} + res = list(topsort(data)) + assert_equal(res, [{a, b, c}]) def test_topsort_linear_chain(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - d = frozenset({"D"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = { - a: {b}, - b: {c}, - c: {d}, - d: set(), - } - res = list(topsort_func(data)) - assert_equal(res, [{d}, {c}, {b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + d = frozenset({"D"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {c}, c: {d}, d: set()} + res = list(topsort(data)) + assert_equal(res, [{d}, {c}, {b}, {a}]) def test_topsort_self_dependency(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()} - res = list(topsort_func(data)) - assert_equal(res, [{b}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {a, b}, b: set()} + res = list(topsort(data)) + assert_equal(res, [{b}, {a}]) def test_topsort_orphan_diamond(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - c = frozenset({"C"}) - # B and C are orphans -- they appear only in values, not as keys. - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}} - res = list(topsort_func(data)) - assert_equal(res, [{b, c}, {a}]) + a = frozenset({"A"}) + b = frozenset({"B"}) + c = frozenset({"C"}) + # B and C are orphans -- they appear only in values, not as keys. + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b, c}} + res = list(topsort(data)) + assert_equal(res, [{b, c}, {a}]) def test_topsort_cycle(self) -> None: - for topsort_func in [topsort, topsort2]: - a = frozenset({"A"}) - b = frozenset({"B"}) - data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}} - with self.assertRaises(AssertionError): - list(topsort_func(data)) + a = frozenset({"A"}) + b = frozenset({"B"}) + data: dict[AbstractSet[str], set[AbstractSet[str]]] = {a: {b}, b: {a}} + with self.assertRaises(AssertionError): + list(topsort(data)) def test_scc(self) -> None: vertices = {"A", "B", "C", "D"}