Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ Kevin J. Foley
Kian Eliasi
Kian-Meng Ang
Kim Soo
Kiril Klein
Kodi B. Arfer
Kojo Idrissa
Kostis Anagnostopoulos
Expand Down
3 changes: 3 additions & 0 deletions changelog/8998.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Assertion failures comparing very large strings, lists, or dataclasses no longer hang for a long time (sometimes minutes) while building the diff.

When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now falls back to a faster line-level diff and notes this in the output.
5 changes: 5 additions & 0 deletions src/_pytest/assertion/_compare_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from _pytest._io.pprint import PrettyPrinter
from _pytest._io.saferepr import saferepr
from _pytest.assertion._diff import fast_unified_diff
from _pytest.assertion._diff import ndiff_too_slow
from _pytest.assertion._typing import _HighlightFunc
from _pytest.compat import running_on_ci

Expand All @@ -27,6 +29,9 @@ def _compare_eq_iterable(

yield ""
yield "Full diff:"
if ndiff_too_slow(left_formatting, right_formatting):
yield from fast_unified_diff(left_formatting, right_formatting, highlighter)
return
# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
Expand Down
61 changes: 61 additions & 0 deletions src/_pytest/assertion/_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import annotations

from collections.abc import Iterator
from collections.abc import Sequence

from _pytest.assertion._typing import _HighlightFunc


# Above this combined input size (in characters), ``difflib.ndiff`` becomes
# pathologically slow: its character-level "fancy replace" step is quadratic in
# the size of the differing region, so a few tens of kilobytes of differing text
# can hang for minutes (see issue #8998).
NDIFF_MAX_INPUT_SIZE = 10_000

# Above this number of lines, both ``ndiff`` and ``unified_diff`` get slow,
# since the underlying ``SequenceMatcher`` is quadratic in the number of lines
# (a large nested structure can pretty-print to hundreds of thousands of lines).
# We both fall back and cap the fallback's input at this many lines.
DIFF_MAX_LINES = 1_000


def ndiff_too_slow(left_lines: Sequence[str], right_lines: Sequence[str]) -> bool:
"""Return True if ``difflib.ndiff`` would likely be pathologically slow."""
if len(left_lines) > DIFF_MAX_LINES or len(right_lines) > DIFF_MAX_LINES:
return True
size = sum(len(line) for line in left_lines) + sum(
len(line) for line in right_lines
)
return size > NDIFF_MAX_INPUT_SIZE


def fast_unified_diff(
left_lines: Sequence[str],
right_lines: Sequence[str],
highlighter: _HighlightFunc,
) -> Iterator[str]:
"""Yield a fast, coarse line-level diff for inputs too large for ``ndiff``.

Unlike ``ndiff`` this does not produce character-level "?" guide lines, and
it only diffs the first ``DIFF_MAX_LINES`` lines of each side, but it
completes in milliseconds where ``ndiff`` would hang (see issue #8998).

"right" is the expected base against which we compare "left",
see https://github.com/pytest-dev/pytest/issues/3333.
"""
from difflib import unified_diff

yield (
f"Diff too large to compute in full (over {NDIFF_MAX_INPUT_SIZE} "
"characters); showing a faster line-level diff instead:"
)
left = [line.rstrip("\n") for line in left_lines[:DIFF_MAX_LINES]]
right = [line.rstrip("\n") for line in right_lines[:DIFF_MAX_LINES]]
hidden = max(len(left_lines), len(right_lines)) - DIFF_MAX_LINES
if hidden > 0:
yield f"Diffing only the first {DIFF_MAX_LINES} lines; {hidden} more hidden"
diff = unified_diff(right, left, n=3, lineterm="")
# The first two lines are the always-empty "--- "/"+++ " file headers.
next(diff, None)
next(diff, None)
yield from highlighter("\n".join(diff), lexer="diff").splitlines()
12 changes: 8 additions & 4 deletions src/_pytest/assertion/compare_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from collections.abc import Iterator

from _pytest._io.saferepr import saferepr
from _pytest.assertion._diff import fast_unified_diff
from _pytest.assertion._diff import ndiff_too_slow
from _pytest.assertion._typing import _AssertionTextDiffStyle
from _pytest.assertion._typing import _HighlightFunc
from _pytest.assertion.highlight import dummy_highlighter
Expand Down Expand Up @@ -75,13 +77,15 @@ def _diff_text(
left = repr(str(left))
right = repr(str(right))
yield "Strings contain only whitespace, escaping them using repr()"
left_lines = left.splitlines(keepends)
right_lines = right.splitlines(keepends)
if ndiff_too_slow(left_lines, right_lines):
yield from fast_unified_diff(left_lines, right_lines, highlighter)
return
# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
"\n".join(
line.strip("\n")
for line in ndiff(right.splitlines(keepends), left.splitlines(keepends))
),
"\n".join(line.strip("\n") for line in ndiff(right_lines, left_lines)),
lexer="diff",
).splitlines()

Expand Down
51 changes: 51 additions & 0 deletions testing/test_assertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from _pytest.assertion import truncate
from _pytest.assertion import util
from _pytest.assertion._compare_any import _compare_eq_cls
from _pytest.assertion._diff import ndiff_too_slow
from _pytest.assertion.compare_text import _compare_eq_text
from _pytest.config import Config as _Config
from _pytest.monkeypatch import MonkeyPatch
Expand Down Expand Up @@ -459,6 +460,19 @@ def callequal(
)


class TestNdiffTooSlow:
"""Heuristic guarding against pathologically slow diffs (#8998)."""

def test_small_input_uses_ndiff(self) -> None:
assert ndiff_too_slow(["spam"], ["eggs"]) is False

def test_many_characters_is_too_slow(self) -> None:
assert ndiff_too_slow(["a" * 6000], ["b" * 6000]) is True

def test_many_lines_is_too_slow(self) -> None:
assert ndiff_too_slow(["x"] * 1001, ["y"]) is True


class TestAssert_reprcompare:
def test_different_types(self) -> None:
assert callequal([0, 1], "foo") is None
Expand Down Expand Up @@ -513,6 +527,32 @@ def test_text_skipping_verbose(self) -> None:
assert "- " + "a" * 50 + "eggs" in lines
assert "+ " + "a" * 50 + "spam" in lines

def test_text_diff_large_input_skips_ndiff(self) -> None:
# A single huge differing line is above the character cutoff and falls
# back to a fast line-level diff instead of the pathologically slow
# ndiff (#8998).
left = "a" + "x" * 20000
right = "b" + "y" * 20000
lines = callequal(left, right, verbose=1)
assert lines is not None
assert any("Diff too large to compute in full" in line for line in lines)
# The character-level "?" guide lines produced by ndiff must not appear.
assert not any(line.startswith("? ") for line in lines)

def test_text_diff_many_lines_skips_ndiff(self) -> None:
# Many lines are above the line cutoff and fall back, capping the
# number of lines actually diffed (#8998).
left = "\n".join(f"left line {i}" for i in range(2000))
right = "\n".join(f"right line {i}" for i in range(2000))
lines = callequal(left, right, verbose=1)
assert lines is not None
assert any("Diff too large to compute in full" in line for line in lines)
assert any("Diffing only the first 1000 lines" in line for line in lines)
assert not any(line.startswith("? ") for line in lines)
# The fallback still shows which lines differ.
assert "-right line 0" in lines
assert "+left line 0" in lines

def test_multiline_text_diff(self) -> None:
left = "foo\nspam\nbar"
right = "foo\neggs\nbar"
Expand Down Expand Up @@ -673,6 +713,17 @@ def test_iterable_quiet(self) -> None:
"Use -v to get more diff",
]

def test_iterable_large_input_skips_ndiff(self) -> None:
# Large iterables fall back to a fast line-level diff instead of the
# pathologically slow ndiff over their pprint output (#8998).
left = [f"item-{i}" for i in range(2000)]
right = [f"other-{i}" for i in range(2000)]
lines = callequal(left, right, verbose=1)
assert lines is not None
assert "Full diff:" in lines
assert any("Diff too large to compute in full" in line for line in lines)
assert not any(line.startswith("? ") for line in lines)

def test_iterable_full_diff_ci(
self, monkeypatch: MonkeyPatch, pytester: Pytester
) -> None:
Expand Down
Loading