From f0fb87826c049e02afe43f387dd250ad82baaa71 Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Fri, 26 Jul 2024 21:46:21 +0200 Subject: [PATCH 1/8] Detect recursively referencing requirements files Fixes #12653 --- news/12653.feature.rst | 2 ++ src/pip/_internal/req/req_file.py | 25 +++++++++++++++++++++---- tests/unit/test_req_file.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 news/12653.feature.rst diff --git a/news/12653.feature.rst b/news/12653.feature.rst new file mode 100644 index 00000000000..83e1a46e6a8 --- /dev/null +++ b/news/12653.feature.rst @@ -0,0 +1,2 @@ +Detect recursively referencing requirements files and help users identify +the source. diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 53ad8674cd8..83cacc5b836 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -324,11 +324,14 @@ def __init__( ) -> None: self._session = session self._line_parser = line_parser + self._parsed_files: dict[str, Optional[str]] = {} def parse( self, filename: str, constraint: bool ) -> Generator[ParsedLine, None, None]: """Parse a given file, yielding parsed lines.""" + filename = os.path.abspath(filename) + self._parsed_files[filename] = None # The primary requirements file passed yield from self._parse_and_recurse(filename, constraint) def _parse_and_recurse( @@ -353,11 +356,25 @@ def _parse_and_recurse( # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work - req_path = os.path.join( - os.path.dirname(filename), - req_path, + # and then abspath so that we can identify recursive references + req_path = os.path.abspath( + os.path.join( + os.path.dirname(filename), + req_path, + ) ) - + if req_path in self._parsed_files.keys(): + initial_file = self._parsed_files[req_path] + tail = ( + f"and again in {initial_file}" + if initial_file is not None + else "" + ) + raise RecursionError( + f"{req_path} recursively references itself in {filename} {tail}" + ) + # Keeping a track where was each file first included in + self._parsed_files[req_path] = filename yield from self._parse_and_recurse(req_path, nested_constraint) else: yield line diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 236b666fb34..c536bdc37cf 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -345,6 +345,34 @@ def test_nested_constraints_file( assert reqs[0].name == req_name assert reqs[0].constraint + def test_recursive_requirements_file( + self, tmpdir: Path, session: PipSession + ) -> None: + req_files: list[Path] = [] + req_file_count = 4 + for i in range(req_file_count): + req_file = tmpdir / f"{i}.txt" + req_file.write_text(f"-r {(i+1) % req_file_count}.txt") + req_files.append(req_file) + + # When the passed requirements file recursively references itself + with pytest.raises( + RecursionError, + match=f"{req_files[0]} recursively references itself" + f" in {req_files[req_file_count - 1]}", + ): + list(parse_requirements(filename=str(req_files[0]), session=session)) + + # When one of other the requirements file recursively references itself + req_files[req_file_count - 1].write_text(f"-r {req_files[req_file_count - 2]}") + with pytest.raises( + RecursionError, + match=f"{req_files[req_file_count - 2]} recursively references itself " + f"in {req_files[req_file_count - 1]} and again in" + f" {req_files[req_file_count - 3]}", + ): + list(parse_requirements(filename=str(req_files[0]), session=session)) + def test_options_on_a_requirement_line(self, line_processor: LineProcessor) -> None: line = ( 'SomeProject --global-option="yo3" --global-option "yo4" ' From 86a8a289c1842ae939d6650536a2d70ced20d75d Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Sat, 27 Jul 2024 22:15:55 +0200 Subject: [PATCH 2/8] Add a unit test to test the absolute path change --- src/pip/_internal/req/req_file.py | 5 ++-- tests/unit/test_req_file.py | 44 +++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 83cacc5b836..b8db037c7c1 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -330,8 +330,9 @@ def parse( self, filename: str, constraint: bool ) -> Generator[ParsedLine, None, None]: """Parse a given file, yielding parsed lines.""" - filename = os.path.abspath(filename) - self._parsed_files[filename] = None # The primary requirements file passed + self._parsed_files[os.path.abspath(filename)] = ( + None # The primary requirements file passed + ) yield from self._parse_and_recurse(filename, constraint) def _parse_and_recurse( diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index c536bdc37cf..dc7e519ef04 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -9,6 +9,7 @@ import pytest +import pip._internal.exceptions import pip._internal.req.req_file # this will be monkeypatched from pip._internal.exceptions import InstallationError, RequirementsFileParseError from pip._internal.index.package_finder import PackageFinder @@ -358,8 +359,10 @@ def test_recursive_requirements_file( # When the passed requirements file recursively references itself with pytest.raises( RecursionError, - match=f"{req_files[0]} recursively references itself" - f" in {req_files[req_file_count - 1]}", + match=( + f"{req_files[0]} recursively references itself" + f" in {req_files[req_file_count - 1]}" + ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) @@ -367,12 +370,43 @@ def test_recursive_requirements_file( req_files[req_file_count - 1].write_text(f"-r {req_files[req_file_count - 2]}") with pytest.raises( RecursionError, - match=f"{req_files[req_file_count - 2]} recursively references itself " - f"in {req_files[req_file_count - 1]} and again in" - f" {req_files[req_file_count - 3]}", + match=( + f"{req_files[req_file_count - 2]} recursively references itself " + f"in {req_files[req_file_count - 1]} and again in" + f" {req_files[req_file_count - 3]}" + ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) + def test_recursive_relative_requirements_file( + self, monkeypatch: pytest.MonkeyPatch, tmpdir: Path, session: PipSession + ) -> None: + root_req_file = tmpdir / "root.txt" + (tmpdir / "nest" / "nest").mkdir(parents=True) + level_1_req_file = tmpdir / "nest" / "level_1.txt" + level_2_req_file = tmpdir / "nest" / "nest" / "level_2.txt" + + root_req_file.write_text("-r nest/level_1.txt") + level_1_req_file.write_text("-r nest/level_2.txt") + level_2_req_file.write_text("-r ../../root.txt") + + with pytest.raises( + RecursionError, + match=( + f"{root_req_file} recursively references itself in {level_2_req_file}" + ), + ): + list(parse_requirements(filename=str(root_req_file), session=session)) + + # If we don't use absolute path, it keeps on chaining the filename resulting in + # a huge filename, since a != a/b/c/../../ + monkeypatch.setattr(os.path, "abspath", lambda x: x) + with pytest.raises( + pip._internal.exceptions.InstallationError, + match=r"Could not open requirements file: \[Errno 36\] File name too long:", + ): + list(parse_requirements(filename=str(root_req_file), session=session)) + def test_options_on_a_requirement_line(self, line_processor: LineProcessor) -> None: line = ( 'SomeProject --global-option="yo3" --global-option "yo4" ' From 2adea729bbd0544e639675e26edb664dc82ede8a Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Sat, 27 Jul 2024 22:31:45 +0200 Subject: [PATCH 3/8] Use platform independent regex --- tests/unit/test_req_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index dc7e519ef04..7df7b3ab05f 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -403,7 +403,7 @@ def test_recursive_relative_requirements_file( monkeypatch.setattr(os.path, "abspath", lambda x: x) with pytest.raises( pip._internal.exceptions.InstallationError, - match=r"Could not open requirements file: \[Errno 36\] File name too long:", + match="Could not open requirements file: .* File name too long:", ): list(parse_requirements(filename=str(root_req_file), session=session)) From 50202fdabe3bf718cca404b14fae83864e08b550 Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Sat, 27 Jul 2024 23:32:23 +0200 Subject: [PATCH 4/8] Escape Windows path forward slashes in regex --- tests/unit/test_req_file.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 7df7b3ab05f..4e0928ffce6 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -79,6 +79,12 @@ def parse_reqfile( ) +def path_to_string(p: Path) -> str: + # Escape the back slashes in windows path before using it + # in a regex + return str(p).replace("\\", "\\\\") + + def test_read_file_url(tmp_path: Path, session: PipSession) -> None: reqs = tmp_path.joinpath("requirements.txt") reqs.write_text("foo") @@ -360,8 +366,8 @@ def test_recursive_requirements_file( with pytest.raises( RecursionError, match=( - f"{req_files[0]} recursively references itself" - f" in {req_files[req_file_count - 1]}" + f"{path_to_string(req_files[0])} recursively references itself" + f" in {path_to_string(req_files[req_file_count - 1])}" ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) @@ -371,9 +377,10 @@ def test_recursive_requirements_file( with pytest.raises( RecursionError, match=( - f"{req_files[req_file_count - 2]} recursively references itself " - f"in {req_files[req_file_count - 1]} and again in" - f" {req_files[req_file_count - 3]}" + f"{path_to_string(req_files[req_file_count - 2])} recursively" + " references itself in" + f" {path_to_string(req_files[req_file_count - 1])} and again in" + f" {path_to_string(req_files[req_file_count - 3])}" ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) @@ -393,7 +400,8 @@ def test_recursive_relative_requirements_file( with pytest.raises( RecursionError, match=( - f"{root_req_file} recursively references itself in {level_2_req_file}" + f"{path_to_string(root_req_file)} recursively references itself in" + f" {path_to_string(level_2_req_file)}" ), ): list(parse_requirements(filename=str(root_req_file), session=session)) From ed63b3d5b435d2e8741c3316bb9807cd0a8717f5 Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Sun, 28 Jul 2024 00:32:12 +0200 Subject: [PATCH 5/8] Remove inconsistent test and fix filename write --- tests/unit/test_req_file.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 4e0928ffce6..10a4c54c927 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -373,7 +373,10 @@ def test_recursive_requirements_file( list(parse_requirements(filename=str(req_files[0]), session=session)) # When one of other the requirements file recursively references itself - req_files[req_file_count - 1].write_text(f"-r {req_files[req_file_count - 2]}") + req_files[req_file_count - 1].write_text( + # Just name since they are in the same folder + f"-r {req_files[req_file_count - 2].name}" + ) with pytest.raises( RecursionError, match=( @@ -386,7 +389,7 @@ def test_recursive_requirements_file( list(parse_requirements(filename=str(req_files[0]), session=session)) def test_recursive_relative_requirements_file( - self, monkeypatch: pytest.MonkeyPatch, tmpdir: Path, session: PipSession + self, tmpdir: Path, session: PipSession ) -> None: root_req_file = tmpdir / "root.txt" (tmpdir / "nest" / "nest").mkdir(parents=True) @@ -406,15 +409,6 @@ def test_recursive_relative_requirements_file( ): list(parse_requirements(filename=str(root_req_file), session=session)) - # If we don't use absolute path, it keeps on chaining the filename resulting in - # a huge filename, since a != a/b/c/../../ - monkeypatch.setattr(os.path, "abspath", lambda x: x) - with pytest.raises( - pip._internal.exceptions.InstallationError, - match="Could not open requirements file: .* File name too long:", - ): - list(parse_requirements(filename=str(root_req_file), session=session)) - def test_options_on_a_requirement_line(self, line_processor: LineProcessor) -> None: line = ( 'SomeProject --global-option="yo3" --global-option "yo4" ' From 59dbbba59d45c59cf3807b9f5be58762a6afa8a9 Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Sun, 28 Jul 2024 21:37:32 +0200 Subject: [PATCH 6/8] Use RequirementsFileParserError instead of RecursionError --- src/pip/_internal/req/req_file.py | 2 +- tests/unit/test_req_file.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index b8db037c7c1..b15a6ea9a3e 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -371,7 +371,7 @@ def _parse_and_recurse( if initial_file is not None else "" ) - raise RecursionError( + raise RequirementsFileParseError( f"{req_path} recursively references itself in {filename} {tail}" ) # Keeping a track where was each file first included in diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 10a4c54c927..d0501e2c254 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -9,7 +9,6 @@ import pytest -import pip._internal.exceptions import pip._internal.req.req_file # this will be monkeypatched from pip._internal.exceptions import InstallationError, RequirementsFileParseError from pip._internal.index.package_finder import PackageFinder @@ -364,7 +363,7 @@ def test_recursive_requirements_file( # When the passed requirements file recursively references itself with pytest.raises( - RecursionError, + RequirementsFileParseError, match=( f"{path_to_string(req_files[0])} recursively references itself" f" in {path_to_string(req_files[req_file_count - 1])}" @@ -378,7 +377,7 @@ def test_recursive_requirements_file( f"-r {req_files[req_file_count - 2].name}" ) with pytest.raises( - RecursionError, + RequirementsFileParseError, match=( f"{path_to_string(req_files[req_file_count - 2])} recursively" " references itself in" @@ -401,7 +400,7 @@ def test_recursive_relative_requirements_file( level_2_req_file.write_text("-r ../../root.txt") with pytest.raises( - RecursionError, + RequirementsFileParseError, match=( f"{path_to_string(root_req_file)} recursively references itself in" f" {path_to_string(level_2_req_file)}" From 8d15e3b37cae43d349d3eafefc1bda0b02ab8bea Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Tue, 20 Aug 2024 20:09:48 +0200 Subject: [PATCH 7/8] Use re.escape for windows paths --- src/pip/_internal/req/req_file.py | 4 ++-- tests/unit/test_req_file.py | 21 ++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index b15a6ea9a3e..195bde8d12e 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -367,12 +367,12 @@ def _parse_and_recurse( if req_path in self._parsed_files.keys(): initial_file = self._parsed_files[req_path] tail = ( - f"and again in {initial_file}" + f" and again in {initial_file}" if initial_file is not None else "" ) raise RequirementsFileParseError( - f"{req_path} recursively references itself in {filename} {tail}" + f"{req_path} recursively references itself in {filename}{tail}" ) # Keeping a track where was each file first included in self._parsed_files[req_path] = filename diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index d0501e2c254..1ddccdee58e 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -1,6 +1,7 @@ import collections import logging import os +import re import textwrap from optparse import Values from pathlib import Path @@ -78,12 +79,6 @@ def parse_reqfile( ) -def path_to_string(p: Path) -> str: - # Escape the back slashes in windows path before using it - # in a regex - return str(p).replace("\\", "\\\\") - - def test_read_file_url(tmp_path: Path, session: PipSession) -> None: reqs = tmp_path.joinpath("requirements.txt") reqs.write_text("foo") @@ -365,8 +360,8 @@ def test_recursive_requirements_file( with pytest.raises( RequirementsFileParseError, match=( - f"{path_to_string(req_files[0])} recursively references itself" - f" in {path_to_string(req_files[req_file_count - 1])}" + f"{re.escape(str(req_files[0]))} recursively references itself" + f" in {re.escape(str(req_files[req_file_count - 1]))}" ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) @@ -379,10 +374,10 @@ def test_recursive_requirements_file( with pytest.raises( RequirementsFileParseError, match=( - f"{path_to_string(req_files[req_file_count - 2])} recursively" + f"{re.escape(str(req_files[req_file_count - 2]))} recursively" " references itself in" - f" {path_to_string(req_files[req_file_count - 1])} and again in" - f" {path_to_string(req_files[req_file_count - 3])}" + f" {re.escape(str(req_files[req_file_count - 1]))} and again in" + f" {re.escape(str(req_files[req_file_count - 3]))}" ), ): list(parse_requirements(filename=str(req_files[0]), session=session)) @@ -402,8 +397,8 @@ def test_recursive_relative_requirements_file( with pytest.raises( RequirementsFileParseError, match=( - f"{path_to_string(root_req_file)} recursively references itself in" - f" {path_to_string(level_2_req_file)}" + f"{re.escape(str(root_req_file))} recursively references itself in" + f" {re.escape(str(level_2_req_file))}" ), ): list(parse_requirements(filename=str(root_req_file), session=session)) From 26c6a458bb4dc99f670230b467d22c84ac11c264 Mon Sep 17 00:00:00 2001 From: Kuntal Majumder Date: Tue, 3 Sep 2024 22:03:26 +0200 Subject: [PATCH 8/8] Remove redundant .keys() call --- src/pip/_internal/req/req_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 195bde8d12e..55c0726f370 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -364,7 +364,7 @@ def _parse_and_recurse( req_path, ) ) - if req_path in self._parsed_files.keys(): + if req_path in self._parsed_files: initial_file = self._parsed_files[req_path] tail = ( f" and again in {initial_file}"