From 9e76bd2900f3f87fee9bb59579c0d5b34597fbea Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 29 Oct 2024 12:11:57 +0100 Subject: [PATCH] feat: `gitpathspec` module for handling Git's pathspecs Besides the basic type provided here, the key feature is the translation of pathspecs into the scope of a subdirectory. This is the foundational support for implementations that focus on submodule-recursion combined with pathspecs. Git does not generally provide this support in its commands. --- datasalad/gitpathspec/__init__.py | 17 + datasalad/gitpathspec/pathspec.py | 321 +++++++++++++ datasalad/gitpathspec/pathspecs.py | 146 ++++++ datasalad/gitpathspec/tests/__init__.py | 0 .../gitpathspec/tests/test_gitpathspec.py | 442 ++++++++++++++++++ docs/index.rst | 3 +- 6 files changed, 928 insertions(+), 1 deletion(-) create mode 100644 datasalad/gitpathspec/__init__.py create mode 100644 datasalad/gitpathspec/pathspec.py create mode 100644 datasalad/gitpathspec/pathspecs.py create mode 100644 datasalad/gitpathspec/tests/__init__.py create mode 100644 datasalad/gitpathspec/tests/test_gitpathspec.py diff --git a/datasalad/gitpathspec/__init__.py b/datasalad/gitpathspec/__init__.py new file mode 100644 index 0000000..3a586bd --- /dev/null +++ b/datasalad/gitpathspec/__init__.py @@ -0,0 +1,17 @@ +"""Handling of Git's pathspecs with subdirectory mangling support + +This functionality can be used to add support for pathspecs to implementations +that rely on Git commands that do not support submodule recursion directly. + +.. currentmodule:: datasalad.gitpathspec +.. autosummary:: + :toctree: generated + + GitPathSpec + GitPathSpecs +""" + +__all__ = ['GitPathSpec', 'GitPathSpecs'] + +from .pathspec import GitPathSpec +from .pathspecs import GitPathSpecs diff --git a/datasalad/gitpathspec/pathspec.py b/datasalad/gitpathspec/pathspec.py new file mode 100644 index 0000000..2899b16 --- /dev/null +++ b/datasalad/gitpathspec/pathspec.py @@ -0,0 +1,321 @@ +# +# Intentionally written without importing datalad code +# +from __future__ import annotations + +import posixpath +from dataclasses import dataclass +from fnmatch import fnmatch +from typing import Generator + + +@dataclass(frozen=True) +class GitPathSpec: + """Support class for patterns used to limit paths in Git commands + + From the Git documentation: + + Pathspecs are used on the command line of "git ls-files", "git ls-tree", + "git add", "git grep", "git diff", "git checkout", and many other + commands to limit the scope of operations to some subset of the tree + or working tree. + + Apart from providing a dedicated type for a pathspec, the main purpose + of this functionality is to take a pathspec that is valid in the context + of one (top-level) repository, and translate it such that the set of + pathspecs given to the same command running on/in a submodule/subdirectory + gives the same results, as if the initial top-level invocation reported + them (if it even could). See the ``for_subdir()`` method for more. + + >>> # simple stripping of leading directory + >>> ps = GitPathSpec.from_pathspec_str('dir/*.jpg') + >>> [str(i) for i in ps.for_subdir('dir')] + ['*.jpg'] + >>> # match against magic pathspecs + >>> ps = GitPathSpec.from_pathspec_str(':(glob)**r/*.jpg') + >>> # longest and shortest match are produced + >>> [str(i) for i in ps.for_subdir('dir')] + [':(glob)**r/*.jpg', ':(glob)*.jpg'] + >>> [str(i) for i in ps.for_subdir('root/some/dir')] + [':(glob)**r/*.jpg', ':(glob)*.jpg'] + >>> # support for special 'no-pathspec' pathspec + >>> ps = GitPathSpec.from_pathspec_str(':') + >>> ps.is_nopathspecs + True + + .. seealso:: + + - Entry in the Git glossary: + https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec + - Informative, more elaborate description of pathspecs: + https://css-tricks.com/git-pathspecs-and-how-to-use-them/ + """ + + # TODO: think about adding support for another magic that represents + # the root of a repository hierarchy (amending 'top', which is + # the root of the working tree -- but presumably for a single repository + spectypes: tuple[str, ...] + """Long-form pathspec type identifiers""" + dirprefix: str + """Directory prefix (pathspec up to the last slash) limiting the scope""" + pattern: str | None + """Pattern to match paths against using ``fnmatch``""" + + @property + def is_nopathspecs(self) -> bool: + """Whether this pathspec is the "no pathspecs" pathspec, AKA ``':'``""" + return not self.spectypes and not self.dirprefix and not self.pattern + + def __str__(self) -> str: + """Generate normalized (long-form) pathspec""" + if self.is_nopathspecs: + return ':' + ps = '' + if self.spectypes: + ps += ':(' + ps += ','.join(self.spectypes) + ps += ')' + ps += self.get_joined_pattern() + return ps + + def get_joined_pattern(self): + return ( + f'{self.dirprefix if self.dirprefix else ""}' + f'{"/" if self.dirprefix else ""}' + f'{self.pattern if self.pattern else ""}' + ) + + def for_subdir(self, subdir: str) -> list[GitPathSpec]: + """Translate a pathspec into the scope of a subdirectory. + + The processing implemented here is purely lexical. This means that it + works without matching against actual file system (or Git tree) + content. Consequently, to some degree, overly broad results are + produced, but at the same time use cases are supported where there + is nothing (yet) to match against (e.g., a not-yet-cloned submodule). + + A pathspec with a ``top`` magic is produced unmodified, as there are + defined relative to the root of a repository, not relative to a base + directory. As a consequence, such pathspecs will automatically + refer to a submodule root when the target directory is contained in + one. + + Parameters + ---------- + subdir: str + Relative path in POSIX notation + + Returns + ------- + list + When an empty list is returned, this indicates that the pathsspec + cannot be translated to the given ``subdir``, because it does + not match the ``subdir`` itself. If a pathspec translates to + "no pathspecs" (``':'``), a list with a dedicated ':' pathspec is + returned. + """ + # special case of a non-translation (pretty much only here to + # make some test implementations simpler + if not subdir: + return [self] + + return list(yield_subdir_match_remainder_pathspecs(subdir, self)) + + @classmethod + def from_pathspec_str( + cls, + pathspec: str, + ) -> GitPathSpec: + """Parse a string-form pathspec into types, prefix, and pattern""" + spectypes = [] + dirprefix = None + pattern = None + + if pathspec == ':': + # shortcut for the special no-path-spec pathspec + return GitPathSpec((), '', None) + + if pathspec.startswith(':('): + # long-form magic + magic, pattern = pathspec[2:].split(')', maxsplit=1) + spectypes = magic.split(',') + elif pathspec.startswith(':'): + # short-form magic + magic_signatures = { + '/': 'top', + '!': 'exclude', + '^': 'exclude', + ':': None, + } + pattern = pathspec[1:] + spectypes = [] + for i in range(1, len(pathspec)): + sig = magic_signatures.get(pathspec[i]) + if sig is None: + pattern = pathspec[i:] + break + spectypes.append(sig) + else: + pattern = pathspec + + # raise when glob and literal magic markers are present + # simultaneously + if 'glob' in spectypes and 'literal' in spectypes: + msg = "'glob' magic is incompatible with 'literal' magic" + raise ValueError(msg) + + # split off dirprefix + dirprefix, pattern = _split_prefix_pattern(pattern) + + return cls( + spectypes=tuple(spectypes), + dirprefix=dirprefix, + pattern=pattern, + ) + + +def _split_prefix_pattern(pathspec): + # > the pathspec up to the last slash represents a directory prefix. + # > The scope of that pathspec is limited to that subtree. + try: + last_slash_idx = pathspec[::-1].index('/') + except ValueError: + # everything is the pattern + dirprefix = None + pattern = pathspec + else: + dirprefix = pathspec[: -last_slash_idx - 1] + pattern = pathspec[-last_slash_idx:] if last_slash_idx > 0 else None + return dirprefix, pattern + + +def yield_subdir_match_remainder_pathspecs( + subdir: str, + pathspec: GitPathSpec, +) -> Generator[GitPathSpec, None, None]: + """Translate a pathspec into a set of possible subdirectory pathspecs + + The processing implemented here is purely lexical. This means that it + works without matching against actual file system (or Git tree) content. + This means that it yields, to some degree, overly broad results, but also + that it works in cases where there is nothing (yet) to match against. + For example, a not-yet-cloned submodule. + + This function does not perform any validatity checking of pathspecs. Only + valid pathspecs and well-formed paths are supported. + + A pathspec with the ``top`` magic is returned immediately and as-is. These + pathspecs have an absolute reference and do not require a translation into + a subdirectory namespace. + + Parameters + ---------- + subdir: str + POSIX-notation relative path of a subdirectory. The reference directory + match be the same as that of the pathspec to be translated. + pathspec: GitPathSpec + To-be-translated pathspec + + Yields + ------ + GitPathSpec + Any number of pathspecs that an input pathspec decomposed into upon + translation into the namespace of a subdirectory. + """ + if 'top' in pathspec.spectypes or pathspec.is_nopathspecs: + # pathspec with an absolute reference, or "no pathspecs" + # no translation needed + yield pathspec + return + + # add a trailing directory separator to prevent undesired + # matches of partial directory names + subdir = subdir if subdir.endswith('/') else f'{subdir}/' + tp = pathspec.get_joined_pattern() + + if 'icase' in pathspec.spectypes: + subdir = subdir.casefold() + tp = tp.casefold() + + # literal pathspecs + if 'literal' in pathspec.spectypes: + # append a trailing slash to allow for full matches + tp_endslash = f'{tp}/' + if not tp_endslash.startswith(subdir): + # no match + # BUT + # we might have a multi-level subdir, and we might match an + # intermediate subdir and could still yield a 'no pathspec' + # result + while subdir := posixpath.split(subdir)[0]: + if tp_endslash.startswith(subdir): + yield GitPathSpec.from_pathspec_str(':') + return + return + + remainder = tp[len(subdir) :] + if not remainder: + # full match + yield GitPathSpec.from_pathspec_str(':') + else: + yield GitPathSpec(pathspec.spectypes, *_split_prefix_pattern(remainder)) + return + + # tokenize the testpattern using the wildcard that also matches + # directories + token_delim = '**' if 'glob' in pathspec.spectypes else '*' + tp_chunks = tp.split(token_delim) + prefix_match = '' + yielded = set() + for i, chunk in enumerate(tp_chunks): + last_chunk = i + 1 == len(tp_chunks) + if last_chunk: + trymatch = f'{prefix_match}{chunk}{"" if chunk.endswith("/") else "/"}' + else: + trymatch = f'{prefix_match}{chunk}*' + if not fnmatch(subdir, f'{trymatch}'): + # each chunk needs match in order, first non-match ends the + # algorithm + # BUT + # we have an (initial) chunk that points already + # inside the target subdir + submatch = trymatch + while submatch := posixpath.split(submatch)[0]: + if not fnmatch(f'{subdir}', f'{submatch}/'): + continue + ps = GitPathSpec( + pathspec.spectypes, + *_split_prefix_pattern( + # +1 for trailing slash + tp[len(submatch) + 1 :] + ), + ) + if ps not in yielded: + yield ps + return + # OR + # we might have a multi-level subdir, and we might match an + # intermediate subdir and could still yield a 'no pathspec' + # result + while subdir := posixpath.split(subdir)[0]: + if fnmatch(f'{subdir}/', trymatch): + yield GitPathSpec.from_pathspec_str(':') + return + return + + remainder = tp_chunks[i + 1 :] + if all(not c for c in remainder): + # direct hit, no pathspecs after translation + yield GitPathSpec.from_pathspec_str(':') + return + else: + ps = GitPathSpec( + pathspec.spectypes, + *_split_prefix_pattern( + f'{token_delim}{token_delim.join(remainder)}', + ), + ) + yield ps + yielded.add(ps) + # extend prefix for the next round + prefix_match = trymatch diff --git a/datasalad/gitpathspec/pathspecs.py b/datasalad/gitpathspec/pathspecs.py new file mode 100644 index 0000000..8393d55 --- /dev/null +++ b/datasalad/gitpathspec/pathspecs.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING + +from datasalad.gitpathspec.pathspec import GitPathSpec + +if TYPE_CHECKING: + from collections.abc import Iterable + from pathlib import ( + PurePosixPath, + ) + + +class GitPathSpecs: + """Convenience container for any number of pathspecs (or none) + + This class can facilitate implementing support for pathspec-constraints, + including scenarios involving submodule recursion. + + >>> from pathlib import PurePosixPath + >>> # can accept a "default" argument for no pathspecs + >>> ps = GitPathSpecs(None) + >>> not ps + True + >>> ps.arglist() + [] + >>> # deal with any number of pathspecs + >>> ps = GitPathSpecs(['*.jpg', 'dir/*.png']) + >>> ps.any_match_subdir(PurePosixPath('dummy')) + True + >>> ps.for_subdir(PurePosixPath('dir')) + GitPathSpecs(['*.jpg', '*.png']) + """ + + def __init__( + self, + pathspecs: Iterable[str | GitPathSpec] | GitPathSpecs | None, + ): + """Pathspecs can be given as an iterable (string-form and/or + ``GitPathSpec``), another ``GitPathSpecs`` instance, or ``None``. + ``None``, or empty iterable indicate a 'no constraint' scenario, + equivalent to a single ``':'`` pathspec. + """ + self._pathspecs: tuple[GitPathSpec, ...] | None = None + + if pathspecs is None: + self._pathspecs = None + return + + if isinstance(pathspecs, GitPathSpecs): + self._pathspecs = pathspecs.pathspecs or None + return + + # we got something that needs converting + self._pathspecs = tuple( + ps if isinstance(ps, GitPathSpec) else GitPathSpec.from_pathspec_str(ps) + for ps in pathspecs + ) + if not self._pathspecs: + msg = ( + f'{pathspecs!r} did not contain any pathspecs. ' + 'To indicate "no pathspec constraints" use the ' + '":" pathspec or `None`.' + ) + raise ValueError(msg) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}([{', '.join(repr(p) for p in self.arglist())}])" + ) + + def __len__(self) -> int: + return len(self._pathspecs) if self._pathspecs is not None else 0 + + def __eq__(self, obj): + return self.pathspecs == obj.pathspecs + + # TODO: lru_cache decorator? + # this would prevent repeated conversion cost for the usage pattern of + # - test if we would have a match for a subdir + # - run code with the matching pathspecs + # without having to implement caching logic in client code + def for_subdir( + self, + path: PurePosixPath, + ) -> GitPathSpecs: + """Translate pathspecs into the scope of a subdirectory + + Raises + ------ + ValueError + Whenever no pathspec can be translated into the scope of the target + directory. + """ + if not self._pathspecs: + return GitPathSpecs(None) + translated = list( + chain.from_iterable(ps.for_subdir(str(path)) for ps in self._pathspecs) + ) + if not translated: + # not a single pathspec could be translated into the subdirectory + # scope. This means none was applicable, and not that the whole + # subdirectory is matched. We raise in order to allow client code + # to distinguish a no-match from an all-match scenario. Returning + # the equivalent of an empty list would code "no constraint", + # rather than "no match" + msg = f'No pathspecs translate to {path=}' + raise ValueError(msg) + return GitPathSpecs(translated) + + def any_match_subdir( + self, + path: PurePosixPath, + ) -> bool: + """Returns whether any pathspec could match subdirectory content + + In other words, ``False`` is returned whenever ``.for_subdir()`` + would raise ``ValueError``. + + Parameters + ---------- + path: PurePosixPath + Relative path of the subdirectory to run the test for. + """ + if self._pathspecs is None: + return True + path_s = str(path) + return any(ps.for_subdir(path_s) for ps in self._pathspecs) + + def arglist(self) -> list[str]: + """Convert pathspecs to a CLI argument list + + This list is suitable for use with any Git command that supports + pathspecs, after a ``--`` (that disables the interpretation of further + arguments as options). + + When no pathspecs are present an empty list is returned. + """ + if self._pathspecs is None: + return [] + return [str(ps) for ps in self._pathspecs] + + @property + def pathspecs(self) -> tuple[GitPathSpec, ...] | None: + return self._pathspecs diff --git a/datasalad/gitpathspec/tests/__init__.py b/datasalad/gitpathspec/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasalad/gitpathspec/tests/test_gitpathspec.py b/datasalad/gitpathspec/tests/test_gitpathspec.py new file mode 100644 index 0000000..8709343 --- /dev/null +++ b/datasalad/gitpathspec/tests/test_gitpathspec.py @@ -0,0 +1,442 @@ +import subprocess +import sys +from pathlib import Path + +import pytest + +from .. import ( + GitPathSpec, + GitPathSpecs, +) +from ..pathspec import yield_subdir_match_remainder_pathspecs + + +def _list_files(path, pathspecs): + return [ + i + for i in subprocess.run( + ['git', 'ls-files', '-z', '--other', '--', *pathspecs], # noqa: S607 + capture_output=True, + cwd=path, + check=False, + ) + .stdout.decode('utf-8') + .split('\0') + if i + ] + + +@pytest.fixture(scope='function') # noqa: PT003 +def pathspec_match_testground(tmp_path_factory): + """Create a Git repo with no commit and many untracked files + + In this playground, `git ls-files --other` can be used to testrun + pathspecs. + + See the top item in `testcases` for a summary of the content + """ + p = tmp_path_factory.mktemp('pathspec_match') + probe = p / 'pr?be' + # check for case insensitive file systems + crippled_fs = Path(str(p).upper()).exists() + try: + probe.touch() + probe.unlink() + except OSError: + crippled_fs = True + + subprocess.run(['git', 'init'], cwd=p, check=True) # noqa: S607 + + p_sub = p / 'sub' + p_sub.mkdir() + for d in (p, p_sub): + p_a = d / 'aba' + p_b = d / 'a?a' + for sp in (p_a,) if crippled_fs else (p_a, p_b): + sp.mkdir() + for fname in ('a.txt', 'A.txt', 'a.JPG'): + (sp / fname).touch() + # add something that is unique to sub/ + (p_sub / 'b.dat').touch() + + aba_fordir_results = { + None: { + 'match': [ + 'aba/a.JPG', + 'aba/a.txt', + ] + if crippled_fs + else ['aba/A.txt', 'aba/a.JPG', 'aba/a.txt'], + }, + 'aba': { + 'specs': [':'], + 'match': ['a.JPG', 'a.txt'] if crippled_fs else ['A.txt', 'a.JPG', 'a.txt'], + }, + } + + testcases = [ + # valid + { + 'ps': ':', + 'fordir': { + None: { + 'specs': [':'], + 'match': [ + 'aba/a.JPG', + 'aba/a.txt', + 'sub/aba/a.JPG', + 'sub/aba/a.txt', + 'sub/b.dat', + ] + if crippled_fs + else [ + 'a?a/A.txt', + 'a?a/a.JPG', + 'a?a/a.txt', + 'aba/A.txt', + 'aba/a.JPG', + 'aba/a.txt', + 'sub/a?a/A.txt', + 'sub/a?a/a.JPG', + 'sub/a?a/a.txt', + 'sub/aba/A.txt', + 'sub/aba/a.JPG', + 'sub/aba/a.txt', + 'sub/b.dat', + ], + }, + 'sub': { + 'specs': [':'], + 'match': ['aba/a.JPG', 'aba/a.txt', 'b.dat'] + if crippled_fs + else [ + 'a?a/A.txt', + 'a?a/a.JPG', + 'a?a/a.txt', + 'aba/A.txt', + 'aba/a.JPG', + 'aba/a.txt', + 'b.dat', + ], + }, + }, + }, + { + 'ps': 'aba', + 'fordir': aba_fordir_results, + }, + # same as above, but with a trailing slash + { + 'ps': 'aba/', + 'fordir': aba_fordir_results, + }, + # and one more of this kind, as a pointless literal + { + 'ps': ':(literal)aba', + 'fordir': aba_fordir_results, + }, + { + 'ps': ':(glob)aba/*.txt', + 'fordir': { + None: { + 'match': [ + 'aba/a.txt', + ] + if crippled_fs + else ['aba/A.txt', 'aba/a.txt'] + }, + 'sub': {'specs': []}, + }, + }, + { + 'ps': ':/aba/*.txt', + 'norm': ':(top)aba/*.txt', + 'fordir': { + None: { + 'match': [ + 'aba/a.txt', + ] + if crippled_fs + else ['aba/A.txt', 'aba/a.txt'] + }, + # for a subdir a keeps matching the exact same items + # not only be name, but by location + 'sub': { + 'specs': [':(top)aba/*.txt'], + 'match': ['../aba/a.txt'] + if crippled_fs + else ['../aba/A.txt', '../aba/a.txt'], + }, + }, + }, + { + 'ps': 'aba/*.txt', + 'fordir': { + None: { + 'match': ['aba/a.txt'] + if crippled_fs + else ['aba/A.txt', 'aba/a.txt'], + }, + # not applicable + 'sub': {'specs': []}, + # but this is + 'aba': {'specs': ['*.txt']}, + }, + }, + { + 'ps': 'sub/aba/*.txt', + 'fordir': { + None: { + 'match': ['sub/aba/a.txt'] + if crippled_fs + else ['sub/aba/A.txt', 'sub/aba/a.txt'] + }, + 'sub': { + 'specs': ['aba/*.txt'], + 'match': ['aba/a.txt'] + if crippled_fs + else ['aba/A.txt', 'aba/a.txt'], + }, + }, + }, + { + 'ps': '*.JPG', + 'fordir': { + None: { + 'match': ['aba/a.JPG', 'sub/aba/a.JPG'] + if crippled_fs + else ['a?a/a.JPG', 'aba/a.JPG', 'sub/a?a/a.JPG', 'sub/aba/a.JPG'] + }, + # unchanged + 'sub': {'specs': ['*.JPG']}, + }, + }, + { + 'ps': '*ba*.JPG', + 'fordir': { + None: {'match': ['aba/a.JPG', 'sub/aba/a.JPG']}, + 'aba': {'specs': ['*ba*.JPG', '*.JPG'], 'match': ['a.JPG']}, + }, + }, + # invalid + # + # conceptual conflict and thereby unsupported by Git + # makes sense and is easy to catch that + {'ps': ':(glob,literal)broken', 'raises': ValueError}, + ] + if not crippled_fs: + testcases.extend( + [ + # literal magic is only needed for non-crippled FS + { + 'ps': ':(literal)a?a/a.JPG', + 'fordir': { + None: { + 'match': ['a?a/a.JPG'], + }, + 'a?a': { + 'specs': [':(literal)a.JPG'], + 'match': ['a.JPG'], + }, + }, + }, + { + 'ps': ':(literal,icase)SuB/A?A/a.jpg', + 'fordir': { + None: {'match': ['sub/a?a/a.JPG']}, + 'sub/a?a': { + 'specs': [':(literal,icase)a.jpg'], + # given the spec transformation matches + # MIH would really expect to following, + # but it is not coming from Git :( + #'match': ['a.JPG'], + 'match': [], + }, + }, + }, + { + 'ps': ':(icase)A?A/a.jpg', + 'fordir': { + None: {'match': ['a?a/a.JPG', 'aba/a.JPG']}, + 'aba': { + 'specs': [':(icase)a.jpg'], + 'match': ['a.JPG'], + }, + }, + }, + { + 'ps': ':(literal,icase)A?A/a.jpg', + 'fordir': { + None: {'match': ['a?a/a.JPG']}, + 'a?a': { + 'specs': [':(literal,icase)a.jpg'], + 'match': ['a.JPG'], + }, + # the target subdir does not match the pathspec + 'aba': {'specs': set()}, + }, + }, + ] + ) + + return p, testcases + + +def test_pathspecs(pathspec_match_testground): + tg, testcases = pathspec_match_testground + + for testcase in testcases: + if testcase.get('raises'): + # test case states how `GitPathSpec` will blow up + # on this case. Verify and skip any further testing + # on this case + with pytest.raises(testcase['raises']): + GitPathSpec.from_pathspec_str(testcase['ps']) + continue + # create the instance + ps = GitPathSpec.from_pathspec_str(testcase['ps']) + # if no deviating normalized representation is given + # it must match the original one + assert str(ps) == testcase.get('norm', testcase['ps']) + # test translations onto subdirs now + # `None` is a special subdir that means "self", i.e. + # not translation other than normalization, we can use it + # to test matching behavior of the full pathspec + for subdir, target in testcase.get('fordir', {}).items(): + # translate -- a single input pathspec can turn into + # multiple translated ones. This is due to + subdir_specs = [str(s) for s in ps.for_subdir(subdir)] + if 'specs' in target: + assert set(subdir_specs) == set( + target['specs'] + ), f'Mismatch for {testcase["ps"]!r} -> subdir {subdir!r} {target}' + if subdir and not target.get('specs') and 'match' in target: + msg = ( + 'invalid test specification: no subdir specs expected, ' + f'but match declared: {testcase!r}' + ) + raise ValueError(msg) + if subdir_specs and 'match' in target: + tg_subdir = tg / subdir if subdir else tg + assert _list_files(tg_subdir, subdir_specs) == target['match'] + + +def test_yield_subdir_match_remainder_pathspecs(): + testcases = [ + # FORMAT: target path, pathspec, subdir pathspecs + ('abc', ':', [':']), + # top-magic is returned as-is + ('murks', ':(top)crazy*^#', [':(top)crazy*^#']), + # no match + ('abc', 'not', []), + ('abc', 'ABC', [':'] if sys.platform.startswith('win') else []), + # direct hits, resolve to "no pathspecs" + ('abc', 'a?c', [':']), + ('abc', 'abc', [':']), + ('abc', 'abc/', [':']), + # icase-magic + ('abc', ':(icase)ABC', [':']), + ('ABC', ':(icase)abc', [':']), + # some fairly common fnmatch-style pathspec + ('abc', 'abc/*.jpg', ['*.jpg']), + ('abc', '*.jpg', ['*.jpg']), + ('abc', '*/*.jpg', ['*/*.jpg', '*.jpg']), + ('abc', '*/*.jpg', ['*/*.jpg', '*.jpg']), + ('abc', '*bc*.jpg', ['*bc*.jpg', '*.jpg']), + # adding an glob-unrelated magic does not impact the result + ('abc', ':(exclude)*/*.jpg', [':(exclude)*/*.jpg', ':(exclude)*.jpg']), + ( + 'abc', + ':(attr:export-subst)*/*.jpg', + [':(attr:export-subst)*/*.jpg', ':(attr:export-subst)*.jpg'], + ), + ( + 'abc', + ':(icase,exclude)*/*.jpg', + [':(icase,exclude)*/*.jpg', ':(icase,exclude)*.jpg'], + ), + # glob-magic + ('abc', ':(glob)*bc*.jpg', []), + ('abc', ':(glob)*bc**.jpg', [':(glob)**.jpg']), + # 2nd-level subdir + ('abc/123', 'some.jpg', []), + ('abc/123', '*.jpg', ['*.jpg']), + ('abc/123', 'abc/*', [':']), + ('abc/123', 'abc', [':']), + ('abc/123', ':(glob)abc', [':']), + ('abc/123', '*123', ['*123', ':']), + ('abc/123', '*/123', ['*/123', ':']), + ('abc/123', ':(glob)*/123', [':']), + # literal-magic + ('abc', ':(literal)a?c', []), + ('a?c', ':(literal)a?c', [':']), + ('a?c', ':(literal)a?c/*?ab*', [':(literal)*?ab*']), + ('a?c/123', ':(literal)a?c', [':']), + # more complex cases + ('abc/123/ABC', 'a*/1?3/*.jpg', ['*/1?3/*.jpg', '*.jpg', '1?3/*.jpg']), + # exclude-magic + ('abc', ':(exclude)abc', [':']), + ('abc/123', ':(exclude)abc', [':']), + ('a?c', ':(exclude,literal)a?c', [':']), + # stuff that was problematic at some point + # initial, non-wildcard part already points inside the + # target directory + ('sub', 'sub/aba/*.txt', ['aba/*.txt']), + # no directory-greedy wildcard whatsoever + ('abc', ':(icase)A?C/a.jpg', [':(icase)a.jpg']), + # no directory-greedy wildcard in later chunk + ('nope/abc', 'no*/a?c/a.jpg', ['*/a?c/a.jpg', 'a.jpg']), + ] + for ts in testcases: + # always test against the given subdir, and also against the subdir + # given with a trailing slash + for target_path in (ts[0], f'{ts[0]}/'): + tsps = GitPathSpec.from_pathspec_str(ts[1]) + remainders = list( + yield_subdir_match_remainder_pathspecs( + target_path, + tsps, + ) + ) + assert [str(ps) for ps in remainders] == ts[2], f'Mismatch for {ts}' + # arglist processing of the GitPathSpecs container comes to the + # same result + if remainders: + assert GitPathSpecs(remainders).arglist() == ts[2] + # now we produce the same result with the GitPathSpecs handler + try: + assert GitPathSpecs([ts[1]]).for_subdir(target_path).arglist() == [ + str(ps) for ps in remainders + ] + except ValueError: + # translation must raise when there would not be a remainder + assert not remainders + # if we are supposed to get any remainder out, the test for a + # subdir match also gives an analog result + if ts[2]: + assert GitPathSpecs([tsps]).any_match_subdir(target_path) + else: + assert not GitPathSpecs([tsps]).any_match_subdir(target_path) + + +def test_GitPathSpecs(): + spec_input = ['mike/*', '*.jpg'] + ps = GitPathSpecs(spec_input) + # we can create a GitPathSpecs object from another + assert GitPathSpecs(ps).arglist() == ps.arglist() + + # going over the properties + assert repr(ps) == "GitPathSpecs(['mike/*', '*.jpg'])" + assert len(ps) == len(spec_input) + + # we can have "no pathspecs". `None` and a single ':' are equivalent + # ways to communicate this + for ps in (None, ':'): + nops = GitPathSpecs(ps) + assert GitPathSpecs(ps).for_subdir('doesntmatter') == nops + assert GitPathSpecs(ps).any_match_subdir('doesntmatter') is True + + # how about the semantic distinction between None and []? + # [] is not valid + with pytest.raises(ValueError, match='did not contain any pathspecs'): + GitPathSpecs([]) diff --git a/docs/index.rst b/docs/index.rst index 006d9c2..18198b9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -75,9 +75,10 @@ Also see the :ref:`modindex`. .. autosummary:: :toctree: generated - runners + gitpathspec iterable_subprocess itertools + runners settings