style: automatically reformat code

datalad · Jun 18, 2024 · d3788c5 · d3788c5
1 parent d7cc018
commit d3788c5
Show file tree

Hide file tree

Showing 9 changed files with 114 additions and 129 deletions.
diff --git a/datasalad/itertools/align_pattern.py b/datasalad/itertools/align_pattern.py
@@ -1,5 +1,4 @@
-""" Function to ensure that a pattern is completely contained in single chunks
-"""
+"""Function to ensure that a pattern is completely contained in single chunks"""
 
 from __future__ import annotations
 
@@ -10,10 +9,10 @@
 )
 
 
-def align_pattern(iterable: Iterable[str | bytes | bytearray],
-                  pattern: str | bytes | bytearray
-                  ) -> Generator[str | bytes | bytearray, None, None]:
-    """ Yield data chunks that contain a complete pattern, if it is present
+def align_pattern(
+    iterable: Iterable[str | bytes | bytearray], pattern: str | bytes | bytearray
+) -> Generator[str | bytes | bytearray, None, None]:
+    """Yield data chunks that contain a complete pattern, if it is present
 
     ``align_pattern`` makes it easy to find a pattern (``str``, ``bytes``,
     or ``bytearray``) in data chunks. It joins data-chunks in such a way,
@@ -77,15 +76,23 @@ def align_pattern(iterable: Iterable[str | bytes | bytearray],
 
     # Create pattern matcher for all
     if isinstance(pattern, str):
-        regex: str | bytes | bytearray = '(' + '|'.join(
-            '.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + '$'
-            for index in range(1, len(pattern))
-        ) + ')'
+        regex: str | bytes | bytearray = (
+            '('
+            + '|'.join(
+                '.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + '$'
+                for index in range(1, len(pattern))
+            )
+            + ')'
+        )
     else:
-        regex = b'(' + b'|'.join(
-            b'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + b'$'
-            for index in range(1, len(pattern))
-        ) + b')'
+        regex = (
+            b'('
+            + b'|'.join(
+                b'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + b'$'
+                for index in range(1, len(pattern))
+            )
+            + b')'
+        )
     pattern_matcher = re.compile(regex, re.DOTALL)
     pattern_sub = len(pattern) - 1
     # Join data chunks until they are sufficiently long to contain the pattern,
@@ -98,10 +105,10 @@ def align_pattern(iterable: Iterable[str | bytes | bytearray],
             current_chunk = data_chunk
         else:
             current_chunk += data_chunk
-        if len(current_chunk) >= len(pattern) \
-                and not (
-                    current_chunk[-1] in pattern
-                    and pattern_matcher.match(current_chunk, len(current_chunk) - pattern_sub)):
+        if len(current_chunk) >= len(pattern) and not (
+            current_chunk[-1] in pattern
+            and pattern_matcher.match(current_chunk, len(current_chunk) - pattern_sub)
+        ):
             yield current_chunk
             current_chunk = None
 

diff --git a/datasalad/itertools/decode_bytes.py b/datasalad/itertools/decode_bytes.py
@@ -1,4 +1,4 @@
-"""Get strings decoded from chunks of bytes """
+"""Get strings decoded from chunks of bytes"""
 
 from __future__ import annotations
 
@@ -93,19 +93,18 @@ def decode_bytes(
         ``iterable`` cannot be decoded with the specified ``encoding``
     """
 
-    def handle_decoding_error(position: int,
-                              exc: UnicodeDecodeError
-                              ) -> tuple[int, str]:
-        """ Handle a UnicodeDecodeError """
+    def handle_decoding_error(
+        position: int, exc: UnicodeDecodeError
+    ) -> tuple[int, str]:
+        """Handle a UnicodeDecodeError"""
         if not backslash_replace:
             # Signal the error to the caller
             raise exc
         return (
             position + exc.end,
-            joined_data[:position + exc.start].decode(encoding)
-            + joined_data[position + exc.start:position + exc.end].decode(
-                encoding,
-                errors='backslashreplace'
+            joined_data[: position + exc.start].decode(encoding)
+            + joined_data[position + exc.start : position + exc.end].decode(
+                encoding, errors='backslashreplace'
             ),
         )
 

diff --git a/datasalad/itertools/itemize.py b/datasalad/itertools/itemize.py
@@ -100,11 +100,12 @@ def itemize(
         )
 
 
-def _split_items_with_separator(iterable: Iterable[T],
-                                sep: T,
-                                *,
-                                keep_ends: bool = False,
-                                ) -> Generator[T, None, None]:
+def _split_items_with_separator(
+    iterable: Iterable[T],
+    sep: T,
+    *,
+    keep_ends: bool = False,
+) -> Generator[T, None, None]:
     assembled = None
     for chunk in iterable:
         if not assembled:
@@ -127,10 +128,11 @@ def _split_items_with_separator(iterable: Iterable[T],
         yield assembled
 
 
-def _split_lines(iterable: Iterable[T],
-                 *,
-                 keep_ends: bool = False,
-                 ) -> Generator[T, None, None]:
+def _split_lines(
+    iterable: Iterable[T],
+    *,
+    keep_ends: bool = False,
+) -> Generator[T, None, None]:
     assembled = None
     for chunk in iterable:
         if not assembled:

diff --git a/datasalad/itertools/load_json.py b/datasalad/itertools/load_json.py
@@ -1,4 +1,4 @@
-""" Functions that yield JSON objects converted from input items """
+"""Functions that yield JSON objects converted from input items"""
 
 from __future__ import annotations
 
@@ -12,9 +12,10 @@
 __all__ = ['load_json', 'load_json_with_flag']
 
 
-def load_json(iterable: Iterable[bytes | str],
-              ) -> Generator[Any, None, None]:
-    """ Convert items yielded by ``iterable`` into JSON objects and yield them
+def load_json(
+    iterable: Iterable[bytes | str],
+) -> Generator[Any, None, None]:
+    """Convert items yielded by ``iterable`` into JSON objects and yield them
 
     This function fetches items from the underlying
     iterable. The items are expected to be ``bytes``, ``str``, or ``bytearry``,
@@ -65,9 +66,9 @@ def load_json(iterable: Iterable[bytes | str],
 
 
 def load_json_with_flag(
-        iterable: Iterable[bytes | str],
+    iterable: Iterable[bytes | str],
 ) -> Generator[tuple[Any | json.decoder.JSONDecodeError, bool], None, None]:
-    """ Convert items from ``iterable`` into JSON objects and a success flag
+    """Convert items from ``iterable`` into JSON objects and a success flag
 
     ``load_json_with_flag`` works analogous to ``load_json``, but reports
     success and failure differently.

diff --git a/datasalad/itertools/reroute.py b/datasalad/itertools/reroute.py
@@ -1,4 +1,4 @@
-""" Functions that allow to route data around upstream iterator """
+"""Functions that allow to route data around upstream iterator"""
 
 from __future__ import annotations
 
@@ -16,11 +16,12 @@ class StoreOnly:
     pass
 
 
-def route_out(iterable: Iterable,
-              data_store: list,
-              splitter: Callable[[Any], tuple[Any, Any]],
-              ) -> Generator:
-    """ Route data around the consumer of this iterable
+def route_out(
+    iterable: Iterable,
+    data_store: list,
+    splitter: Callable[[Any], tuple[Any, Any]],
+) -> Generator:
+    """Route data around the consumer of this iterable
 
     :func:`route_out` allows its user to:
 
@@ -63,29 +64,23 @@ def route_out(iterable: Iterable,
         from math import nan
         from datalad_next.itertools import route_out, route_in, StoreOnly
 
+
         def splitter(divisor):
             # if divisor == 0, return `StoreOnly` in the first element of the
             # result tuple to indicate that route_out should not yield this
             # element to its consumer
             return (StoreOnly, divisor) if divisor == 0 else (divisor, divisor)
 
+
         def joiner(processed_data, stored_data):
             #
             return nan if processed_data is StoreOnly else processed_data
 
+
         divisors = [0, 1, 0, 2, 0, 3, 0, 4]
         store = list()
         r = route_in(
-            map(
-                lambda x: 2.0 / x,
-                route_out(
-                    divisors,
-                    store,
-                    splitter
-                )
-            ),
-            store,
-            joiner
+            (2.0 / x for x in route_out(divisors, store, splitter)), store, joiner
         )
         print(list(r))
 
@@ -118,11 +113,10 @@ def joiner(processed_data, stored_data):
             yield data_to_process
 
 
-def route_in(iterable: Iterable,
-             data_store: list,
-             joiner: Callable[[Any, Any], Any]
-             ) -> Generator:
-    """ Yield previously rerouted data to the consumer
+def route_in(
+    iterable: Iterable, data_store: list, joiner: Callable[[Any, Any], Any]
+) -> Generator:
+    """Yield previously rerouted data to the consumer
 
     This function is the counter-part to :func:`route_out`. It takes the iterable
     ``iterable`` and a data store given in ``data_store`` and yields items
@@ -152,11 +146,9 @@ def route_in(iterable: Iterable,
 
         store_1 = list()
         route_in(
-            some_generator(
-                route_out(input_iterable, store_1, splitter_1)
-            ),
+            some_generator(route_out(input_iterable, store_1, splitter_1)),
             store_1,
-            joiner_1
+            joiner_1,
         )
 
     :func:`route_in` will yield the same number of elements as ``input_iterable``.

diff --git a/datasalad/itertools/tests/test_align_pattern.py b/datasalad/itertools/tests/test_align_pattern.py
@@ -5,29 +5,32 @@
 from ..align_pattern import align_pattern
 
 
-@pytest.mark.parametrize(('data_chunks', 'pattern', 'expected'), [
-    (['a', 'b', 'c', 'd', 'e'], 'abc', ['abc', 'de']),
-    (['a', 'b', 'c', 'a', 'b', 'c'], 'abc', ['abc', 'abc']),
-    # Ensure that unaligned pattern prefixes are not keeping data chunks short.
-    (['a', 'b', 'c', 'dddbbb', 'a', 'b', 'x'], 'abc', ['abc', 'dddbbb', 'abx']),
-    # Expect that a trailing minimum length-chunk that ends with a pattern
-    # prefix is not returned as data, but as remainder, if it is not the final
-    # chunk.
-    (['a', 'b', 'c', 'd', 'a'], 'abc', ['abc', 'da']),
-    # Expect the last chunk to be returned as data, if final is True, although
-    # it ends with a pattern prefix. If final is false, the last chunk will be
-    # returned as a remainder, because it ends with a pattern prefix.
-    (['a', 'b', 'c', 'dddbbb', 'a'], 'abc', ['abc', 'dddbbb', 'a']),
-    (['a', 'b', 'c', '9', 'a'], 'abc', ['abc', '9a']),
-])
+@pytest.mark.parametrize(
+    ('data_chunks', 'pattern', 'expected'),
+    [
+        (['a', 'b', 'c', 'd', 'e'], 'abc', ['abc', 'de']),
+        (['a', 'b', 'c', 'a', 'b', 'c'], 'abc', ['abc', 'abc']),
+        # Ensure that unaligned pattern prefixes are not keeping data chunks short.
+        (['a', 'b', 'c', 'dddbbb', 'a', 'b', 'x'], 'abc', ['abc', 'dddbbb', 'abx']),
+        # Expect that a trailing minimum length-chunk that ends with a pattern
+        # prefix is not returned as data, but as remainder, if it is not the final
+        # chunk.
+        (['a', 'b', 'c', 'd', 'a'], 'abc', ['abc', 'da']),
+        # Expect the last chunk to be returned as data, if final is True, although
+        # it ends with a pattern prefix. If final is false, the last chunk will be
+        # returned as a remainder, because it ends with a pattern prefix.
+        (['a', 'b', 'c', 'dddbbb', 'a'], 'abc', ['abc', 'dddbbb', 'a']),
+        (['a', 'b', 'c', '9', 'a'], 'abc', ['abc', '9a']),
+    ],
+)
 def test_pattern_processor(data_chunks, pattern, expected):
     assert expected == list(align_pattern(data_chunks, pattern=pattern))
 
 
 def test_newline_matches():
     pattern = b'----datalad-end-marker-3654137433-rekram-dne-dalatad----\n'
-    chunk1 =  b'Have a lot of fun...\n----datalad-end-marker-3654137433-r'
-    chunk2 =  b'e'
-    chunk3 =  b'kram-dne-dalatad----\n'
+    chunk1 = b'Have a lot of fun...\n----datalad-end-marker-3654137433-r'
+    chunk2 = b'e'
+    chunk3 = b'kram-dne-dalatad----\n'
     result = list(align_pattern([chunk1, chunk2, chunk3], pattern))
     assert result == [chunk1 + chunk2 + chunk3]
diff --git a/datasalad/itertools/tests/test_itemize.py b/datasalad/itertools/tests/test_itemize.py
@@ -4,12 +4,7 @@
 
 from ..itemize import itemize
 
-text_chunks = [
-    'abc',
-    'def\n012',
-    '\n',
-    '\n'
-]
+text_chunks = ['abc', 'def\n012', '\n', '\n']
 byte_chunks = [chunk.encode() for chunk in text_chunks]
 text_chunks_other = [chunk.replace('\n', '\r\n') for chunk in text_chunks]
 byte_chunks_other = [chunk.encode() for chunk in text_chunks_other]
@@ -21,8 +16,8 @@
         (text_chunks, '\n'),
         (byte_chunks, b'\n'),
         (text_chunks_other, '\r\n'),
-        (byte_chunks_other, b'\r\n')
-    ]
+        (byte_chunks_other, b'\r\n'),
+    ],
 )
 def test_assembling_and_splitting(input_chunks, separator):
     empty = input_chunks[0][:0]

diff --git a/datasalad/itertools/tests/test_load_json.py b/datasalad/itertools/tests/test_load_json.py
@@ -13,49 +13,44 @@
 )
 
 json_object = {
-    'list1': [
-        'a', 'bäöl', 1
-    ],
+    'list1': ['a', 'bäöl', 1],
     'dict1': {
         'x': 123,
         'y': 234,
         'z': 456,
-    }
+    },
 }
 
 
-correct_json = b'\n'.join(
-    json.dumps(x).encode()
-    for x in [json_object] * 10
-) + b'\n'
+correct_json = b'\n'.join(json.dumps(x).encode() for x in [json_object] * 10) + b'\n'
 
 correct_chunks = [
-    correct_json[i:i + 10]
-    for i in range(0, len(correct_json) + 10, 10)
+    correct_json[i : i + 10] for i in range(0, len(correct_json) + 10, 10)
 ]
 
 faulty_json = correct_json.replace(b'}\n', b'\n')
-faulty_chunks = [
-    faulty_json[i:i + 10]
-    for i in range(0, len(correct_json) + 10, 10)
-]
+faulty_chunks = [faulty_json[i : i + 10] for i in range(0, len(correct_json) + 10, 10)]
 
 
 def test_load_json_on_decoded_bytes():
-    assert all(x == json_object for x in load_json(
-        decode_bytes(itemize(correct_chunks, b'\n'))))
+    assert all(
+        x == json_object
+        for x in load_json(decode_bytes(itemize(correct_chunks, b'\n')))
+    )
     with pytest.raises(JSONDecodeError):
         list(load_json(decode_bytes(itemize(faulty_chunks, b'\n'))))
 
 
 def test_load_json_with_flag():
     assert all(
         obj == json_object and success is True
-        for (obj, success)
-        in load_json_with_flag(decode_bytes(itemize(correct_chunks, b'\n')))
+        for (obj, success) in load_json_with_flag(
+            decode_bytes(itemize(correct_chunks, b'\n'))
+        )
     )
     assert all(
         isinstance(exc, JSONDecodeError) and success is False
-        for (exc, success)
-        in load_json_with_flag(decode_bytes(itemize(faulty_chunks, b'\n')))
+        for (exc, success) in load_json_with_flag(
+            decode_bytes(itemize(faulty_chunks, b'\n'))
+        )
     )