Skip to content

Commit

Permalink
style: automatically reformat code
Browse files Browse the repository at this point in the history
  • Loading branch information
mih committed Jun 18, 2024
1 parent d7cc018 commit d3788c5
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 129 deletions.
43 changes: 25 additions & 18 deletions datasalad/itertools/align_pattern.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
""" Function to ensure that a pattern is completely contained in single chunks
"""
"""Function to ensure that a pattern is completely contained in single chunks"""

from __future__ import annotations

Expand All @@ -10,10 +9,10 @@
)


def align_pattern(iterable: Iterable[str | bytes | bytearray],
pattern: str | bytes | bytearray
) -> Generator[str | bytes | bytearray, None, None]:
""" Yield data chunks that contain a complete pattern, if it is present
def align_pattern(
iterable: Iterable[str | bytes | bytearray], pattern: str | bytes | bytearray
) -> Generator[str | bytes | bytearray, None, None]:
"""Yield data chunks that contain a complete pattern, if it is present
``align_pattern`` makes it easy to find a pattern (``str``, ``bytes``,
or ``bytearray``) in data chunks. It joins data-chunks in such a way,
Expand Down Expand Up @@ -77,15 +76,23 @@ def align_pattern(iterable: Iterable[str | bytes | bytearray],

# Create pattern matcher for all
if isinstance(pattern, str):
regex: str | bytes | bytearray = '(' + '|'.join(
'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + '$'
for index in range(1, len(pattern))
) + ')'
regex: str | bytes | bytearray = (
'('
+ '|'.join(
'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + '$'
for index in range(1, len(pattern))
)
+ ')'
)
else:
regex = b'(' + b'|'.join(
b'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + b'$'
for index in range(1, len(pattern))
) + b')'
regex = (
b'('
+ b'|'.join(
b'.' * (len(pattern) - index - 1) + re.escape(pattern[:index]) + b'$'
for index in range(1, len(pattern))
)
+ b')'
)
pattern_matcher = re.compile(regex, re.DOTALL)
pattern_sub = len(pattern) - 1
# Join data chunks until they are sufficiently long to contain the pattern,
Expand All @@ -98,10 +105,10 @@ def align_pattern(iterable: Iterable[str | bytes | bytearray],
current_chunk = data_chunk
else:
current_chunk += data_chunk
if len(current_chunk) >= len(pattern) \
and not (
current_chunk[-1] in pattern
and pattern_matcher.match(current_chunk, len(current_chunk) - pattern_sub)):
if len(current_chunk) >= len(pattern) and not (
current_chunk[-1] in pattern
and pattern_matcher.match(current_chunk, len(current_chunk) - pattern_sub)
):
yield current_chunk
current_chunk = None

Expand Down
17 changes: 8 additions & 9 deletions datasalad/itertools/decode_bytes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Get strings decoded from chunks of bytes """
"""Get strings decoded from chunks of bytes"""

from __future__ import annotations

Expand Down Expand Up @@ -93,19 +93,18 @@ def decode_bytes(
``iterable`` cannot be decoded with the specified ``encoding``
"""

def handle_decoding_error(position: int,
exc: UnicodeDecodeError
) -> tuple[int, str]:
""" Handle a UnicodeDecodeError """
def handle_decoding_error(
position: int, exc: UnicodeDecodeError
) -> tuple[int, str]:
"""Handle a UnicodeDecodeError"""
if not backslash_replace:
# Signal the error to the caller
raise exc
return (
position + exc.end,
joined_data[:position + exc.start].decode(encoding)
+ joined_data[position + exc.start:position + exc.end].decode(
encoding,
errors='backslashreplace'
joined_data[: position + exc.start].decode(encoding)
+ joined_data[position + exc.start : position + exc.end].decode(
encoding, errors='backslashreplace'
),
)

Expand Down
20 changes: 11 additions & 9 deletions datasalad/itertools/itemize.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,12 @@ def itemize(
)


def _split_items_with_separator(iterable: Iterable[T],
sep: T,
*,
keep_ends: bool = False,
) -> Generator[T, None, None]:
def _split_items_with_separator(
iterable: Iterable[T],
sep: T,
*,
keep_ends: bool = False,
) -> Generator[T, None, None]:
assembled = None
for chunk in iterable:
if not assembled:
Expand All @@ -127,10 +128,11 @@ def _split_items_with_separator(iterable: Iterable[T],
yield assembled


def _split_lines(iterable: Iterable[T],
*,
keep_ends: bool = False,
) -> Generator[T, None, None]:
def _split_lines(
iterable: Iterable[T],
*,
keep_ends: bool = False,
) -> Generator[T, None, None]:
assembled = None
for chunk in iterable:
if not assembled:
Expand Down
13 changes: 7 additions & 6 deletions datasalad/itertools/load_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Functions that yield JSON objects converted from input items """
"""Functions that yield JSON objects converted from input items"""

from __future__ import annotations

Expand All @@ -12,9 +12,10 @@
__all__ = ['load_json', 'load_json_with_flag']


def load_json(iterable: Iterable[bytes | str],
) -> Generator[Any, None, None]:
""" Convert items yielded by ``iterable`` into JSON objects and yield them
def load_json(
iterable: Iterable[bytes | str],
) -> Generator[Any, None, None]:
"""Convert items yielded by ``iterable`` into JSON objects and yield them
This function fetches items from the underlying
iterable. The items are expected to be ``bytes``, ``str``, or ``bytearry``,
Expand Down Expand Up @@ -65,9 +66,9 @@ def load_json(iterable: Iterable[bytes | str],


def load_json_with_flag(
iterable: Iterable[bytes | str],
iterable: Iterable[bytes | str],
) -> Generator[tuple[Any | json.decoder.JSONDecodeError, bool], None, None]:
""" Convert items from ``iterable`` into JSON objects and a success flag
"""Convert items from ``iterable`` into JSON objects and a success flag
``load_json_with_flag`` works analogous to ``load_json``, but reports
success and failure differently.
Expand Down
42 changes: 17 additions & 25 deletions datasalad/itertools/reroute.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Functions that allow to route data around upstream iterator """
"""Functions that allow to route data around upstream iterator"""

from __future__ import annotations

Expand All @@ -16,11 +16,12 @@ class StoreOnly:
pass


def route_out(iterable: Iterable,
data_store: list,
splitter: Callable[[Any], tuple[Any, Any]],
) -> Generator:
""" Route data around the consumer of this iterable
def route_out(
iterable: Iterable,
data_store: list,
splitter: Callable[[Any], tuple[Any, Any]],
) -> Generator:
"""Route data around the consumer of this iterable
:func:`route_out` allows its user to:
Expand Down Expand Up @@ -63,29 +64,23 @@ def route_out(iterable: Iterable,
from math import nan
from datalad_next.itertools import route_out, route_in, StoreOnly
def splitter(divisor):
# if divisor == 0, return `StoreOnly` in the first element of the
# result tuple to indicate that route_out should not yield this
# element to its consumer
return (StoreOnly, divisor) if divisor == 0 else (divisor, divisor)
def joiner(processed_data, stored_data):
#
return nan if processed_data is StoreOnly else processed_data
divisors = [0, 1, 0, 2, 0, 3, 0, 4]
store = list()
r = route_in(
map(
lambda x: 2.0 / x,
route_out(
divisors,
store,
splitter
)
),
store,
joiner
(2.0 / x for x in route_out(divisors, store, splitter)), store, joiner
)
print(list(r))
Expand Down Expand Up @@ -118,11 +113,10 @@ def joiner(processed_data, stored_data):
yield data_to_process


def route_in(iterable: Iterable,
data_store: list,
joiner: Callable[[Any, Any], Any]
) -> Generator:
""" Yield previously rerouted data to the consumer
def route_in(
iterable: Iterable, data_store: list, joiner: Callable[[Any, Any], Any]
) -> Generator:
"""Yield previously rerouted data to the consumer
This function is the counter-part to :func:`route_out`. It takes the iterable
``iterable`` and a data store given in ``data_store`` and yields items
Expand Down Expand Up @@ -152,11 +146,9 @@ def route_in(iterable: Iterable,
store_1 = list()
route_in(
some_generator(
route_out(input_iterable, store_1, splitter_1)
),
some_generator(route_out(input_iterable, store_1, splitter_1)),
store_1,
joiner_1
joiner_1,
)
:func:`route_in` will yield the same number of elements as ``input_iterable``.
Expand Down
39 changes: 21 additions & 18 deletions datasalad/itertools/tests/test_align_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,32 @@
from ..align_pattern import align_pattern


@pytest.mark.parametrize(('data_chunks', 'pattern', 'expected'), [
(['a', 'b', 'c', 'd', 'e'], 'abc', ['abc', 'de']),
(['a', 'b', 'c', 'a', 'b', 'c'], 'abc', ['abc', 'abc']),
# Ensure that unaligned pattern prefixes are not keeping data chunks short.
(['a', 'b', 'c', 'dddbbb', 'a', 'b', 'x'], 'abc', ['abc', 'dddbbb', 'abx']),
# Expect that a trailing minimum length-chunk that ends with a pattern
# prefix is not returned as data, but as remainder, if it is not the final
# chunk.
(['a', 'b', 'c', 'd', 'a'], 'abc', ['abc', 'da']),
# Expect the last chunk to be returned as data, if final is True, although
# it ends with a pattern prefix. If final is false, the last chunk will be
# returned as a remainder, because it ends with a pattern prefix.
(['a', 'b', 'c', 'dddbbb', 'a'], 'abc', ['abc', 'dddbbb', 'a']),
(['a', 'b', 'c', '9', 'a'], 'abc', ['abc', '9a']),
])
@pytest.mark.parametrize(
('data_chunks', 'pattern', 'expected'),
[
(['a', 'b', 'c', 'd', 'e'], 'abc', ['abc', 'de']),
(['a', 'b', 'c', 'a', 'b', 'c'], 'abc', ['abc', 'abc']),
# Ensure that unaligned pattern prefixes are not keeping data chunks short.
(['a', 'b', 'c', 'dddbbb', 'a', 'b', 'x'], 'abc', ['abc', 'dddbbb', 'abx']),
# Expect that a trailing minimum length-chunk that ends with a pattern
# prefix is not returned as data, but as remainder, if it is not the final
# chunk.
(['a', 'b', 'c', 'd', 'a'], 'abc', ['abc', 'da']),
# Expect the last chunk to be returned as data, if final is True, although
# it ends with a pattern prefix. If final is false, the last chunk will be
# returned as a remainder, because it ends with a pattern prefix.
(['a', 'b', 'c', 'dddbbb', 'a'], 'abc', ['abc', 'dddbbb', 'a']),
(['a', 'b', 'c', '9', 'a'], 'abc', ['abc', '9a']),
],
)
def test_pattern_processor(data_chunks, pattern, expected):
assert expected == list(align_pattern(data_chunks, pattern=pattern))


def test_newline_matches():
pattern = b'----datalad-end-marker-3654137433-rekram-dne-dalatad----\n'
chunk1 = b'Have a lot of fun...\n----datalad-end-marker-3654137433-r'
chunk2 = b'e'
chunk3 = b'kram-dne-dalatad----\n'
chunk1 = b'Have a lot of fun...\n----datalad-end-marker-3654137433-r'
chunk2 = b'e'
chunk3 = b'kram-dne-dalatad----\n'
result = list(align_pattern([chunk1, chunk2, chunk3], pattern))
assert result == [chunk1 + chunk2 + chunk3]
11 changes: 3 additions & 8 deletions datasalad/itertools/tests/test_itemize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@

from ..itemize import itemize

text_chunks = [
'abc',
'def\n012',
'\n',
'\n'
]
text_chunks = ['abc', 'def\n012', '\n', '\n']
byte_chunks = [chunk.encode() for chunk in text_chunks]
text_chunks_other = [chunk.replace('\n', '\r\n') for chunk in text_chunks]
byte_chunks_other = [chunk.encode() for chunk in text_chunks_other]
Expand All @@ -21,8 +16,8 @@
(text_chunks, '\n'),
(byte_chunks, b'\n'),
(text_chunks_other, '\r\n'),
(byte_chunks_other, b'\r\n')
]
(byte_chunks_other, b'\r\n'),
],
)
def test_assembling_and_splitting(input_chunks, separator):
empty = input_chunks[0][:0]
Expand Down
35 changes: 15 additions & 20 deletions datasalad/itertools/tests/test_load_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,44 @@
)

json_object = {
'list1': [
'a', 'bäöl', 1
],
'list1': ['a', 'bäöl', 1],
'dict1': {
'x': 123,
'y': 234,
'z': 456,
}
},
}


correct_json = b'\n'.join(
json.dumps(x).encode()
for x in [json_object] * 10
) + b'\n'
correct_json = b'\n'.join(json.dumps(x).encode() for x in [json_object] * 10) + b'\n'

correct_chunks = [
correct_json[i:i + 10]
for i in range(0, len(correct_json) + 10, 10)
correct_json[i : i + 10] for i in range(0, len(correct_json) + 10, 10)
]

faulty_json = correct_json.replace(b'}\n', b'\n')
faulty_chunks = [
faulty_json[i:i + 10]
for i in range(0, len(correct_json) + 10, 10)
]
faulty_chunks = [faulty_json[i : i + 10] for i in range(0, len(correct_json) + 10, 10)]


def test_load_json_on_decoded_bytes():
assert all(x == json_object for x in load_json(
decode_bytes(itemize(correct_chunks, b'\n'))))
assert all(
x == json_object
for x in load_json(decode_bytes(itemize(correct_chunks, b'\n')))
)
with pytest.raises(JSONDecodeError):
list(load_json(decode_bytes(itemize(faulty_chunks, b'\n'))))


def test_load_json_with_flag():
assert all(
obj == json_object and success is True
for (obj, success)
in load_json_with_flag(decode_bytes(itemize(correct_chunks, b'\n')))
for (obj, success) in load_json_with_flag(
decode_bytes(itemize(correct_chunks, b'\n'))
)
)
assert all(
isinstance(exc, JSONDecodeError) and success is False
for (exc, success)
in load_json_with_flag(decode_bytes(itemize(faulty_chunks, b'\n')))
for (exc, success) in load_json_with_flag(
decode_bytes(itemize(faulty_chunks, b'\n'))
)
)
Loading

0 comments on commit d3788c5

Please sign in to comment.