Skip to content

Commit

Permalink
Merge pull request #36 from christian-monch/fix-decode_bytes
Browse files Browse the repository at this point in the history
Fix handling of multiple encoding errors in `decode_bytes` input chunks.
  • Loading branch information
mih authored Jul 11, 2024
2 parents 1266a9d + 31876e7 commit 810f513
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
2 changes: 1 addition & 1 deletion datasalad/itertools/decode_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def handle_decoding_error(
raise exc
return (
position + exc.end,
joined_data[: position + exc.start].decode(encoding)
joined_data[position : position + exc.start].decode(encoding)
+ joined_data[position + exc.start : position + exc.end].decode(
encoding, errors='backslashreplace'
),
Expand Down
5 changes: 5 additions & 0 deletions datasalad/itertools/tests/test_decode_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,8 @@ def test_no_empty_strings():
# check that empty strings are not yielded
r = tuple(decode_bytes([b'\xc3', b'\xb6']))
assert r == ('ö',)


def test_multiple_errors():
r = ''.join(decode_bytes([b'08 War \xaf No \xaf More \xaf Trouble.shn.mp3']))
assert r == '08 War \\xaf No \\xaf More \\xaf Trouble.shn.mp3'

0 comments on commit 810f513

Please sign in to comment.