diff --git a/datasalad/itertools/decode_bytes.py b/datasalad/itertools/decode_bytes.py index 6255360..e52a6f7 100644 --- a/datasalad/itertools/decode_bytes.py +++ b/datasalad/itertools/decode_bytes.py @@ -102,7 +102,7 @@ def handle_decoding_error( raise exc return ( position + exc.end, - joined_data[: position + exc.start].decode(encoding) + joined_data[position : position + exc.start].decode(encoding) + joined_data[position + exc.start : position + exc.end].decode( encoding, errors='backslashreplace' ), diff --git a/datasalad/itertools/tests/test_decode_bytes.py b/datasalad/itertools/tests/test_decode_bytes.py index e597162..d7a55a1 100644 --- a/datasalad/itertools/tests/test_decode_bytes.py +++ b/datasalad/itertools/tests/test_decode_bytes.py @@ -35,3 +35,8 @@ def test_no_empty_strings(): # check that empty strings are not yielded r = tuple(decode_bytes([b'\xc3', b'\xb6'])) assert r == ('รถ',) + + +def test_multiple_errors(): + r = ''.join(decode_bytes([b'08 War \xaf No \xaf More \xaf Trouble.shn.mp3'])) + assert r == '08 War \\xaf No \\xaf More \\xaf Trouble.shn.mp3'