From deceaa2e43cd2c0b4519e2cf85167fa772443c4c Mon Sep 17 00:00:00 2001 From: "S.Mohammad Emami Razavi" Date: Thu, 7 Nov 2024 10:24:08 +0330 Subject: [PATCH] Update split_silence.py ``` output = p.communicate()[1].decode('utf-8') UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 1888: invalid start byte ``` There was an important error for media files containing invalid characters in meta data or other aspects. With this correction bug resolved completely. --- examples/split_silence.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/split_silence.py b/examples/split_silence.py index 90b46d9..d87f754 100755 --- a/examples/split_silence.py +++ b/examples/split_silence.py @@ -38,6 +38,11 @@ def _logged_popen(cmd_line, *args, **kwargs): return subprocess.Popen(cmd_line, *args, **kwargs) +def remove_non_ascii(raw_data): + # Keep only ASCII printable and common whitespace characters (tab, newline, carriage return) + return bytes(b for b in raw_data if 32 <= b <= 126 or b in {9, 10, 13}) + + def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None): input_kwargs = {} if start_time is not None: @@ -56,7 +61,7 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time ) + ['-nostats'], # FIXME: use .nostats() once it's implemented in ffmpeg-python. stderr=subprocess.PIPE ) - output = p.communicate()[1].decode('utf-8') + output = remove_non_ascii(p.communicate()[1].decode('utf-8')) if p.returncode != 0: sys.stderr.write(output) sys.exit(1)