From deceaa2e43cd2c0b4519e2cf85167fa772443c4c Mon Sep 17 00:00:00 2001
From: "S.Mohammad Emami Razavi" <emamirazavi@gmail.com>
Date: Thu, 7 Nov 2024 10:24:08 +0330
Subject: [PATCH 1/2] Update split_silence.py

```
output = p.communicate()[1].decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 1888: invalid start byte
```

There was an important error for media files containing invalid characters in meta data or other aspects. With this correction bug resolved completely.
---
 examples/split_silence.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/split_silence.py b/examples/split_silence.py
index 90b46d9..d87f754 100755
--- a/examples/split_silence.py
+++ b/examples/split_silence.py
@@ -38,6 +38,11 @@ def _logged_popen(cmd_line, *args, **kwargs):
     return subprocess.Popen(cmd_line, *args, **kwargs)
 
 
+def remove_non_ascii(raw_data):
+    # Keep only ASCII printable and common whitespace characters (tab, newline, carriage return)
+    return bytes(b for b in raw_data if 32 <= b <= 126 or b in {9, 10, 13})
+    
+
 def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None):
     input_kwargs = {}
     if start_time is not None:
@@ -56,7 +61,7 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time
         ) + ['-nostats'],  # FIXME: use .nostats() once it's implemented in ffmpeg-python.
         stderr=subprocess.PIPE
     )
-    output = p.communicate()[1].decode('utf-8')
+    output = remove_non_ascii(p.communicate()[1].decode('utf-8'))
     if p.returncode != 0:
         sys.stderr.write(output)
         sys.exit(1)

From e872c76b8e887e36d2274a7c783c13454125643f Mon Sep 17 00:00:00 2001
From: "S.Mohammad Emami Razavi" <emamirazavi@gmail.com>
Date: Thu, 7 Nov 2024 10:29:11 +0330
Subject: [PATCH 2/2] Update split_silence.py

---
 examples/split_silence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/split_silence.py b/examples/split_silence.py
index d87f754..dbc0c77 100755
--- a/examples/split_silence.py
+++ b/examples/split_silence.py
@@ -61,7 +61,7 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time
         ) + ['-nostats'],  # FIXME: use .nostats() once it's implemented in ffmpeg-python.
         stderr=subprocess.PIPE
     )
-    output = remove_non_ascii(p.communicate()[1].decode('utf-8'))
+    output = remove_non_ascii(p.communicate()[1]).decode('utf-8')
     if p.returncode != 0:
         sys.stderr.write(output)
         sys.exit(1)