Update split_silence.py

changes will resolve an error raised for some media files having bad and unprintable metadata

```
Traceback (most recent call last):
  File "/code/main.py", line 285, in <module>
    transform(item)
  File "/code/main.py", line 214, in transform
    chunk_dic = split_audio_and_chunk_to_dic(in_filename=f'./rx/some_file',
  File "/code/split_silence.py", line 313, in split_audio_and_chunk_to_dic
    chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time)
  File "/code/split_silence.py", line 149, in get_chunk_times
    output = p.communicate()[1].decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 1888: invalid start byte
```
This commit is contained in:
S.Mohammad Emami Razavi 2024-10-30 18:10:38 +03:30 committed by GitHub
parent df129c7ba3
commit 192d3d6842
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -47,17 +47,24 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time
if end_time is not None:
input_kwargs['t'] = end_time - start_time
p = _logged_popen(
p1 = _logged_popen(
(ffmpeg
.input(in_filename, **input_kwargs)
.filter('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration)
.output('-', format='null')
.compile()
) + ['-nostats'], # FIXME: use .nostats() once it's implemented in ffmpeg-python.
stderr=subprocess.PIPE
stderr=subprocess.PIPE, stdout=subprocess.PIPE
)
output = p.communicate()[1].decode('utf-8')
if p.returncode != 0:
# NOTE:
# adds below line to resolve a bug in decoding bad and not pritable metadata on communicate
# in Linux OS. To resolve this problem, your OS must have sed command.
p2 = subprocess.Popen(['sed', '1,/silencedetect/{/silencedetect/!d}'], stdin=p1.stdout,
stdout=subprocess.PIPE)
p1.stdout.close()
output = p2.communicate()[0].decode('utf-8')
if p2.returncode != 0:
sys.stderr.write(output)
sys.exit(1)
logger.debug(output)