From 7bc77ff71417ea6391c571a3a9f737082508593f Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Fri, 22 Dec 2017 16:22:41 +0100 Subject: [PATCH 01/11] Add `asplit` filter --- ffmpeg/_filters.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ffmpeg/_filters.py b/ffmpeg/_filters.py index 0f980a7..8db40ee 100644 --- a/ffmpeg/_filters.py +++ b/ffmpeg/_filters.py @@ -52,6 +52,11 @@ def split(stream): return FilterNode(stream, split.__name__) +@filter_operator() +def asplit(stream): + return FilterNode(stream, split.__name__) + + @filter_operator() def setpts(stream, expr): """Change the PTS (presentation timestamp) of the input frames. From 755fb843de221faf90245eb47a1ce8d466e42fd5 Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Fri, 22 Dec 2017 17:11:23 +0100 Subject: [PATCH 02/11] Also provide the number of splits to `asplit` filter --- ffmpeg/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffmpeg/nodes.py b/ffmpeg/nodes.py index 2b4c94f..013025d 100644 --- a/ffmpeg/nodes.py +++ b/ffmpeg/nodes.py @@ -152,7 +152,7 @@ class FilterNode(Node): def _get_filter(self, outgoing_edges): args = self.args kwargs = self.kwargs - if self.name == 'split': + if self.name in ('split', 'asplit'): args = [len(outgoing_edges)] out_args = [escape_chars(x, '\\\'=:') for x in args] From 4311e33859d5a031f82dc2c6bbafdd2852dacb07 Mon Sep 17 00:00:00 2001 From: Karl Kroening Date: Sun, 7 Jan 2018 03:32:05 -0800 Subject: [PATCH 03/11] Add `split_silence` example --- examples/split_silence.py | 111 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100755 examples/split_silence.py diff --git a/examples/split_silence.py b/examples/split_silence.py new file mode 100755 index 0000000..11c5ddc --- /dev/null +++ b/examples/split_silence.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import argparse +import ffmpeg +import logging +import re +import subprocess + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + +DEFAULT_DURATION = 0.3 +DEFAULT_THRESHOLD = -60 + +parser = argparse.ArgumentParser(description='Split media into separate chunks wherever silence occurs') +parser.add_argument('in_filename', help='Input filename (`-` for stdin)') +parser.add_argument('out_pattern', help='Output filename pattern (e.g. `out/chunk_%%04d.wav`)') +parser.add_argument('--silence-threshold', default=DEFAULT_THRESHOLD, type=int, help='Silence threshold (in dB)') +parser.add_argument('--silence-duration', default=DEFAULT_DURATION, type=float, help='Silence duration') +parser.add_argument('--start-time', type=float, help='Start time (seconds)') +parser.add_argument('--end-time', type=float, help='End time (seconds)') + + +silence_start_re = re.compile(' silence_start: (?P[0-9]+(\.?[0-9]*))$') +silence_end_re = re.compile(' silence_end: (?P[0-9]+(\.?[0-9]*)) ') +total_duration_re = re.compile( + 'size=[^ ]+ time=(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9\.]{5}) bitrate=') + + +def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None): + input_kwargs = {} + if start_time is not None: + input_kwargs['ss'] = start_time + else: + start_time = 0. + if end_time is not None: + input_kwargs['t'] = end_time - start_time + + args = (ffmpeg + .input(in_filename, **input_kwargs) + .filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration) + .output('-', format='null') + .get_args() + ) + p = subprocess.Popen(['ffmpeg'] + args, stderr=subprocess.PIPE) + output = p.communicate()[1].decode('utf-8') + lines = output.splitlines() + + # Chunks start when silence ends, and chunks end when silence starts. + chunk_starts = [] + chunk_ends = [] + for line in lines: + silence_start_match = silence_start_re.search(line) + silence_end_match = silence_end_re.search(line) + total_duration_match = total_duration_re.search(line) + if silence_start_match: + chunk_ends.append(float(silence_start_match.group('start'))) + if len(chunk_starts) == 0: + # Started with non-silence. + chunk_starts.append(start_time or 0.) + elif silence_end_match: + chunk_starts.append(float(silence_end_match.group('end'))) + elif total_duration_match: + hours = int(total_duration_match.group('hours')) + minutes = int(total_duration_match.group('minutes')) + seconds = float(total_duration_match.group('seconds')) + end_time = hours * 3600 + minutes * 60 + seconds + + if len(chunk_starts) == 0: + # No silence found. + chunk_starts.append(start_time) + + if len(chunk_starts) > len(chunk_ends): + # Finished with non-silence. + chunk_ends.append(end_time or 10000000.) + + return list(zip(chunk_starts, chunk_ends)) + + +def split_audio( + in_filename, + out_pattern, + silence_threshold=DEFAULT_THRESHOLD, + silence_duration=DEFAULT_DURATION, + start_time=None, + end_time=None, +): + chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time) + for i, (start_time, end_time) in enumerate(chunk_times): + time = end_time - start_time + out_filename = out_pattern % i + logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time, + time)) + subprocess.Popen( + (ffmpeg + .input(in_filename, ss=start_time, t=time) + .output(out_filename) + .overwrite_output() + .compile() + ), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ).communicate() + + +if __name__ == '__main__': + args = parser.parse_args() + split_audio(**vars(args)) From ad58a38d59533e1d46f6b19295648b1173a5310d Mon Sep 17 00:00:00 2001 From: Karl Kroening Date: Sun, 7 Jan 2018 03:43:20 -0800 Subject: [PATCH 04/11] Finalize split_silence --- examples/requirements.txt | 1 + examples/split_silence.py | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 examples/requirements.txt diff --git a/examples/requirements.txt b/examples/requirements.txt new file mode 100644 index 0000000..20645e6 --- /dev/null +++ b/examples/requirements.txt @@ -0,0 +1 @@ +ffmpeg diff --git a/examples/split_silence.py b/examples/split_silence.py index 11c5ddc..297083a 100755 --- a/examples/split_silence.py +++ b/examples/split_silence.py @@ -2,13 +2,15 @@ from __future__ import unicode_literals import argparse +import errno import ffmpeg import logging +import os import re import subprocess +import sys - -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.INFO, format='%(message)s') logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) @@ -17,7 +19,7 @@ DEFAULT_THRESHOLD = -60 parser = argparse.ArgumentParser(description='Split media into separate chunks wherever silence occurs') parser.add_argument('in_filename', help='Input filename (`-` for stdin)') -parser.add_argument('out_pattern', help='Output filename pattern (e.g. `out/chunk_%%04d.wav`)') +parser.add_argument('out_pattern', help='Output filename pattern (e.g. `out/chunk_{:04d}.wav`)') parser.add_argument('--silence-threshold', default=DEFAULT_THRESHOLD, type=int, help='Silence threshold (in dB)') parser.add_argument('--silence-duration', default=DEFAULT_DURATION, type=float, help='Silence duration') parser.add_argument('--start-time', type=float, help='Start time (seconds)') @@ -47,6 +49,9 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time ) p = subprocess.Popen(['ffmpeg'] + args, stderr=subprocess.PIPE) output = p.communicate()[1].decode('utf-8') + if p.returncode != 0: + sys.stderr.write(output) + sys.exit(1) lines = output.splitlines() # Chunks start when silence ends, and chunks end when silence starts. @@ -80,6 +85,14 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time return list(zip(chunk_starts, chunk_ends)) +def _makedirs(path): + """Python2-compatible version of ``os.makedirs(path, exist_ok=True)``.""" + try: + os.makedirs(path) + except OSError as exc: + if exc.errno != errno.EEXIST or not os.path.isdir(path): + raise + def split_audio( in_filename, out_pattern, @@ -89,9 +102,12 @@ def split_audio( end_time=None, ): chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time) + for i, (start_time, end_time) in enumerate(chunk_times): time = end_time - start_time - out_filename = out_pattern % i + out_filename = out_pattern.format(i, i=i) + _makedirs(os.path.dirname(out_filename)) + logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time, time)) subprocess.Popen( From f5f7ee20730f2beca5db80933f11d880e1f22a0c Mon Sep 17 00:00:00 2001 From: Karl Kroening Date: Sun, 7 Jan 2018 04:43:05 -0800 Subject: [PATCH 05/11] Improve logging in split_silence; add transcribe example --- examples/split_silence.py | 38 ++++++++++++++++-------- examples/transcribe.py | 62 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 12 deletions(-) create mode 100755 examples/transcribe.py diff --git a/examples/split_silence.py b/examples/split_silence.py index 297083a..9a8adbd 100755 --- a/examples/split_silence.py +++ b/examples/split_silence.py @@ -10,6 +10,7 @@ import re import subprocess import sys + logging.basicConfig(level=logging.INFO, format='%(message)s') logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) @@ -24,7 +25,7 @@ parser.add_argument('--silence-threshold', default=DEFAULT_THRESHOLD, type=int, parser.add_argument('--silence-duration', default=DEFAULT_DURATION, type=float, help='Silence duration') parser.add_argument('--start-time', type=float, help='Start time (seconds)') parser.add_argument('--end-time', type=float, help='End time (seconds)') - +parser.add_argument('-v', dest='verbose', action='store_true', help='Verbose mode') silence_start_re = re.compile(' silence_start: (?P[0-9]+(\.?[0-9]*))$') silence_end_re = re.compile(' silence_end: (?P[0-9]+(\.?[0-9]*)) ') @@ -32,6 +33,11 @@ total_duration_re = re.compile( 'size=[^ ]+ time=(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9\.]{5}) bitrate=') +def _logged_popen(cmd_line, *args, **kwargs): + logger.debug('Running command: {}'.format(subprocess.list2cmdline(cmd_line))) + return subprocess.Popen(cmd_line, *args, **kwargs) + + def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None): input_kwargs = {} if start_time is not None: @@ -41,17 +47,20 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time if end_time is not None: input_kwargs['t'] = end_time - start_time - args = (ffmpeg - .input(in_filename, **input_kwargs) - .filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration) - .output('-', format='null') - .get_args() + p = _logged_popen( + (ffmpeg + .input(in_filename, **input_kwargs) + .filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration) + .output('-', format='null') + .compile() + ) + ['-nostats'], # FIXME: use .nostats() once it's implemented in ffmpeg-python. + stderr=subprocess.PIPE ) - p = subprocess.Popen(['ffmpeg'] + args, stderr=subprocess.PIPE) output = p.communicate()[1].decode('utf-8') if p.returncode != 0: sys.stderr.write(output) sys.exit(1) + logger.debug(output) lines = output.splitlines() # Chunks start when silence ends, and chunks end when silence starts. @@ -93,6 +102,7 @@ def _makedirs(path): if exc.errno != errno.EEXIST or not os.path.isdir(path): raise + def split_audio( in_filename, out_pattern, @@ -100,6 +110,7 @@ def split_audio( silence_duration=DEFAULT_DURATION, start_time=None, end_time=None, + verbose=False, ): chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time) @@ -110,18 +121,21 @@ def split_audio( logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time, time)) - subprocess.Popen( + _logged_popen( (ffmpeg .input(in_filename, ss=start_time, t=time) .output(out_filename) .overwrite_output() .compile() ), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stdout=subprocess.PIPE if not verbose else None, + stderr=subprocess.PIPE if not verbose else None, ).communicate() if __name__ == '__main__': - args = parser.parse_args() - split_audio(**vars(args)) + kwargs = vars(parser.parse_args()) + if kwargs['verbose']: + logging.basicConfig(level=logging.DEBUG, format='%(levels): %(message)s') + logger.setLevel(logging.DEBUG) + split_audio(**kwargs) diff --git a/examples/transcribe.py b/examples/transcribe.py new file mode 100755 index 0000000..fb484df --- /dev/null +++ b/examples/transcribe.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +from google.cloud import speech +from google.cloud.speech import enums +from google.cloud.speech import types +import argparse +import ffmpeg +import logging +import subprocess +import sys + + +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + + +parser = argparse.ArgumentParser(description='Convert speech audio to text using Google Speech API') +parser.add_argument('in_filename', help='Input filename (`-` for stdin)') + + +def decode_audio(in_filename, **input_kwargs): + p = subprocess.Popen( + (ffmpeg + .input(in_filename, **input_kwargs) + .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar='16k') + .overwrite_output() + .compile() + ), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + out = p.communicate() + if p.returncode != 0: + sys.stderr.write(out[1]) + sys.exit(1) + return out[0] + + +def get_transcripts(audio_data): + client = speech.SpeechClient() + audio = types.RecognitionAudio(content=audio_data) + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US' + ) + response = client.recognize(config, audio) + return [result.alternatives[0].transcript for result in response.results] + + +def transcribe(in_filename): + audio_data = decode_audio(in_filename) + transcripts = get_transcripts(audio_data) + for transcript in transcripts: + print(repr(transcript.encode('utf-8'))) + + +if __name__ == '__main__': + args = parser.parse_args() + transcribe(args.in_filename) From e7fbb288d418140f9ceaf7851ed3faa6d4d0368a Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Wed, 10 Jan 2018 10:35:23 +0100 Subject: [PATCH 06/11] Fix name of asplit filter --- ffmpeg/_filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffmpeg/_filters.py b/ffmpeg/_filters.py index 8db40ee..94e6d24 100644 --- a/ffmpeg/_filters.py +++ b/ffmpeg/_filters.py @@ -54,7 +54,7 @@ def split(stream): @filter_operator() def asplit(stream): - return FilterNode(stream, split.__name__) + return FilterNode(stream, asplit.__name__) @filter_operator() From c87fd5cf56174f3b01b2df48993c801e5ec7ac47 Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Wed, 10 Jan 2018 10:38:42 +0100 Subject: [PATCH 07/11] Add tests for asplit filter --- ffmpeg/tests/test_ffmpeg.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/ffmpeg/tests/test_ffmpeg.py b/ffmpeg/tests/test_ffmpeg.py index 95e0042..59d17b0 100644 --- a/ffmpeg/tests/test_ffmpeg.py +++ b/ffmpeg/tests/test_ffmpeg.py @@ -147,6 +147,41 @@ def test_get_args_complex_filter(): ] +def _get_complex_filter_asplit_example(): + split = (ffmpeg + .input(TEST_INPUT_FILE1) + .vflip() + .asplit() + ) + split0 = split[0] + split1 = split[1] + + return (ffmpeg + .concat( + split0.filter_("atrim", start=10, end=20), + split1.filter_("atrim", start=30, end=40), + ) + .output(TEST_OUTPUT_FILE1) + .overwrite_output() + ) + + +def test_filter_asplit(): + out = _get_complex_filter_asplit_example() + args = out.get_args() + assert args == [ + '-i', + TEST_INPUT_FILE1, + '-filter_complex', + '[0]vflip[s0];[s0]asplit=2[s1][s2];[s1]atrim=end=20:start=10[s3];[s2]atrim=end=40:start=30[s4];[s3]' + '[s4]concat=n=2[s5]', + '-map', + '[s5]', + TEST_OUTPUT_FILE1, + '-y' + ] + + def test_filter_normal_arg_escape(): """Test string escaping of normal filter args (e.g. ``font`` param of ``drawtext`` filter).""" def _get_drawtext_font_repr(font): From 3a818cc33dedea3273d2c21c0fc3e9b3c7e06948 Mon Sep 17 00:00:00 2001 From: Karl Kroening Date: Thu, 11 Jan 2018 22:22:31 -0800 Subject: [PATCH 08/11] Update requirements.txt --- examples/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/requirements.txt b/examples/requirements.txt index 20645e6..d5f10fc 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -1 +1,2 @@ -ffmpeg +ffmpeg-python +google-cloud-speech From 19f316e9c5c500e2dc28d1dc78fdf595de7bd20d Mon Sep 17 00:00:00 2001 From: Karl Kroening Date: Sat, 13 Jan 2018 21:07:30 -0800 Subject: [PATCH 09/11] Release version 0.1.10 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6f59cc0..6f26376 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup from textwrap import dedent import subprocess -version = '0.1.9' +version = '0.1.10' download_url = 'https://github.com/kkroening/ffmpeg-python/archive/v{}.zip'.format(version) long_description = dedent("""\ From 50c4a8985d713e06bc809f221756ae9c69eacd18 Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Tue, 16 Jan 2018 22:10:41 +0100 Subject: [PATCH 10/11] Cleanup graph source file after rendering graph to pdf --- ffmpeg/_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffmpeg/_view.py b/ffmpeg/_view.py index cdb41b0..d2a945b 100644 --- a/ffmpeg/_view.py +++ b/ffmpeg/_view.py @@ -76,7 +76,7 @@ def view(stream_spec, **kwargs): downstream_node_id = str(hash(edge.downstream_node)) graph.edge(upstream_node_id, downstream_node_id, **kwargs) - graph.view(filename) + graph.view(filename, cleanup=True) return stream_spec From f1e418be4cd09f7bad0e42fcaa187cb6a42f1ac2 Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Fri, 26 Jan 2018 14:38:57 +0100 Subject: [PATCH 11/11] Add `filter_multi_output` to `__all__` so it's available in API --- ffmpeg/_filters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ffmpeg/_filters.py b/ffmpeg/_filters.py index 8bbe0cd..d220b69 100644 --- a/ffmpeg/_filters.py +++ b/ffmpeg/_filters.py @@ -421,6 +421,7 @@ __all__ = [ 'drawbox', 'drawtext', 'filter_', + 'filter_multi_output', 'hflip', 'hue', 'overlay',