mirror of
https://github.com/kkroening/ffmpeg-python.git
synced 2025-04-06 04:15:44 +08:00
Merge remote-tracking branch 'origin/master' into stream_selectors
This commit is contained in:
commit
809ab6cd17
2
examples/requirements.txt
Normal file
2
examples/requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
ffmpeg-python
|
||||||
|
google-cloud-speech
|
141
examples/split_silence.py
Executable file
141
examples/split_silence.py
Executable file
@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import errno
|
||||||
|
import ffmpeg
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||||
|
logger = logging.getLogger(__file__)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
DEFAULT_DURATION = 0.3
|
||||||
|
DEFAULT_THRESHOLD = -60
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Split media into separate chunks wherever silence occurs')
|
||||||
|
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
|
||||||
|
parser.add_argument('out_pattern', help='Output filename pattern (e.g. `out/chunk_{:04d}.wav`)')
|
||||||
|
parser.add_argument('--silence-threshold', default=DEFAULT_THRESHOLD, type=int, help='Silence threshold (in dB)')
|
||||||
|
parser.add_argument('--silence-duration', default=DEFAULT_DURATION, type=float, help='Silence duration')
|
||||||
|
parser.add_argument('--start-time', type=float, help='Start time (seconds)')
|
||||||
|
parser.add_argument('--end-time', type=float, help='End time (seconds)')
|
||||||
|
parser.add_argument('-v', dest='verbose', action='store_true', help='Verbose mode')
|
||||||
|
|
||||||
|
silence_start_re = re.compile(' silence_start: (?P<start>[0-9]+(\.?[0-9]*))$')
|
||||||
|
silence_end_re = re.compile(' silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ')
|
||||||
|
total_duration_re = re.compile(
|
||||||
|
'size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate=')
|
||||||
|
|
||||||
|
|
||||||
|
def _logged_popen(cmd_line, *args, **kwargs):
|
||||||
|
logger.debug('Running command: {}'.format(subprocess.list2cmdline(cmd_line)))
|
||||||
|
return subprocess.Popen(cmd_line, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None):
|
||||||
|
input_kwargs = {}
|
||||||
|
if start_time is not None:
|
||||||
|
input_kwargs['ss'] = start_time
|
||||||
|
else:
|
||||||
|
start_time = 0.
|
||||||
|
if end_time is not None:
|
||||||
|
input_kwargs['t'] = end_time - start_time
|
||||||
|
|
||||||
|
p = _logged_popen(
|
||||||
|
(ffmpeg
|
||||||
|
.input(in_filename, **input_kwargs)
|
||||||
|
.filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration)
|
||||||
|
.output('-', format='null')
|
||||||
|
.compile()
|
||||||
|
) + ['-nostats'], # FIXME: use .nostats() once it's implemented in ffmpeg-python.
|
||||||
|
stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
|
output = p.communicate()[1].decode('utf-8')
|
||||||
|
if p.returncode != 0:
|
||||||
|
sys.stderr.write(output)
|
||||||
|
sys.exit(1)
|
||||||
|
logger.debug(output)
|
||||||
|
lines = output.splitlines()
|
||||||
|
|
||||||
|
# Chunks start when silence ends, and chunks end when silence starts.
|
||||||
|
chunk_starts = []
|
||||||
|
chunk_ends = []
|
||||||
|
for line in lines:
|
||||||
|
silence_start_match = silence_start_re.search(line)
|
||||||
|
silence_end_match = silence_end_re.search(line)
|
||||||
|
total_duration_match = total_duration_re.search(line)
|
||||||
|
if silence_start_match:
|
||||||
|
chunk_ends.append(float(silence_start_match.group('start')))
|
||||||
|
if len(chunk_starts) == 0:
|
||||||
|
# Started with non-silence.
|
||||||
|
chunk_starts.append(start_time or 0.)
|
||||||
|
elif silence_end_match:
|
||||||
|
chunk_starts.append(float(silence_end_match.group('end')))
|
||||||
|
elif total_duration_match:
|
||||||
|
hours = int(total_duration_match.group('hours'))
|
||||||
|
minutes = int(total_duration_match.group('minutes'))
|
||||||
|
seconds = float(total_duration_match.group('seconds'))
|
||||||
|
end_time = hours * 3600 + minutes * 60 + seconds
|
||||||
|
|
||||||
|
if len(chunk_starts) == 0:
|
||||||
|
# No silence found.
|
||||||
|
chunk_starts.append(start_time)
|
||||||
|
|
||||||
|
if len(chunk_starts) > len(chunk_ends):
|
||||||
|
# Finished with non-silence.
|
||||||
|
chunk_ends.append(end_time or 10000000.)
|
||||||
|
|
||||||
|
return list(zip(chunk_starts, chunk_ends))
|
||||||
|
|
||||||
|
|
||||||
|
def _makedirs(path):
|
||||||
|
"""Python2-compatible version of ``os.makedirs(path, exist_ok=True)``."""
|
||||||
|
try:
|
||||||
|
os.makedirs(path)
|
||||||
|
except OSError as exc:
|
||||||
|
if exc.errno != errno.EEXIST or not os.path.isdir(path):
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def split_audio(
|
||||||
|
in_filename,
|
||||||
|
out_pattern,
|
||||||
|
silence_threshold=DEFAULT_THRESHOLD,
|
||||||
|
silence_duration=DEFAULT_DURATION,
|
||||||
|
start_time=None,
|
||||||
|
end_time=None,
|
||||||
|
verbose=False,
|
||||||
|
):
|
||||||
|
chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time)
|
||||||
|
|
||||||
|
for i, (start_time, end_time) in enumerate(chunk_times):
|
||||||
|
time = end_time - start_time
|
||||||
|
out_filename = out_pattern.format(i, i=i)
|
||||||
|
_makedirs(os.path.dirname(out_filename))
|
||||||
|
|
||||||
|
logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time,
|
||||||
|
time))
|
||||||
|
_logged_popen(
|
||||||
|
(ffmpeg
|
||||||
|
.input(in_filename, ss=start_time, t=time)
|
||||||
|
.output(out_filename)
|
||||||
|
.overwrite_output()
|
||||||
|
.compile()
|
||||||
|
),
|
||||||
|
stdout=subprocess.PIPE if not verbose else None,
|
||||||
|
stderr=subprocess.PIPE if not verbose else None,
|
||||||
|
).communicate()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
kwargs = vars(parser.parse_args())
|
||||||
|
if kwargs['verbose']:
|
||||||
|
logging.basicConfig(level=logging.DEBUG, format='%(levels): %(message)s')
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
split_audio(**kwargs)
|
62
examples/transcribe.py
Executable file
62
examples/transcribe.py
Executable file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from google.cloud import speech
|
||||||
|
from google.cloud.speech import enums
|
||||||
|
from google.cloud.speech import types
|
||||||
|
import argparse
|
||||||
|
import ffmpeg
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||||
|
logger = logging.getLogger(__file__)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Convert speech audio to text using Google Speech API')
|
||||||
|
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
|
||||||
|
|
||||||
|
|
||||||
|
def decode_audio(in_filename, **input_kwargs):
|
||||||
|
p = subprocess.Popen(
|
||||||
|
(ffmpeg
|
||||||
|
.input(in_filename, **input_kwargs)
|
||||||
|
.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar='16k')
|
||||||
|
.overwrite_output()
|
||||||
|
.compile()
|
||||||
|
),
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
|
out = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
sys.stderr.write(out[1])
|
||||||
|
sys.exit(1)
|
||||||
|
return out[0]
|
||||||
|
|
||||||
|
|
||||||
|
def get_transcripts(audio_data):
|
||||||
|
client = speech.SpeechClient()
|
||||||
|
audio = types.RecognitionAudio(content=audio_data)
|
||||||
|
config = types.RecognitionConfig(
|
||||||
|
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
sample_rate_hertz=16000,
|
||||||
|
language_code='en-US'
|
||||||
|
)
|
||||||
|
response = client.recognize(config, audio)
|
||||||
|
return [result.alternatives[0].transcript for result in response.results]
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe(in_filename):
|
||||||
|
audio_data = decode_audio(in_filename)
|
||||||
|
transcripts = get_transcripts(audio_data)
|
||||||
|
for transcript in transcripts:
|
||||||
|
print(repr(transcript.encode('utf-8')))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parser.parse_args()
|
||||||
|
transcribe(args.in_filename)
|
@ -52,6 +52,11 @@ def split(stream):
|
|||||||
return FilterNode(stream, split.__name__)
|
return FilterNode(stream, split.__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@filter_operator()
|
||||||
|
def asplit(stream):
|
||||||
|
return FilterNode(stream, asplit.__name__)
|
||||||
|
|
||||||
|
|
||||||
@filter_operator()
|
@filter_operator()
|
||||||
def setpts(stream, expr):
|
def setpts(stream, expr):
|
||||||
"""Change the PTS (presentation timestamp) of the input frames.
|
"""Change the PTS (presentation timestamp) of the input frames.
|
||||||
@ -421,6 +426,7 @@ __all__ = [
|
|||||||
'drawbox',
|
'drawbox',
|
||||||
'drawtext',
|
'drawtext',
|
||||||
'filter_',
|
'filter_',
|
||||||
|
'filter_multi_output',
|
||||||
'hflip',
|
'hflip',
|
||||||
'hue',
|
'hue',
|
||||||
'overlay',
|
'overlay',
|
||||||
|
@ -78,7 +78,7 @@ def view(stream_spec, **kwargs):
|
|||||||
downstream_node_id = str(hash(edge.downstream_node))
|
downstream_node_id = str(hash(edge.downstream_node))
|
||||||
graph.edge(upstream_node_id, downstream_node_id, **kwargs)
|
graph.edge(upstream_node_id, downstream_node_id, **kwargs)
|
||||||
|
|
||||||
graph.view(filename)
|
graph.view(filename, cleanup=True)
|
||||||
|
|
||||||
return stream_spec
|
return stream_spec
|
||||||
|
|
||||||
|
@ -280,7 +280,7 @@ class FilterNode(Node):
|
|||||||
def _get_filter(self, outgoing_edges):
|
def _get_filter(self, outgoing_edges):
|
||||||
args = self.args
|
args = self.args
|
||||||
kwargs = self.kwargs
|
kwargs = self.kwargs
|
||||||
if self.name == 'split':
|
if self.name in ('split', 'asplit'):
|
||||||
args = [len(outgoing_edges)]
|
args = [len(outgoing_edges)]
|
||||||
|
|
||||||
out_args = [escape_chars(x, '\\\'=:') for x in args]
|
out_args = [escape_chars(x, '\\\'=:') for x in args]
|
||||||
|
@ -180,6 +180,41 @@ def test_map_same_effect_as_output():
|
|||||||
TEST_OUTPUT_FILE1]
|
TEST_OUTPUT_FILE1]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_complex_filter_asplit_example():
|
||||||
|
split = (ffmpeg
|
||||||
|
.input(TEST_INPUT_FILE1)
|
||||||
|
.vflip()
|
||||||
|
.asplit()
|
||||||
|
)
|
||||||
|
split0 = split[0]
|
||||||
|
split1 = split[1]
|
||||||
|
|
||||||
|
return (ffmpeg
|
||||||
|
.concat(
|
||||||
|
split0.filter_("atrim", start=10, end=20),
|
||||||
|
split1.filter_("atrim", start=30, end=40),
|
||||||
|
)
|
||||||
|
.output(TEST_OUTPUT_FILE1)
|
||||||
|
.overwrite_output()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_asplit():
|
||||||
|
out = _get_complex_filter_asplit_example()
|
||||||
|
args = out.get_args()
|
||||||
|
assert args == [
|
||||||
|
'-i',
|
||||||
|
TEST_INPUT_FILE1,
|
||||||
|
'-filter_complex',
|
||||||
|
'[0]vflip[s0];[s0]asplit=2[s1][s2];[s1]atrim=end=20:start=10[s3];[s2]atrim=end=40:start=30[s4];[s3]'
|
||||||
|
'[s4]concat=n=2[s5]',
|
||||||
|
'-map',
|
||||||
|
'[s5]',
|
||||||
|
TEST_OUTPUT_FILE1,
|
||||||
|
'-y'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_filter_normal_arg_escape():
|
def test_filter_normal_arg_escape():
|
||||||
"""Test string escaping of normal filter args (e.g. ``font`` param of ``drawtext`` filter)."""
|
"""Test string escaping of normal filter args (e.g. ``font`` param of ``drawtext`` filter)."""
|
||||||
def _get_drawtext_font_repr(font):
|
def _get_drawtext_font_repr(font):
|
||||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
version = '0.1.9'
|
version = '0.1.10'
|
||||||
download_url = 'https://github.com/kkroening/ffmpeg-python/archive/v{}.zip'.format(version)
|
download_url = 'https://github.com/kkroening/ffmpeg-python/archive/v{}.zip'.format(version)
|
||||||
|
|
||||||
long_description = dedent("""\
|
long_description = dedent("""\
|
||||||
|
Loading…
x
Reference in New Issue
Block a user