mirror of
https://github.com/kkroening/ffmpeg-python.git
synced 2025-04-05 04:22:51 +08:00
Improve logging in split_silence; add transcribe example
This commit is contained in:
parent
ad58a38d59
commit
f5f7ee2073
@ -10,6 +10,7 @@ import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
logger = logging.getLogger(__file__)
|
||||
logger.setLevel(logging.INFO)
|
||||
@ -24,7 +25,7 @@ parser.add_argument('--silence-threshold', default=DEFAULT_THRESHOLD, type=int,
|
||||
parser.add_argument('--silence-duration', default=DEFAULT_DURATION, type=float, help='Silence duration')
|
||||
parser.add_argument('--start-time', type=float, help='Start time (seconds)')
|
||||
parser.add_argument('--end-time', type=float, help='End time (seconds)')
|
||||
|
||||
parser.add_argument('-v', dest='verbose', action='store_true', help='Verbose mode')
|
||||
|
||||
silence_start_re = re.compile(' silence_start: (?P<start>[0-9]+(\.?[0-9]*))$')
|
||||
silence_end_re = re.compile(' silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ')
|
||||
@ -32,6 +33,11 @@ total_duration_re = re.compile(
|
||||
'size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate=')
|
||||
|
||||
|
||||
def _logged_popen(cmd_line, *args, **kwargs):
|
||||
logger.debug('Running command: {}'.format(subprocess.list2cmdline(cmd_line)))
|
||||
return subprocess.Popen(cmd_line, *args, **kwargs)
|
||||
|
||||
|
||||
def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time=None, end_time=None):
|
||||
input_kwargs = {}
|
||||
if start_time is not None:
|
||||
@ -41,17 +47,20 @@ def get_chunk_times(in_filename, silence_threshold, silence_duration, start_time
|
||||
if end_time is not None:
|
||||
input_kwargs['t'] = end_time - start_time
|
||||
|
||||
args = (ffmpeg
|
||||
.input(in_filename, **input_kwargs)
|
||||
.filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration)
|
||||
.output('-', format='null')
|
||||
.get_args()
|
||||
p = _logged_popen(
|
||||
(ffmpeg
|
||||
.input(in_filename, **input_kwargs)
|
||||
.filter_('silencedetect', n='{}dB'.format(silence_threshold), d=silence_duration)
|
||||
.output('-', format='null')
|
||||
.compile()
|
||||
) + ['-nostats'], # FIXME: use .nostats() once it's implemented in ffmpeg-python.
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
p = subprocess.Popen(['ffmpeg'] + args, stderr=subprocess.PIPE)
|
||||
output = p.communicate()[1].decode('utf-8')
|
||||
if p.returncode != 0:
|
||||
sys.stderr.write(output)
|
||||
sys.exit(1)
|
||||
logger.debug(output)
|
||||
lines = output.splitlines()
|
||||
|
||||
# Chunks start when silence ends, and chunks end when silence starts.
|
||||
@ -93,6 +102,7 @@ def _makedirs(path):
|
||||
if exc.errno != errno.EEXIST or not os.path.isdir(path):
|
||||
raise
|
||||
|
||||
|
||||
def split_audio(
|
||||
in_filename,
|
||||
out_pattern,
|
||||
@ -100,6 +110,7 @@ def split_audio(
|
||||
silence_duration=DEFAULT_DURATION,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
verbose=False,
|
||||
):
|
||||
chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time)
|
||||
|
||||
@ -110,18 +121,21 @@ def split_audio(
|
||||
|
||||
logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time,
|
||||
time))
|
||||
subprocess.Popen(
|
||||
_logged_popen(
|
||||
(ffmpeg
|
||||
.input(in_filename, ss=start_time, t=time)
|
||||
.output(out_filename)
|
||||
.overwrite_output()
|
||||
.compile()
|
||||
),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE if not verbose else None,
|
||||
stderr=subprocess.PIPE if not verbose else None,
|
||||
).communicate()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
split_audio(**vars(args))
|
||||
kwargs = vars(parser.parse_args())
|
||||
if kwargs['verbose']:
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(levels): %(message)s')
|
||||
logger.setLevel(logging.DEBUG)
|
||||
split_audio(**kwargs)
|
||||
|
62
examples/transcribe.py
Executable file
62
examples/transcribe.py
Executable file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from google.cloud import speech
|
||||
from google.cloud.speech import enums
|
||||
from google.cloud.speech import types
|
||||
import argparse
|
||||
import ffmpeg
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
logger = logging.getLogger(__file__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert speech audio to text using Google Speech API')
|
||||
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
|
||||
|
||||
|
||||
def decode_audio(in_filename, **input_kwargs):
|
||||
p = subprocess.Popen(
|
||||
(ffmpeg
|
||||
.input(in_filename, **input_kwargs)
|
||||
.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar='16k')
|
||||
.overwrite_output()
|
||||
.compile()
|
||||
),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
out = p.communicate()
|
||||
if p.returncode != 0:
|
||||
sys.stderr.write(out[1])
|
||||
sys.exit(1)
|
||||
return out[0]
|
||||
|
||||
|
||||
def get_transcripts(audio_data):
|
||||
client = speech.SpeechClient()
|
||||
audio = types.RecognitionAudio(content=audio_data)
|
||||
config = types.RecognitionConfig(
|
||||
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
sample_rate_hertz=16000,
|
||||
language_code='en-US'
|
||||
)
|
||||
response = client.recognize(config, audio)
|
||||
return [result.alternatives[0].transcript for result in response.results]
|
||||
|
||||
|
||||
def transcribe(in_filename):
|
||||
audio_data = decode_audio(in_filename)
|
||||
transcripts = get_transcripts(audio_data)
|
||||
for transcript in transcripts:
|
||||
print(repr(transcript.encode('utf-8')))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
transcribe(args.in_filename)
|
Loading…
x
Reference in New Issue
Block a user