Merge pull request #2509 from coqui-ai/update_vad

Update VAD
This commit is contained in:
Eren Gölge 2023-04-13 19:35:17 +02:00 committed by GitHub
commit dba5cec497
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 34 additions and 4 deletions

View File

@ -1,12 +1,16 @@
import argparse import argparse
import glob import glob
import multiprocessing
import os import os
import pathlib import pathlib
import torch
from tqdm import tqdm from tqdm import tqdm
from TTS.utils.vad import get_vad_model_and_utils, remove_silence from TTS.utils.vad import get_vad_model_and_utils, remove_silence
torch.set_num_threads(1)
def adjust_path_and_remove_silence(audio_path): def adjust_path_and_remove_silence(audio_path):
output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, "")) output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
@ -44,6 +48,20 @@ def preprocess_audios():
# create threads # create threads
# num_threads = multiprocessing.cpu_count() # num_threads = multiprocessing.cpu_count()
# process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15) # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
if args.num_processes > 1:
with multiprocessing.Pool(processes=args.num_processes) as pool:
results = list(
tqdm(
pool.imap_unordered(adjust_path_and_remove_silence, files),
total=len(files),
desc="Processing audio files",
)
)
for output_path, is_speech in results:
if not is_speech:
filtered_files.append(output_path)
else:
for f in tqdm(files): for f in tqdm(files):
output_path, is_speech = adjust_path_and_remove_silence(f) output_path, is_speech = adjust_path_and_remove_silence(f)
if not is_speech: if not is_speech:
@ -87,6 +105,18 @@ if __name__ == "__main__":
default=False, default=False,
help="If True use cuda", help="If True use cuda",
) )
parser.add_argument(
"--use_onnx",
type=bool,
default=False,
help="If True use onnx",
)
parser.add_argument(
"--num_processes",
type=int,
default=1,
help="Number of processes to use",
)
args = parser.parse_args() args = parser.parse_args()
# load the model and utils # load the model and utils
model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda) model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda)