mirror of https://github.com/coqui-ai/TTS.git
Write non-speech files in a TXT (#2048)
* Write non-speech files in a txt * Save 16-bit wav out of vad
This commit is contained in:
parent
d6ad9a05b4
commit
5f5d441ee5
|
@ -17,7 +17,7 @@ def adjust_path_and_remove_silence(audio_path):
|
||||||
# create all directory structure
|
# create all directory structure
|
||||||
pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
# remove the silence and save the audio
|
# remove the silence and save the audio
|
||||||
output_path = remove_silence(
|
output_path, is_speech = remove_silence(
|
||||||
model_and_utils,
|
model_and_utils,
|
||||||
audio_path,
|
audio_path,
|
||||||
output_path,
|
output_path,
|
||||||
|
@ -25,26 +25,34 @@ def adjust_path_and_remove_silence(audio_path):
|
||||||
use_cuda=args.use_cuda,
|
use_cuda=args.use_cuda,
|
||||||
)
|
)
|
||||||
|
|
||||||
return output_path
|
return output_path, is_speech
|
||||||
|
|
||||||
|
|
||||||
def preprocess_audios():
|
def preprocess_audios():
|
||||||
files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True))
|
files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True))
|
||||||
print("> Number of files: ", len(files))
|
print("> Number of files: ", len(files))
|
||||||
if not args.force:
|
if not args.force:
|
||||||
print("> Ignoring files that already exist in the output directory.")
|
print("> Ignoring files that already exist in the output idrectory.")
|
||||||
|
|
||||||
if args.trim_just_beginning_and_end:
|
if args.trim_just_beginning_and_end:
|
||||||
print("> Trimming just the beginning and the end with nonspeech parts.")
|
print("> Trimming just the beginning and the end with nonspeech parts.")
|
||||||
else:
|
else:
|
||||||
print("> Trimming all nonspeech parts.")
|
print("> Trimming all nonspeech parts.")
|
||||||
|
|
||||||
|
filtered_files = []
|
||||||
if files:
|
if files:
|
||||||
# create threads
|
# create threads
|
||||||
# num_threads = multiprocessing.cpu_count()
|
# num_threads = multiprocessing.cpu_count()
|
||||||
# process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
|
# process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
|
||||||
for f in tqdm(files):
|
for f in tqdm(files):
|
||||||
adjust_path_and_remove_silence(f)
|
output_path, is_speech = adjust_path_and_remove_silence(f)
|
||||||
|
if not is_speech:
|
||||||
|
filtered_files.append(output_path)
|
||||||
|
|
||||||
|
# write files that do not have speech
|
||||||
|
with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f:
|
||||||
|
for file in filtered_files:
|
||||||
|
f.write(file + "\n")
|
||||||
else:
|
else:
|
||||||
print("> No files Found !")
|
print("> No files Found !")
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
|
|
||||||
|
@ -48,7 +49,7 @@ def remove_silence(
|
||||||
):
|
):
|
||||||
|
|
||||||
# get the VAD model and utils functions
|
# get the VAD model and utils functions
|
||||||
model, get_speech_timestamps, save_audio, collect_chunks = model_and_utils
|
model, get_speech_timestamps, _, collect_chunks = model_and_utils
|
||||||
|
|
||||||
# read ground truth wav and resample the audio for the VAD
|
# read ground truth wav and resample the audio for the VAD
|
||||||
wav, gt_sample_rate = read_audio(audio_path)
|
wav, gt_sample_rate = read_audio(audio_path)
|
||||||
|
@ -73,9 +74,11 @@ def remove_silence(
|
||||||
# if have speech timestamps else save the wav
|
# if have speech timestamps else save the wav
|
||||||
if new_speech_timestamps:
|
if new_speech_timestamps:
|
||||||
wav = collect_chunks(new_speech_timestamps, wav)
|
wav = collect_chunks(new_speech_timestamps, wav)
|
||||||
|
is_speech = True
|
||||||
else:
|
else:
|
||||||
print(f"> The file {audio_path} probably does not have speech please check it !!")
|
print(f"> The file {audio_path} probably does not have speech please check it !!")
|
||||||
|
is_speech = False
|
||||||
|
|
||||||
# save audio
|
# save audio
|
||||||
save_audio(out_path, wav, sampling_rate=gt_sample_rate)
|
sf.write(out_path, wav, gt_sample_rate, subtype="PCM_16")
|
||||||
return out_path
|
return out_path, is_speech
|
||||||
|
|
Loading…
Reference in New Issue