Write non-speech files in a TXT (#2048)

* Write non-speech files in a txt

* Save 16-bit wav out of vad
This commit is contained in:
Eren Gölge 2022-10-06 13:25:54 +02:00 committed by GitHub
parent d6ad9a05b4
commit 5f5d441ee5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 7 deletions

View File

@ -17,7 +17,7 @@ def adjust_path_and_remove_silence(audio_path):
# create all directory structure # create all directory structure
pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True) pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# remove the silence and save the audio # remove the silence and save the audio
output_path = remove_silence( output_path, is_speech = remove_silence(
model_and_utils, model_and_utils,
audio_path, audio_path,
output_path, output_path,
@ -25,26 +25,34 @@ def adjust_path_and_remove_silence(audio_path):
use_cuda=args.use_cuda, use_cuda=args.use_cuda,
) )
return output_path return output_path, is_speech
def preprocess_audios(): def preprocess_audios():
files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True)) files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True))
print("> Number of files: ", len(files)) print("> Number of files: ", len(files))
if not args.force: if not args.force:
print("> Ignoring files that already exist in the output directory.") print("> Ignoring files that already exist in the output idrectory.")
if args.trim_just_beginning_and_end: if args.trim_just_beginning_and_end:
print("> Trimming just the beginning and the end with nonspeech parts.") print("> Trimming just the beginning and the end with nonspeech parts.")
else: else:
print("> Trimming all nonspeech parts.") print("> Trimming all nonspeech parts.")
filtered_files = []
if files: if files:
# create threads # create threads
# num_threads = multiprocessing.cpu_count() # num_threads = multiprocessing.cpu_count()
# process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15) # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
for f in tqdm(files): for f in tqdm(files):
adjust_path_and_remove_silence(f) output_path, is_speech = adjust_path_and_remove_silence(f)
if not is_speech:
filtered_files.append(output_path)
# write files that do not have speech
with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f:
for file in filtered_files:
f.write(file + "\n")
else: else:
print("> No files Found !") print("> No files Found !")

View File

@ -1,3 +1,4 @@
import soundfile as sf
import torch import torch
import torchaudio import torchaudio
@ -48,7 +49,7 @@ def remove_silence(
): ):
# get the VAD model and utils functions # get the VAD model and utils functions
model, get_speech_timestamps, save_audio, collect_chunks = model_and_utils model, get_speech_timestamps, _, collect_chunks = model_and_utils
# read ground truth wav and resample the audio for the VAD # read ground truth wav and resample the audio for the VAD
wav, gt_sample_rate = read_audio(audio_path) wav, gt_sample_rate = read_audio(audio_path)
@ -73,9 +74,11 @@ def remove_silence(
# if have speech timestamps else save the wav # if have speech timestamps else save the wav
if new_speech_timestamps: if new_speech_timestamps:
wav = collect_chunks(new_speech_timestamps, wav) wav = collect_chunks(new_speech_timestamps, wav)
is_speech = True
else: else:
print(f"> The file {audio_path} probably does not have speech please check it !!") print(f"> The file {audio_path} probably does not have speech please check it !!")
is_speech = False
# save audio # save audio
save_audio(out_path, wav, sampling_rate=gt_sample_rate) sf.write(out_path, wav, gt_sample_rate, subtype="PCM_16")
return out_path return out_path, is_speech