mirror of https://github.com/coqui-ai/TTS.git
7.2 MiB
7.2 MiB
None
<html lang="en">
<head>
</head>
</html>
This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.
To use this notebook, you need:
- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/
- FFMPEG:
sudo apt-get install ffmpeg
In [1]:
import os, sys import glob import subprocess import tempfile import IPython import soundfile as sf import numpy as np from tqdm import tqdm from multiprocessing import Pool
In [2]:
# Set the meta parameters DATA_PATH = "/home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/" NUM_PROC = 1 CURRENT_PATH = os.getcwd()
In [3]:
def compute_file_snr(file_path): """ Convert given file to required format with FFMPEG and process with WADA """ _, sr = sf.read(file_path) new_file = file_path.replace(".wav", "_tmp.wav") if sr != 16000: command = 'ffmpeg -i "{}" -ac 1 -acodec pcm_s16le -y -ar {} "{}"'.format(file_path, 16000, new_file) else: command = f'cp {file_path} {new_file}' os.system(command) command = [f"{CURRENT_PATH}/WadaSNR/Exe/WADASNR", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav'] pipe = subprocess.Popen(" ".join(command), shell=True, stdout=subprocess.PIPE).stdout output = pipe.read() try: output = float(output.split()[-3].decode("utf-8")) except: raise RuntimeError(" ".join(command)) os.system("rm {}".format(new_file)) return output, file_path
In [4]:
wav_file = "/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav" output = compute_file_snr(wav_file)
In [5]:
wav_files = glob.glob("{}/**/*.wav".format(DATA_PATH), recursive=True) print(" > Number of wav files {}".format(len(wav_files)))
> Number of wav files 13331
In [6]:
if NUM_PROC == 1: file_snrs = [None] * len(wav_files) for idx, wav_file in tqdm(enumerate(wav_files)): tup = compute_file_snr(wav_file) file_snrs[idx] = tup else: with Pool(NUM_PROC) as pool: file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))
13331it [22:53, 9.71it/s]
In [7]:
snrs = [tup[0] for tup in file_snrs] error_idxs = np.where(np.isnan(snrs) == True)[0] error_files = [file_names[idx] for idx in error_idxs] file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs] file_names = [tup[1] for tup in file_snrs] snrs = [tup[0] for tup in file_snrs] print(" > Average SNR of the dataset:{}".format(np.mean(snrs)))
> Average SNR of the dataset:65.62862835473709
In [8]:
# find worse SNR files N = 10 # number of files to fetch file_idxs = np.argsort(snrs) for i in range(N): idx = file_idxs[i] file_name = file_names[idx] wav, sr = sf.read(file_name) print(" > {} - snr:{}".format(file_name, snrs[idx])) IPython.display.display(IPython.display.Audio(wav, rate=sr))
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627
Your browser does not support the audio element.
In [9]:
# find best recordings N = 10 # number of files to fetch file_idxs = np.argsort(-1 * np.array(snrs)) for i in range(N): idx = file_idxs[i] file_name = file_names[idx] wav, sr = sf.read(file_name) print(" > {} - snr:{}".format(file_name, snrs[idx])) IPython.display.display(IPython.display.Audio(wav, rate=sr))
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0
Your browser does not support the audio element.
> /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0
Your browser does not support the audio element.