coqui-tts/dataset_analysis/CheckDatasetSNR.ipynb

7.2 MiB

None <html lang="en"> <head> </head>

This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.

To use this notebook, you need:

In [1]:
import os, sys
import glob
import subprocess
import tempfile
import IPython
import soundfile as sf
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool
In [2]:
# Set the meta parameters
DATA_PATH = "/home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/"
NUM_PROC = 1
CURRENT_PATH = os.getcwd()
In [3]:
def compute_file_snr(file_path):
    """ Convert given file to required format with FFMPEG
    and process with WADA
    """
    _, sr = sf.read(file_path)
    new_file = file_path.replace(".wav", "_tmp.wav")
    if sr != 16000:
        command = 'ffmpeg -i "{}" -ac 1 -acodec pcm_s16le -y -ar {} "{}"'.format(file_path, 16000, new_file)
    else:
        command = f'cp {file_path} {new_file}'
    os.system(command)
    command = [f"{CURRENT_PATH}/WadaSNR/Exe/WADASNR", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav']
    pipe = subprocess.Popen(" ".join(command), shell=True, stdout=subprocess.PIPE).stdout
    output = pipe.read()
    try:
        output = float(output.split()[-3].decode("utf-8"))
    except:
        raise RuntimeError(" ".join(command))
    os.system("rm {}".format(new_file))
    return output, file_path
In [4]:
wav_file = "/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav"
output = compute_file_snr(wav_file)
In [5]:
wav_files = glob.glob("{}/**/*.wav".format(DATA_PATH), recursive=True)
print(" > Number of wav files {}".format(len(wav_files)))
 > Number of wav files 13331
In [6]:
if NUM_PROC == 1:
    file_snrs = [None] * len(wav_files) 
    for idx, wav_file in tqdm(enumerate(wav_files)):
        tup = compute_file_snr(wav_file)
        file_snrs[idx] = tup
else:
    with Pool(NUM_PROC) as pool:
        file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))
13331it [22:53,  9.71it/s]
In [7]:
snrs = [tup[0] for tup in file_snrs]

error_idxs = np.where(np.isnan(snrs) == True)[0]
error_files = [file_names[idx] for idx in error_idxs]

file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]
file_names = [tup[1] for tup in file_snrs]
snrs = [tup[0] for tup in file_snrs]

print(" > Average SNR of the dataset:{}".format(np.mean(snrs)))
 > Average SNR of the dataset:65.62862835473709
In [8]:
# find worse SNR files
N = 10  # number of files to fetch
file_idxs = np.argsort(snrs)
for i in range(N):
    idx = file_idxs[i]
    file_name = file_names[idx]
    wav, sr = sf.read(file_name)
    print(" > {} - snr:{}".format(file_name, snrs[idx]))
    IPython.display.display(IPython.display.Audio(wav, rate=sr))
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627
Your browser does not support the audio element.
In [9]:
# find best recordings
N = 10  # number of files to fetch
file_idxs = np.argsort(-1 * np.array(snrs))
for i in range(N):
    idx = file_idxs[i]
    file_name = file_names[idx]
    wav, sr = sf.read(file_name)
    print(" > {} - snr:{}".format(file_name, snrs[idx]))
    IPython.display.display(IPython.display.Audio(wav, rate=sr))
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0
Your browser does not support the audio element.
 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0
Your browser does not support the audio element.
</html>