This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.

To use this notebook, you need:
- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/
- FFMPEG: ```sudo apt-get install ffmpeg ```     


In [1]:
import os, sys
import glob
import subprocess
import tempfile
import IPython
import soundfile as sf
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool

In [2]:
# Set the meta parameters
DATA_PATH = "/home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/"
NUM_PROC = 1
CURRENT_PATH = os.getcwd()

In [3]:
def compute_file_snr(file_path):
    """ Convert given file to required format with FFMPEG
    and process with WADA
    """
    _, sr = sf.read(file_path)
    new_file = file_path.replace(".wav", "_tmp.wav")
    if sr != 16000:
        command = 'ffmpeg -i "{}" -ac 1 -acodec pcm_s16le -y -ar {} "{}"'.format(file_path, 16000, new_file)
    else:
        command = f'cp {file_path} {new_file}'
    os.system(command)
    command = [f"{CURRENT_PATH}/WadaSNR/Exe/WADASNR", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav']
    pipe = subprocess.Popen(" ".join(command), shell=True, stdout=subprocess.PIPE).stdout
    output = pipe.read()
    try:
        output = float(output.split()[-3].decode("utf-8"))
    except:
        raise RuntimeError(" ".join(command))
    os.system("rm {}".format(new_file))
    return output, file_path


In [4]:
wav_file = "/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav"
output = compute_file_snr(wav_file)

In [5]:
wav_files = glob.glob("{}/**/*.wav".format(DATA_PATH), recursive=True)
print(" > Number of wav files {}".format(len(wav_files)))

 > Number of wav files 13331


In [6]:
if NUM_PROC == 1:
    file_snrs = [None] * len(wav_files) 
    for idx, wav_file in tqdm(enumerate(wav_files)):
        tup = compute_file_snr(wav_file)
        file_snrs[idx] = tup
else:
    with Pool(NUM_PROC) as pool:
        file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))

13331it [22:53,  9.71it/s]


In [7]:
snrs = [tup[0] for tup in file_snrs]

error_idxs = np.where(np.isnan(snrs) == True)[0]
error_files = [file_names[idx] for idx in error_idxs]

file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]
file_names = [tup[1] for tup in file_snrs]
snrs = [tup[0] for tup in file_snrs]

print(" > Average SNR of the dataset:{}".format(np.mean(snrs)))

 > Average SNR of the dataset:65.62862835473709


In [8]:
# find worse SNR files
N = 10  # number of files to fetch
file_idxs = np.argsort(snrs)
for i in range(N):
    idx = file_idxs[i]
    file_name = file_names[idx]
    wav, sr = sf.read(file_name)
    print(" > {} - snr:{}".format(file_name, snrs[idx]))
    IPython.display.display(IPython.display.Audio(wav, rate=sr))

 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627


In [9]:
# find best recordings
N = 10  # number of files to fetch
file_idxs = np.argsort(-1 * np.array(snrs))
for i in range(N):
    idx = file_idxs[i]
    file_name = file_names[idx]
    wav, sr = sf.read(file_name)
    print(" > {} - snr:{}".format(file_name, snrs[idx]))
    IPython.display.display(IPython.display.Audio(wav, rate=sr))

 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0


 > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0
