From 1203e72ae6eb3537386a08b84db5137f749ac7e3 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Fri, 12 Jul 2019 11:16:33 +0200 Subject: [PATCH] CheckSNR notebook --- dataset_analysis/CheckDatasetSNR.ipynb | 687 +++++++++++++++++++++++++ 1 file changed, 687 insertions(+) create mode 100644 dataset_analysis/CheckDatasetSNR.ipynb diff --git a/dataset_analysis/CheckDatasetSNR.ipynb b/dataset_analysis/CheckDatasetSNR.ipynb new file mode 100644 index 00000000..f9676a38 --- /dev/null +++ b/dataset_analysis/CheckDatasetSNR.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.\n", + "\n", + "To use this notebook, you need:\n", + "- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/\n", + "- FFMPEG: ```sudo apt-get install ffmpeg ``` \n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "import subprocess\n", + "import tempfile\n", + "import IPython\n", + "import soundfile as sf\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from multiprocessing import Pool" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the meta parameters\n", + "DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/\"\n", + "NUM_PROC = 1\n", + "CURRENT_PATH = os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_file_snr(file_path):\n", + " \"\"\" Convert given file to required format with FFMPEG\n", + " and process with WADA\n", + " \"\"\"\n", + " _, sr = sf.read(file_path)\n", + " new_file = file_path.replace(\".wav\", \"_tmp.wav\")\n", + " if sr != 16000:\n", + " command = 'ffmpeg -i \"{}\" -ac 1 -acodec pcm_s16le -y -ar {} \"{}\"'.format(file_path, 16000, new_file)\n", + " else:\n", + " command = f'cp {file_path} {new_file}'\n", + " os.system(command)\n", + " command = [f\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\", f'-i {new_file}', f'-t {CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt', '-ifmt mswav']\n", + " pipe = subprocess.Popen(\" \".join(command), shell=True, stdout=subprocess.PIPE).stdout\n", + " output = pipe.read()\n", + " try:\n", + " output = float(output.split()[-3].decode(\"utf-8\"))\n", + " except:\n", + " raise RuntimeError(\" \".join(command))\n", + " os.system(\"rm {}\".format(new_file))\n", + " return output, file_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "wav_file = \"/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav\"\n", + "output = compute_file_snr(wav_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Number of wav files 13331\n" + ] + } + ], + "source": [ + "wav_files = glob.glob(\"{}/**/*.wav\".format(DATA_PATH), recursive=True)\n", + "print(\" > Number of wav files {}\".format(len(wav_files)))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13331it [22:53, 9.71it/s]\n" + ] + } + ], + "source": [ + "if NUM_PROC == 1:\n", + " file_snrs = [None] * len(wav_files) \n", + " for idx, wav_file in tqdm(enumerate(wav_files)):\n", + " tup = compute_file_snr(wav_file)\n", + " file_snrs[idx] = tup\n", + "else:\n", + " with Pool(NUM_PROC) as pool:\n", + " file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Average SNR of the dataset:65.62862835473709\n" + ] + } + ], + "source": [ + "snrs = [tup[0] for tup in file_snrs]\n", + "\n", + "error_idxs = np.where(np.isnan(snrs) == True)[0]\n", + "error_files = [file_names[idx] for idx in error_idxs]\n", + "\n", + "file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]\n", + "file_names = [tup[1] for tup in file_snrs]\n", + "snrs = [tup[0] for tup in file_snrs]\n", + "\n", + "print(\" > Average SNR of the dataset:{}\".format(np.mean(snrs)))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000011.wav - snr:17.236514\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000105.wav - snr:17.620196\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000048.wav - snr:17.771109\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000084.wav - snr:18.852683\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000034.wav - snr:19.213945\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_16_f000101.wav - snr:19.526065\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_19_f000229.wav - snr:19.785003\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_04_f000033.wav - snr:19.811579\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/werde_die_du_bist/wavs/werde_die_du_bist_03_f000007.wav - snr:20.052098\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000031.wav - snr:20.055627\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# find worse SNR files\n", + "N = 10 # number of files to fetch\n", + "file_idxs = np.argsort(snrs)\n", + "for i in range(N):\n", + " idx = file_idxs[i]\n", + " file_name = file_names[idx]\n", + " wav, sr = sf.read(file_name)\n", + " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n", + " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/kleine_lord/wavs/kleine_lord_04_f000156.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_15_f000107.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_26_f000133.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_33_f000102.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000200.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000193.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_28_f000048.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_04_f000145.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_12_f000006.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > /home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/toten_seelen/wavs/toten_seelen_21_f000108.wav - snr:100.0\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# find best recordings\n", + "N = 10 # number of files to fetch\n", + "file_idxs = np.argsort(-1 * np.array(snrs))\n", + "for i in range(N):\n", + " idx = file_idxs[i]\n", + " file_name = file_names[idx]\n", + " wav, sr = sf.read(file_name)\n", + " print(\" > {} - snr:{}\".format(file_name, snrs[idx]))\n", + " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}