fix ExtractSpectrogram.ipynb

This commit is contained in:
Eren Gölge 2021-04-04 03:56:42 +02:00
parent 44b4cb5ba5
commit 718f6e1568
1 changed files with 23 additions and 15 deletions

View File

@ -24,9 +24,10 @@
"from torch.utils.data import DataLoader\n", "from torch.utils.data import DataLoader\n",
"from TTS.tts.datasets.TTSDataset import MyDataset\n", "from TTS.tts.datasets.TTSDataset import MyDataset\n",
"from TTS.tts.layers.losses import L1LossMasked\n", "from TTS.tts.layers.losses import L1LossMasked\n",
"from TTS.tts.utils.audio import AudioProcessor\n", "from TTS.utils.audio import AudioProcessor\n",
"from TTS.utils.io import load_config\n",
"from TTS.tts.utils.visual import plot_spectrogram\n", "from TTS.tts.utils.visual import plot_spectrogram\n",
"from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n", "from TTS.tts.utils.generic_utils import setup_model, sequence_mask\n",
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", "from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
"\n", "\n",
"%matplotlib inline\n", "%matplotlib inline\n",
@ -59,16 +60,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"OUT_PATH = \"/home/erogol/Data/LJSpeech-1.1/ljspeech-March-17-2020_01+16AM-871588c/\"\n", "OUT_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/tacotron-DCA\"\n",
"DATA_PATH = \"/home/erogol/Data/LJSpeech-1.1/\"\n", "DATA_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/\"\n",
"DATASET = \"ljspeech\"\n", "DATASET = \"sam_accenture\"\n",
"METADATA_FILE = \"metadata.csv\"\n", "METADATA_FILE = \"recording_script.xml\"\n",
"CONFIG_PATH = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/config.json\"\n", "CONFIG_PATH = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/config.json\"\n",
"MODEL_FILE = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/checkpoint_420000.pth.tar\"\n", "MODEL_FILE = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/best_model.pth.tar\"\n",
"BATCH_SIZE = 32\n", "BATCH_SIZE = 32\n",
"\n", "\n",
"QUANTIZED_WAV = False\n", "QUANTIZED_WAV = False\n",
"QUANTIZE_BIT = 9\n", "QUANTIZE_BIT = None\n",
"DRY_RUN = False # if False, does not generate output files, only computes loss and visuals.\n", "DRY_RUN = False # if False, does not generate output files, only computes loss and visuals.\n",
"\n", "\n",
"use_cuda = torch.cuda.is_available()\n", "use_cuda = torch.cuda.is_available()\n",
@ -257,8 +258,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# plot posnet output\n", "# plot posnet output\n",
"plot_spectrogram(mel_postnet, ap);\n", "print(mel_postnet[:mel_lengths[idx], :].shape)\n",
"print(mel_postnet[:mel_lengths[idx], :].shape)" "plot_spectrogram(mel_postnet, ap)"
] ]
}, },
{ {
@ -268,8 +269,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# plot decoder output\n", "# plot decoder output\n",
"plot_spectrogram(mel_decoder, ap);\n", "print(mel_decoder.shape)\n",
"print(mel_decoder.shape)" "plot_spectrogram(mel_decoder, ap)"
] ]
}, },
{ {
@ -280,7 +281,7 @@
"source": [ "source": [
"# plot GT specgrogram\n", "# plot GT specgrogram\n",
"print(mel_truth.shape)\n", "print(mel_truth.shape)\n",
"plot_spectrogram(mel_truth.T, ap);" "plot_spectrogram(mel_truth.T, ap)"
] ]
}, },
{ {
@ -328,6 +329,13 @@
"plt.colorbar()\n", "plt.colorbar()\n",
"plt.tight_layout()" "plt.tight_layout()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {