diff --git a/notebooks/ExtractTTSpectrogram.ipynb b/notebooks/ExtractTTSpectrogram.ipynb index 0e63c942..dc35e86f 100644 --- a/notebooks/ExtractTTSpectrogram.ipynb +++ b/notebooks/ExtractTTSpectrogram.ipynb @@ -24,9 +24,10 @@ "from torch.utils.data import DataLoader\n", "from TTS.tts.datasets.TTSDataset import MyDataset\n", "from TTS.tts.layers.losses import L1LossMasked\n", - "from TTS.tts.utils.audio import AudioProcessor\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config\n", "from TTS.tts.utils.visual import plot_spectrogram\n", - "from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n", + "from TTS.tts.utils.generic_utils import setup_model, sequence_mask\n", "from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", "\n", "%matplotlib inline\n", @@ -59,16 +60,16 @@ "metadata": {}, "outputs": [], "source": [ - "OUT_PATH = \"/home/erogol/Data/LJSpeech-1.1/ljspeech-March-17-2020_01+16AM-871588c/\"\n", - "DATA_PATH = \"/home/erogol/Data/LJSpeech-1.1/\"\n", - "DATASET = \"ljspeech\"\n", - "METADATA_FILE = \"metadata.csv\"\n", - "CONFIG_PATH = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/config.json\"\n", - "MODEL_FILE = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/checkpoint_420000.pth.tar\"\n", + "OUT_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/tacotron-DCA\"\n", + "DATA_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/\"\n", + "DATASET = \"sam_accenture\"\n", + "METADATA_FILE = \"recording_script.xml\"\n", + "CONFIG_PATH = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/config.json\"\n", + "MODEL_FILE = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/best_model.pth.tar\"\n", "BATCH_SIZE = 32\n", "\n", "QUANTIZED_WAV = False\n", - "QUANTIZE_BIT = 9\n", + "QUANTIZE_BIT = None\n", "DRY_RUN = False # if False, does not generate output files, only computes loss and visuals.\n", "\n", "use_cuda = torch.cuda.is_available()\n", @@ -257,8 +258,8 @@ "outputs": [], "source": [ "# plot posnet output\n", - "plot_spectrogram(mel_postnet, ap);\n", - "print(mel_postnet[:mel_lengths[idx], :].shape)" + "print(mel_postnet[:mel_lengths[idx], :].shape)\n", + "plot_spectrogram(mel_postnet, ap)" ] }, { @@ -268,8 +269,8 @@ "outputs": [], "source": [ "# plot decoder output\n", - "plot_spectrogram(mel_decoder, ap);\n", - "print(mel_decoder.shape)" + "print(mel_decoder.shape)\n", + "plot_spectrogram(mel_decoder, ap)" ] }, { @@ -280,7 +281,7 @@ "source": [ "# plot GT specgrogram\n", "print(mel_truth.shape)\n", - "plot_spectrogram(mel_truth.T, ap);" + "plot_spectrogram(mel_truth.T, ap)" ] }, { @@ -328,6 +329,13 @@ "plt.colorbar()\n", "plt.tight_layout()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -351,4 +359,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file