mirror of https://github.com/coqui-ai/TTS.git
fix ExtractSpectrogram.ipynb
This commit is contained in:
parent
44b4cb5ba5
commit
718f6e1568
|
@ -24,9 +24,10 @@
|
|||
"from torch.utils.data import DataLoader\n",
|
||||
"from TTS.tts.datasets.TTSDataset import MyDataset\n",
|
||||
"from TTS.tts.layers.losses import L1LossMasked\n",
|
||||
"from TTS.tts.utils.audio import AudioProcessor\n",
|
||||
"from TTS.utils.audio import AudioProcessor\n",
|
||||
"from TTS.utils.io import load_config\n",
|
||||
"from TTS.tts.utils.visual import plot_spectrogram\n",
|
||||
"from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n",
|
||||
"from TTS.tts.utils.generic_utils import setup_model, sequence_mask\n",
|
||||
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
|
||||
"\n",
|
||||
"%matplotlib inline\n",
|
||||
|
@ -59,16 +60,16 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OUT_PATH = \"/home/erogol/Data/LJSpeech-1.1/ljspeech-March-17-2020_01+16AM-871588c/\"\n",
|
||||
"DATA_PATH = \"/home/erogol/Data/LJSpeech-1.1/\"\n",
|
||||
"DATASET = \"ljspeech\"\n",
|
||||
"METADATA_FILE = \"metadata.csv\"\n",
|
||||
"CONFIG_PATH = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/config.json\"\n",
|
||||
"MODEL_FILE = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/checkpoint_420000.pth.tar\"\n",
|
||||
"OUT_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/tacotron-DCA\"\n",
|
||||
"DATA_PATH = \"/home/erogol/gdrive/Datasets/non-binary-voice-files/\"\n",
|
||||
"DATASET = \"sam_accenture\"\n",
|
||||
"METADATA_FILE = \"recording_script.xml\"\n",
|
||||
"CONFIG_PATH = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/config.json\"\n",
|
||||
"MODEL_FILE = \"/home/erogol/gdrive/Trainings/sam/ljspeech-dcattn-April-03-2021_05+02-2344379/best_model.pth.tar\"\n",
|
||||
"BATCH_SIZE = 32\n",
|
||||
"\n",
|
||||
"QUANTIZED_WAV = False\n",
|
||||
"QUANTIZE_BIT = 9\n",
|
||||
"QUANTIZE_BIT = None\n",
|
||||
"DRY_RUN = False # if False, does not generate output files, only computes loss and visuals.\n",
|
||||
"\n",
|
||||
"use_cuda = torch.cuda.is_available()\n",
|
||||
|
@ -257,8 +258,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# plot posnet output\n",
|
||||
"plot_spectrogram(mel_postnet, ap);\n",
|
||||
"print(mel_postnet[:mel_lengths[idx], :].shape)"
|
||||
"print(mel_postnet[:mel_lengths[idx], :].shape)\n",
|
||||
"plot_spectrogram(mel_postnet, ap)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -268,8 +269,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# plot decoder output\n",
|
||||
"plot_spectrogram(mel_decoder, ap);\n",
|
||||
"print(mel_decoder.shape)"
|
||||
"print(mel_decoder.shape)\n",
|
||||
"plot_spectrogram(mel_decoder, ap)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -280,7 +281,7 @@
|
|||
"source": [
|
||||
"# plot GT specgrogram\n",
|
||||
"print(mel_truth.shape)\n",
|
||||
"plot_spectrogram(mel_truth.T, ap);"
|
||||
"plot_spectrogram(mel_truth.T, ap)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -328,6 +329,13 @@
|
|||
"plt.colorbar()\n",
|
||||
"plt.tight_layout()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in New Issue