mirror of https://github.com/coqui-ai/TTS.git
update notebook
This commit is contained in:
parent
4a6949632b
commit
ddd7de6439
|
@ -2,15 +2,22 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"Collapsed": "false"
|
||||||
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"This notebook is to test attention performance on hard sentences taken from DeepVoice paper."
|
"This notebook is to test attention performance of a TTS model on a list of sentences taken from DeepVoice paper.\n",
|
||||||
|
"### Features of this notebook\n",
|
||||||
|
"- You can see visually how your model performs on each sentence and try to dicern common problems.\n",
|
||||||
|
"- At the end, final attention score would be printed showing the ultimate performace of your model. You can use this value to perform model selection.\n",
|
||||||
|
"- You can change the list of sentences byt providing a different sentence file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"Collapsed": "false",
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
@ -31,7 +38,8 @@
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.layers import *\n",
|
"from TTS.layers import *\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from TTS.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.utils.generic_utils import load_config, setup_model\n",
|
"from TTS.utils.generic_utils import setup_model\n",
|
||||||
|
"from TTS.utils.io import load_config\n",
|
||||||
"from TTS.utils.text import text_to_sequence\n",
|
"from TTS.utils.text import text_to_sequence\n",
|
||||||
"from TTS.utils.synthesis import synthesis\n",
|
"from TTS.utils.synthesis import synthesis\n",
|
||||||
"from TTS.utils.visual import plot_alignment\n",
|
"from TTS.utils.visual import plot_alignment\n",
|
||||||
|
@ -45,7 +53,7 @@
|
||||||
"def tts(model, text, CONFIG, use_cuda, ap):\n",
|
"def tts(model, text, CONFIG, use_cuda, ap):\n",
|
||||||
" t_1 = time.time()\n",
|
" t_1 = time.time()\n",
|
||||||
" # run the model\n",
|
" # run the model\n",
|
||||||
" waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, None, False, CONFIG.enable_eos_bos_chars, True)\n",
|
" waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, None, False, CONFIG.enable_eos_bos_chars, True)\n",
|
||||||
" if CONFIG.model == \"Tacotron\" and not use_gl:\n",
|
" if CONFIG.model == \"Tacotron\" and not use_gl:\n",
|
||||||
" mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T\n",
|
" mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T\n",
|
||||||
" # plotting\n",
|
" # plotting\n",
|
||||||
|
@ -62,7 +70,7 @@
|
||||||
" return attn_score\n",
|
" return attn_score\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Set constants\n",
|
"# Set constants\n",
|
||||||
"ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/'\n",
|
"ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n",
|
||||||
"MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n",
|
"MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n",
|
||||||
"CONFIG_PATH = ROOT_PATH + '/config.json'\n",
|
"CONFIG_PATH = ROOT_PATH + '/config.json'\n",
|
||||||
"OUT_FOLDER = './hard_sentences/'\n",
|
"OUT_FOLDER = './hard_sentences/'\n",
|
||||||
|
@ -82,7 +90,9 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"Collapsed": "false"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# LOAD TTS MODEL\n",
|
"# LOAD TTS MODEL\n",
|
||||||
|
@ -130,7 +140,9 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"Collapsed": "false"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"model.decoder.max_decoder_steps=3000\n",
|
"model.decoder.max_decoder_steps=3000\n",
|
||||||
|
@ -144,7 +156,9 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"Collapsed": "false"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"np.mean(attn_scores)"
|
"np.mean(attn_scores)"
|
||||||
|
|
Loading…
Reference in New Issue