mirror of https://github.com/coqui-ai/TTS.git
renaming train scripts and updating tests
This commit is contained in:
parent
39c71ee8a9
commit
73581cd94c
16
README.md
16
README.md
|
@ -150,23 +150,25 @@ head -n 12000 metadata_shuf.csv > metadata_train.csv
|
||||||
tail -n 1100 metadata_shuf.csv > metadata_val.csv
|
tail -n 1100 metadata_shuf.csv > metadata_val.csv
|
||||||
```
|
```
|
||||||
|
|
||||||
To train a new model, you need to define your own ```config.json``` file (check the example) and call with the command below. You also set the model architecture in ```config.json```.
|
To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script.
|
||||||
|
|
||||||
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json```
|
For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps.
|
||||||
|
|
||||||
|
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json```
|
||||||
|
|
||||||
To fine-tune a model, use ```--restore_path```.
|
To fine-tune a model, use ```--restore_path```.
|
||||||
|
|
||||||
```python TTS/bin/train_tts.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
|
```python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar```
|
||||||
|
|
||||||
To continue an old training run, use ```--continue_path```.
|
To continue an old training run, use ```--continue_path```.
|
||||||
|
|
||||||
```python TTS/bin/train_tts.py --continue_path /path/to/your/run_folder/```
|
```python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/```
|
||||||
|
|
||||||
For multi-GPU training use ```distribute.py```. It enables process based multi-GPU training where each process uses a single GPU.
|
For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting.
|
||||||
|
|
||||||
```CUDA_VISIBLE_DEVICES="0,1,4" TTS/bin/distribute.py --script train_tts.py --config_path TTS/tts/configs/config.json```
|
```CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json```
|
||||||
|
|
||||||
Each run creates a new output folder and ```config.json``` is copied under this folder.
|
Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs.
|
||||||
|
|
||||||
In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed.
|
In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed.
|
||||||
|
|
||||||
|
|
|
@ -7,27 +7,25 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
from random import randrange
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from random import randrange
|
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import TacotronLoss
|
from TTS.tts.layers.losses import TacotronLoss
|
||||||
from TTS.tts.utils.distribute import (DistributedSampler,
|
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
|
||||||
apply_gradient_allreduce,
|
|
||||||
init_distributed, reduce_tensor)
|
|
||||||
from TTS.tts.utils.generic_utils import setup_model, check_config_tts
|
|
||||||
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
||||||
from TTS.tts.utils.measures import alignment_diagonal_score
|
from TTS.tts.utils.measures import alignment_diagonal_score
|
||||||
from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping
|
from TTS.tts.utils.speakers import load_speaker_mapping, parse_speakers
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from TTS.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.console_logger import ConsoleLogger
|
from TTS.utils.console_logger import ConsoleLogger
|
||||||
|
from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce,
|
||||||
|
init_distributed, reduce_tensor)
|
||||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
||||||
create_experiment_folder, get_git_branch,
|
create_experiment_folder, get_git_branch,
|
||||||
remove_experiment_folder, set_init_dict)
|
remove_experiment_folder, set_init_dict)
|
||||||
|
@ -38,7 +36,6 @@ from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
|
||||||
gradual_training_scheduler, set_weight_decay,
|
gradual_training_scheduler, set_weight_decay,
|
||||||
setup_torch_training_env)
|
setup_torch_training_env)
|
||||||
|
|
||||||
|
|
||||||
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
||||||
|
|
||||||
|
|
|
@ -132,10 +132,6 @@ def train(model, criterion, optimizer,
|
||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# schedule update
|
|
||||||
if scheduler is not None:
|
|
||||||
scheduler.step()
|
|
||||||
|
|
||||||
# backward pass with loss scaling
|
# backward pass with loss scaling
|
||||||
if c.mixed_precision:
|
if c.mixed_precision:
|
||||||
scaler.scale(loss).backward()
|
scaler.scale(loss).backward()
|
||||||
|
@ -150,7 +146,9 @@ def train(model, criterion, optimizer,
|
||||||
c.clip_grad)
|
c.clip_grad)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
|
# schedule update
|
||||||
|
if scheduler is not None:
|
||||||
|
scheduler.step()
|
||||||
|
|
||||||
# disconnect loss values
|
# disconnect loss values
|
||||||
loss_dict = dict()
|
loss_dict = dict()
|
|
@ -68,11 +68,14 @@
|
||||||
"apex_amp_level": null, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.
|
"apex_amp_level": null, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.
|
||||||
|
|
||||||
// LOSS SETTINGS
|
// LOSS SETTINGS
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": false, // enable / disable loss masking against the sequence padding.
|
||||||
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
||||||
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
||||||
"ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||||
"diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
|
||||||
// VALIDATION
|
// VALIDATION
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
|
|
|
@ -6,9 +6,10 @@ TF_CPP_MIN_LOG_LEVEL=3
|
||||||
# runtime tests
|
# runtime tests
|
||||||
./tests/test_server_package.sh && \
|
./tests/test_server_package.sh && \
|
||||||
./tests/test_tts_train.sh && \
|
./tests/test_tts_train.sh && \
|
||||||
|
./tests/test_glow-tts_train.sh && \
|
||||||
./tests/test_vocoder_gan_train.sh && \
|
./tests/test_vocoder_gan_train.sh && \
|
||||||
./tests/test_vocoder_wavernn_train.sh && \
|
./tests/test_vocoder_wavernn_train.sh && \
|
||||||
./tests/test_glow-tts_train.sh && \
|
./tests/test_vocoder_wavegrad_train.sh && \
|
||||||
|
|
||||||
# linter check
|
# linter check
|
||||||
cardboardlinter --refspec master
|
cardboardlinter --refspec master
|
|
@ -74,6 +74,16 @@
|
||||||
"test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time.
|
"test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time.
|
||||||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||||
|
|
||||||
|
// LOSS SETTINGS
|
||||||
|
"loss_masking": false, // enable / disable loss masking against the sequence padding.
|
||||||
|
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
||||||
|
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
||||||
|
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||||
|
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||||
|
|
||||||
// OPTIMIZER
|
// OPTIMIZER
|
||||||
"noam_schedule": false, // use noam warmup and lr schedule.
|
"noam_schedule": false, // use noam warmup and lr schedule.
|
||||||
"grad_clip": 1.0, // upper limit for gradients for clipping.
|
"grad_clip": 1.0, // upper limit for gradients for clipping.
|
||||||
|
|
|
@ -5,11 +5,11 @@ echo "$BASEDIR"
|
||||||
# create run dir
|
# create run dir
|
||||||
mkdir $BASEDIR/train_outputs
|
mkdir $BASEDIR/train_outputs
|
||||||
# run training
|
# run training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
||||||
# find the training folder
|
# find the training folder
|
||||||
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
||||||
echo $LATEST_FOLDER
|
echo $LATEST_FOLDER
|
||||||
# continue the previous training
|
# continue the previous training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_gan_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||||
# remove all the outputs
|
# remove all the outputs
|
||||||
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||||
|
|
|
@ -5,11 +5,11 @@ echo "$BASEDIR"
|
||||||
# create run dir
|
# create run dir
|
||||||
mkdir $BASEDIR/train_outputs
|
mkdir $BASEDIR/train_outputs
|
||||||
# run training
|
# run training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
|
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
|
||||||
# find the training folder
|
# find the training folder
|
||||||
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
||||||
echo $LATEST_FOLDER
|
echo $LATEST_FOLDER
|
||||||
# continue the previous training
|
# continue the previous training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_wavernn_vocoder.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||||
# remove all the outputs
|
# remove all the outputs
|
||||||
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER
|
Loading…
Reference in New Issue