mirror of https://github.com/coqui-ai/TTS.git
tacotron DDC LJSpeech recipe
This commit is contained in:
parent
34a42d379f
commit
d1b469935d
|
@ -132,4 +132,3 @@ notebooks/data/*
|
||||||
TTS/tts/layers/glow_tts/monotonic_align/core.c
|
TTS/tts/layers/glow_tts/monotonic_align/core.c
|
||||||
.vscode-upload.json
|
.vscode-upload.json
|
||||||
temp_build/*
|
temp_build/*
|
||||||
recipes/*
|
|
||||||
|
|
|
@ -169,7 +169,8 @@ class TacotronConfig(BaseTTSConfig):
|
||||||
postnet_ssim_alpha: float = 0.25
|
postnet_ssim_alpha: float = 0.25
|
||||||
ga_alpha: float = 5.0
|
ga_alpha: float = 5.0
|
||||||
|
|
||||||
|
|
||||||
def check_values(self):
|
def check_values(self):
|
||||||
if self.gradual_training:
|
if self.gradual_training:
|
||||||
assert self.gradual_training[0][1] == self.r, f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
|
assert (
|
||||||
|
self.gradual_training[0][1] == self.r
|
||||||
|
), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# 🐸💬 TTS Training Recipes
|
||||||
|
|
||||||
|
TTS recipes intended to host bash scripts running all the necessary steps to train a TTS model with a particular dataset.
|
||||||
|
|
||||||
|
Run each script from the root TTS folder as follows
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ bash ./recipes/<dataset>/<model>/run.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
All the outputs are held under the recipe directory unless you change the paths in the bash script.
|
||||||
|
|
||||||
|
If you train a new model using TTS, feel free to share your training to expand the list of recipes.
|
|
@ -1,4 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
# take the scripts's parent's directory to prefix all the output paths.
|
||||||
RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||||
echo $RUN_DIR
|
echo $RUN_DIR
|
||||||
# download LJSpeech dataset
|
# download LJSpeech dataset
|
||||||
|
@ -12,10 +13,10 @@ tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
|
||||||
mv LJSpeech-1.1 $RUN_DIR/
|
mv LJSpeech-1.1 $RUN_DIR/
|
||||||
rm LJSpeech-1.1.tar.bz2
|
rm LJSpeech-1.1.tar.bz2
|
||||||
# compute dataset mean and variance for normalization
|
# compute dataset mean and variance for normalization
|
||||||
python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DCA.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/
|
python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DDC.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/
|
||||||
# training ....
|
# training ....
|
||||||
# change the GPU id if needed
|
# change the GPU id if needed
|
||||||
CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tacotron.py --config_path $RUN_DIR/tacotron2-DDC.json \
|
CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tacotron.py --config_path $RUN_DIR/tacotron2-DDC.json \
|
||||||
--output_path $RUN_DIR \
|
--coqpit.output_path $RUN_DIR \
|
||||||
--coqpit.datasets.0.path $RUN_DIR/LJSpeech-1.1/ \
|
--coqpit.datasets.0.path $RUN_DIR/LJSpeech-1.1/ \
|
||||||
--coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
|
--coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
|
|
@ -37,11 +37,10 @@
|
||||||
"gst_num_style_tokens": 10
|
"gst_num_style_tokens": 10
|
||||||
},
|
},
|
||||||
"model": "Tacotron2",
|
"model": "Tacotron2",
|
||||||
"run_name": "ljspeech-dcattn",
|
"run_name": "ljspeech-ddc",
|
||||||
"run_description": "tacotron2 with dynamic convolution attention.",
|
"run_description": "tacotron2 with double decoder consistency.",
|
||||||
"batch_size": 64,
|
"batch_size": 64,
|
||||||
"eval_batch_size": 16,
|
"eval_batch_size": 16,
|
||||||
"r": 2,
|
|
||||||
"mixed_precision": true,
|
"mixed_precision": true,
|
||||||
"loss_masking": true,
|
"loss_masking": true,
|
||||||
"decoder_loss_alpha": 0.25,
|
"decoder_loss_alpha": 0.25,
|
||||||
|
@ -69,6 +68,7 @@
|
||||||
"double_decoder_consistency": true,
|
"double_decoder_consistency": true,
|
||||||
"ddc_r": 6,
|
"ddc_r": 6,
|
||||||
"attention_norm": "sigmoid",
|
"attention_norm": "sigmoid",
|
||||||
|
"r": 6,
|
||||||
"gradual_training": [[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]],
|
"gradual_training": [[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]],
|
||||||
"stopnet": true,
|
"stopnet": true,
|
||||||
"separate_stopnet": true,
|
"separate_stopnet": true,
|
Loading…
Reference in New Issue