mirror of https://github.com/coqui-ai/TTS.git
config updates, update audio.py, update mailabs preprocessor
This commit is contained in:
parent
f69127ff88
commit
f096f1052f
8
.compute
8
.compute
|
@ -9,8 +9,8 @@ pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux
|
||||||
wget https://www.dropbox.com/s/wqn5v3wkktw9lmo/install.sh?dl=0 -O install.sh
|
wget https://www.dropbox.com/s/wqn5v3wkktw9lmo/install.sh?dl=0 -O install.sh
|
||||||
sudo sh install.sh
|
sudo sh install.sh
|
||||||
python3 setup.py develop
|
python3 setup.py develop
|
||||||
# cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
|
# cp -R ${USER_DIR}/GermanData ../tmp/
|
||||||
cp -R ${USER_DIR}/GermanData ../tmp/
|
# python3 distribute.py --config_path config_tacotron_de.json --data_path ../tmp/GermanData/karlsson/
|
||||||
python3 distribute.py --config_path config_tacotron.json --data_path ../tmp/GermanData/karlsson/
|
cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
|
||||||
# python3 distribute.py --config_path config_cluster.json --data_path ${SHARED_DIR}/data/mozilla/Judy/
|
python3 distribute.py --config_path config_tacotron.json --data_path ../tmp/Mozilla_22050/ --restore_path /data/rw/home/4845.pth.tar
|
||||||
while true; do sleep 1000000; done
|
while true; do sleep 1000000; done
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"run_name": "mozilla-tacotron-tagent",
|
"run_name": "mozilla-tacotron-tagent-bn",
|
||||||
"run_description": "using forward attention with transition agent, with original prenet, loss masking, separate stopnet, sigmoid norm. Compare this with 4841",
|
"run_description": "finetune 4845 with bn prenet.",
|
||||||
|
|
||||||
"audio":{
|
"audio":{
|
||||||
// Audio processing parameters
|
// Audio processing parameters
|
||||||
|
@ -40,7 +40,7 @@
|
||||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
"prenet_type": "bn", // ONLY TACOTRON2 - "original" or "bn".
|
||||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||||
"transition_agent": true, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
"transition_agent": true, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||||
|
@ -62,6 +62,7 @@
|
||||||
|
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||||
|
"test_sentences_file": null,
|
||||||
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
||||||
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
|
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
|
||||||
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"run_name": "german-tacotron-tagent",
|
"run_name": "german-tacotron-tagent-bn",
|
||||||
"run_description": "using forward attention with transition agent, with original prenet, loss masking, separate stopnet, sigmoid norm. First run German data.",
|
"run_description": "train german",
|
||||||
|
|
||||||
"audio":{
|
"audio":{
|
||||||
// Audio processing parameters
|
// Audio processing parameters
|
||||||
|
@ -65,25 +65,16 @@
|
||||||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||||
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
||||||
"meta_file_train": [
|
"meta_file_train": [
|
||||||
"kleinzaches/metadata.csv",
|
"grune_haus/metadata.csv",
|
||||||
"spiegel_kaetzchen/metadata.csv",
|
"kleine_lord/metadata.csv",
|
||||||
"herrnarnesschatz/metadata.csv",
|
"toten_seelen/metadata.csv",
|
||||||
"maedchen_von_moorhof/metadata.csv",
|
"werde_die_du_bist/metadata.csv"
|
||||||
"koenigsgaukler/metadata.csv",
|
], // DATASET-RELATED: metafile for training dataloader.
|
||||||
"altehous/metadata.csv",
|
|
||||||
"odysseus/metadata.csv",
|
|
||||||
"undine/metadata.csv",
|
|
||||||
"reise_tilsit/metadata.csv",
|
|
||||||
"schmied_seines_glueckes/metadata.csv",
|
|
||||||
"kammmacher/metadata.csv",
|
|
||||||
"unterm_birnbaum/metadata.csv",
|
|
||||||
"liebesbriefe/metadata.csv",
|
|
||||||
"sandmann/metadata.csv"], // DATASET-RELATED: metafile for training dataloader.
|
|
||||||
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||||
"dataset": "mailabs", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
"dataset": "mailabs", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
||||||
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
"max_seq_len": 200, // DATASET-RELATED: maximum text length
|
||||||
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
"output_path": "/media/erogol/data_ssd/Data/models/german/", // DATASET-RELATED: output path for all training outputs.
|
||||||
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 4, // number of evaluation data loader processes.
|
"num_val_loader_workers": 4, // number of evaluation data loader processes.
|
||||||
"phoneme_cache_path": "phoneme_cache", // phoneme computation is slow, therefore, it caches results in the given folder.
|
"phoneme_cache_path": "phoneme_cache", // phoneme computation is slow, therefore, it caches results in the given folder.
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
from glob import glob
|
||||||
|
|
||||||
|
|
||||||
def tweb(root_path, meta_file):
|
def tweb(root_path, meta_file):
|
||||||
|
@ -60,6 +61,8 @@ def mozilla(root_path, meta_file):
|
||||||
|
|
||||||
def mailabs(root_path, meta_files):
|
def mailabs(root_path, meta_files):
|
||||||
"""Normalizes M-AI-Labs meta data files to TTS format"""
|
"""Normalizes M-AI-Labs meta data files to TTS format"""
|
||||||
|
if meta_files is None:
|
||||||
|
meta_files = glob(root_path+"/**/metadata.csv", recursive=True)
|
||||||
folders = [os.path.dirname(f.strip()) for f in meta_files]
|
folders = [os.path.dirname(f.strip()) for f in meta_files]
|
||||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||||
items = []
|
items = []
|
||||||
|
|
|
@ -243,7 +243,7 @@ class AudioProcessor(object):
|
||||||
if self.do_trim_silence:
|
if self.do_trim_silence:
|
||||||
x = self.trim_silence(x)
|
x = self.trim_silence(x)
|
||||||
# sr, x = io.wavfile.read(filename)
|
# sr, x = io.wavfile.read(filename)
|
||||||
assert self.sample_rate == sr
|
assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def encode_16bits(self, x):
|
def encode_16bits(self, x):
|
||||||
|
|
Loading…
Reference in New Issue