mirror of https://github.com/coqui-ai/TTS.git
config updates, update audio.py, update mailabs preprocessor
This commit is contained in:
parent
f69127ff88
commit
f096f1052f
8
.compute
8
.compute
|
@ -9,8 +9,8 @@ pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux
|
|||
wget https://www.dropbox.com/s/wqn5v3wkktw9lmo/install.sh?dl=0 -O install.sh
|
||||
sudo sh install.sh
|
||||
python3 setup.py develop
|
||||
# cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
|
||||
cp -R ${USER_DIR}/GermanData ../tmp/
|
||||
python3 distribute.py --config_path config_tacotron.json --data_path ../tmp/GermanData/karlsson/
|
||||
# python3 distribute.py --config_path config_cluster.json --data_path ${SHARED_DIR}/data/mozilla/Judy/
|
||||
# cp -R ${USER_DIR}/GermanData ../tmp/
|
||||
# python3 distribute.py --config_path config_tacotron_de.json --data_path ../tmp/GermanData/karlsson/
|
||||
cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
|
||||
python3 distribute.py --config_path config_tacotron.json --data_path ../tmp/Mozilla_22050/ --restore_path /data/rw/home/4845.pth.tar
|
||||
while true; do sleep 1000000; done
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"run_name": "mozilla-tacotron-tagent",
|
||||
"run_description": "using forward attention with transition agent, with original prenet, loss masking, separate stopnet, sigmoid norm. Compare this with 4841",
|
||||
"run_name": "mozilla-tacotron-tagent-bn",
|
||||
"run_description": "finetune 4845 with bn prenet.",
|
||||
|
||||
"audio":{
|
||||
// Audio processing parameters
|
||||
|
@ -40,7 +40,7 @@
|
|||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_type": "bn", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||
"transition_agent": true, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||
|
@ -62,6 +62,7 @@
|
|||
|
||||
"run_eval": true,
|
||||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||
"test_sentences_file": null,
|
||||
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
||||
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
|
||||
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"run_name": "german-tacotron-tagent",
|
||||
"run_description": "using forward attention with transition agent, with original prenet, loss masking, separate stopnet, sigmoid norm. First run German data.",
|
||||
"run_name": "german-tacotron-tagent-bn",
|
||||
"run_description": "train german",
|
||||
|
||||
"audio":{
|
||||
// Audio processing parameters
|
||||
|
@ -65,25 +65,16 @@
|
|||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
||||
"meta_file_train": [
|
||||
"kleinzaches/metadata.csv",
|
||||
"spiegel_kaetzchen/metadata.csv",
|
||||
"herrnarnesschatz/metadata.csv",
|
||||
"maedchen_von_moorhof/metadata.csv",
|
||||
"koenigsgaukler/metadata.csv",
|
||||
"altehous/metadata.csv",
|
||||
"odysseus/metadata.csv",
|
||||
"undine/metadata.csv",
|
||||
"reise_tilsit/metadata.csv",
|
||||
"schmied_seines_glueckes/metadata.csv",
|
||||
"kammmacher/metadata.csv",
|
||||
"unterm_birnbaum/metadata.csv",
|
||||
"liebesbriefe/metadata.csv",
|
||||
"sandmann/metadata.csv"], // DATASET-RELATED: metafile for training dataloader.
|
||||
"grune_haus/metadata.csv",
|
||||
"kleine_lord/metadata.csv",
|
||||
"toten_seelen/metadata.csv",
|
||||
"werde_die_du_bist/metadata.csv"
|
||||
], // DATASET-RELATED: metafile for training dataloader.
|
||||
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||
"dataset": "mailabs", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
||||
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
|
||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
||||
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
||||
"max_seq_len": 200, // DATASET-RELATED: maximum text length
|
||||
"output_path": "/media/erogol/data_ssd/Data/models/german/", // DATASET-RELATED: output path for all training outputs.
|
||||
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||
"num_val_loader_workers": 4, // number of evaluation data loader processes.
|
||||
"phoneme_cache_path": "phoneme_cache", // phoneme computation is slow, therefore, it caches results in the given folder.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
from glob import glob
|
||||
|
||||
|
||||
def tweb(root_path, meta_file):
|
||||
|
@ -60,6 +61,8 @@ def mozilla(root_path, meta_file):
|
|||
|
||||
def mailabs(root_path, meta_files):
|
||||
"""Normalizes M-AI-Labs meta data files to TTS format"""
|
||||
if meta_files is None:
|
||||
meta_files = glob(root_path+"/**/metadata.csv", recursive=True)
|
||||
folders = [os.path.dirname(f.strip()) for f in meta_files]
|
||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||
items = []
|
||||
|
|
|
@ -243,7 +243,7 @@ class AudioProcessor(object):
|
|||
if self.do_trim_silence:
|
||||
x = self.trim_silence(x)
|
||||
# sr, x = io.wavfile.read(filename)
|
||||
assert self.sample_rate == sr
|
||||
assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
|
||||
return x
|
||||
|
||||
def encode_16bits(self, x):
|
||||
|
|
Loading…
Reference in New Issue