mirror of https://github.com/coqui-ai/TTS.git
load_meta_data changes
This commit is contained in:
parent
1fad04e317
commit
64a01f584b
17
config.json
17
config.json
|
@ -65,10 +65,6 @@
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
"test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time.
|
||||||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||||
"data_path": "/home/erogol/Data/LJSpeech-1.1/", // DATASET-RELATED: can overwritten from command argument
|
|
||||||
"meta_file_train": "metadata_train.csv", // DATASET-RELATED: metafile for training dataloader.
|
|
||||||
"meta_file_val": "metadata_val.csv", // DATASET-RELATED: metafile for evaluation dataloader.
|
|
||||||
"dataset": "ljspeech", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
|
||||||
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
||||||
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
||||||
|
@ -80,6 +76,17 @@
|
||||||
"text_cleaner": "phoneme_cleaners",
|
"text_cleaner": "phoneme_cleaners",
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference.
|
"style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference.
|
||||||
"use_gst": false // TACOTRON ONLY: use global style tokens
|
"use_gst": false, // TACOTRON ONLY: use global style tokens
|
||||||
|
|
||||||
|
"datasets": // List of datasets. They all merged and they get different speaker_ids.
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "ljspeech",
|
||||||
|
"path": "/home/erogol/Data/LJSpeech-1.1/",
|
||||||
|
"meta_file_train": "metadata_train.csv",
|
||||||
|
"meta_file_val": "metadata_val.csv"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
14
train.py
14
train.py
|
@ -28,7 +28,7 @@ from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
||||||
from TTS.utils.synthesis import synthesis
|
from TTS.utils.synthesis import synthesis
|
||||||
from TTS.utils.text.symbols import phonemes, symbols
|
from TTS.utils.text.symbols import phonemes, symbols
|
||||||
from TTS.utils.visual import plot_alignment, plot_spectrogram
|
from TTS.utils.visual import plot_alignment, plot_spectrogram
|
||||||
from TTS.datasets.preprocess import get_preprocessor_by_name
|
from TTS.datasets.preprocess import load_meta_data
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
from TTS.utils.measures import alignment_diagonal_score
|
from TTS.utils.measures import alignment_diagonal_score
|
||||||
|
|
||||||
|
@ -46,17 +46,7 @@ def setup_loader(ap, is_val=False, verbose=False):
|
||||||
global meta_data_train
|
global meta_data_train
|
||||||
global meta_data_eval
|
global meta_data_eval
|
||||||
if "meta_data_train" not in globals():
|
if "meta_data_train" not in globals():
|
||||||
if c.meta_file_train is not None:
|
meta_data_train, meta_data_eval = load_meta_data(c.datasets)
|
||||||
meta_data_train = get_preprocessor_by_name(
|
|
||||||
c.dataset)(c.data_path, c.meta_file_train)
|
|
||||||
else:
|
|
||||||
meta_data_train = get_preprocessor_by_name(c.dataset)(c.data_path)
|
|
||||||
if "meta_data_eval" not in globals() and c.run_eval:
|
|
||||||
if c.meta_file_val is not None:
|
|
||||||
meta_data_eval = get_preprocessor_by_name(
|
|
||||||
c.dataset)(c.data_path, c.meta_file_val)
|
|
||||||
else:
|
|
||||||
meta_data_eval, meta_data_train = split_dataset(meta_data_train)
|
|
||||||
if is_val and not c.run_eval:
|
if is_val and not c.run_eval:
|
||||||
loader = None
|
loader = None
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue