From fbea058c596e4a0b1d0c21c68438d4ce6d85ae60 Mon Sep 17 00:00:00 2001 From: Edresson Date: Sat, 24 Oct 2020 16:10:05 -0300 Subject: [PATCH] add parse speakers function --- TTS/bin/train_glow_tts.py | 40 ++------------------------------------ TTS/bin/train_tts.py | 40 ++------------------------------------ TTS/tts/utils/speakers.py | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 76 deletions(-) diff --git a/TTS/bin/train_glow_tts.py b/TTS/bin/train_glow_tts.py index d924b906..7ffca36e 100644 --- a/TTS/bin/train_glow_tts.py +++ b/TTS/bin/train_glow_tts.py @@ -19,8 +19,7 @@ from TTS.tts.utils.distribute import (DistributedSampler, init_distributed, from TTS.tts.utils.generic_utils import setup_model, check_config_tts from TTS.tts.utils.io import save_best_model, save_checkpoint from TTS.tts.utils.measures import alignment_diagonal_score -from TTS.tts.utils.speakers import (get_speakers, load_speaker_mapping, - save_speaker_mapping) +from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram @@ -474,42 +473,7 @@ def main(args): # pylint: disable=redefined-outer-name meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)] # parse speakers - if c.use_speaker_embedding: - speakers = get_speakers(meta_data_train) - if args.restore_path: - if c.use_external_speaker_embedding_file: # if restore checkpoint and use External Embedding file - prev_out_path = os.path.dirname(args.restore_path) - speaker_mapping = load_speaker_mapping(prev_out_path) - if not speaker_mapping: - print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.external_speaker_embedding_file") - speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) - if not speaker_mapping: - raise RuntimeError("You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.external_speaker_embedding_file") - speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) - elif not c.use_external_speaker_embedding_file: # if restore checkpoint and don't use External Embedding file - prev_out_path = os.path.dirname(args.restore_path) - speaker_mapping = load_speaker_mapping(prev_out_path) - speaker_embedding_dim = None - assert all([speaker in speaker_mapping - for speaker in speakers]), "As of now you, you cannot " \ - "introduce new speakers to " \ - "a previously trained model." - elif c.use_external_speaker_embedding_file and c.external_speaker_embedding_file: # if start new train using External Embedding file - speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) - speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) - elif c.use_external_speaker_embedding_file and not c.external_speaker_embedding_file: # if start new train using External Embedding file and don't pass external embedding file - raise "use_external_speaker_embedding_file is True, so you need pass a external speaker embedding file, run GE2E-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb or AngularPrototypical-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb notebook in notebooks/ folder" - else: # if start new train and don't use External Embedding file - speaker_mapping = {name: i for i, name in enumerate(speakers)} - speaker_embedding_dim = None - save_speaker_mapping(OUT_PATH, speaker_mapping) - num_speakers = len(speaker_mapping) - print("Training with {} speakers: {}".format(len(speakers), - ", ".join(speakers))) - else: - num_speakers = 0 - speaker_embedding_dim = None - speaker_mapping = None + num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(c, args, meta_data_train, OUT_PATH) # setup model model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim=speaker_embedding_dim) diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py index 88e10aea..4c615b99 100644 --- a/TTS/bin/train_tts.py +++ b/TTS/bin/train_tts.py @@ -22,8 +22,7 @@ from TTS.tts.utils.distribute import (DistributedSampler, from TTS.tts.utils.generic_utils import setup_model, check_config_tts from TTS.tts.utils.io import save_best_model, save_checkpoint from TTS.tts.utils.measures import alignment_diagonal_score -from TTS.tts.utils.speakers import (get_speakers, load_speaker_mapping, - save_speaker_mapping) +from TTS.tts.utils.speakers import parse_speakers, load_speaker_mapping from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram @@ -502,42 +501,7 @@ def main(args): # pylint: disable=redefined-outer-name meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)] # parse speakers - if c.use_speaker_embedding: - speakers = get_speakers(meta_data_train) - if args.restore_path: - if c.use_external_speaker_embedding_file: # if restore checkpoint and use External Embedding file - prev_out_path = os.path.dirname(args.restore_path) - speaker_mapping = load_speaker_mapping(prev_out_path) - if not speaker_mapping: - print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.external_speaker_embedding_file") - speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) - if not speaker_mapping: - raise RuntimeError("You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.external_speaker_embedding_file") - speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) - elif not c.use_external_speaker_embedding_file: # if restore checkpoint and don't use External Embedding file - prev_out_path = os.path.dirname(args.restore_path) - speaker_mapping = load_speaker_mapping(prev_out_path) - speaker_embedding_dim = None - assert all([speaker in speaker_mapping - for speaker in speakers]), "As of now you, you cannot " \ - "introduce new speakers to " \ - "a previously trained model." - elif c.use_external_speaker_embedding_file and c.external_speaker_embedding_file: # if start new train using External Embedding file - speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) - speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) - elif c.use_external_speaker_embedding_file and not c.external_speaker_embedding_file: # if start new train using External Embedding file and don't pass external embedding file - raise "use_external_speaker_embedding_file is True, so you need pass a external speaker embedding file, run GE2E-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb or AngularPrototypical-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb notebook in notebooks/ folder" - else: # if start new train and don't use External Embedding file - speaker_mapping = {name: i for i, name in enumerate(speakers)} - speaker_embedding_dim = None - save_speaker_mapping(OUT_PATH, speaker_mapping) - num_speakers = len(speaker_mapping) - print("Training with {} speakers: {}".format(num_speakers, - ", ".join(speakers))) - else: - num_speakers = 0 - speaker_embedding_dim = None - speaker_mapping = None + num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(c, args, meta_data_train, OUT_PATH) model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim) diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index 156e42af..d507ff3d 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -30,3 +30,44 @@ def get_speakers(items): """Returns a sorted, unique list of speakers in a given dataset.""" speakers = {e[2] for e in items} return sorted(speakers) + +def parse_speakers(c, args, meta_data_train, OUT_PATH): + """ Returns number of speakers, speaker embedding shape and speaker mapping""" + if c.use_speaker_embedding: + speakers = get_speakers(meta_data_train) + if args.restore_path: + if c.use_external_speaker_embedding_file: # if restore checkpoint and use External Embedding file + prev_out_path = os.path.dirname(args.restore_path) + speaker_mapping = load_speaker_mapping(prev_out_path) + if not speaker_mapping: + print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.external_speaker_embedding_file") + speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) + if not speaker_mapping: + raise RuntimeError("You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.external_speaker_embedding_file") + speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) + elif not c.use_external_speaker_embedding_file: # if restore checkpoint and don't use External Embedding file + prev_out_path = os.path.dirname(args.restore_path) + speaker_mapping = load_speaker_mapping(prev_out_path) + speaker_embedding_dim = None + assert all([speaker in speaker_mapping + for speaker in speakers]), "As of now you, you cannot " \ + "introduce new speakers to " \ + "a previously trained model." + elif c.use_external_speaker_embedding_file and c.external_speaker_embedding_file: # if start new train using External Embedding file + speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) + speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) + elif c.use_external_speaker_embedding_file and not c.external_speaker_embedding_file: # if start new train using External Embedding file and don't pass external embedding file + raise "use_external_speaker_embedding_file is True, so you need pass a external speaker embedding file, run GE2E-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb or AngularPrototypical-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb notebook in notebooks/ folder" + else: # if start new train and don't use External Embedding file + speaker_mapping = {name: i for i, name in enumerate(speakers)} + speaker_embedding_dim = None + save_speaker_mapping(OUT_PATH, speaker_mapping) + num_speakers = len(speaker_mapping) + print("Training with {} speakers: {}".format(len(speakers), + ", ".join(speakers))) + else: + num_speakers = 0 + speaker_embedding_dim = None + speaker_mapping = None + + return num_speakers, speaker_embedding_dim, speaker_mapping \ No newline at end of file