mirror of https://github.com/coqui-ai/TTS.git
Bug fix in single speaker emotion embedding training
This commit is contained in:
parent
38027b15c2
commit
10dee54ac3
|
@ -7,7 +7,7 @@ from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
from TTS.tts.datasets import load_tts_samples
|
from TTS.tts.datasets import load_tts_samples
|
||||||
from TTS.tts.utils.speakers import SpeakerManager
|
from TTS.tts.utils.managers import EmbeddingManager
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
|
description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
|
||||||
|
@ -44,7 +44,7 @@ c_dataset = load_config(args.config_dataset_path)
|
||||||
meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
|
meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
|
||||||
wav_files = meta_data_train + meta_data_eval
|
wav_files = meta_data_train + meta_data_eval
|
||||||
|
|
||||||
encoder_manager = SpeakerManager(
|
encoder_manager = EmbeddingManager(
|
||||||
encoder_model_path=args.model_path,
|
encoder_model_path=args.model_path,
|
||||||
encoder_config_path=args.config_path,
|
encoder_config_path=args.config_path,
|
||||||
d_vectors_file_path=args.old_file,
|
d_vectors_file_path=args.old_file,
|
||||||
|
|
|
@ -936,7 +936,10 @@ class Vits(BaseTTS):
|
||||||
|
|
||||||
# concat the emotion embedding and speaker embedding
|
# concat the emotion embedding and speaker embedding
|
||||||
if eg is not None and (self.args.use_emotion_embedding or self.args.use_external_emotions_embeddings):
|
if eg is not None and (self.args.use_emotion_embedding or self.args.use_external_emotions_embeddings):
|
||||||
g = torch.cat([g, eg], dim=1) # [b, h1+h2, 1]
|
if g is None:
|
||||||
|
g = eg
|
||||||
|
else:
|
||||||
|
g = torch.cat([g, eg], dim=1) # [b, h1+h2, 1]
|
||||||
|
|
||||||
# language embedding
|
# language embedding
|
||||||
lang_emb = None
|
lang_emb = None
|
||||||
|
@ -1046,8 +1049,11 @@ class Vits(BaseTTS):
|
||||||
eg = self.emb_emotion(eid).unsqueeze(-1) # [b, h, 1]
|
eg = self.emb_emotion(eid).unsqueeze(-1) # [b, h, 1]
|
||||||
|
|
||||||
# concat the emotion embedding and speaker embedding
|
# concat the emotion embedding and speaker embedding
|
||||||
if eg is not None and g is not None and (self.args.use_emotion_embedding or self.args.use_external_emotions_embeddings):
|
if eg is not None and (self.args.use_emotion_embedding or self.args.use_external_emotions_embeddings):
|
||||||
g = torch.cat([g, eg], dim=1) # [b, h1+h1, 1]
|
if g is None:
|
||||||
|
g = eg
|
||||||
|
else:
|
||||||
|
g = torch.cat([g, eg], dim=1) # [b, h1+h2, 1]
|
||||||
|
|
||||||
# language embedding
|
# language embedding
|
||||||
lang_emb = None
|
lang_emb = None
|
||||||
|
@ -1614,10 +1620,15 @@ class Vits(BaseTTS):
|
||||||
language_manager = LanguageManager.init_from_config(config)
|
language_manager = LanguageManager.init_from_config(config)
|
||||||
emotion_manager = EmotionManager.init_from_config(config)
|
emotion_manager = EmotionManager.init_from_config(config)
|
||||||
|
|
||||||
if config.model_args.encoder_model_path:
|
if config.model_args.encoder_model_path and speaker_manager is not None:
|
||||||
speaker_manager.init_encoder(
|
speaker_manager.init_encoder(
|
||||||
config.model_args.encoder_model_path, config.model_args.encoder_config_path
|
config.model_args.encoder_model_path, config.model_args.encoder_config_path
|
||||||
)
|
)
|
||||||
|
elif config.model_args.encoder_model_path and emotion_manager is not None:
|
||||||
|
emotion_manager.init_encoder(
|
||||||
|
config.model_args.encoder_model_path, config.model_args.encoder_config_path
|
||||||
|
)
|
||||||
|
|
||||||
return Vits(new_config, ap, tokenizer, speaker_manager, language_manager, emotion_manager=emotion_manager)
|
return Vits(new_config, ap, tokenizer, speaker_manager, language_manager, emotion_manager=emotion_manager)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue