diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 67b17241..ba722464 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -86,6 +86,10 @@ for idx, wav_file in enumerate(tqdm(wav_files)): class_mapping[wav_file_name]["name"] = class_name class_mapping[wav_file_name]["embedding"] = embedd +if args.old_file: + # merge the embeddings dict + class_mapping = {**encoder_manager.embeddings, **class_mapping} + if class_mapping: # save class_mapping if target dataset is defined if ".json" not in args.output_path or ".pth" not in args.output_path: diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index a48959f4..87474e54 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -123,9 +123,21 @@ class Synthesizer(object): if use_cuda: self.tts_model.cuda() - if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager"): + if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager") and self.tts_model.speaker_manager is not None: self.tts_model.speaker_manager.init_encoder(self.encoder_checkpoint, self.encoder_config, use_cuda) + if self.tts_emotions_file and hasattr(self.tts_model, "emotion_manager") and self.tts_model.emotion_manager is not None: + if getattr(self.tts_config, "use_external_emotions_embeddings", False) or (getattr(self.tts_config, "model_args", None) and getattr(self.tts_config.model_args, "use_external_emotions_embeddings", False)): + self.tts_model.emotion_manager.load_embeddings_from_file(self.tts_emotions_file) + else: + self.tts_model.emotion_manager.load_ids_from_file(self.tts_emotions_file) + + if self.tts_speakers_file and hasattr(self.tts_model, "speaker_manager") and self.tts_model.speaker_manager is not None: + if getattr(self.tts_config, "use_d_vector_file", False) or (getattr(self.tts_config, "model_args", None) and getattr(self.tts_config.model_args, "use_d_vector_file", False)): + self.tts_model.speaker_manager.load_embeddings_from_file(self.tts_speakers_file) + else: + self.tts_model.speaker_manager.load_ids_from_file(self.tts_speakers_file) + def _set_speaker_encoder_paths_from_tts_config(self): """Set the encoder paths from the tts model config for models with speaker encoders.""" if hasattr(self.tts_config, "model_args") and hasattr(