diff --git a/TTS/tts/utils/emotions.py b/TTS/tts/utils/emotions.py new file mode 100644 index 00000000..71cd71bf --- /dev/null +++ b/TTS/tts/utils/emotions.py @@ -0,0 +1,197 @@ +import json +import os +from typing import Any, Dict, List, Tuple, Union + +import fsspec +import numpy as np +import torch +from coqpit import Coqpit + +from TTS.config import get_from_config_or_model_args_with_default +from TTS.tts.utils.managers import EmbeddingManager + +class EmotionManager(EmbeddingManager): + """Manage the emotions for emotional TTS. Load a datafile and parse the information + in a way that can be queried by emotion or clip. + + There are 3 different scenarios considered: + + 1. Models using emotion embedding layers. The datafile only maps emotion names to ids used by the embedding layer. + 2. Models using embeddings. The datafile includes a dictionary in the following format. + + :: + + { + 'clip_name.wav':{ + 'name': 'emotionA', + 'embedding'[] + }, + ... + } + + + 3. Computing the embeddings by the emotion encoder. It loads the emotion encoder model and + computes the embeddings for a given clip or emotion. + + Args: + emo_embeddings_file_path (str, optional): Path to the metafile including x vectors. Defaults to "". + emotion_id_file_path (str, optional): Path to the metafile that maps emotion names to ids used by + TTS models. Defaults to "". + encoder_model_path (str, optional): Path to the emotion encoder model file. Defaults to "". + encoder_config_path (str, optional): Path to the spealer encoder config file. Defaults to "". + + Examples: + >>> # load audio processor and emotion encoder + >>> ap = AudioProcessor(**config.audio) + >>> manager = EmotionManager(encoder_model_path=encoder_model_path, encoder_config_path=encoder_config_path) + >>> # load a sample audio and compute embedding + >>> embedding = manager.compute_embedding_from_clip(sample_wav_path) + """ + + def __init__( + self, + emo_embeddings_file_path: str = "", + emotion_id_file_path: str = "", + encoder_model_path: str = "", + encoder_config_path: str = "", + use_cuda: bool = False, + ): + super().__init__( + external_emotions_ids_file_path=emo_embeddings_file_path, + id_file_path=emotion_id_file_path, + encoder_model_path=encoder_model_path, + encoder_config_path=encoder_config_path, + use_cuda=use_cuda + ) + + @property + def num_emotions(self): + return len(self.ids) + + @property + def emotion_names(self): + return list(self.ids.keys()) + + @staticmethod + def parse_ids_from_data(items: List, parse_key: str) -> Any: + raise NotImplementedError + + def set_ids_from_data(self, items: List, parse_key: str) -> Any: + raise NotImplementedError + + def get_emotions(self) -> List: + return self.ids + + @staticmethod + def init_from_config(config: "Coqpit") -> "EmotionManager": + """Initialize a emotion manager from config + + Args: + config (Coqpit): Config object. + + Returns: + EmotionEncoder: Emotion encoder object. + """ + emotion_manager = None + if get_from_config_or_model_args_with_default(config, "use_emotion_embedding", False): + if get_from_config_or_model_args_with_default(config, "emotions_ids_file", None): + emotion_manager = EmotionManager( + emotion_id_file_path=get_from_config_or_model_args_with_default(config, "emotions_ids_file", None) + ) + + if get_from_config_or_model_args_with_default(config, "use_external_emotion_embedding", False): + if get_from_config_or_model_args_with_default(config, "external_emotions_ids_file", None): + emotion_manager = EmotionManager( + embeddings_file_path=get_from_config_or_model_args_with_default(config, "external_emotions_ids_file", None) + ) + + return emotion_manager + + +def _set_file_path(path): + """Find the emotions.json under the given path or the above it. + Intended to band aid the different paths returned in restored and continued training.""" + path_restore = os.path.join(os.path.dirname(path), "emotions.json") + path_continue = os.path.join(path, "emotions.json") + fs = fsspec.get_mapper(path).fs + if fs.exists(path_restore): + return path_restore + if fs.exists(path_continue): + return path_continue + raise FileNotFoundError(f" [!] `emotions.json` not found in {path}") + + +def load_emotion_mapping(out_path): + """Loads emotion mapping if already present.""" + if os.path.splitext(out_path)[1] == ".json": + json_file = out_path + else: + json_file = _set_file_path(out_path) + with fsspec.open(json_file, "r") as f: + return json.load(f) + + +def save_emotion_mapping(out_path, emotion_mapping): + """Saves emotion mapping if not yet present.""" + if out_path is not None: + emotions_json_path = _set_file_path(out_path) + with fsspec.open(emotions_json_path, "w") as f: + json.dump(emotion_mapping, f, indent=4) + + +def get_emotion_manager(c: Coqpit, restore_path: str = None, out_path: str = None) -> EmotionManager: + """Initiate a `EmotionManager` instance by the provided config. + + Args: + c (Coqpit): Model configuration. + restore_path (str): Path to a previous training folder. + out_path (str, optional): Save the generated emotion IDs to a output path. Defaults to None. + + Returns: + EmotionManager: initialized and ready to use instance. + """ + emotion_manager = EmotionManager() + if restore_path: + emotions_ids_file = _set_file_path(restore_path) + # restoring emotion manager from a previous run. + if c.use_external_emotions_embeddings: + # restore emotion manager with the embedding file + if not os.path.exists(emotions_ids_file): + print("WARNING: emotions.json was not found in restore_path, trying to use CONFIG.external_emotions_ids_file") + if not os.path.exists(c.external_emotions_ids_file): + raise RuntimeError( + "You must copy the file emotions.json to restore_path, or set a valid file in CONFIG.external_emotions_ids_file" + ) + emotion_manager.load_embeddings_from_file(c.external_emotions_ids_file) + emotion_manager.load_embeddings_from_file(emotions_ids_file) + elif not c.use_external_emotions_embeddings: # restor emotion manager with emotion ID file. + emotion_manager.load_ids_from_file(emotions_ids_file) + + elif c.use_external_emotions_embeddings and c.external_emotions_ids_file: + # new emotion manager with external emotion embeddings. + emotion_manager.load_embeddings_from_file(c.external_emotions_ids_file) + elif c.use_external_emotions_embeddings and not c.external_emotions_ids_file: + raise "use_external_emotions_embeddings is True, so you need pass a external emotion embedding file." + elif c.use_emotion_embedding: + if "emotions_ids_file" in c and c.emotions_ids_file: + emotion_manager.load_ids_from_file(c.emotions_ids_file) + else: # enable get ids from eternal embedding files + emotion_manager.load_embeddings_from_file(c.external_emotions_ids_file) + + if emotion_manager.num_emotions > 0: + print( + " > Emotion manager is loaded with {} emotions: {}".format( + emotion_manager.num_emotions, ", ".join(emotion_manager.ids) + ) + ) + + # save file if path is defined + if out_path: + out_file_path = os.path.join(out_path, "emotions.json") + print(f" > Saving `emotions.json` to {out_file_path}.") + if c.use_external_emotions_embeddings and c.external_emotions_ids_file: + emotion_manager.save_embeddings_to_file(out_file_path) + else: + emotion_manager.save_ids_to_file(out_file_path) + return emotion_manager +