diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index eadf0529..a9216166 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -581,19 +581,26 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument return items -def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused-argument +def artic_multispeaker(root_path, meta_file, ignored_speakers=None): # pylint: disable=unused-argument """Normalizes the ARTIC multi-speaker meta data files to TTS format - + Args: root_path (str): path to the artic dataset meta_file (str): name of the meta file containing names of wav to select and transcripts of the corresponding utterances !Must be the same for all speakers! + ignore_speakers (List[str]): list of ignored speakers (or None) + Returns: List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance """ items = [] # Loop over speakers: speaker names are subdirs of `root_path` for pth in glob(f"{root_path}/*/**/", recursive=False): + speaker_name = os.path.basename(pth) + # Ignore speakers + if isinstance(ignored_speakers, list): + if speaker_name in ignored_speakers: + continue items.extend(artic(pth, meta_file)) return items