Support ignored speakers in artic multi-speaker formatter

2022-07-10 22:31:41 +02:00 · 2022-07-10 22:31:41 +02:00 · a7d2e9b475
parent 1896db7e2c
commit a7d2e9b475
1 changed files with 9 additions and 2 deletions
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -581,7 +581,7 @@ def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
    return items


-def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
+def artic_multispeaker(root_path, meta_file, ignored_speakers=None): # pylint: disable=unused-argument
    """Normalizes the ARTIC multi-speaker meta data files to TTS format

    Args:
@ -589,11 +589,18 @@ def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused
        meta_file (str): name of the meta file containing names of wav to select and
                         transcripts of the corresponding utterances
                         !Must be the same for all speakers!
+        ignore_speakers (List[str]): list of ignored speakers (or None)
+    
    Returns:
        List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance
    """
    items = []
    # Loop over speakers: speaker names are subdirs of `root_path`
    for pth in glob(f"{root_path}/*/**/", recursive=False):
+        speaker_name = os.path.basename(pth)
+        # Ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_name in ignored_speakers:
+                continue
        items.extend(artic(pth, meta_file))
    return items