Support ignored speakers in artic multi-speaker formatter

This commit is contained in:
Jindrich Matousek 2022-07-10 22:31:41 +02:00
parent 1896db7e2c
commit a7d2e9b475
1 changed files with 9 additions and 2 deletions

View File

@ -581,7 +581,7 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
return items
def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
def artic_multispeaker(root_path, meta_file, ignored_speakers=None): # pylint: disable=unused-argument
"""Normalizes the ARTIC multi-speaker meta data files to TTS format
Args:
@ -589,11 +589,18 @@ def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused
meta_file (str): name of the meta file containing names of wav to select and
transcripts of the corresponding utterances
!Must be the same for all speakers!
ignore_speakers (List[str]): list of ignored speakers (or None)
Returns:
List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance
"""
items = []
# Loop over speakers: speaker names are subdirs of `root_path`
for pth in glob(f"{root_path}/*/**/", recursive=False):
speaker_name = os.path.basename(pth)
# Ignore speakers
if isinstance(ignored_speakers, list):
if speaker_name in ignored_speakers:
continue
items.extend(artic(pth, meta_file))
return items