Support ignored speakers in artic multi-speaker formatter

This commit is contained in:
Jindrich Matousek 2022-07-10 22:31:41 +02:00
parent 1896db7e2c
commit a7d2e9b475
1 changed files with 9 additions and 2 deletions

View File

@ -581,7 +581,7 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
return items return items
def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused-argument def artic_multispeaker(root_path, meta_file, ignored_speakers=None): # pylint: disable=unused-argument
"""Normalizes the ARTIC multi-speaker meta data files to TTS format """Normalizes the ARTIC multi-speaker meta data files to TTS format
Args: Args:
@ -589,11 +589,18 @@ def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused
meta_file (str): name of the meta file containing names of wav to select and meta_file (str): name of the meta file containing names of wav to select and
transcripts of the corresponding utterances transcripts of the corresponding utterances
!Must be the same for all speakers! !Must be the same for all speakers!
ignore_speakers (List[str]): list of ignored speakers (or None)
Returns: Returns:
List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance
""" """
items = [] items = []
# Loop over speakers: speaker names are subdirs of `root_path` # Loop over speakers: speaker names are subdirs of `root_path`
for pth in glob(f"{root_path}/*/**/", recursive=False): for pth in glob(f"{root_path}/*/**/", recursive=False):
speaker_name = os.path.basename(pth)
# Ignore speakers
if isinstance(ignored_speakers, list):
if speaker_name in ignored_speakers:
continue
items.extend(artic(pth, meta_file)) items.extend(artic(pth, meta_file))
return items return items