diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index 40b26679..b811e88a 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -581,11 +581,25 @@ def kokoro(root_path, meta_file, **kwargs): # pylint: disable=unused-argument def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument - """Normalizes the ARTIC meta data file to TTS format""" + """Normalizes the ARTIC meta data file to TTS format + + Args: + root_path (str): path to the artic dataset + meta_file (str): name of the meta file containing names of wav to select and + transcripts of the corresponding utterances + + Returns: + List[List[str]]: List of (text, wav_path, speaker_name, language, root_path) associated with each utterance + """ txt_file = os.path.join(root_path, meta_file) items = [] # Speaker name is the name of the directory with the data (last part of `root_path`) speaker_name = os.path.basename(os.path.normpath(root_path)) + # Speaker name can consists of language code (eg. cs-CZ) and gender (m/f) separated by dots + # Example: AndJa.cs-CZ.m + parts = speaker_name.split(".") + lang = parts[1] if len(parts) == 3 and "-" in parts[1] else None + print(f" > ARTIC dataset: voice {parts[0]}, language {lang}") with open(txt_file, "r", encoding="utf-8") as ttf: for line in ttf: # Check the number of standard separators @@ -599,7 +613,7 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument # In either way, wav name is stored in `cols[0]` and text in `cols[-1]` wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav") text = cols[-1] - items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}) + items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "language": lang, "root_path": root_path}) return items