mirror of https://github.com/coqui-ai/TTS.git
Parse speaker name in artic dataset to extract language and append language item
Add comments
This commit is contained in:
parent
ec4501d31c
commit
8cfbe23d9e
|
@ -581,11 +581,25 @@ def kokoro(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
|||
|
||||
|
||||
def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
||||
"""Normalizes the ARTIC meta data file to TTS format"""
|
||||
"""Normalizes the ARTIC meta data file to TTS format
|
||||
|
||||
Args:
|
||||
root_path (str): path to the artic dataset
|
||||
meta_file (str): name of the meta file containing names of wav to select and
|
||||
transcripts of the corresponding utterances
|
||||
|
||||
Returns:
|
||||
List[List[str]]: List of (text, wav_path, speaker_name, language, root_path) associated with each utterance
|
||||
"""
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
items = []
|
||||
# Speaker name is the name of the directory with the data (last part of `root_path`)
|
||||
speaker_name = os.path.basename(os.path.normpath(root_path))
|
||||
# Speaker name can consists of language code (eg. cs-CZ) and gender (m/f) separated by dots
|
||||
# Example: AndJa.cs-CZ.m
|
||||
parts = speaker_name.split(".")
|
||||
lang = parts[1] if len(parts) == 3 and "-" in parts[1] else None
|
||||
print(f" > ARTIC dataset: voice {parts[0]}, language {lang}")
|
||||
with open(txt_file, "r", encoding="utf-8") as ttf:
|
||||
for line in ttf:
|
||||
# Check the number of standard separators
|
||||
|
@ -599,7 +613,7 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
|||
# In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
|
||||
wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
|
||||
text = cols[-1]
|
||||
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
|
||||
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "language": lang, "root_path": root_path})
|
||||
return items
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue