mirror of https://github.com/coqui-ai/TTS.git
Parse speaker name in artic dataset to extract language and append language item
Add comments
This commit is contained in:
parent
ec4501d31c
commit
8cfbe23d9e
|
@ -581,11 +581,25 @@ def kokoro(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
||||||
|
|
||||||
|
|
||||||
def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
||||||
"""Normalizes the ARTIC meta data file to TTS format"""
|
"""Normalizes the ARTIC meta data file to TTS format
|
||||||
|
|
||||||
|
Args:
|
||||||
|
root_path (str): path to the artic dataset
|
||||||
|
meta_file (str): name of the meta file containing names of wav to select and
|
||||||
|
transcripts of the corresponding utterances
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[List[str]]: List of (text, wav_path, speaker_name, language, root_path) associated with each utterance
|
||||||
|
"""
|
||||||
txt_file = os.path.join(root_path, meta_file)
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
items = []
|
items = []
|
||||||
# Speaker name is the name of the directory with the data (last part of `root_path`)
|
# Speaker name is the name of the directory with the data (last part of `root_path`)
|
||||||
speaker_name = os.path.basename(os.path.normpath(root_path))
|
speaker_name = os.path.basename(os.path.normpath(root_path))
|
||||||
|
# Speaker name can consists of language code (eg. cs-CZ) and gender (m/f) separated by dots
|
||||||
|
# Example: AndJa.cs-CZ.m
|
||||||
|
parts = speaker_name.split(".")
|
||||||
|
lang = parts[1] if len(parts) == 3 and "-" in parts[1] else None
|
||||||
|
print(f" > ARTIC dataset: voice {parts[0]}, language {lang}")
|
||||||
with open(txt_file, "r", encoding="utf-8") as ttf:
|
with open(txt_file, "r", encoding="utf-8") as ttf:
|
||||||
for line in ttf:
|
for line in ttf:
|
||||||
# Check the number of standard separators
|
# Check the number of standard separators
|
||||||
|
@ -599,7 +613,7 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
||||||
# In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
|
# In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
|
||||||
wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
|
wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
|
||||||
text = cols[-1]
|
text = cols[-1]
|
||||||
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
|
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "language": lang, "root_path": root_path})
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue