Parse speaker name in artic dataset to extract language and append language item

Add comments
2022-09-13 17:32:25 +02:00 · 2022-09-13 17:32:25 +02:00 · 8cfbe23d9e
parent ec4501d31c
commit 8cfbe23d9e
1 changed files with 16 additions and 2 deletions
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -581,11 +581,25 @@ def kokoro(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument


 def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
-    """Normalizes the ARTIC meta data file to TTS format"""
+    """Normalizes the ARTIC meta data file to TTS format
+    
+    Args:
+        root_path (str): path to the artic dataset
+        meta_file (str): name of the meta file containing names of wav to select and
+                         transcripts of the corresponding utterances
+    
+    Returns:
+        List[List[str]]: List of (text, wav_path, speaker_name, language, root_path) associated with each utterance
+    """
    txt_file = os.path.join(root_path, meta_file)
    items = []
    # Speaker name is the name of the directory with the data (last part of `root_path`)
    speaker_name = os.path.basename(os.path.normpath(root_path))
+    # Speaker name can consists of language code (eg. cs-CZ) and gender (m/f) separated by dots
+    # Example: AndJa.cs-CZ.m
+    parts = speaker_name.split(".")
+    lang = parts[1] if len(parts) == 3 and "-" in parts[1] else None
+    print(f" > ARTIC dataset: voice {parts[0]}, language {lang}")
    with open(txt_file, "r", encoding="utf-8") as ttf:
        for line in ttf:
            # Check the number of standard separators
@ -599,7 +613,7 @@ def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
            # In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
            text = cols[-1]
-            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "language": lang, "root_path": root_path})
    return items