Language of each item (sample/utterance) is set to dataset language only when not defined at the sample/utterance level

Speaker name is prepended by dataset name in case of multispeaker datasets Refactor "artic" formatter
2023-09-06 17:05:47 +02:00 · 2023-09-06 17:05:47 +02:00 · c312343585
parent a0db2eeee8
commit c312343585
2 changed files with 14 additions and 7 deletions
--- a/TTS/tts/datasets/init.py
+++ b/TTS/tts/datasets/init.py
@ -58,8 +58,12 @@ def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
 def add_extra_keys(metadata, language, dataset_name):
    for item in metadata:
-        # add language name
+        # JMa: Add language name only if not defined at the sample level. Could  be good for multi-language datasets.
-        item["language"] = language
+        if not item["language"]:
            item["language"] = language
        # JMa: Prepend dataset name to speaker name. Could be good for multispeaker datasets.
        if item["speaker_name"] != dataset_name and not item["speaker_name"].startswith(dataset_name+"_"):
            item["speaker_name"] = f'{dataset_name}_{item["speaker_name"]}'
        # add unique audio name
        relfilepath = os.path.splitext(os.path.relpath(item["audio_file"], item["root_path"]))[0]
        audio_unique_name = f"{dataset_name}#{relfilepath}"
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -652,11 +652,14 @@ def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
    items = []
    # Speaker name is the name of the directory with the data (last part of `root_path`)
    speaker_name = os.path.basename(os.path.normpath(root_path))
-    # Speaker name can consists of language code (eg. cs-CZ) and gender (m/f) separated by dots
+    # Speaker name can consists of language code (eg. cs-CZ or en) and gender (m/f) separated by dots
-    # Example: AndJa.cs-CZ.m
+    # Example: AndJa.cs-CZ.m, LJS.en.f
-    parts = speaker_name.split(".")
+    try:
-    lang = parts[1] if len(parts) == 3 and "-" in parts[1] else None
+        voice, lang, sex = speaker_name.split(".")
-    print(f" > ARTIC dataset: voice {parts[0]}, language {lang}")
+    except ValueError:
        voice = speaker_name
        lang, sex = None, None
    print(f" > ARTIC dataset: voice={voice}, sex={sex}, language={lang}")
    with open(txt_file, "r", encoding="utf-8") as ttf:
        for line in ttf:
            # Check the number of standard separators