Refactor artic formatter

2022-07-10 11:37:40 +02:00 · 2022-07-10 11:37:40 +02:00 · 3270dda162
parent 9758971baa
commit 3270dda162
1 changed files with 7 additions and 7 deletions
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -565,16 +565,16 @@ def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
    speaker_name = "artic"
    with open(txt_file, "r", encoding="utf-8") as ttf:
        for line in ttf:
-            # Split according to standard delimiter
-            cols = line.split("|")
-            if len(cols) > 1:
-                # One or two |s are present => text is taken from the last part
-                text = cols[-1]
+            # Check the number of standard separators
+            n_seps = line.count("|")
+            if n_seps > 0:
+                # Split according to standard separator
+                cols = line.split("|")
            else:
                # Assume ARTIC SNT format => wav name is delimited by the first space
                cols = line.split(maxsplit=1)
-                text = cols[1]
-            # in either way, wav name is stored in `cols[0]`
+            # In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[-1]
            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
    return items