Refactor artic formatter

This commit is contained in:
Jindrich Matousek 2022-07-10 11:37:40 +02:00
parent 9758971baa
commit 3270dda162
1 changed files with 7 additions and 7 deletions

View File

@ -565,16 +565,16 @@ def artic(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
speaker_name = "artic" speaker_name = "artic"
with open(txt_file, "r", encoding="utf-8") as ttf: with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf: for line in ttf:
# Split according to standard delimiter # Check the number of standard separators
n_seps = line.count("|")
if n_seps > 0:
# Split according to standard separator
cols = line.split("|") cols = line.split("|")
if len(cols) > 1:
# One or two |s are present => text is taken from the last part
text = cols[-1]
else: else:
# Assume ARTIC SNT format => wav name is delimited by the first space # Assume ARTIC SNT format => wav name is delimited by the first space
cols = line.split(maxsplit=1) cols = line.split(maxsplit=1)
text = cols[1] # In either way, wav name is stored in `cols[0]` and text in `cols[-1]`
# in either way, wav name is stored in `cols[0]`
wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav") wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
text = cols[-1]
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name}) items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
return items return items