Fix the bug in M-AILABS formatter

This commit is contained in:
Edresson 2021-08-23 16:12:31 -03:00 committed by Eren Gölge
parent d653227e59
commit 3df5d9a619
2 changed files with 12 additions and 5 deletions

View File

@ -125,6 +125,7 @@ class TTSDataset(Dataset):
self.d_vector_mapping = d_vector_mapping self.d_vector_mapping = d_vector_mapping
self.language_id_mapping = language_id_mapping self.language_id_mapping = language_id_mapping
self.use_noise_augment = use_noise_augment self.use_noise_augment = use_noise_augment
self.verbose = verbose self.verbose = verbose
self.input_seq_computed = False self.input_seq_computed = False
self.rescue_item_idx = 1 self.rescue_item_idx = 1

View File

@ -68,14 +68,19 @@ def mailabs(root_path, meta_files=None):
recursively. Defaults to None recursively. Defaults to None
""" """
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/") speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
if meta_files is None: if not meta_files:
csv_files = glob(root_path + "/**/metadata.csv", recursive=True) csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
else: else:
csv_files = meta_files csv_files = meta_files
# meta_files = [f.strip() for f in meta_files.split(",")] # meta_files = [f.strip() for f in meta_files.split(",")]
items = [] items = []
for csv_file in csv_files: for csv_file in csv_files:
txt_file = os.path.join(root_path, csv_file) if os.path.isfile(csv_file):
txt_file = csv_file
else:
txt_file = os.path.join(root_path, csv_file)
folder = os.path.dirname(txt_file) folder = os.path.dirname(txt_file)
# determine speaker based on folder structure... # determine speaker based on folder structure...
speaker_name_match = speaker_regex.search(txt_file) speaker_name_match = speaker_regex.search(txt_file)
@ -90,7 +95,7 @@ def mailabs(root_path, meta_files=None):
with open(txt_file, "r", encoding="utf-8") as ttf: with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf: for line in ttf:
cols = line.split("|") cols = line.split("|")
if meta_files is None: if not meta_files:
wav_file = os.path.join(folder, "wavs", cols[0] + ".wav") wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
else: else:
wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav") wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
@ -98,7 +103,8 @@ def mailabs(root_path, meta_files=None):
text = cols[1].strip() text = cols[1].strip()
items.append([text, wav_file, speaker_name]) items.append([text, wav_file, speaker_name])
else: else:
raise RuntimeError("> File %s does not exist!" % (wav_file)) # M-AI-Labs have some missing samples, so just print the warning
print("> File %s does not exist!" % (wav_file))
return items return items
@ -214,7 +220,7 @@ def common_voice(root_path, meta_file, ununsed_speakers=None):
def libri_tts(root_path, meta_files=None, ununsed_speakers=None): def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
"""https://ai.google/tools/datasets/libri-tts/""" """https://ai.google/tools/datasets/libri-tts/"""
items = [] items = []
if meta_files is None: if not meta_files:
meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True) meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
else: else:
if isinstance(meta_files, str): if isinstance(meta_files, str):