mirror of https://github.com/coqui-ai/TTS.git
Fix the bug in M-AILABS formatter
This commit is contained in:
parent
c334d39acc
commit
f1f016314e
|
@ -125,6 +125,7 @@ class TTSDataset(Dataset):
|
||||||
self.d_vector_mapping = d_vector_mapping
|
self.d_vector_mapping = d_vector_mapping
|
||||||
self.language_id_mapping = language_id_mapping
|
self.language_id_mapping = language_id_mapping
|
||||||
self.use_noise_augment = use_noise_augment
|
self.use_noise_augment = use_noise_augment
|
||||||
|
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.input_seq_computed = False
|
self.input_seq_computed = False
|
||||||
self.rescue_item_idx = 1
|
self.rescue_item_idx = 1
|
||||||
|
|
|
@ -68,14 +68,19 @@ def mailabs(root_path, meta_files=None):
|
||||||
recursively. Defaults to None
|
recursively. Defaults to None
|
||||||
"""
|
"""
|
||||||
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
||||||
if meta_files is None:
|
if not meta_files:
|
||||||
csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
|
csv_files = glob(root_path + "/**/metadata.csv", recursive=True)
|
||||||
else:
|
else:
|
||||||
csv_files = meta_files
|
csv_files = meta_files
|
||||||
|
|
||||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||||
items = []
|
items = []
|
||||||
for csv_file in csv_files:
|
for csv_file in csv_files:
|
||||||
|
if os.path.isfile(csv_file):
|
||||||
|
txt_file = csv_file
|
||||||
|
else:
|
||||||
txt_file = os.path.join(root_path, csv_file)
|
txt_file = os.path.join(root_path, csv_file)
|
||||||
|
|
||||||
folder = os.path.dirname(txt_file)
|
folder = os.path.dirname(txt_file)
|
||||||
# determine speaker based on folder structure...
|
# determine speaker based on folder structure...
|
||||||
speaker_name_match = speaker_regex.search(txt_file)
|
speaker_name_match = speaker_regex.search(txt_file)
|
||||||
|
@ -90,7 +95,7 @@ def mailabs(root_path, meta_files=None):
|
||||||
with open(txt_file, "r", encoding="utf-8") as ttf:
|
with open(txt_file, "r", encoding="utf-8") as ttf:
|
||||||
for line in ttf:
|
for line in ttf:
|
||||||
cols = line.split("|")
|
cols = line.split("|")
|
||||||
if meta_files is None:
|
if not meta_files:
|
||||||
wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
|
wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
|
||||||
else:
|
else:
|
||||||
wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
|
wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
|
||||||
|
@ -98,7 +103,8 @@ def mailabs(root_path, meta_files=None):
|
||||||
text = cols[1].strip()
|
text = cols[1].strip()
|
||||||
items.append([text, wav_file, speaker_name])
|
items.append([text, wav_file, speaker_name])
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("> File %s does not exist!" % (wav_file))
|
# M-AI-Labs have some missing samples, so just print the warning
|
||||||
|
print("> File %s does not exist!" % (wav_file))
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +220,7 @@ def common_voice(root_path, meta_file, ununsed_speakers=None):
|
||||||
def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
|
def libri_tts(root_path, meta_files=None, ununsed_speakers=None):
|
||||||
"""https://ai.google/tools/datasets/libri-tts/"""
|
"""https://ai.google/tools/datasets/libri-tts/"""
|
||||||
items = []
|
items = []
|
||||||
if meta_files is None:
|
if not meta_files:
|
||||||
meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
|
meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
|
||||||
else:
|
else:
|
||||||
if isinstance(meta_files, str):
|
if isinstance(meta_files, str):
|
||||||
|
|
Loading…
Reference in New Issue