From 1654a725a7adc26797e1a731ef13624a9fdab5d8 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Mon, 20 May 2019 18:20:48 +0200 Subject: [PATCH] new mozilla preprocessor --- datasets/preprocess.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index 1bf9ae59..ac9e4cfa 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -29,7 +29,7 @@ def tweb(root_path, meta_file): # return {'text': texts, 'wavs': wavs} -def mozilla(root_path, meta_file): +def mozilla_old(root_path, meta_file): """Normalizes Mozilla meta data files to TTS format""" txt_file = os.path.join(root_path, meta_file) items = [] @@ -44,6 +44,20 @@ def mozilla(root_path, meta_file): return items +def mozilla(root_path, meta_file): + """Normalizes Mozilla meta data files to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = cols[1].strip() + text = cols[0].strip() + wav_file = os.path.join(root_path, "wavs", wav_file) + items.append([text, wav_file]) + return items + + def mailabs(root_path, meta_files): """Normalizes M-AI-Labs meta data files to TTS format""" folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]