From 7af1aeaf7a29136a8b721d060ed90fbd2d6eed20 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Mon, 14 Jan 2019 17:29:14 +0100 Subject: [PATCH] Add M-AI-labs preprocessor and set config for en_UK --- config.json | 10 +++++----- datasets/preprocess.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/config.json b/config.json index 44d52ad7..3f3c2abd 100644 --- a/config.json +++ b/config.json @@ -43,13 +43,13 @@ "tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. "run_eval": true, - "data_path": "../../Data/LJSpeech-1.1/", // DATASET-RELATED: can overwritten from command argument - "meta_file_train": "transcript_train.txt", // DATASET-RELATED: metafile for training dataloader. - "meta_file_val": "transcript_val.txt", // DATASET-RELATED: metafile for evaluation dataloader. - "dataset": "tweb", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py + "data_path": "/home/erogol/Data/en_UK/by_book/female/elizabeth_klett/", // DATASET-RELATED: can overwritten from command argument + "meta_file_train": "jane_eyre/metadata_train.csv, wives_and_daughters/metadata_train.csv", // DATASET-RELATED: metafile for training dataloader. + "meta_file_val": "jane_eyre/metadata_val.csv, wives_and_daughters/metadata_val.csv", // DATASET-RELATED: metafile for evaluation dataloader. + "dataset": "mailabs", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py "min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 300, // DATASET-RELATED: maximum text length - "output_path": "/media/erogol/data_ssd/Data/models/tweb_models/", // DATASET-RELATED: output path for all training outputs. + "output_path": "/media/erogol/data_ssd/Data/models/en_UK/", // DATASET-RELATED: output path for all training outputs. "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values. "num_val_loader_workers": 4 // number of evaluation data loader processes. } diff --git a/datasets/preprocess.py b/datasets/preprocess.py index cb2b009d..53ffaf18 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -42,6 +42,28 @@ def tweb(root_path, meta_file): # return {'text': texts, 'wavs': wavs} +def mailabs(root_path, meta_files): + """Normalizes M-AI-Labs meta data files to TTS format""" + folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")] + meta_files = [f.strip() for f in meta_files.split(",")] + items = [] + for idx, meta_file in enumerate(meta_files): + print(" | > {}".format(meta_file)) + folder = folders[idx] + txt_file = os.path.join(root_path, meta_file) + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav') + if os.path.isfile(wav_file): + text = cols[1] + items.append([text, wav_file]) + else: + continue + random.shuffle(items) + return items + + def ljspeech(root_path, meta_file): """Normalizes the Nancy meta data file to TTS format""" txt_file = os.path.join(root_path, meta_file)