Add M-AI-labs preprocessor and set config for en_UK

2019-01-14 17:29:14 +01:00 · 2019-01-14 17:29:14 +01:00 · 7af1aeaf7a
parent f4fa155cd3
commit 7af1aeaf7a
2 changed files with 27 additions and 5 deletions
--- a/config.json
+++ b/config.json
@ -43,13 +43,13 @@
    "tb_model_param_stats": true,     // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. 

    "run_eval": true,
-    "data_path": "../../Data/LJSpeech-1.1/",  // DATASET-RELATED: can overwritten from command argument
-    "meta_file_train": "transcript_train.txt",      // DATASET-RELATED: metafile for training dataloader.
-    "meta_file_val": "transcript_val.txt",    // DATASET-RELATED: metafile for evaluation dataloader.
-    "dataset": "tweb",      // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
+    "data_path": "/home/erogol/Data/en_UK/by_book/female/elizabeth_klett/",  // DATASET-RELATED: can overwritten from command argument
+    "meta_file_train": "jane_eyre/metadata_train.csv, wives_and_daughters/metadata_train.csv",      // DATASET-RELATED: metafile for training dataloader.
+    "meta_file_val": "jane_eyre/metadata_val.csv, wives_and_daughters/metadata_val.csv",    // DATASET-RELATED: metafile for evaluation dataloader.
+    "dataset": "mailabs",      // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
    "min_seq_len": 0,       // DATASET-RELATED: minimum text length to use in training
    "max_seq_len": 300,     // DATASET-RELATED: maximum text length
-    "output_path": "/media/erogol/data_ssd/Data/models/tweb_models/",      // DATASET-RELATED: output path for all training outputs.
+    "output_path": "/media/erogol/data_ssd/Data/models/en_UK/",      // DATASET-RELATED: output path for all training outputs.
    "num_loader_workers": 8,        // number of training data loader processes. Don't set it too big. 4-8 are good values.
    "num_val_loader_workers": 4     // number of evaluation data loader processes.
 }
--- a/datasets/preprocess.py
+++ b/datasets/preprocess.py
@ -42,6 +42,28 @@ def tweb(root_path, meta_file):
 #     return  {'text': texts, 'wavs': wavs}


+def mailabs(root_path, meta_files):
+        """Normalizes M-AI-Labs meta data files to TTS format"""
+        folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]
+        meta_files = [f.strip() for f in meta_files.split(",")]
+        items = []
+        for idx, meta_file in enumerate(meta_files):
+                print(" | > {}".format(meta_file))
+                folder = folders[idx]
+                txt_file = os.path.join(root_path, meta_file)
+                with open(txt_file, 'r') as ttf:
+                        for line in ttf:
+                                cols = line.split('|')
+                                wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav')
+                                if os.path.isfile(wav_file):
+                                        text = cols[1]
+                                        items.append([text, wav_file])
+                                else: 
+                                        continue
+        random.shuffle(items)
+        return items
+
+
 def ljspeech(root_path, meta_file):
    """Normalizes the Nancy meta data file to TTS format"""
    txt_file = os.path.join(root_path, meta_file)