removed shuffling of data in the preprocessor, uniform indentation

2019-04-25 11:46:11 +02:00 · 2019-04-25 11:46:11 +02:00 · f279fe9e8b
parent 41e3e42989
commit f279fe9e8b
1 changed files with 46 additions and 51 deletions
--- a/datasets/preprocess.py
+++ b/datasets/preprocess.py
@ -1,5 +1,4 @@
 import os
-import random


 def tts_cache(root_path, meta_file):
@ -9,8 +8,8 @@ def tts_cache(root_path, meta_file):
    with open(txt_file, 'r', encoding='utf8') as f:
        for line in f:
            cols = line.split('| ')
-            items.append(cols)  # text, wav_full_path, mel_name, linear_name, wav_len, mel_len
-    random.shuffle(items)
+            # text, wav_full_path, mel_name, linear_name, wav_len, mel_len
+            items.append(cols)
    return items


@ -26,7 +25,6 @@ def tweb(root_path, meta_file):
            wav_file = os.path.join(root_path, cols[0] + '.wav')
            text = cols[1]
            items.append([text, wav_file])
-    random.shuffle(items)
    return items


@ -55,14 +53,14 @@ def mozilla(root_path, meta_files):
        with open(txt_file, 'r') as ttf:
            for line in ttf:
                cols = line.split('|')
-                                wav_file = os.path.join(root_path, folder, 'wavs_no_processing', cols[1].strip())
+                wav_file = os.path.join(root_path, folder, 'wavs_no_processing',
+                                        cols[1].strip())
                if os.path.isfile(wav_file):
                    text = cols[0].strip()
                    items.append([text, wav_file])
                else:
                    print(" > Error: {}".format(cols))
                    continue
-        random.shuffle(items)
    return items


@ -78,13 +76,13 @@ def mailabs(root_path, meta_files):
        with open(txt_file, 'r') as ttf:
            for line in ttf:
                cols = line.split('|')
-                                wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav')
+                wav_file = os.path.join(root_path, folder, 'wavs',
+                                        cols[0] + '.wav')
                if os.path.isfile(wav_file):
                    text = cols[1]
                    items.append([text, wav_file])
                else:
                    continue
-        random.shuffle(items)
    return items


@ -98,7 +96,6 @@ def ljspeech(root_path, meta_file):
            wav_file = os.path.join(root_path, 'wavs', cols[0] + '.wav')
            text = cols[1]
            items.append([text, wav_file])
-    random.shuffle(items)
    return items


@ -112,7 +109,6 @@ def nancy(root_path, meta_file):
            text = line[line.find('"') + 1:line.rfind('"') - 1]
            wav_file = os.path.join(root_path, "wavn", id + ".wav")
            items.append([text, wav_file])
-    random.shuffle(items)    
    return items


@ -126,7 +122,6 @@ def common_voice(root_path, meta_file):
                continue
            cols = line.split("\t")
            text = cols[2]
-            # Files need to be first converted to wav...
            wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
            items.append([text, wav_file])
    return items