diff --git a/config.json b/config.json index 178c1b68..9046bfa3 100644 --- a/config.json +++ b/config.json @@ -28,7 +28,6 @@ "embedding_size": 256, "text_cleaner": "english_cleaners", "epochs": 1000, - "lr": 0.001, "lr_decay": false, "warmup_steps": 4000, diff --git a/datasets/preprocess.py b/datasets/preprocess.py index 67c184ef..cb2b009d 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -8,7 +8,7 @@ def tts_cache(root_path, meta_file): with open(txt_file, 'r', encoding='utf8') as f: for line in f: cols = line.split('| ') - items.append(cols) # wav_full_path, mel_name, linear_name, wav_len, mel_len, text + items.append(cols) # text, wav_full_path, mel_name, linear_name, wav_len, mel_len random.shuffle(items) return items diff --git a/extract_features.py b/extract_features.py index 52d2d913..3e617b3f 100644 --- a/extract_features.py +++ b/extract_features.py @@ -69,7 +69,7 @@ if __name__ == "__main__": linear = ap.spectrogram(x.astype('float32')).astype('float32') linear_len = linear.shape[1] np.save(linear_path, linear, allow_pickle=False) - output.insert(2, linear_path+".npy") + output.insert(3, linear_path+".npy") assert mel_len == linear_len if args.process_audio: audio_file = file_name + "_audio"