diff --git a/.compute b/.compute index aef3d770..14be6aa9 100644 --- a/.compute +++ b/.compute @@ -1,10 +1,5 @@ #!/bin/bash source ../tmp/venv/bin/activate -# ls /snakepit/jobs/1023/keep/ -# source /snakepit/jobs/1023/keep/venv/bin/activate -# source /snakepit/jobs/1047/keep/venv/bin/activate -# python extract_feats.py --data_path /snakepit/shared/data/keithito/LJSpeech-1.1/wavs --out_path /snakepit/shared/data/keithito/LJSpeech-1.1/loader_data/ --config config.json --num_proc 32 -# python train.py --config_path config_kusal.json --debug true -python train.py --config_path config.json --debug true -# python -m cProfile -o profile.cprof train.py --config_path config.json --debug true -# nvidia-smi +# python extract_features.py --data_path ${DATA_ROOT}/shared/data/keithito/LJSpeech-1.1/ --cache_path ~/tts_cache/ --config config.json --num_proc 12 --dataset ljspeech --meta_file metadata.csv --val_split 1000 --process_audio true +# python train.py --config_path config.json --data_path ~/tts_cache/ --debug true +python train.py --config_path config.json --data_path ${DATA_ROOT}/shared/data/keithito/LJSpeech-1.1/ --debug true \ No newline at end of file diff --git a/.install b/.install index acc7b6cc..0ae9b43b 100644 --- a/.install +++ b/.install @@ -1,4 +1,4 @@ #!/bin/bash virtualenv -p python3 ../tmp/venv source ../tmp/venv/bin/activate -python setup.py develop +python setup.py develop \ No newline at end of file diff --git a/extract_feats.py b/extract_feats.py deleted file mode 100644 index f075110c..00000000 --- a/extract_feats.py +++ /dev/null @@ -1,108 +0,0 @@ -''' -Extract spectrograms and save them to file for training -''' -import os -import sys -import time -import glob -import argparse -import librosa -import numpy as np -import tqdm -from utils.generic_utils import load_config - -from multiprocessing import Pool - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--data_path', type=str, help='Data folder.') - parser.add_argument('--out_path', type=str, help='Output folder.') - parser.add_argument( - '--config', type=str, help='conf.json file for run settings.') - parser.add_argument( - "--num_proc", type=int, default=8, help="number of processes.") - parser.add_argument( - "--trim_silence", - type=bool, - default=False, - help="trim silence in the voice clip.") - args = parser.parse_args() - DATA_PATH = args.data_path - OUT_PATH = args.out_path - CONFIG = load_config(args.config) - - print(" > Input path: ", DATA_PATH) - print(" > Output path: ", OUT_PATH) - - audio = importlib.import_module('utils.' + c.audio_processor) - AudioProcessor = getattr(audio, 'AudioProcessor') - ap = AudioProcessor( - sample_rate=CONFIG.sample_rate, - num_mels=CONFIG.num_mels, - min_level_db=CONFIG.min_level_db, - frame_shift_ms=CONFIG.frame_shift_ms, - frame_length_ms=CONFIG.frame_length_ms, - ref_level_db=CONFIG.ref_level_db, - num_freq=CONFIG.num_freq, - power=CONFIG.power, - preemphasis=CONFIG.preemphasis, - min_mel_freq=CONFIG.min_mel_freq, - max_mel_freq=CONFIG.max_mel_freq) - - def trim_silence(self, wav): - margin = int(CONFIG.sample_rate * 0.1) - wav = wav[margin:-margin] - return librosa.effects.trim( - wav, top_db=40, frame_length=1024, hop_length=256)[0] - - def extract_mel(file_path): - # x, fs = sf.read(file_path) - x, fs = librosa.load(file_path, CONFIG.sample_rate) - if args.trim_silence: - x = trim_silence(x) - mel = ap.melspectrogram(x.astype('float32')).astype('float32') - linear = ap.spectrogram(x.astype('float32')).astype('float32') - file_name = os.path.basename(file_path).replace(".wav", "") - mel_file = file_name + ".mel" - linear_file = file_name + ".linear" - np.save(os.path.join(OUT_PATH, mel_file), mel, allow_pickle=False) - np.save( - os.path.join(OUT_PATH, linear_file), linear, allow_pickle=False) - mel_len = mel.shape[1] - linear_len = linear.shape[1] - wav_len = x.shape[0] - print(" > " + file_path, flush=True) - return file_path, mel_file, linear_file, str(wav_len), str( - mel_len), str(linear_len) - - glob_path = os.path.join(DATA_PATH, "*.wav") - print(" > Reading wav: {}".format(glob_path)) - file_names = glob.glob(glob_path, recursive=True) - - if __name__ == "__main__": - print(" > Number of files: %i" % (len(file_names))) - if not os.path.exists(OUT_PATH): - os.makedirs(OUT_PATH) - print(" > A new folder created at {}".format(OUT_PATH)) - - r = [] - if args.num_proc > 1: - print(" > Using {} processes.".format(args.num_proc)) - with Pool(args.num_proc) as p: - r = list( - tqdm.tqdm( - p.imap(extract_mel, file_names), - total=len(file_names))) - # r = list(p.imap(extract_mel, file_names)) - else: - print(" > Using single process run.") - for file_name in file_names: - print(" > ", file_name) - r.append(extract_mel(file_name)) - - file_path = os.path.join(OUT_PATH, "meta_fftnet.csv") - file = open(file_path, "w") - for line in r: - line = ", ".join(line) - file.write(line + '\n') - file.close() diff --git a/tests/loader_tests.py b/tests/loader_tests.py index 81ab59d4..e5dae929 100644 --- a/tests/loader_tests.py +++ b/tests/loader_tests.py @@ -15,6 +15,13 @@ os.makedirs(OUTPATH, exist_ok=True) c = load_config(os.path.join(file_path, 'test_config.json')) ok_ljspeech = os.path.exists(c.data_path) +DATA_EXIST = True +CACHE_EXIST = True +if not os.path.exists(c.data_path_cache): + CACHE_EXIST = False + +if not os.path.exists(c.data_path): + DATA_EXIST = False class TestTTSDataset(unittest.TestCase): def __init__(self, *args, **kwargs):