mirror of https://github.com/coqui-ai/TTS.git
Test updates
This commit is contained in:
parent
c8a552e627
commit
4681f935b4
11
.compute
11
.compute
|
@ -1,10 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source ../tmp/venv/bin/activate
|
source ../tmp/venv/bin/activate
|
||||||
# ls /snakepit/jobs/1023/keep/
|
# python extract_features.py --data_path ${DATA_ROOT}/shared/data/keithito/LJSpeech-1.1/ --cache_path ~/tts_cache/ --config config.json --num_proc 12 --dataset ljspeech --meta_file metadata.csv --val_split 1000 --process_audio true
|
||||||
# source /snakepit/jobs/1023/keep/venv/bin/activate
|
# python train.py --config_path config.json --data_path ~/tts_cache/ --debug true
|
||||||
# source /snakepit/jobs/1047/keep/venv/bin/activate
|
python train.py --config_path config.json --data_path ${DATA_ROOT}/shared/data/keithito/LJSpeech-1.1/ --debug true
|
||||||
# python extract_feats.py --data_path /snakepit/shared/data/keithito/LJSpeech-1.1/wavs --out_path /snakepit/shared/data/keithito/LJSpeech-1.1/loader_data/ --config config.json --num_proc 32
|
|
||||||
# python train.py --config_path config_kusal.json --debug true
|
|
||||||
python train.py --config_path config.json --debug true
|
|
||||||
# python -m cProfile -o profile.cprof train.py --config_path config.json --debug true
|
|
||||||
# nvidia-smi
|
|
2
.install
2
.install
|
@ -1,4 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
virtualenv -p python3 ../tmp/venv
|
virtualenv -p python3 ../tmp/venv
|
||||||
source ../tmp/venv/bin/activate
|
source ../tmp/venv/bin/activate
|
||||||
python setup.py develop
|
python setup.py develop
|
108
extract_feats.py
108
extract_feats.py
|
@ -1,108 +0,0 @@
|
||||||
'''
|
|
||||||
Extract spectrograms and save them to file for training
|
|
||||||
'''
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import glob
|
|
||||||
import argparse
|
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
import tqdm
|
|
||||||
from utils.generic_utils import load_config
|
|
||||||
|
|
||||||
from multiprocessing import Pool
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--data_path', type=str, help='Data folder.')
|
|
||||||
parser.add_argument('--out_path', type=str, help='Output folder.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--config', type=str, help='conf.json file for run settings.')
|
|
||||||
parser.add_argument(
|
|
||||||
"--num_proc", type=int, default=8, help="number of processes.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--trim_silence",
|
|
||||||
type=bool,
|
|
||||||
default=False,
|
|
||||||
help="trim silence in the voice clip.")
|
|
||||||
args = parser.parse_args()
|
|
||||||
DATA_PATH = args.data_path
|
|
||||||
OUT_PATH = args.out_path
|
|
||||||
CONFIG = load_config(args.config)
|
|
||||||
|
|
||||||
print(" > Input path: ", DATA_PATH)
|
|
||||||
print(" > Output path: ", OUT_PATH)
|
|
||||||
|
|
||||||
audio = importlib.import_module('utils.' + c.audio_processor)
|
|
||||||
AudioProcessor = getattr(audio, 'AudioProcessor')
|
|
||||||
ap = AudioProcessor(
|
|
||||||
sample_rate=CONFIG.sample_rate,
|
|
||||||
num_mels=CONFIG.num_mels,
|
|
||||||
min_level_db=CONFIG.min_level_db,
|
|
||||||
frame_shift_ms=CONFIG.frame_shift_ms,
|
|
||||||
frame_length_ms=CONFIG.frame_length_ms,
|
|
||||||
ref_level_db=CONFIG.ref_level_db,
|
|
||||||
num_freq=CONFIG.num_freq,
|
|
||||||
power=CONFIG.power,
|
|
||||||
preemphasis=CONFIG.preemphasis,
|
|
||||||
min_mel_freq=CONFIG.min_mel_freq,
|
|
||||||
max_mel_freq=CONFIG.max_mel_freq)
|
|
||||||
|
|
||||||
def trim_silence(self, wav):
|
|
||||||
margin = int(CONFIG.sample_rate * 0.1)
|
|
||||||
wav = wav[margin:-margin]
|
|
||||||
return librosa.effects.trim(
|
|
||||||
wav, top_db=40, frame_length=1024, hop_length=256)[0]
|
|
||||||
|
|
||||||
def extract_mel(file_path):
|
|
||||||
# x, fs = sf.read(file_path)
|
|
||||||
x, fs = librosa.load(file_path, CONFIG.sample_rate)
|
|
||||||
if args.trim_silence:
|
|
||||||
x = trim_silence(x)
|
|
||||||
mel = ap.melspectrogram(x.astype('float32')).astype('float32')
|
|
||||||
linear = ap.spectrogram(x.astype('float32')).astype('float32')
|
|
||||||
file_name = os.path.basename(file_path).replace(".wav", "")
|
|
||||||
mel_file = file_name + ".mel"
|
|
||||||
linear_file = file_name + ".linear"
|
|
||||||
np.save(os.path.join(OUT_PATH, mel_file), mel, allow_pickle=False)
|
|
||||||
np.save(
|
|
||||||
os.path.join(OUT_PATH, linear_file), linear, allow_pickle=False)
|
|
||||||
mel_len = mel.shape[1]
|
|
||||||
linear_len = linear.shape[1]
|
|
||||||
wav_len = x.shape[0]
|
|
||||||
print(" > " + file_path, flush=True)
|
|
||||||
return file_path, mel_file, linear_file, str(wav_len), str(
|
|
||||||
mel_len), str(linear_len)
|
|
||||||
|
|
||||||
glob_path = os.path.join(DATA_PATH, "*.wav")
|
|
||||||
print(" > Reading wav: {}".format(glob_path))
|
|
||||||
file_names = glob.glob(glob_path, recursive=True)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print(" > Number of files: %i" % (len(file_names)))
|
|
||||||
if not os.path.exists(OUT_PATH):
|
|
||||||
os.makedirs(OUT_PATH)
|
|
||||||
print(" > A new folder created at {}".format(OUT_PATH))
|
|
||||||
|
|
||||||
r = []
|
|
||||||
if args.num_proc > 1:
|
|
||||||
print(" > Using {} processes.".format(args.num_proc))
|
|
||||||
with Pool(args.num_proc) as p:
|
|
||||||
r = list(
|
|
||||||
tqdm.tqdm(
|
|
||||||
p.imap(extract_mel, file_names),
|
|
||||||
total=len(file_names)))
|
|
||||||
# r = list(p.imap(extract_mel, file_names))
|
|
||||||
else:
|
|
||||||
print(" > Using single process run.")
|
|
||||||
for file_name in file_names:
|
|
||||||
print(" > ", file_name)
|
|
||||||
r.append(extract_mel(file_name))
|
|
||||||
|
|
||||||
file_path = os.path.join(OUT_PATH, "meta_fftnet.csv")
|
|
||||||
file = open(file_path, "w")
|
|
||||||
for line in r:
|
|
||||||
line = ", ".join(line)
|
|
||||||
file.write(line + '\n')
|
|
||||||
file.close()
|
|
|
@ -15,6 +15,13 @@ os.makedirs(OUTPATH, exist_ok=True)
|
||||||
c = load_config(os.path.join(file_path, 'test_config.json'))
|
c = load_config(os.path.join(file_path, 'test_config.json'))
|
||||||
ok_ljspeech = os.path.exists(c.data_path)
|
ok_ljspeech = os.path.exists(c.data_path)
|
||||||
|
|
||||||
|
DATA_EXIST = True
|
||||||
|
CACHE_EXIST = True
|
||||||
|
if not os.path.exists(c.data_path_cache):
|
||||||
|
CACHE_EXIST = False
|
||||||
|
|
||||||
|
if not os.path.exists(c.data_path):
|
||||||
|
DATA_EXIST = False
|
||||||
|
|
||||||
class TestTTSDataset(unittest.TestCase):
|
class TestTTSDataset(unittest.TestCase):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
Loading…
Reference in New Issue