mirror of https://github.com/coqui-ai/TTS.git
trim silence if necessary
This commit is contained in:
parent
3154833f89
commit
ac3334d025
|
@ -13,6 +13,7 @@ from utils.generic_utils import load_config
|
||||||
|
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--data_path', type=str,
|
parser.add_argument('--data_path', type=str,
|
||||||
|
@ -23,6 +24,8 @@ if __name__ == "__main__":
|
||||||
help='conf.json file for run settings.')
|
help='conf.json file for run settings.')
|
||||||
parser.add_argument("--num_proc", type=int, default=8,
|
parser.add_argument("--num_proc", type=int, default=8,
|
||||||
help="number of processes.")
|
help="number of processes.")
|
||||||
|
parser.add_argument("--trim_silence", type=bool, default=False,
|
||||||
|
help="trim silence in the voice clip.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
DATA_PATH = args.data_path
|
DATA_PATH = args.data_path
|
||||||
OUT_PATH = args.out_path
|
OUT_PATH = args.out_path
|
||||||
|
@ -45,9 +48,19 @@ if __name__ == "__main__":
|
||||||
min_mel_freq = CONFIG.min_mel_freq,
|
min_mel_freq = CONFIG.min_mel_freq,
|
||||||
max_mel_freq = CONFIG.max_mel_freq)
|
max_mel_freq = CONFIG.max_mel_freq)
|
||||||
|
|
||||||
|
def trim_silence(self, wav):
|
||||||
|
margin = int(CONFIG.sample_rate * 0.1)
|
||||||
|
wav = wav[margin:-margin]
|
||||||
|
return librosa.effects.trim(
|
||||||
|
wav, top_db=40,
|
||||||
|
frame_length=1024,
|
||||||
|
hop_length=256)[0]
|
||||||
|
|
||||||
def extract_mel(file_path):
|
def extract_mel(file_path):
|
||||||
# x, fs = sf.read(file_path)
|
# x, fs = sf.read(file_path)
|
||||||
x, fs = librosa.load(file_path, CONFIG.sample_rate)
|
x, fs = librosa.load(file_path, CONFIG.sample_rate)
|
||||||
|
if args.trim_silence:
|
||||||
|
x = trim_silence(x)
|
||||||
mel = ap.melspectrogram(x.astype('float32')).astype('float32')
|
mel = ap.melspectrogram(x.astype('float32')).astype('float32')
|
||||||
linear = ap.spectrogram(x.astype('float32')).astype('float32')
|
linear = ap.spectrogram(x.astype('float32')).astype('float32')
|
||||||
file_name = os.path.basename(file_path).replace(".wav","")
|
file_name = os.path.basename(file_path).replace(".wav","")
|
||||||
|
|
Loading…
Reference in New Issue