mirror of https://github.com/coqui-ai/TTS.git
add save audio option
This commit is contained in:
parent
8228091f92
commit
20e42a3381
|
@ -66,10 +66,12 @@ def set_filename(wav_path, out_path):
|
||||||
os.makedirs(os.path.join(out_path, "quant"), exist_ok=True)
|
os.makedirs(os.path.join(out_path, "quant"), exist_ok=True)
|
||||||
os.makedirs(os.path.join(out_path, "mel"), exist_ok=True)
|
os.makedirs(os.path.join(out_path, "mel"), exist_ok=True)
|
||||||
os.makedirs(os.path.join(out_path, "wav_gl"), exist_ok=True)
|
os.makedirs(os.path.join(out_path, "wav_gl"), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(out_path, "wav"), exist_ok=True)
|
||||||
wavq_path = os.path.join(out_path, "quant", file_name)
|
wavq_path = os.path.join(out_path, "quant", file_name)
|
||||||
mel_path = os.path.join(out_path, "mel", file_name)
|
mel_path = os.path.join(out_path, "mel", file_name)
|
||||||
wav_path = os.path.join(out_path, "wav_gl", file_name+'.wav')
|
wav_gl_path = os.path.join(out_path, "wav_gl", file_name+'.wav')
|
||||||
return file_name, wavq_path, mel_path, wav_path
|
wav_path = os.path.join(out_path, "wav", file_name+'.wav')
|
||||||
|
return file_name, wavq_path, mel_path, wav_gl_path, wav_path
|
||||||
|
|
||||||
def format_data(data):
|
def format_data(data):
|
||||||
# setup input data
|
# setup input data
|
||||||
|
@ -122,7 +124,7 @@ def format_data(data):
|
||||||
)
|
)
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def extract_spectrograms(data_loader, model, ap, output_path, quantized_wav=False, debug=False, metada_name="metada.txt"):
|
def extract_spectrograms(data_loader, model, ap, output_path, quantized_wav=False, save_audio=False, debug=False, metada_name="metada.txt"):
|
||||||
model.eval()
|
model.eval()
|
||||||
export_metadata = []
|
export_metadata = []
|
||||||
for _, data in tqdm(enumerate(data_loader), total=len(data_loader)):
|
for _, data in tqdm(enumerate(data_loader), total=len(data_loader)):
|
||||||
|
@ -185,7 +187,7 @@ def extract_spectrograms(data_loader, model, ap, output_path, quantized_wav=Fals
|
||||||
for idx in range(text_input.shape[0]):
|
for idx in range(text_input.shape[0]):
|
||||||
wav_file_path = item_idx[idx]
|
wav_file_path = item_idx[idx]
|
||||||
wav = ap.load_wav(wav_file_path)
|
wav = ap.load_wav(wav_file_path)
|
||||||
_, wavq_path, mel_path, wav_path = set_filename(wav_file_path, output_path)
|
_, wavq_path, mel_path, wav_gl_path, wav_path = set_filename(wav_file_path, output_path)
|
||||||
|
|
||||||
# quantize and save wav
|
# quantize and save wav
|
||||||
if quantized_wav:
|
if quantized_wav:
|
||||||
|
@ -199,11 +201,13 @@ def extract_spectrograms(data_loader, model, ap, output_path, quantized_wav=Fals
|
||||||
np.save(mel_path, mel)
|
np.save(mel_path, mel)
|
||||||
|
|
||||||
export_metadata.append([wav_file_path, mel_path])
|
export_metadata.append([wav_file_path, mel_path])
|
||||||
|
if save_audio:
|
||||||
|
ap.save_wav(wav, wav_path)
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print("Audio for debug saved at:", wav_path)
|
print("Audio for debug saved at:", wav_gl_path)
|
||||||
wav = ap.inv_melspectrogram(mel)
|
wav = ap.inv_melspectrogram(mel)
|
||||||
ap.save_wav(wav, wav_path)
|
ap.save_wav(wav, wav_gl_path)
|
||||||
|
|
||||||
with open(os.path.join(output_path, metada_name), "w") as f:
|
with open(os.path.join(output_path, metada_name), "w") as f:
|
||||||
for data in export_metadata:
|
for data in export_metadata:
|
||||||
|
@ -247,7 +251,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
|
r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
|
||||||
own_loader = setup_loader(ap, r, verbose=True)
|
own_loader = setup_loader(ap, r, verbose=True)
|
||||||
|
|
||||||
extract_spectrograms(own_loader, model, ap, args.output_path, quantized_wav=args.quantized, debug=args.debug, metada_name="metada.txt")
|
extract_spectrograms(own_loader, model, ap, args.output_path, quantized_wav=args.quantized, save_audio=args.save_audio, debug=args.debug, metada_name="metada.txt")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -271,6 +275,10 @@ if __name__ == "__main__":
|
||||||
default=False,
|
default=False,
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Save audio files for debug')
|
help='Save audio files for debug')
|
||||||
|
parser.add_argument('--save_audio',
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Save audio files')
|
||||||
parser.add_argument('--quantized',
|
parser.add_argument('--quantized',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Save quantized audio files')
|
help='Save quantized audio files')
|
||||||
|
|
Loading…
Reference in New Issue