mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'dev' of https://github.com/mozilla/TTS into dev
This commit is contained in:
commit
0252a07fa6
|
@ -9,6 +9,7 @@ import string
|
|||
import time
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from TTS.tts.utils.generic_utils import setup_model, is_tacotron
|
||||
from TTS.tts.utils.synthesis import synthesis
|
||||
|
@ -21,6 +22,18 @@ from TTS.vocoder.utils.generic_utils import setup_generator
|
|||
def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid, speaker_embedding=None, gst_style=None):
|
||||
t_1 = time.time()
|
||||
waveform, _, _, mel_postnet_spec, _, _ = synthesis(model, text, CONFIG, use_cuda, ap, speaker_fileid, gst_style, False, CONFIG.enable_eos_bos_chars, use_gl, speaker_embedding=speaker_embedding)
|
||||
|
||||
# grab spectrogram (thx to the nice guys at mozilla discourse for codesnipplet)
|
||||
if args.save_spectogram:
|
||||
spec_file_name = args.text.replace(" ", "_")
|
||||
spec_file_name = spec_file_name.translate(
|
||||
str.maketrans('', '', string.punctuation.replace('_', ''))) + '.npy'
|
||||
spec_file_name = os.path.join(args.out_path, spec_file_name)
|
||||
spectrogram = torch.FloatTensor(mel_postnet_spec.T)
|
||||
spectrogram = spectrogram.unsqueeze(0)
|
||||
np.save(spec_file_name, spectrogram)
|
||||
print(" > Saving raw spectogram to " + spec_file_name)
|
||||
|
||||
if CONFIG.model == "Tacotron" and not use_gl:
|
||||
mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T
|
||||
if not use_gl:
|
||||
|
@ -88,6 +101,11 @@ if __name__ == "__main__":
|
|||
'--gst_style',
|
||||
help="Wav path file for GST stylereference.",
|
||||
default=None)
|
||||
parser.add_argument(
|
||||
'--save_spectogram',
|
||||
type=bool,
|
||||
help="If true save raw spectogram for further (vocoder) processing in out_path.",
|
||||
default=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
@ -105,8 +105,8 @@ class Wavegrad(nn.Module):
|
|||
self.noise_level = self.noise_level.to(y_0)
|
||||
if len(y_0.shape) == 3:
|
||||
y_0 = y_0.squeeze(1)
|
||||
s = torch.randint(1, self.num_steps + 1, [y_0.shape[0]])
|
||||
l_a, l_b = self.noise_level[s-1], self.noise_level[s]
|
||||
s = torch.randint(0, self.num_steps - 1, [y_0.shape[0]])
|
||||
l_a, l_b = self.noise_level[s], self.noise_level[s+1]
|
||||
noise_scale = l_a + torch.rand(y_0.shape[0]).to(y_0) * (l_b - l_a)
|
||||
noise_scale = noise_scale.unsqueeze(1)
|
||||
noise = torch.randn_like(y_0)
|
||||
|
|
Loading…
Reference in New Issue