From 28a64221eaf6d6435d61d4ca72730bb994e22cc4 Mon Sep 17 00:00:00 2001 From: Thorsten Mueller Date: Sat, 19 Dec 2020 22:23:28 +0100 Subject: [PATCH 1/3] Improve robostness on cpu / gpu model mix --- TTS/bin/synthesize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index b08f9345..40852bdf 100644 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -37,7 +37,8 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid if CONFIG.model == "Tacotron" and not use_gl: mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T if not use_gl: - waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0)) + device_type = "cuda" if use_cuda else "cpu" + waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).to(device_type).unsqueeze(0)) if use_cuda and not use_gl: waveform = waveform.cpu() if not use_gl: From 2aa0354b44266e48655973cca8cea8f2e17c483c Mon Sep 17 00:00:00 2001 From: Thorsten Mueller Date: Sat, 19 Dec 2020 22:37:03 +0100 Subject: [PATCH 2/3] Fix for 'NoneType' object has no attribute 'to' --- TTS/bin/synthesize.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 40852bdf..bd8a73f2 100644 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -37,6 +37,9 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid if CONFIG.model == "Tacotron" and not use_gl: mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T if not use_gl: + beta = np.linspace(1e-6, 0.01, 50) + vocoder_model.compute_noise_level(beta) + device_type = "cuda" if use_cuda else "cpu" waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).to(device_type).unsqueeze(0)) if use_cuda and not use_gl: From f673f8f74dd343889b6b81658e09fb61e33700ea Mon Sep 17 00:00:00 2001 From: Thorsten Mueller Date: Sat, 19 Dec 2020 22:51:22 +0100 Subject: [PATCH 3/3] Added support for npy output from tune-wavegrad --- TTS/bin/synthesize.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index bd8a73f2..12ff4d30 100644 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -37,8 +37,13 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid if CONFIG.model == "Tacotron" and not use_gl: mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T if not use_gl: + # Use if not computed noise schedule with tune_wavegrad beta = np.linspace(1e-6, 0.01, 50) vocoder_model.compute_noise_level(beta) + + # Use alternative when using output npy file from tune_wavegrad + # beta = np.load("output-tune-wavegrad.npy", allow_pickle=True).item() + # vocoder_model.compute_noise_level(beta['beta']) device_type = "cuda" if use_cuda else "cpu" waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).to(device_type).unsqueeze(0))