mirror of https://github.com/coqui-ai/TTS.git
Remove SpeedySpeech from .models.json
This commit is contained in:
parent
d97952611d
commit
26f76fce22
|
@ -47,15 +47,6 @@
|
||||||
"license": "MPL",
|
"license": "MPL",
|
||||||
"contact": "egolge@coqui.com"
|
"contact": "egolge@coqui.com"
|
||||||
},
|
},
|
||||||
"speedy-speech-wn": {
|
|
||||||
"description": "Speedy Speech model with wavenet decoder.",
|
|
||||||
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--ljspeech--speedy-speech-wn.zip",
|
|
||||||
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
|
|
||||||
"commit": "77b6145",
|
|
||||||
"author": "Eren Gölge @erogol",
|
|
||||||
"license": "MPL",
|
|
||||||
"contact": "egolge@coqui.com"
|
|
||||||
},
|
|
||||||
"vits": {
|
"vits": {
|
||||||
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
|
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
|
||||||
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--vits.zip",
|
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--vits.zip",
|
||||||
|
|
|
@ -1,15 +1 @@
|
||||||
from TTS.tts.layers.losses import *
|
from TTS.tts.layers.losses import *
|
||||||
|
|
||||||
|
|
||||||
def setup_loss(config):
|
|
||||||
if config.model.lower() in ["tacotron", "tacotron2"]:
|
|
||||||
model = TacotronLoss(config)
|
|
||||||
elif config.model.lower() == "glow_tts":
|
|
||||||
model = GlowTTSLoss()
|
|
||||||
elif config.model.lower() == "speedy_speech":
|
|
||||||
model = SpeedySpeechLoss(config)
|
|
||||||
elif config.model.lower() == "align_tts":
|
|
||||||
model = AlignTTSLoss(config)
|
|
||||||
else:
|
|
||||||
raise ValueError(f" [!] loss for model {config.model.lower()} cannot be found.")
|
|
||||||
return model
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
|
||||||
|
|
||||||
|
|
||||||
def _pad_data(x, length):
|
def _pad_data(x, length):
|
||||||
|
|
|
@ -11,11 +11,11 @@ except ModuleNotFoundError:
|
||||||
|
|
||||||
|
|
||||||
class StandardScaler:
|
class StandardScaler:
|
||||||
"""StandardScaler for mean-std normalization with the given mean and std values."""
|
"""StandardScaler for mean-scale normalization with the given mean and scale values."""
|
||||||
|
|
||||||
def __init__(self, mean: np.ndarray = None, std: np.ndarray = None) -> None:
|
def __init__(self, mean: np.ndarray = None, scale: np.ndarray = None) -> None:
|
||||||
self.mean_ = mean
|
self.mean_ = mean
|
||||||
self.std_ = std
|
self.scale_ = scale
|
||||||
|
|
||||||
def set_stats(self, mean, scale):
|
def set_stats(self, mean, scale):
|
||||||
self.mean_ = mean
|
self.mean_ = mean
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
import unittest
|
|
||||||
|
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.models.forward_tts import ForwardTTS, ForwardTTSArgs
|
from TTS.tts.models.forward_tts import ForwardTTS, ForwardTTSArgs
|
||||||
|
@ -54,12 +52,12 @@ def model_input_output_test():
|
||||||
assert (outputs["x_mask"] - x_mask).sum() == 0.0
|
assert (outputs["x_mask"] - x_mask).sum() == 0.0
|
||||||
assert (outputs["y_mask"] - y_mask).sum() == 0.0
|
assert (outputs["y_mask"] - y_mask).sum() == 0.0
|
||||||
|
|
||||||
assert outputs["alignment_soft"] == None
|
assert outputs["alignment_soft"] is None
|
||||||
assert outputs["alignment_mas"] == None
|
assert outputs["alignment_mas"] is None
|
||||||
assert outputs["alignment_logprob"] == None
|
assert outputs["alignment_logprob"] is None
|
||||||
assert outputs["o_alignment_dur"] == None
|
assert outputs["o_alignment_dur"] is None
|
||||||
assert outputs["pitch_avg"] == None
|
assert outputs["pitch_avg"] is None
|
||||||
assert outputs["pitch_avg_gt"] == None
|
assert outputs["pitch_avg_gt"] is None
|
||||||
|
|
||||||
# USE PITCH
|
# USE PITCH
|
||||||
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=False))
|
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=False))
|
||||||
|
@ -85,10 +83,10 @@ def model_input_output_test():
|
||||||
assert outputs["pitch_avg"].shape == (2, 1, 21)
|
assert outputs["pitch_avg"].shape == (2, 1, 21)
|
||||||
assert outputs["pitch_avg_gt"].shape == (2, 1, 21)
|
assert outputs["pitch_avg_gt"].shape == (2, 1, 21)
|
||||||
|
|
||||||
assert outputs["alignment_soft"] == None
|
assert outputs["alignment_soft"] is None
|
||||||
assert outputs["alignment_mas"] == None
|
assert outputs["alignment_mas"] is None
|
||||||
assert outputs["alignment_logprob"] == None
|
assert outputs["alignment_logprob"] is None
|
||||||
assert outputs["o_alignment_dur"] == None
|
assert outputs["o_alignment_dur"] is None
|
||||||
|
|
||||||
# USE ALIGNER NETWORK
|
# USE ALIGNER NETWORK
|
||||||
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=False, use_aligner=True))
|
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=False, use_aligner=True))
|
||||||
|
@ -116,8 +114,8 @@ def model_input_output_test():
|
||||||
assert outputs["alignment_logprob"].shape == (2, 1, durations.sum(1).max(), 21)
|
assert outputs["alignment_logprob"].shape == (2, 1, durations.sum(1).max(), 21)
|
||||||
assert outputs["o_alignment_dur"].shape == (2, 21)
|
assert outputs["o_alignment_dur"].shape == (2, 21)
|
||||||
|
|
||||||
assert outputs["pitch_avg"] == None
|
assert outputs["pitch_avg"] is None
|
||||||
assert outputs["pitch_avg_gt"] == None
|
assert outputs["pitch_avg_gt"] is None
|
||||||
|
|
||||||
# USE ALIGNER NETWORK AND PITCH
|
# USE ALIGNER NETWORK AND PITCH
|
||||||
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=True))
|
model = ForwardTTS(ForwardTTSArgs(num_chars=10, use_pitch=True, use_aligner=True))
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.utils.helpers import *
|
from TTS.tts.utils.helpers import average_over_durations, generate_path, segment, sequence_mask
|
||||||
|
|
||||||
|
|
||||||
def average_over_durations_test(): # pylint: disable=no-self-use
|
def average_over_durations_test(): # pylint: disable=no-self-use
|
||||||
|
@ -47,7 +47,7 @@ def generate_path_test():
|
||||||
durations = durations * x_mask.squeeze(1)
|
durations = durations * x_mask.squeeze(1)
|
||||||
y_length = durations.sum(1)
|
y_length = durations.sum(1)
|
||||||
y_mask = sequence_mask(y_length).unsqueeze(1).long()
|
y_mask = sequence_mask(y_length).unsqueeze(1).long()
|
||||||
attn_mask = (torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)).squeeze(1).long()
|
attn_mask = (T.unsqueeze(x_mask, -1) * T.unsqueeze(y_mask, 2)).squeeze(1).long()
|
||||||
print(attn_mask.shape)
|
print(attn_mask.shape)
|
||||||
path = generate_path(durations, attn_mask)
|
path = generate_path(durations, attn_mask)
|
||||||
assert path.shape == (10, 21, durations.sum(1).max().item())
|
assert path.shape == (10, 21, durations.sum(1).max().item())
|
||||||
|
|
Loading…
Reference in New Issue