mirror of https://github.com/coqui-ai/TTS.git
Bug fixes in OverFlow audio generation (#2380)
This commit is contained in:
parent
b8d9837d27
commit
3c15f0619a
|
@ -179,6 +179,7 @@ class NeuralhmmTTS(BaseTTS):
|
||||||
Args:
|
Args:
|
||||||
aux_inputs (Dict): Dictionary containing the auxiliary inputs.
|
aux_inputs (Dict): Dictionary containing the auxiliary inputs.
|
||||||
"""
|
"""
|
||||||
|
default_input_dict = default_input_dict.copy()
|
||||||
default_input_dict.update(
|
default_input_dict.update(
|
||||||
{
|
{
|
||||||
"sampling_temp": self.sampling_temp,
|
"sampling_temp": self.sampling_temp,
|
||||||
|
@ -187,8 +188,8 @@ class NeuralhmmTTS(BaseTTS):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if aux_input:
|
if aux_input:
|
||||||
return format_aux_input(aux_input, default_input_dict)
|
return format_aux_input(default_input_dict, aux_input)
|
||||||
return None
|
return default_input_dict
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def inference(
|
def inference(
|
||||||
|
@ -319,7 +320,7 @@ class NeuralhmmTTS(BaseTTS):
|
||||||
# sample one item from the batch -1 will give the smalles item
|
# sample one item from the batch -1 will give the smalles item
|
||||||
print(" | > Synthesising audio from the model...")
|
print(" | > Synthesising audio from the model...")
|
||||||
inference_output = self.inference(
|
inference_output = self.inference(
|
||||||
batch["text_input"][-1].unsqueeze(0), aux_input={"x_lenghts": batch["text_lengths"][-1].unsqueeze(0)}
|
batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)}
|
||||||
)
|
)
|
||||||
figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
|
figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
|
||||||
|
|
||||||
|
|
|
@ -192,6 +192,7 @@ class Overflow(BaseTTS):
|
||||||
Args:
|
Args:
|
||||||
aux_inputs (Dict): Dictionary containing the auxiliary inputs.
|
aux_inputs (Dict): Dictionary containing the auxiliary inputs.
|
||||||
"""
|
"""
|
||||||
|
default_input_dict = default_input_dict.copy()
|
||||||
default_input_dict.update(
|
default_input_dict.update(
|
||||||
{
|
{
|
||||||
"sampling_temp": self.sampling_temp,
|
"sampling_temp": self.sampling_temp,
|
||||||
|
@ -200,8 +201,8 @@ class Overflow(BaseTTS):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if aux_input:
|
if aux_input:
|
||||||
return format_aux_input(aux_input, default_input_dict)
|
return format_aux_input(default_input_dict, aux_input)
|
||||||
return None
|
return default_input_dict
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def inference(
|
def inference(
|
||||||
|
@ -335,7 +336,7 @@ class Overflow(BaseTTS):
|
||||||
# sample one item from the batch -1 will give the smalles item
|
# sample one item from the batch -1 will give the smalles item
|
||||||
print(" | > Synthesising audio from the model...")
|
print(" | > Synthesising audio from the model...")
|
||||||
inference_output = self.inference(
|
inference_output = self.inference(
|
||||||
batch["text_input"][-1].unsqueeze(0), aux_input={"x_lenghts": batch["text_lengths"][-1].unsqueeze(0)}
|
batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)}
|
||||||
)
|
)
|
||||||
figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
|
figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
|
||||||
|
|
||||||
|
|
|
@ -167,9 +167,10 @@ def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict:
|
||||||
Returns:
|
Returns:
|
||||||
Dict: arguments with formatted auxilary inputs.
|
Dict: arguments with formatted auxilary inputs.
|
||||||
"""
|
"""
|
||||||
|
kwargs = kwargs.copy()
|
||||||
for name in def_args:
|
for name in def_args:
|
||||||
if name not in kwargs:
|
if name not in kwargs or kwargs[name] is None:
|
||||||
kwargs[def_args[name]] = None
|
kwargs[name] = def_args[name]
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue