Update synthesizer.py

Configurable verbose output
This commit is contained in:
David Martin Rius 2024-03-05 18:36:47 +01:00 committed by GitHub
parent 64fdd0ed8b
commit 275229a876
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 16 additions and 9 deletions

View File

@ -264,6 +264,7 @@ class Synthesizer(nn.Module):
style_text=None,
reference_wav=None,
reference_speaker_name=None,
verbose: bool = True,
split_sentences: bool = True,
**kwargs,
) -> List[int]:
@ -278,6 +279,7 @@ class Synthesizer(nn.Module):
style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.
reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.
reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None.
verbose (bool, optional): print verbose output. Defaults to True.
split_sentences (bool, optional): split the input text into sentences. Defaults to True.
**kwargs: additional arguments to pass to the TTS model.
Returns:
@ -294,8 +296,10 @@ class Synthesizer(nn.Module):
if text:
sens = [text]
if split_sentences:
if verbose:
print(" > Text splitted to sentences.")
sens = self.split_into_sentences(text)
if verbose:
print(sens)
# handle multi-speaker
@ -420,6 +424,7 @@ class Synthesizer(nn.Module):
self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
]
if scale_factor[1] != 1:
if verbose:
print(" > interpolating tts model output.")
vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
else:
@ -484,6 +489,7 @@ class Synthesizer(nn.Module):
self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
]
if scale_factor[1] != 1:
if verbose:
print(" > interpolating tts model output.")
vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
else:
@ -497,6 +503,7 @@ class Synthesizer(nn.Module):
waveform = waveform.numpy()
wavs = waveform.squeeze()
if verbose:
# compute stats
process_time = time.time() - start_time
audio_time = len(wavs) / self.tts_config.audio["sample_rate"]