mirror of https://github.com/coqui-ai/TTS.git
Edit model configs for multi-speaker
This commit is contained in:
parent
cea8e1739b
commit
3ab009ca8d
|
@ -11,7 +11,7 @@ class FastPitchConfig(BaseTTSConfig):
|
|||
|
||||
Example:
|
||||
|
||||
>>> from TTS.tts.configs import FastPitchConfig
|
||||
>>> from TTS.tts.configs.fast_pitch_config import FastPitchConfig
|
||||
>>> config = FastPitchConfig()
|
||||
|
||||
Args:
|
||||
|
@ -30,6 +30,10 @@ class FastPitchConfig(BaseTTSConfig):
|
|||
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
|
||||
for the rest. Defaults to 10.
|
||||
|
||||
speakers_file (str):
|
||||
Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
|
||||
speaker names. Defaults to `None`.
|
||||
|
||||
use_speaker_embedding (bool):
|
||||
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
|
||||
in the multi-speaker mode. Defaults to False.
|
||||
|
@ -105,6 +109,8 @@ class FastPitchConfig(BaseTTSConfig):
|
|||
model_args: ForwardTTSArgs = ForwardTTSArgs()
|
||||
|
||||
# multi-speaker settings
|
||||
num_speakers: int = 0
|
||||
speakers_file: str = None
|
||||
use_speaker_embedding: bool = False
|
||||
use_d_vector_file: bool = False
|
||||
d_vector_file: str = False
|
||||
|
@ -149,3 +155,22 @@ class FastPitchConfig(BaseTTSConfig):
|
|||
"Prior to November 22, 1963.",
|
||||
]
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
# Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
|
||||
if self.num_speakers > 0:
|
||||
self.model_args.num_speakers = self.num_speakers
|
||||
|
||||
# speaker embedding settings
|
||||
if self.use_speaker_embedding:
|
||||
self.model_args.use_speaker_embedding = True
|
||||
if self.speakers_file:
|
||||
self.model_args.speakers_file = self.speakers_file
|
||||
|
||||
# d-vector settings
|
||||
if self.use_d_vector_file:
|
||||
self.model_args.use_d_vector_file = True
|
||||
if self.d_vector_dim is not None and self.d_vector_dim > 0:
|
||||
self.model_args.d_vector_dim = self.d_vector_dim
|
||||
if self.d_vector_file:
|
||||
self.model_args.d_vector_file = self.d_vector_file
|
||||
|
|
|
@ -30,6 +30,11 @@ class FastSpeechConfig(BaseTTSConfig):
|
|||
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
|
||||
for the rest. Defaults to 10.
|
||||
|
||||
speakers_file (str):
|
||||
Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
|
||||
speaker names. Defaults to `None`.
|
||||
|
||||
|
||||
use_speaker_embedding (bool):
|
||||
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
|
||||
in the multi-speaker mode. Defaults to False.
|
||||
|
@ -105,6 +110,7 @@ class FastSpeechConfig(BaseTTSConfig):
|
|||
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False)
|
||||
|
||||
# multi-speaker settings
|
||||
speakers_file: str = None
|
||||
use_speaker_embedding: bool = False
|
||||
use_d_vector_file: bool = False
|
||||
d_vector_file: str = False
|
||||
|
@ -149,3 +155,22 @@ class FastSpeechConfig(BaseTTSConfig):
|
|||
"Prior to November 22, 1963.",
|
||||
]
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
# Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
|
||||
if self.num_speakers > 0:
|
||||
self.model_args.num_speakers = self.num_speakers
|
||||
|
||||
# speaker embedding settings
|
||||
if self.use_speaker_embedding:
|
||||
self.model_args.use_speaker_embedding = True
|
||||
if self.speakers_file:
|
||||
self.model_args.speakers_file = self.speakers_file
|
||||
|
||||
# d-vector settings
|
||||
if self.use_d_vector_file:
|
||||
self.model_args.use_d_vector_file = True
|
||||
if self.d_vector_dim is not None and self.d_vector_dim > 0:
|
||||
self.model_args.d_vector_dim = self.d_vector_dim
|
||||
if self.d_vector_file:
|
||||
self.model_args.d_vector_file = self.d_vector_file
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
|
||||
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
||||
from TTS.tts.models.forward_tts import ForwardTTSArgs
|
||||
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -30,6 +30,10 @@ class SpeedySpeechConfig(BaseTTSConfig):
|
|||
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
|
||||
for the rest. Defaults to 10.
|
||||
|
||||
speakers_file (str):
|
||||
Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
|
||||
speaker names. Defaults to `None`.
|
||||
|
||||
use_speaker_embedding (bool):
|
||||
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
|
||||
in the multi-speaker mode. Defaults to False.
|
||||
|
@ -117,12 +121,13 @@ class SpeedySpeechConfig(BaseTTSConfig):
|
|||
},
|
||||
out_channels=80,
|
||||
hidden_channels=128,
|
||||
num_speakers=0,
|
||||
positional_encoding=True,
|
||||
detach_duration_predictor=True,
|
||||
)
|
||||
|
||||
# multi-speaker settings
|
||||
num_speakers: int = 0
|
||||
speakers_file: str = None
|
||||
use_speaker_embedding: bool = False
|
||||
use_d_vector_file: bool = False
|
||||
d_vector_file: str = False
|
||||
|
@ -166,3 +171,22 @@ class SpeedySpeechConfig(BaseTTSConfig):
|
|||
"Prior to November 22, 1963.",
|
||||
]
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
# Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
|
||||
if self.num_speakers > 0:
|
||||
self.model_args.num_speakers = self.num_speakers
|
||||
|
||||
# speaker embedding settings
|
||||
if self.use_speaker_embedding:
|
||||
self.model_args.use_speaker_embedding = True
|
||||
if self.speakers_file:
|
||||
self.model_args.speakers_file = self.speakers_file
|
||||
|
||||
# d-vector settings
|
||||
if self.use_d_vector_file:
|
||||
self.model_args.use_d_vector_file = True
|
||||
if self.d_vector_dim is not None and self.d_vector_dim > 0:
|
||||
self.model_args.d_vector_dim = self.d_vector_dim
|
||||
if self.d_vector_file:
|
||||
self.model_args.d_vector_file = self.d_vector_file
|
||||
|
|
Loading…
Reference in New Issue