mirror of https://github.com/coqui-ai/TTS.git
Add `sort_by_audio_len` option
This commit is contained in:
parent
59d52a4cd8
commit
6e9d4062f2
|
@ -141,11 +141,14 @@ class BaseTTSConfig(BaseTrainingConfig):
|
||||||
loss_masking (bool):
|
loss_masking (bool):
|
||||||
enable / disable masking loss values against padded segments of samples in a batch.
|
enable / disable masking loss values against padded segments of samples in a batch.
|
||||||
|
|
||||||
|
sort_by_audio_len (bool):
|
||||||
|
If true, dataloder sorts the data by audio length else sorts by the input text length. Defaults to `True`.
|
||||||
|
|
||||||
min_seq_len (int):
|
min_seq_len (int):
|
||||||
Minimum input sequence length to be used at training.
|
Minimum sequence length to be used at training.
|
||||||
|
|
||||||
max_seq_len (int):
|
max_seq_len (int):
|
||||||
Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
|
Maximum sequence length to be used at training. Larger values result in more VRAM usage.
|
||||||
|
|
||||||
compute_f0 (int):
|
compute_f0 (int):
|
||||||
(Not in use yet).
|
(Not in use yet).
|
||||||
|
@ -198,6 +201,7 @@ class BaseTTSConfig(BaseTrainingConfig):
|
||||||
batch_group_size: int = 0
|
batch_group_size: int = 0
|
||||||
loss_masking: bool = None
|
loss_masking: bool = None
|
||||||
# dataloading
|
# dataloading
|
||||||
|
sort_by_audio_len: bool = True
|
||||||
min_seq_len: int = 1
|
min_seq_len: int = 1
|
||||||
max_seq_len: int = float("inf")
|
max_seq_len: int = float("inf")
|
||||||
compute_f0: bool = False
|
compute_f0: bool = False
|
||||||
|
|
|
@ -67,11 +67,14 @@ class VitsConfig(BaseTTSConfig):
|
||||||
compute_linear_spec (bool):
|
compute_linear_spec (bool):
|
||||||
If true, the linear spectrogram is computed and returned alongside the mel output. Do not change. Defaults to `True`.
|
If true, the linear spectrogram is computed and returned alongside the mel output. Do not change. Defaults to `True`.
|
||||||
|
|
||||||
|
sort_by_audio_len (bool):
|
||||||
|
If true, dataloder sorts the data by audio length else sorts by the input text length. Defaults to `True`.
|
||||||
|
|
||||||
min_seq_len (int):
|
min_seq_len (int):
|
||||||
Minimum text length to be considered for training. Defaults to `13`.
|
Minimum sequnce length to be considered for training. Defaults to `0`.
|
||||||
|
|
||||||
max_seq_len (int):
|
max_seq_len (int):
|
||||||
Maximum text length to be considered for training. Defaults to `500`.
|
Maximum sequnce length to be considered for training. Defaults to `500000`.
|
||||||
|
|
||||||
r (int):
|
r (int):
|
||||||
Number of spectrogram frames to be generated at a time. Do not change. Defaults to `1`.
|
Number of spectrogram frames to be generated at a time. Do not change. Defaults to `1`.
|
||||||
|
|
Loading…
Reference in New Issue