Update recipes

This commit is contained in:
Eren G??lge 2022-06-18 12:21:15 +02:00
parent cd0a55df9a
commit 00a75dde29
2 changed files with 4 additions and 23 deletions

View File

@ -3,11 +3,10 @@ from glob import glob
from trainer import Trainer, TrainerArgs from trainer import Trainer, TrainerArgs
from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.datasets import load_tts_samples from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs, VitsAudioConfig
from TTS.tts.utils.languages import LanguageManager from TTS.tts.utils.languages import LanguageManager
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.text.tokenizer import TTSTokenizer
@ -22,22 +21,13 @@ dataset_config = [
for path in dataset_paths for path in dataset_paths
] ]
audio_config = BaseAudioConfig( audio_config = VitsAudioConfig(
sample_rate=16000, sample_rate=16000,
win_length=1024, win_length=1024,
hop_length=256, hop_length=256,
num_mels=80, num_mels=80,
preemphasis=0.0,
ref_level_db=20,
log_func="np.log",
do_trim_silence=False,
trim_db=23.0,
mel_fmin=0, mel_fmin=0,
mel_fmax=None, mel_fmax=None,
spec_gain=1.0,
signal_norm=True,
do_amp_to_db_linear=False,
resample=False,
) )
vitsArgs = VitsArgs( vitsArgs = VitsArgs(

View File

@ -2,11 +2,10 @@ import os
from trainer import Trainer, TrainerArgs from trainer import Trainer, TrainerArgs
from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.datasets import load_tts_samples from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.vits import Vits from TTS.tts.models.vits import Vits, VitsAudioConfig
from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.utils.downloaders import download_thorsten_de from TTS.utils.downloaders import download_thorsten_de
@ -21,21 +20,13 @@ if not os.path.exists(dataset_config.path):
print("Downloading dataset") print("Downloading dataset")
download_thorsten_de(os.path.split(os.path.abspath(dataset_config.path))[0]) download_thorsten_de(os.path.split(os.path.abspath(dataset_config.path))[0])
audio_config = BaseAudioConfig( audio_config = VitsAudioConfig(
sample_rate=22050, sample_rate=22050,
win_length=1024, win_length=1024,
hop_length=256, hop_length=256,
num_mels=80, num_mels=80,
preemphasis=0.0,
ref_level_db=20,
log_func="np.log",
do_trim_silence=True,
trim_db=45,
mel_fmin=0, mel_fmin=0,
mel_fmax=None, mel_fmax=None,
spec_gain=1.0,
signal_norm=False,
do_amp_to_db_linear=False,
) )
config = VitsConfig( config = VitsConfig(