mirror of https://github.com/coqui-ai/TTS.git
refactor(vc): rename TTS.vc.modules to TTS.vc.layers for consistency
Same as in TTS.tts and TTS.vocoder
This commit is contained in:
parent
32c99e8e66
commit
7d0416f99b
|
@ -7,7 +7,7 @@ from torch.nn.utils.parametrize import remove_parametrizations
|
||||||
|
|
||||||
from TTS.tts.layers.generic.normalization import LayerNorm2
|
from TTS.tts.layers.generic.normalization import LayerNorm2
|
||||||
from TTS.tts.layers.generic.wavenet import fused_add_tanh_sigmoid_multiply
|
from TTS.tts.layers.generic.wavenet import fused_add_tanh_sigmoid_multiply
|
||||||
from TTS.vc.modules.freevc.commons import init_weights
|
from TTS.vc.layers.freevc.commons import init_weights
|
||||||
from TTS.vocoder.models.hifigan_generator import get_padding
|
from TTS.vocoder.models.hifigan_generator import get_padding
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
LRELU_SLOPE = 0.1
|
|
@ -5,7 +5,7 @@ from typing import Optional, Union
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from TTS.vc.modules.freevc.speaker_encoder.hparams import (
|
from TTS.vc.layers.freevc.speaker_encoder.hparams import (
|
||||||
audio_norm_target_dBFS,
|
audio_norm_target_dBFS,
|
||||||
mel_n_channels,
|
mel_n_channels,
|
||||||
mel_window_length,
|
mel_window_length,
|
|
@ -7,8 +7,8 @@ import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from trainer.io import load_fsspec
|
from trainer.io import load_fsspec
|
||||||
|
|
||||||
from TTS.vc.modules.freevc.speaker_encoder import audio
|
from TTS.vc.layers.freevc.speaker_encoder import audio
|
||||||
from TTS.vc.modules.freevc.speaker_encoder.hparams import (
|
from TTS.vc.layers.freevc.speaker_encoder.hparams import (
|
||||||
mel_n_channels,
|
mel_n_channels,
|
||||||
mel_window_step,
|
mel_window_step,
|
||||||
model_embedding_size,
|
model_embedding_size,
|
|
@ -6,7 +6,7 @@ import torch
|
||||||
from trainer.io import get_user_data_dir
|
from trainer.io import get_user_data_dir
|
||||||
|
|
||||||
from TTS.utils.generic_utils import is_pytorch_at_least_2_4
|
from TTS.utils.generic_utils import is_pytorch_at_least_2_4
|
||||||
from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig
|
from TTS.vc.layers.freevc.wavlm.wavlm import WavLM, WavLMConfig
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
|
@ -17,7 +17,7 @@ import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn import LayerNorm
|
from torch.nn import LayerNorm
|
||||||
|
|
||||||
from TTS.vc.modules.freevc.wavlm.modules import (
|
from TTS.vc.layers.freevc.wavlm.modules import (
|
||||||
Fp32GroupNorm,
|
Fp32GroupNorm,
|
||||||
Fp32LayerNorm,
|
Fp32LayerNorm,
|
||||||
GLU_Linear,
|
GLU_Linear,
|
|
@ -12,17 +12,16 @@ from torch.nn.utils.parametrizations import weight_norm
|
||||||
from torch.nn.utils.parametrize import remove_parametrizations
|
from torch.nn.utils.parametrize import remove_parametrizations
|
||||||
from trainer.io import load_fsspec
|
from trainer.io import load_fsspec
|
||||||
|
|
||||||
import TTS.vc.modules.freevc.commons as commons
|
import TTS.vc.layers.freevc.modules as modules
|
||||||
import TTS.vc.modules.freevc.modules as modules
|
|
||||||
from TTS.tts.layers.vits.discriminator import DiscriminatorS
|
from TTS.tts.layers.vits.discriminator import DiscriminatorS
|
||||||
from TTS.tts.utils.helpers import sequence_mask
|
from TTS.tts.utils.helpers import sequence_mask
|
||||||
from TTS.tts.utils.speakers import SpeakerManager
|
from TTS.tts.utils.speakers import SpeakerManager
|
||||||
from TTS.vc.configs.freevc_config import FreeVCConfig
|
from TTS.vc.configs.freevc_config import FreeVCConfig
|
||||||
|
from TTS.vc.layers.freevc.commons import init_weights, rand_slice_segments
|
||||||
|
from TTS.vc.layers.freevc.mel_processing import mel_spectrogram_torch
|
||||||
|
from TTS.vc.layers.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx
|
||||||
|
from TTS.vc.layers.freevc.wavlm import get_wavlm
|
||||||
from TTS.vc.models.base_vc import BaseVC
|
from TTS.vc.models.base_vc import BaseVC
|
||||||
from TTS.vc.modules.freevc.commons import init_weights
|
|
||||||
from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch
|
|
||||||
from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx
|
|
||||||
from TTS.vc.modules.freevc.wavlm import get_wavlm
|
|
||||||
from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP
|
from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -385,7 +384,7 @@ class FreeVC(BaseVC):
|
||||||
z_p = self.flow(z, spec_mask, g=g)
|
z_p = self.flow(z, spec_mask, g=g)
|
||||||
|
|
||||||
# Randomly slice z and compute o using dec
|
# Randomly slice z and compute o using dec
|
||||||
z_slice, ids_slice = commons.rand_slice_segments(z, spec_lengths, self.segment_size)
|
z_slice, ids_slice = rand_slice_segments(z, spec_lengths, self.segment_size)
|
||||||
o = self.dec(z_slice, g=g)
|
o = self.dec(z_slice, g=g)
|
||||||
|
|
||||||
return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
||||||
|
|
Loading…
Reference in New Issue