From 7d0416f99b837d6dcd575a9512c473395f7e9d12 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 2 Dec 2024 00:16:39 +0100 Subject: [PATCH] refactor(vc): rename TTS.vc.modules to TTS.vc.layers for consistency Same as in TTS.tts and TTS.vocoder --- TTS/vc/{modules => layers}/__init__.py | 0 TTS/vc/{modules => layers}/freevc/__init__.py | 0 TTS/vc/{modules => layers}/freevc/commons.py | 0 TTS/vc/{modules => layers}/freevc/mel_processing.py | 0 TTS/vc/{modules => layers}/freevc/modules.py | 2 +- .../freevc/speaker_encoder/__init__.py | 0 .../freevc/speaker_encoder/audio.py | 2 +- .../freevc/speaker_encoder/hparams.py | 0 .../freevc/speaker_encoder/speaker_encoder.py | 4 ++-- TTS/vc/{modules => layers}/freevc/wavlm/__init__.py | 2 +- TTS/vc/{modules => layers}/freevc/wavlm/config.json | 0 TTS/vc/{modules => layers}/freevc/wavlm/modules.py | 0 TTS/vc/{modules => layers}/freevc/wavlm/wavlm.py | 2 +- TTS/vc/models/freevc.py | 13 ++++++------- 14 files changed, 12 insertions(+), 13 deletions(-) rename TTS/vc/{modules => layers}/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/commons.py (100%) rename TTS/vc/{modules => layers}/freevc/mel_processing.py (100%) rename TTS/vc/{modules => layers}/freevc/modules.py (99%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/audio.py (97%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/hparams.py (100%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/speaker_encoder.py (98%) rename TTS/vc/{modules => layers}/freevc/wavlm/__init__.py (94%) rename TTS/vc/{modules => layers}/freevc/wavlm/config.json (100%) rename TTS/vc/{modules => layers}/freevc/wavlm/modules.py (100%) rename TTS/vc/{modules => layers}/freevc/wavlm/wavlm.py (99%) diff --git a/TTS/vc/modules/__init__.py b/TTS/vc/layers/__init__.py similarity index 100% rename from TTS/vc/modules/__init__.py rename to TTS/vc/layers/__init__.py diff --git a/TTS/vc/modules/freevc/__init__.py b/TTS/vc/layers/freevc/__init__.py similarity index 100% rename from TTS/vc/modules/freevc/__init__.py rename to TTS/vc/layers/freevc/__init__.py diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/layers/freevc/commons.py similarity index 100% rename from TTS/vc/modules/freevc/commons.py rename to TTS/vc/layers/freevc/commons.py diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/layers/freevc/mel_processing.py similarity index 100% rename from TTS/vc/modules/freevc/mel_processing.py rename to TTS/vc/layers/freevc/mel_processing.py diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/layers/freevc/modules.py similarity index 99% rename from TTS/vc/modules/freevc/modules.py rename to TTS/vc/layers/freevc/modules.py index ea17be24..c34f22d7 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/layers/freevc/modules.py @@ -7,7 +7,7 @@ from torch.nn.utils.parametrize import remove_parametrizations from TTS.tts.layers.generic.normalization import LayerNorm2 from TTS.tts.layers.generic.wavenet import fused_add_tanh_sigmoid_multiply -from TTS.vc.modules.freevc.commons import init_weights +from TTS.vc.layers.freevc.commons import init_weights from TTS.vocoder.models.hifigan_generator import get_padding LRELU_SLOPE = 0.1 diff --git a/TTS/vc/modules/freevc/speaker_encoder/__init__.py b/TTS/vc/layers/freevc/speaker_encoder/__init__.py similarity index 100% rename from TTS/vc/modules/freevc/speaker_encoder/__init__.py rename to TTS/vc/layers/freevc/speaker_encoder/__init__.py diff --git a/TTS/vc/modules/freevc/speaker_encoder/audio.py b/TTS/vc/layers/freevc/speaker_encoder/audio.py similarity index 97% rename from TTS/vc/modules/freevc/speaker_encoder/audio.py rename to TTS/vc/layers/freevc/speaker_encoder/audio.py index 5b23a4db..5fa317ce 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/audio.py +++ b/TTS/vc/layers/freevc/speaker_encoder/audio.py @@ -5,7 +5,7 @@ from typing import Optional, Union import librosa import numpy as np -from TTS.vc.modules.freevc.speaker_encoder.hparams import ( +from TTS.vc.layers.freevc.speaker_encoder.hparams import ( audio_norm_target_dBFS, mel_n_channels, mel_window_length, diff --git a/TTS/vc/modules/freevc/speaker_encoder/hparams.py b/TTS/vc/layers/freevc/speaker_encoder/hparams.py similarity index 100% rename from TTS/vc/modules/freevc/speaker_encoder/hparams.py rename to TTS/vc/layers/freevc/speaker_encoder/hparams.py diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py similarity index 98% rename from TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py rename to TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py index 294bf322..a6d5bcf9 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py @@ -7,8 +7,8 @@ import torch from torch import nn from trainer.io import load_fsspec -from TTS.vc.modules.freevc.speaker_encoder import audio -from TTS.vc.modules.freevc.speaker_encoder.hparams import ( +from TTS.vc.layers.freevc.speaker_encoder import audio +from TTS.vc.layers.freevc.speaker_encoder.hparams import ( mel_n_channels, mel_window_step, model_embedding_size, diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/layers/freevc/wavlm/__init__.py similarity index 94% rename from TTS/vc/modules/freevc/wavlm/__init__.py rename to TTS/vc/layers/freevc/wavlm/__init__.py index 4046e137..62f7e74a 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/layers/freevc/wavlm/__init__.py @@ -6,7 +6,7 @@ import torch from trainer.io import get_user_data_dir from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig +from TTS.vc.layers.freevc.wavlm.wavlm import WavLM, WavLMConfig logger = logging.getLogger(__name__) diff --git a/TTS/vc/modules/freevc/wavlm/config.json b/TTS/vc/layers/freevc/wavlm/config.json similarity index 100% rename from TTS/vc/modules/freevc/wavlm/config.json rename to TTS/vc/layers/freevc/wavlm/config.json diff --git a/TTS/vc/modules/freevc/wavlm/modules.py b/TTS/vc/layers/freevc/wavlm/modules.py similarity index 100% rename from TTS/vc/modules/freevc/wavlm/modules.py rename to TTS/vc/layers/freevc/wavlm/modules.py diff --git a/TTS/vc/modules/freevc/wavlm/wavlm.py b/TTS/vc/layers/freevc/wavlm/wavlm.py similarity index 99% rename from TTS/vc/modules/freevc/wavlm/wavlm.py rename to TTS/vc/layers/freevc/wavlm/wavlm.py index 10dd09ed..775f3e59 100644 --- a/TTS/vc/modules/freevc/wavlm/wavlm.py +++ b/TTS/vc/layers/freevc/wavlm/wavlm.py @@ -17,7 +17,7 @@ import torch.nn as nn import torch.nn.functional as F from torch.nn import LayerNorm -from TTS.vc.modules.freevc.wavlm.modules import ( +from TTS.vc.layers.freevc.wavlm.modules import ( Fp32GroupNorm, Fp32LayerNorm, GLU_Linear, diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 62559de5..c654219c 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -12,17 +12,16 @@ from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations from trainer.io import load_fsspec -import TTS.vc.modules.freevc.commons as commons -import TTS.vc.modules.freevc.modules as modules +import TTS.vc.layers.freevc.modules as modules from TTS.tts.layers.vits.discriminator import DiscriminatorS from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.vc.configs.freevc_config import FreeVCConfig +from TTS.vc.layers.freevc.commons import init_weights, rand_slice_segments +from TTS.vc.layers.freevc.mel_processing import mel_spectrogram_torch +from TTS.vc.layers.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx +from TTS.vc.layers.freevc.wavlm import get_wavlm from TTS.vc.models.base_vc import BaseVC -from TTS.vc.modules.freevc.commons import init_weights -from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch -from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx -from TTS.vc.modules.freevc.wavlm import get_wavlm from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP logger = logging.getLogger(__name__) @@ -385,7 +384,7 @@ class FreeVC(BaseVC): z_p = self.flow(z, spec_mask, g=g) # Randomly slice z and compute o using dec - z_slice, ids_slice = commons.rand_slice_segments(z, spec_lengths, self.segment_size) + z_slice, ids_slice = rand_slice_segments(z, spec_lengths, self.segment_size) o = self.dec(z_slice, g=g) return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q)