From 4c829e74a1399ab083b566a70c1b7e879eda6e1e Mon Sep 17 00:00:00 2001 From: Matthew Boakes Date: Wed, 5 Apr 2023 00:59:20 +0100 Subject: [PATCH 1/2] Update Librosa Version To V0.10.0 --- TTS/tts/models/vits.py | 4 ++-- TTS/utils/audio/numpy_transforms.py | 4 ++-- TTS/utils/audio/processor.py | 6 +++--- TTS/utils/audio/torch_transforms.py | 4 ++-- requirements.txt | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index f3b80740..73095b34 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -149,7 +149,7 @@ def spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax): dtype_device = str(spec.dtype) + "_" + str(spec.device) fmax_dtype_device = str(fmax) + "_" + dtype_device if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sample_rate, n_fft, num_mels, fmin, fmax) + mel = librosa_mel_fn(sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) mel = torch.matmul(mel_basis[fmax_dtype_device], spec) mel = amp_to_db(mel) @@ -176,7 +176,7 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm fmax_dtype_device = str(fmax) + "_" + dtype_device wnsize_dtype_device = str(win_length) + "_" + dtype_device if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sample_rate, n_fft, num_mels, fmin, fmax) + mel = librosa_mel_fn(sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) if wnsize_dtype_device not in hann_window: hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index 60f8e0dd..2aa6cce6 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -269,7 +269,7 @@ def compute_f0( np.ndarray: Pitch. Shape :math:`[T_pitch,]`. :math:`T_pitch == T_wav / hop_length` Examples: - >>> WAV_FILE = filename = librosa.util.example_audio_file() + >>> WAV_FILE = filename = librosa.example('vibeace') >>> from TTS.config import BaseAudioConfig >>> from TTS.utils.audio import AudioProcessor >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1) @@ -310,7 +310,7 @@ def compute_energy(y: np.ndarray, **kwargs) -> np.ndarray: Returns: np.ndarray: energy. Shape :math:`[T_energy,]`. :math:`T_energy == T_wav / hop_length` Examples: - >>> WAV_FILE = filename = librosa.util.example_audio_file() + >>> WAV_FILE = filename = librosa.example('vibeace') >>> from TTS.config import BaseAudioConfig >>> from TTS.utils.audio import AudioProcessor >>> conf = BaseAudioConfig() diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index c872efa3..47fcc824 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -243,7 +243,7 @@ class AudioProcessor(object): if self.mel_fmax is not None: assert self.mel_fmax <= self.sample_rate // 2 return librosa.filters.mel( - self.sample_rate, self.fft_size, n_mels=self.num_mels, fmin=self.mel_fmin, fmax=self.mel_fmax + sr=self.sample_rate, n_fft=self.fft_size, n_mels=self.num_mels, fmin=self.mel_fmin, fmax=self.mel_fmax ) def _stft_parameters( @@ -569,7 +569,7 @@ class AudioProcessor(object): np.ndarray: Pitch. Examples: - >>> WAV_FILE = filename = librosa.util.example_audio_file() + >>> WAV_FILE = filename = librosa.example('vibeace') >>> from TTS.config import BaseAudioConfig >>> from TTS.utils.audio import AudioProcessor >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1) @@ -711,7 +711,7 @@ class AudioProcessor(object): Args: filename (str): Path to the wav file. """ - return librosa.get_duration(filename) + return librosa.get_duration(path=filename) @staticmethod def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray: diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py index d4523ad0..d7eed705 100644 --- a/TTS/utils/audio/torch_transforms.py +++ b/TTS/utils/audio/torch_transforms.py @@ -144,8 +144,8 @@ class TorchSTFT(nn.Module): # pylint: disable=abstract-method def _build_mel_basis(self): mel_basis = librosa.filters.mel( - self.sample_rate, - self.n_fft, + sr=self.sample_rate, + n_fft=self.n_fft, n_mels=self.n_mels, fmin=self.mel_fmin, fmax=self.mel_fmax, diff --git a/requirements.txt b/requirements.txt index f8730cc4..94a0bee6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ scipy>=1.4.0 torch>=1.7 torchaudio soundfile -librosa==0.8.0 +librosa>=0.10.0 numba==0.55.1;python_version<"3.9" numba==0.56.4;python_version>="3.9" inflect==5.6.0 From 5bdd6f7c185bb53736676f85017a34cd57cad32a Mon Sep 17 00:00:00 2001 From: Matthew Boakes Date: Thu, 6 Apr 2023 12:36:24 +0100 Subject: [PATCH 2/2] Updated Librosa Dependency Specification --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 94a0bee6..89ea7889 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ scipy>=1.4.0 torch>=1.7 torchaudio soundfile -librosa>=0.10.0 +librosa==0.10.0.* numba==0.55.1;python_version<"3.9" numba==0.56.4;python_version>="3.9" inflect==5.6.0