From 2df9bfa78eb338d1b0972c25f4d236403b4e032d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 9 Nov 2024 18:37:08 +0100 Subject: [PATCH] refactor: handle deprecation of torch.cuda.amp.autocast (#144) torch.cuda.amp.autocast(args...) and torch.cpu.amp.autocast(args...) will be deprecated. Please use torch.autocast("cuda", args...) or torch.autocast("cpu", args...) instead. https://pytorch.org/docs/stable/amp.html --- TTS/encoder/models/lstm.py | 2 +- TTS/tts/layers/bark/load_model.py | 9 ++------- TTS/tts/layers/tortoise/diffusion_decoder.py | 3 +-- TTS/tts/models/delightful_tts.py | 7 +++---- TTS/tts/models/forward_tts.py | 3 +-- TTS/tts/models/glow_tts.py | 3 +-- TTS/tts/models/tacotron.py | 3 +-- TTS/tts/models/tacotron2.py | 3 +-- TTS/tts/models/vits.py | 7 +++---- 9 files changed, 14 insertions(+), 26 deletions(-) diff --git a/TTS/encoder/models/lstm.py b/TTS/encoder/models/lstm.py index 51852b5b..4e0a7523 100644 --- a/TTS/encoder/models/lstm.py +++ b/TTS/encoder/models/lstm.py @@ -86,7 +86,7 @@ class LSTMSpeakerEncoder(BaseEncoder): - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})` """ with torch.no_grad(): - with torch.cuda.amp.autocast(enabled=False): + with torch.autocast("cuda", enabled=False): if self.use_torch_spec: x.squeeze_(1) x = self.torch_spec(x) diff --git a/TTS/tts/layers/bark/load_model.py b/TTS/tts/layers/bark/load_model.py index 72eca30a..6b7caab9 100644 --- a/TTS/tts/layers/bark/load_model.py +++ b/TTS/tts/layers/bark/load_model.py @@ -12,13 +12,8 @@ from TTS.tts.layers.bark.model import GPT, GPTConfig from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -if ( - torch.cuda.is_available() - and hasattr(torch.cuda, "amp") - and hasattr(torch.cuda.amp, "autocast") - and torch.cuda.is_bf16_supported() -): - autocast = functools.partial(torch.cuda.amp.autocast, dtype=torch.bfloat16) +if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): + autocast = functools.partial(torch.autocast, device_type="cuda", dtype=torch.bfloat16) else: @contextlib.contextmanager diff --git a/TTS/tts/layers/tortoise/diffusion_decoder.py b/TTS/tts/layers/tortoise/diffusion_decoder.py index 0d3cf769..f71eaf17 100644 --- a/TTS/tts/layers/tortoise/diffusion_decoder.py +++ b/TTS/tts/layers/tortoise/diffusion_decoder.py @@ -5,7 +5,6 @@ from abc import abstractmethod import torch import torch.nn as nn import torch.nn.functional as F -from torch import autocast from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, normalization @@ -385,7 +384,7 @@ class DiffusionTts(nn.Module): unused_params.extend(list(lyr.parameters())) else: # First and last blocks will have autocast disabled for improved precision. - with autocast(x.device.type, enabled=self.enable_fp16 and i != 0): + with torch.autocast(x.device.type, enabled=self.enable_fp16 and i != 0): x = lyr(x, time_emb) x = x.float() diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index a938a3a4..c6f15a79 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -12,7 +12,6 @@ import torchaudio from coqpit import Coqpit from librosa.filters import mel as librosa_mel_fn from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler @@ -952,7 +951,7 @@ class DelightfulTTS(BaseTTSE2E): ) # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( scores_disc_fake=scores_d_fake, scores_disc_real=scores_d_real, @@ -963,7 +962,7 @@ class DelightfulTTS(BaseTTSE2E): if optimizer_idx == 1: mel = batch["mel_input"] # compute melspec segment - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): mel_slice = segment( mel.float(), self.model_outputs_cache["slice_ids"], self.args.spec_segment_size, pad_short=True ) @@ -991,7 +990,7 @@ class DelightfulTTS(BaseTTSE2E): ) # compute losses - with autocast(enabled=True): # use float32 for the criterion + with torch.autocast("cuda", enabled=True): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( mel_output=self.model_outputs_cache["acoustic_model_outputs"].transpose(1, 2), mel_target=batch["mel_input"], diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index e7bc8637..d449e580 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -6,7 +6,6 @@ import torch from coqpit import Coqpit from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.io import load_fsspec from TTS.tts.layers.feed_forward.decoder import Decoder @@ -744,7 +743,7 @@ class ForwardTTS(BaseTTS): if self.use_aligner: durations = outputs["o_alignment_dur"] # use float32 in AMP - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): # compute loss loss_dict = criterion( decoder_output=outputs["model_outputs"], diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 5ea69865..5bf47131 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -6,7 +6,6 @@ import torch from coqpit import Coqpit from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from trainer.io import load_fsspec @@ -416,7 +415,7 @@ class GlowTTS(BaseTTS): aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids}, ) - with autocast(enabled=False): # avoid mixed_precision in criterion + with torch.autocast("cuda", enabled=False): # avoid mixed_precision in criterion loss_dict = criterion( outputs["z"].float(), outputs["y_mean"].float(), diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 400a86d0..5d3efd20 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -4,7 +4,6 @@ from typing import Dict, List, Tuple, Union import torch from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE @@ -310,7 +309,7 @@ class Tacotron(BaseTacotron): alignment_lengths = mel_lengths // self.decoder.r # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion( outputs["model_outputs"].float(), outputs["decoder_outputs"].float(), diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py index 4b1317f4..2716a397 100644 --- a/TTS/tts/models/tacotron2.py +++ b/TTS/tts/models/tacotron2.py @@ -4,7 +4,6 @@ from typing import Dict, List, Union import torch from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE @@ -338,7 +337,7 @@ class Tacotron2(BaseTacotron): alignment_lengths = mel_lengths // self.decoder.r # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion( outputs["model_outputs"].float(), outputs["decoder_outputs"].float(), diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index af803a0f..432b29f5 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -13,7 +13,6 @@ from coqpit import Coqpit from librosa.filters import mel as librosa_mel_fn from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler @@ -1278,7 +1277,7 @@ class Vits(BaseTTS): ) # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( scores_disc_real, scores_disc_fake, @@ -1289,7 +1288,7 @@ class Vits(BaseTTS): mel = batch["mel"] # compute melspec segment - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): if self.args.encoder_sample_rate: spec_segment_size = self.spec_segment_size * int(self.interpolate_factor) else: @@ -1316,7 +1315,7 @@ class Vits(BaseTTS): ) # compute losses - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( mel_slice_hat=mel_slice.float(), mel_slice=mel_slice_hat.float(),