refactor: handle deprecation of torch.cuda.amp.autocast (#144)

torch.cuda.amp.autocast(args...) and torch.cpu.amp.autocast(args...) will be deprecated. Please use torch.autocast("cuda", args...) or torch.autocast("cpu", args...) instead. https://pytorch.org/docs/stable/amp.html
2024-11-09 18:37:08 +01:00 · 2024-11-09 18:37:08 +01:00 · 2df9bfa78e
parent 540e8d6cf2
commit 2df9bfa78e
9 changed files with 14 additions and 26 deletions
--- a/TTS/encoder/models/lstm.py
+++ b/TTS/encoder/models/lstm.py
@ -86,7 +86,7 @@ class LSTMSpeakerEncoder(BaseEncoder):
            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
        """
        with torch.no_grad():
-            with torch.cuda.amp.autocast(enabled=False):
+            with torch.autocast("cuda", enabled=False):
                if self.use_torch_spec:
                    x.squeeze_(1)
                    x = self.torch_spec(x)
--- a/TTS/tts/layers/bark/load_model.py
+++ b/TTS/tts/layers/bark/load_model.py
@ -12,13 +12,8 @@ from TTS.tts.layers.bark.model import GPT, GPTConfig
 from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig
 from TTS.utils.generic_utils import is_pytorch_at_least_2_4

-if (
-    torch.cuda.is_available()
-    and hasattr(torch.cuda, "amp")
-    and hasattr(torch.cuda.amp, "autocast")
-    and torch.cuda.is_bf16_supported()
-):
-    autocast = functools.partial(torch.cuda.amp.autocast, dtype=torch.bfloat16)
+if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
+    autocast = functools.partial(torch.autocast, device_type="cuda", dtype=torch.bfloat16)
 else:

    @contextlib.contextmanager
--- a/TTS/tts/layers/tortoise/diffusion_decoder.py
+++ b/TTS/tts/layers/tortoise/diffusion_decoder.py
@ -5,7 +5,6 @@ from abc import abstractmethod
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch import autocast

 from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, normalization

@ -385,7 +384,7 @@ class DiffusionTts(nn.Module):
                unused_params.extend(list(lyr.parameters()))
            else:
                # First and last blocks will have autocast disabled for improved precision.
-                with autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
+                with torch.autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
                    x = lyr(x, time_emb)

        x = x.float()
--- a/TTS/tts/models/delightful_tts.py
+++ b/TTS/tts/models/delightful_tts.py
@ -12,7 +12,6 @@ import torchaudio
 from coqpit import Coqpit
 from librosa.filters import mel as librosa_mel_fn
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from torch.nn import functional as F
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import WeightedRandomSampler
@ -952,7 +951,7 @@ class DelightfulTTS(BaseTTSE2E):
                )

                # compute loss
-                with autocast(enabled=False):  # use float32 for the criterion
+                with torch.autocast("cuda", enabled=False):  # use float32 for the criterion
                    loss_dict = criterion[optimizer_idx](
                        scores_disc_fake=scores_d_fake,
                        scores_disc_real=scores_d_real,
@ -963,7 +962,7 @@ class DelightfulTTS(BaseTTSE2E):
        if optimizer_idx == 1:
            mel = batch["mel_input"]
            # compute melspec segment
-            with autocast(enabled=False):
+            with torch.autocast("cuda", enabled=False):
                mel_slice = segment(
                    mel.float(), self.model_outputs_cache["slice_ids"], self.args.spec_segment_size, pad_short=True
                )
@ -991,7 +990,7 @@ class DelightfulTTS(BaseTTSE2E):
                )

            # compute losses
-            with autocast(enabled=True):  # use float32 for the criterion
+            with torch.autocast("cuda", enabled=True):  # use float32 for the criterion
                loss_dict = criterion[optimizer_idx](
                    mel_output=self.model_outputs_cache["acoustic_model_outputs"].transpose(1, 2),
                    mel_target=batch["mel_input"],
--- a/TTS/tts/models/forward_tts.py
+++ b/TTS/tts/models/forward_tts.py
@ -6,7 +6,6 @@ import torch
 from coqpit import Coqpit
 from monotonic_alignment_search import maximum_path
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from trainer.io import load_fsspec

 from TTS.tts.layers.feed_forward.decoder import Decoder
@ -744,7 +743,7 @@ class ForwardTTS(BaseTTS):
        if self.use_aligner:
            durations = outputs["o_alignment_dur"]
        # use float32 in AMP
-        with autocast(enabled=False):
+        with torch.autocast("cuda", enabled=False):
            # compute loss
            loss_dict = criterion(
                decoder_output=outputs["model_outputs"],
--- a/TTS/tts/models/glow_tts.py
+++ b/TTS/tts/models/glow_tts.py
@ -6,7 +6,6 @@ import torch
 from coqpit import Coqpit
 from monotonic_alignment_search import maximum_path
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from torch.nn import functional as F
 from trainer.io import load_fsspec

@ -416,7 +415,7 @@ class GlowTTS(BaseTTS):
                aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids},
            )

-            with autocast(enabled=False):  # avoid mixed_precision in criterion
+            with torch.autocast("cuda", enabled=False):  # avoid mixed_precision in criterion
                loss_dict = criterion(
                    outputs["z"].float(),
                    outputs["y_mean"].float(),
--- a/TTS/tts/models/tacotron.py
+++ b/TTS/tts/models/tacotron.py
@ -4,7 +4,6 @@ from typing import Dict, List, Tuple, Union

 import torch
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from trainer.trainer_utils import get_optimizer, get_scheduler

 from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
@ -310,7 +309,7 @@ class Tacotron(BaseTacotron):
            alignment_lengths = mel_lengths // self.decoder.r

        # compute loss
-        with autocast(enabled=False):  # use float32 for the criterion
+        with torch.autocast("cuda", enabled=False):  # use float32 for the criterion
            loss_dict = criterion(
                outputs["model_outputs"].float(),
                outputs["decoder_outputs"].float(),
--- a/TTS/tts/models/tacotron2.py
+++ b/TTS/tts/models/tacotron2.py
@ -4,7 +4,6 @@ from typing import Dict, List, Union

 import torch
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from trainer.trainer_utils import get_optimizer, get_scheduler

 from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
@ -338,7 +337,7 @@ class Tacotron2(BaseTacotron):
            alignment_lengths = mel_lengths // self.decoder.r

        # compute loss
-        with autocast(enabled=False):  # use float32 for the criterion
+        with torch.autocast("cuda", enabled=False):  # use float32 for the criterion
            loss_dict = criterion(
                outputs["model_outputs"].float(),
                outputs["decoder_outputs"].float(),
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@ -13,7 +13,6 @@ from coqpit import Coqpit
 from librosa.filters import mel as librosa_mel_fn
 from monotonic_alignment_search import maximum_path
 from torch import nn
-from torch.cuda.amp.autocast_mode import autocast
 from torch.nn import functional as F
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import WeightedRandomSampler
@ -1278,7 +1277,7 @@ class Vits(BaseTTS):
            )

            # compute loss
-            with autocast(enabled=False):  # use float32 for the criterion
+            with torch.autocast("cuda", enabled=False):  # use float32 for the criterion
                loss_dict = criterion[optimizer_idx](
                    scores_disc_real,
                    scores_disc_fake,
@ -1289,7 +1288,7 @@ class Vits(BaseTTS):
            mel = batch["mel"]

            # compute melspec segment
-            with autocast(enabled=False):
+            with torch.autocast("cuda", enabled=False):
                if self.args.encoder_sample_rate:
                    spec_segment_size = self.spec_segment_size * int(self.interpolate_factor)
                else:
@ -1316,7 +1315,7 @@ class Vits(BaseTTS):
            )

            # compute losses
-            with autocast(enabled=False):  # use float32 for the criterion
+            with torch.autocast("cuda", enabled=False):  # use float32 for the criterion
                loss_dict = criterion[optimizer_idx](
                    mel_slice_hat=mel_slice.float(),
                    mel_slice=mel_slice_hat.float(),