refactor: handle deprecation of torch.cuda.amp.autocast (#144)

torch.cuda.amp.autocast(args...) and torch.cpu.amp.autocast(args...) will be
deprecated. Please use torch.autocast("cuda", args...) or torch.autocast("cpu",
args...) instead.

https://pytorch.org/docs/stable/amp.html
This commit is contained in:
Enno Hermann 2024-11-09 18:37:08 +01:00 committed by GitHub
parent 540e8d6cf2
commit 2df9bfa78e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 14 additions and 26 deletions

View File

@ -86,7 +86,7 @@ class LSTMSpeakerEncoder(BaseEncoder):
- x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
"""
with torch.no_grad():
with torch.cuda.amp.autocast(enabled=False):
with torch.autocast("cuda", enabled=False):
if self.use_torch_spec:
x.squeeze_(1)
x = self.torch_spec(x)

View File

@ -12,13 +12,8 @@ from TTS.tts.layers.bark.model import GPT, GPTConfig
from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig
from TTS.utils.generic_utils import is_pytorch_at_least_2_4
if (
torch.cuda.is_available()
and hasattr(torch.cuda, "amp")
and hasattr(torch.cuda.amp, "autocast")
and torch.cuda.is_bf16_supported()
):
autocast = functools.partial(torch.cuda.amp.autocast, dtype=torch.bfloat16)
if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
autocast = functools.partial(torch.autocast, device_type="cuda", dtype=torch.bfloat16)
else:
@contextlib.contextmanager

View File

@ -5,7 +5,6 @@ from abc import abstractmethod
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import autocast
from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, normalization
@ -385,7 +384,7 @@ class DiffusionTts(nn.Module):
unused_params.extend(list(lyr.parameters()))
else:
# First and last blocks will have autocast disabled for improved precision.
with autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
with torch.autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
x = lyr(x, time_emb)
x = x.float()

View File

@ -12,7 +12,6 @@ import torchaudio
from coqpit import Coqpit
from librosa.filters import mel as librosa_mel_fn
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
@ -952,7 +951,7 @@ class DelightfulTTS(BaseTTSE2E):
)
# compute loss
with autocast(enabled=False): # use float32 for the criterion
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
loss_dict = criterion[optimizer_idx](
scores_disc_fake=scores_d_fake,
scores_disc_real=scores_d_real,
@ -963,7 +962,7 @@ class DelightfulTTS(BaseTTSE2E):
if optimizer_idx == 1:
mel = batch["mel_input"]
# compute melspec segment
with autocast(enabled=False):
with torch.autocast("cuda", enabled=False):
mel_slice = segment(
mel.float(), self.model_outputs_cache["slice_ids"], self.args.spec_segment_size, pad_short=True
)
@ -991,7 +990,7 @@ class DelightfulTTS(BaseTTSE2E):
)
# compute losses
with autocast(enabled=True): # use float32 for the criterion
with torch.autocast("cuda", enabled=True): # use float32 for the criterion
loss_dict = criterion[optimizer_idx](
mel_output=self.model_outputs_cache["acoustic_model_outputs"].transpose(1, 2),
mel_target=batch["mel_input"],

View File

@ -6,7 +6,6 @@ import torch
from coqpit import Coqpit
from monotonic_alignment_search import maximum_path
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from trainer.io import load_fsspec
from TTS.tts.layers.feed_forward.decoder import Decoder
@ -744,7 +743,7 @@ class ForwardTTS(BaseTTS):
if self.use_aligner:
durations = outputs["o_alignment_dur"]
# use float32 in AMP
with autocast(enabled=False):
with torch.autocast("cuda", enabled=False):
# compute loss
loss_dict = criterion(
decoder_output=outputs["model_outputs"],

View File

@ -6,7 +6,6 @@ import torch
from coqpit import Coqpit
from monotonic_alignment_search import maximum_path
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from torch.nn import functional as F
from trainer.io import load_fsspec
@ -416,7 +415,7 @@ class GlowTTS(BaseTTS):
aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids},
)
with autocast(enabled=False): # avoid mixed_precision in criterion
with torch.autocast("cuda", enabled=False): # avoid mixed_precision in criterion
loss_dict = criterion(
outputs["z"].float(),
outputs["y_mean"].float(),

View File

@ -4,7 +4,6 @@ from typing import Dict, List, Tuple, Union
import torch
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from trainer.trainer_utils import get_optimizer, get_scheduler
from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
@ -310,7 +309,7 @@ class Tacotron(BaseTacotron):
alignment_lengths = mel_lengths // self.decoder.r
# compute loss
with autocast(enabled=False): # use float32 for the criterion
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
loss_dict = criterion(
outputs["model_outputs"].float(),
outputs["decoder_outputs"].float(),

View File

@ -4,7 +4,6 @@ from typing import Dict, List, Union
import torch
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from trainer.trainer_utils import get_optimizer, get_scheduler
from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
@ -338,7 +337,7 @@ class Tacotron2(BaseTacotron):
alignment_lengths = mel_lengths // self.decoder.r
# compute loss
with autocast(enabled=False): # use float32 for the criterion
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
loss_dict = criterion(
outputs["model_outputs"].float(),
outputs["decoder_outputs"].float(),

View File

@ -13,7 +13,6 @@ from coqpit import Coqpit
from librosa.filters import mel as librosa_mel_fn
from monotonic_alignment_search import maximum_path
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
@ -1278,7 +1277,7 @@ class Vits(BaseTTS):
)
# compute loss
with autocast(enabled=False): # use float32 for the criterion
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
loss_dict = criterion[optimizer_idx](
scores_disc_real,
scores_disc_fake,
@ -1289,7 +1288,7 @@ class Vits(BaseTTS):
mel = batch["mel"]
# compute melspec segment
with autocast(enabled=False):
with torch.autocast("cuda", enabled=False):
if self.args.encoder_sample_rate:
spec_segment_size = self.spec_segment_size * int(self.interpolate_factor)
else:
@ -1316,7 +1315,7 @@ class Vits(BaseTTS):
)
# compute losses
with autocast(enabled=False): # use float32 for the criterion
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
loss_dict = criterion[optimizer_idx](
mel_slice_hat=mel_slice.float(),
mel_slice=mel_slice_hat.float(),