mirror of https://github.com/coqui-ai/TTS.git
refactor: handle deprecation of torch.cuda.amp.autocast (#144)
torch.cuda.amp.autocast(args...) and torch.cpu.amp.autocast(args...) will be deprecated. Please use torch.autocast("cuda", args...) or torch.autocast("cpu", args...) instead. https://pytorch.org/docs/stable/amp.html
This commit is contained in:
parent
540e8d6cf2
commit
2df9bfa78e
|
@ -86,7 +86,7 @@ class LSTMSpeakerEncoder(BaseEncoder):
|
|||
- x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
|
||||
"""
|
||||
with torch.no_grad():
|
||||
with torch.cuda.amp.autocast(enabled=False):
|
||||
with torch.autocast("cuda", enabled=False):
|
||||
if self.use_torch_spec:
|
||||
x.squeeze_(1)
|
||||
x = self.torch_spec(x)
|
||||
|
|
|
@ -12,13 +12,8 @@ from TTS.tts.layers.bark.model import GPT, GPTConfig
|
|||
from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig
|
||||
from TTS.utils.generic_utils import is_pytorch_at_least_2_4
|
||||
|
||||
if (
|
||||
torch.cuda.is_available()
|
||||
and hasattr(torch.cuda, "amp")
|
||||
and hasattr(torch.cuda.amp, "autocast")
|
||||
and torch.cuda.is_bf16_supported()
|
||||
):
|
||||
autocast = functools.partial(torch.cuda.amp.autocast, dtype=torch.bfloat16)
|
||||
if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
|
||||
autocast = functools.partial(torch.autocast, device_type="cuda", dtype=torch.bfloat16)
|
||||
else:
|
||||
|
||||
@contextlib.contextmanager
|
||||
|
|
|
@ -5,7 +5,6 @@ from abc import abstractmethod
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch import autocast
|
||||
|
||||
from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, normalization
|
||||
|
||||
|
@ -385,7 +384,7 @@ class DiffusionTts(nn.Module):
|
|||
unused_params.extend(list(lyr.parameters()))
|
||||
else:
|
||||
# First and last blocks will have autocast disabled for improved precision.
|
||||
with autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
|
||||
with torch.autocast(x.device.type, enabled=self.enable_fp16 and i != 0):
|
||||
x = lyr(x, time_emb)
|
||||
|
||||
x = x.float()
|
||||
|
|
|
@ -12,7 +12,6 @@ import torchaudio
|
|||
from coqpit import Coqpit
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data.sampler import WeightedRandomSampler
|
||||
|
@ -952,7 +951,7 @@ class DelightfulTTS(BaseTTSE2E):
|
|||
)
|
||||
|
||||
# compute loss
|
||||
with autocast(enabled=False): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
|
||||
loss_dict = criterion[optimizer_idx](
|
||||
scores_disc_fake=scores_d_fake,
|
||||
scores_disc_real=scores_d_real,
|
||||
|
@ -963,7 +962,7 @@ class DelightfulTTS(BaseTTSE2E):
|
|||
if optimizer_idx == 1:
|
||||
mel = batch["mel_input"]
|
||||
# compute melspec segment
|
||||
with autocast(enabled=False):
|
||||
with torch.autocast("cuda", enabled=False):
|
||||
mel_slice = segment(
|
||||
mel.float(), self.model_outputs_cache["slice_ids"], self.args.spec_segment_size, pad_short=True
|
||||
)
|
||||
|
@ -991,7 +990,7 @@ class DelightfulTTS(BaseTTSE2E):
|
|||
)
|
||||
|
||||
# compute losses
|
||||
with autocast(enabled=True): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=True): # use float32 for the criterion
|
||||
loss_dict = criterion[optimizer_idx](
|
||||
mel_output=self.model_outputs_cache["acoustic_model_outputs"].transpose(1, 2),
|
||||
mel_target=batch["mel_input"],
|
||||
|
|
|
@ -6,7 +6,6 @@ import torch
|
|||
from coqpit import Coqpit
|
||||
from monotonic_alignment_search import maximum_path
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from trainer.io import load_fsspec
|
||||
|
||||
from TTS.tts.layers.feed_forward.decoder import Decoder
|
||||
|
@ -744,7 +743,7 @@ class ForwardTTS(BaseTTS):
|
|||
if self.use_aligner:
|
||||
durations = outputs["o_alignment_dur"]
|
||||
# use float32 in AMP
|
||||
with autocast(enabled=False):
|
||||
with torch.autocast("cuda", enabled=False):
|
||||
# compute loss
|
||||
loss_dict = criterion(
|
||||
decoder_output=outputs["model_outputs"],
|
||||
|
|
|
@ -6,7 +6,6 @@ import torch
|
|||
from coqpit import Coqpit
|
||||
from monotonic_alignment_search import maximum_path
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from torch.nn import functional as F
|
||||
from trainer.io import load_fsspec
|
||||
|
||||
|
@ -416,7 +415,7 @@ class GlowTTS(BaseTTS):
|
|||
aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids},
|
||||
)
|
||||
|
||||
with autocast(enabled=False): # avoid mixed_precision in criterion
|
||||
with torch.autocast("cuda", enabled=False): # avoid mixed_precision in criterion
|
||||
loss_dict = criterion(
|
||||
outputs["z"].float(),
|
||||
outputs["y_mean"].float(),
|
||||
|
|
|
@ -4,7 +4,6 @@ from typing import Dict, List, Tuple, Union
|
|||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from trainer.trainer_utils import get_optimizer, get_scheduler
|
||||
|
||||
from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
|
||||
|
@ -310,7 +309,7 @@ class Tacotron(BaseTacotron):
|
|||
alignment_lengths = mel_lengths // self.decoder.r
|
||||
|
||||
# compute loss
|
||||
with autocast(enabled=False): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
|
||||
loss_dict = criterion(
|
||||
outputs["model_outputs"].float(),
|
||||
outputs["decoder_outputs"].float(),
|
||||
|
|
|
@ -4,7 +4,6 @@ from typing import Dict, List, Union
|
|||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from trainer.trainer_utils import get_optimizer, get_scheduler
|
||||
|
||||
from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
|
||||
|
@ -338,7 +337,7 @@ class Tacotron2(BaseTacotron):
|
|||
alignment_lengths = mel_lengths // self.decoder.r
|
||||
|
||||
# compute loss
|
||||
with autocast(enabled=False): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
|
||||
loss_dict = criterion(
|
||||
outputs["model_outputs"].float(),
|
||||
outputs["decoder_outputs"].float(),
|
||||
|
|
|
@ -13,7 +13,6 @@ from coqpit import Coqpit
|
|||
from librosa.filters import mel as librosa_mel_fn
|
||||
from monotonic_alignment_search import maximum_path
|
||||
from torch import nn
|
||||
from torch.cuda.amp.autocast_mode import autocast
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data.sampler import WeightedRandomSampler
|
||||
|
@ -1278,7 +1277,7 @@ class Vits(BaseTTS):
|
|||
)
|
||||
|
||||
# compute loss
|
||||
with autocast(enabled=False): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
|
||||
loss_dict = criterion[optimizer_idx](
|
||||
scores_disc_real,
|
||||
scores_disc_fake,
|
||||
|
@ -1289,7 +1288,7 @@ class Vits(BaseTTS):
|
|||
mel = batch["mel"]
|
||||
|
||||
# compute melspec segment
|
||||
with autocast(enabled=False):
|
||||
with torch.autocast("cuda", enabled=False):
|
||||
if self.args.encoder_sample_rate:
|
||||
spec_segment_size = self.spec_segment_size * int(self.interpolate_factor)
|
||||
else:
|
||||
|
@ -1316,7 +1315,7 @@ class Vits(BaseTTS):
|
|||
)
|
||||
|
||||
# compute losses
|
||||
with autocast(enabled=False): # use float32 for the criterion
|
||||
with torch.autocast("cuda", enabled=False): # use float32 for the criterion
|
||||
loss_dict = criterion[optimizer_idx](
|
||||
mel_slice_hat=mel_slice.float(),
|
||||
mel_slice=mel_slice_hat.float(),
|
||||
|
|
Loading…
Reference in New Issue