From 2e1cd0b1e75147c60fa137d24ac49ce3dbfc809e Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Sat, 25 Feb 2023 14:37:36 -0300 Subject: [PATCH] Fix Speaker Consistency Loss (SCL) --- TTS/encoder/models/resnet.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/TTS/encoder/models/resnet.py b/TTS/encoder/models/resnet.py index e75ab6c4..5eafcd60 100644 --- a/TTS/encoder/models/resnet.py +++ b/TTS/encoder/models/resnet.py @@ -161,16 +161,14 @@ class ResNetSpeakerEncoder(BaseEncoder): Shapes: - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})` """ - with torch.no_grad(): - with torch.cuda.amp.autocast(enabled=False): - x.squeeze_(1) - # if you torch spec compute it otherwise use the mel spec computed by the AP - if self.use_torch_spec: - x = self.torch_spec(x) + x.squeeze_(1) + # if you torch spec compute it otherwise use the mel spec computed by the AP + if self.use_torch_spec: + x = self.torch_spec(x) - if self.log_input: - x = (x + 1e-6).log() - x = self.instancenorm(x).unsqueeze(1) + if self.log_input: + x = (x + 1e-6).log() + x = self.instancenorm(x).unsqueeze(1) x = self.conv1(x) x = self.relu(x)