mirror of https://github.com/coqui-ai/TTS.git
Fix end artifact for fine tuning models
This commit is contained in:
parent
04901fb2e4
commit
7bfd20e692
|
@ -426,15 +426,6 @@ class GPT(nn.Module):
|
||||||
if max_mel_len > audio_codes.shape[-1]:
|
if max_mel_len > audio_codes.shape[-1]:
|
||||||
audio_codes = F.pad(audio_codes, (0, max_mel_len - audio_codes.shape[-1]))
|
audio_codes = F.pad(audio_codes, (0, max_mel_len - audio_codes.shape[-1]))
|
||||||
|
|
||||||
silence = True
|
|
||||||
for idx, l in enumerate(code_lengths):
|
|
||||||
length = l.item()
|
|
||||||
while silence:
|
|
||||||
if audio_codes[idx, length - 1] != 83:
|
|
||||||
break
|
|
||||||
length -= 1
|
|
||||||
code_lengths[idx] = length
|
|
||||||
|
|
||||||
# 💖 Lovely assertions
|
# 💖 Lovely assertions
|
||||||
assert (
|
assert (
|
||||||
max_mel_len <= audio_codes.shape[-1]
|
max_mel_len <= audio_codes.shape[-1]
|
||||||
|
@ -450,7 +441,7 @@ class GPT(nn.Module):
|
||||||
audio_codes = F.pad(audio_codes[:, :max_mel_len], (0, 1), value=self.stop_audio_token)
|
audio_codes = F.pad(audio_codes[:, :max_mel_len], (0, 1), value=self.stop_audio_token)
|
||||||
|
|
||||||
# Pad mel codes with stop_audio_token
|
# Pad mel codes with stop_audio_token
|
||||||
audio_codes = self.set_mel_padding(audio_codes, code_lengths)
|
audio_codes = self.set_mel_padding(audio_codes, code_lengths - 3) # -3 to get the real code lengths without consider start and stop tokens that was not added yet
|
||||||
|
|
||||||
# Build input and target tensors
|
# Build input and target tensors
|
||||||
# Prepend start token to inputs and append stop token to targets
|
# Prepend start token to inputs and append stop token to targets
|
||||||
|
|
Loading…
Reference in New Issue