Make style

2023-11-17 13:43:34 +01:00 · 2023-11-17 13:43:34 +01:00 · 44880f09ed
parent 26efdf6ee7
commit 44880f09ed
5 changed files with 22 additions and 24 deletions
--- a/TTS/tts/layers/tortoise/diffusion.py
+++ b/TTS/tts/layers/tortoise/diffusion.py
@ -17,7 +17,6 @@ from tqdm import tqdm
 from TTS.tts.layers.tortoise.dpm_solver import DPM_Solver, NoiseScheduleVP, model_wrapper
 try:
    from k_diffusion.sampling import sample_dpmpp_2m, sample_euler_ancestral
--- a/TTS/tts/layers/xtts/gpt.py
+++ b/TTS/tts/layers/xtts/gpt.py
@ -441,7 +441,9 @@ class GPT(nn.Module):
        audio_codes = F.pad(audio_codes[:, :max_mel_len], (0, 1), value=self.stop_audio_token)
        # Pad mel codes with stop_audio_token
-        audio_codes = self.set_mel_padding(audio_codes, code_lengths - 3) # -3 to get the real code lengths without consider start and stop tokens that was not added yet
+        audio_codes = self.set_mel_padding(
            audio_codes, code_lengths - 3
        )  # -3 to get the real code lengths without consider start and stop tokens that was not added yet
        # Build input and target tensors
        # Prepend start token to inputs and append stop token to targets
--- a/TTS/tts/layers/xtts/tokenizer.py
+++ b/TTS/tts/layers/xtts/tokenizer.py
@ -1,23 +1,22 @@
 import os
 import re
 import torch
 import pypinyin
 import textwrap
 from functools import cached_property
 import pypinyin
 import torch
 from hangul_romanize import Transliter
 from hangul_romanize.rule import academic
 from num2words import num2words
 from spacy.lang.ar import Arabic
 from spacy.lang.en import English
 from spacy.lang.es import Spanish
 from spacy.lang.ja import Japanese
 from spacy.lang.zh import Chinese
 from tokenizers import Tokenizer
 from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words
 from spacy.lang.en import English
 from spacy.lang.zh import Chinese
 from spacy.lang.ja import Japanese
 from spacy.lang.ar import Arabic
 from spacy.lang.es import Spanish
 def get_spacy_lang(lang):
    if lang == "zh":
@ -32,6 +31,7 @@ def get_spacy_lang(lang):
        # For most languages, Enlish does the job
        return English()
 def split_sentence(text, lang, text_split_length=250):
    """Preprocess the input text"""
    text_splits = []
@ -67,6 +67,7 @@ def split_sentence(text, lang, text_split_length=250):
    return text_splits
 _whitespace_re = re.compile(r"\s+")
 # List of (regular expression, replacement) pairs for abbreviations:
--- a/TTS/tts/models/xtts.py
+++ b/TTS/tts/models/xtts.py
@ -563,9 +563,7 @@ class Xtts(BaseTTS):
                if length_scale != 1.0:
                    gpt_latents = F.interpolate(
-                        gpt_latents.transpose(1, 2),
+                        gpt_latents.transpose(1, 2), scale_factor=length_scale, mode="linear"
                        scale_factor=length_scale,
                        mode="linear"
                    ).transpose(1, 2)
                gpt_latents_list.append(gpt_latents.cpu())
@ -675,9 +673,7 @@ class Xtts(BaseTTS):
                    gpt_latents = torch.cat(all_latents, dim=0)[None, :]
                    if length_scale != 1.0:
                        gpt_latents = F.interpolate(
-                            gpt_latents.transpose(1, 2),
+                            gpt_latents.transpose(1, 2), scale_factor=length_scale, mode="linear"
                            scale_factor=length_scale,
                            mode="linear"
                        ).transpose(1, 2)
                    wav_gen = self.hifigan_decoder(gpt_latents, g=speaker_embedding.to(self.device))
                    wav_chunk, wav_gen_prev, wav_overlap = self.handle_chunks(
--- a/tests/zoo_tests/test_models.py
+++ b/tests/zoo_tests/test_models.py
@ -186,7 +186,7 @@ def test_xtts_v2_streaming():
        "en",
        gpt_cond_latent,
        speaker_embedding,
-        speed=1.5
+        speed=1.5,
    )
    wav_chuncks = []
    for i, chunk in enumerate(chunks):
@ -198,7 +198,7 @@ def test_xtts_v2_streaming():
        "en",
        gpt_cond_latent,
        speaker_embedding,
-        speed=0.66
+        speed=0.66,
    )
    wav_chuncks = []
    for i, chunk in enumerate(chunks):