Make style

2023-11-13 13:01:01 +01:00 · 2023-11-13 13:01:01 +01:00 · b2682d39c5
parent a16360af85
commit b2682d39c5
6 changed files with 37 additions and 15 deletions
--- a/TTS/cs_api.py
+++ b/TTS/cs_api.py
@ -82,7 +82,6 @@ class CS_API:
        },
    }

-
    SUPPORTED_LANGUAGES = ["en", "es", "de", "fr", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja"]

    def __init__(self, api_token=None, model="XTTS"):
@ -308,7 +307,11 @@ if __name__ == "__main__":
    print(api.list_speakers_as_tts_models())

    ts = time.time()
-    wav, sr = api.tts("It took me quite a long time to develop a voice.", language="en", speaker_name=api.speakers[0].name)
+    wav, sr = api.tts(
+        "It took me quite a long time to develop a voice.", language="en", speaker_name=api.speakers[0].name
+    )
    print(f" [i] XTTS took {time.time() - ts:.2f}s")

-    filepath = api.tts_to_file(text="Hello world!", speaker_name=api.speakers[0].name, language="en", file_path="output.wav")
+    filepath = api.tts_to_file(
+        text="Hello world!", speaker_name=api.speakers[0].name, language="en", file_path="output.wav"
+    )
--- a/TTS/tts/layers/tortoise/dpm_solver.py
+++ b/TTS/tts/layers/tortoise/dpm_solver.py
@ -562,15 +562,21 @@ class DPM_Solver:
        if order == 3:
            K = steps // 3 + 1
            if steps % 3 == 0:
-                orders = [3,] * (
+                orders = [
+                    3,
+                ] * (
                    K - 2
                ) + [2, 1]
            elif steps % 3 == 1:
-                orders = [3,] * (
+                orders = [
+                    3,
+                ] * (
                    K - 1
                ) + [1]
            else:
-                orders = [3,] * (
+                orders = [
+                    3,
+                ] * (
                    K - 1
                ) + [2]
        elif order == 2:
@ -581,7 +587,9 @@ class DPM_Solver:
                ] * K
            else:
                K = steps // 2 + 1
-                orders = [2,] * (
+                orders = [
+                    2,
+                ] * (
                    K - 1
                ) + [1]
        elif order == 1:
@ -1440,7 +1448,10 @@ class DPM_Solver:
                        model_prev_list[-1] = self.model_fn(x, t)
            elif method in ["singlestep", "singlestep_fixed"]:
                if method == "singlestep":
-                    (timesteps_outer, orders,) = self.get_orders_and_timesteps_for_singlestep_solver(
+                    (
+                        timesteps_outer,
+                        orders,
+                    ) = self.get_orders_and_timesteps_for_singlestep_solver(
                        steps=steps,
                        order=order,
                        skip_type=skip_type,
@ -1548,4 +1559,4 @@ def expand_dims(v, dims):
    Returns:
        a PyTorch tensor with shape [N, 1, 1, ..., 1] and the total dimension is `dims`.
    """
-    return v[(...,) + (None,) * (dims - 1)]
+    return v[(...,) + (None,) * (dims - 1)]
--- a/TTS/tts/layers/xtts/tokenizer.py
+++ b/TTS/tts/layers/xtts/tokenizer.py
@ -1,6 +1,7 @@
 import json
 import os
 import re
+from functools import cached_property

 import pypinyin
 import torch
@ -8,7 +9,6 @@ from hangul_romanize import Transliter
 from hangul_romanize.rule import academic
 from num2words import num2words
 from tokenizers import Tokenizer
-from functools import cached_property

 from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words

@ -560,19 +560,22 @@ class VoiceBpeTokenizer:
    @cached_property
    def katsu(self):
        import cutlet
+
        return cutlet.Cutlet()
-    
+
    def check_input_length(self, txt, lang):
        limit = self.char_limits.get(lang, 250)
        if len(txt) > limit:
-            print(f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio.")
+            print(
+                f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio."
+            )

    def preprocess_text(self, txt, lang):
        if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "zh-cn"}:
            txt = multilingual_cleaners(txt, lang)
            if lang in {"zh", "zh-cn"}:
                txt = chinese_transliterate(txt)
-        elif lang == "ja":                
+        elif lang == "ja":
            txt = japanese_cleaners(txt, self.katsu)
        elif lang == "ko":
            txt = korean_cleaners(txt)
--- a/TTS/tts/layers/xtts/trainer/dataset.py
+++ b/TTS/tts/layers/xtts/trainer/dataset.py
@ -5,6 +5,7 @@ import sys
 import torch
 import torch.nn.functional as F
 import torch.utils.data
+
 from TTS.tts.models.xtts import load_audio

 torch.set_num_threads(1)
--- a/tests/xtts_tests/test_xtts_gpt_train.py
+++ b/tests/xtts_tests/test_xtts_gpt_train.py
@ -60,7 +60,9 @@ XTTS_CHECKPOINT = None  # "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_s


 # Training sentences generations
-SPEAKER_REFERENCE = ["tests/data/ljspeech/wavs/LJ001-0002.wav"]  # speaker reference to be used in training test sentences
+SPEAKER_REFERENCE = [
+    "tests/data/ljspeech/wavs/LJ001-0002.wav"
+]  # speaker reference to be used in training test sentences
 LANGUAGE = config_dataset.language


--- a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py
+++ b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py
@ -58,7 +58,9 @@ XTTS_CHECKPOINT = None  # "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_s


 # Training sentences generations
-SPEAKER_REFERENCE = ["tests/data/ljspeech/wavs/LJ001-0002.wav"]  # speaker reference to be used in training test sentences
+SPEAKER_REFERENCE = [
+    "tests/data/ljspeech/wavs/LJ001-0002.wav"
+]  # speaker reference to be used in training test sentences
 LANGUAGE = config_dataset.language