From b2682d39c5dd584bb30f6650e3a5b18e27cccf5b Mon Sep 17 00:00:00 2001 From: Eren G??lge Date: Mon, 13 Nov 2023 13:01:01 +0100 Subject: [PATCH] Make style --- TTS/cs_api.py | 9 +++++--- TTS/tts/layers/tortoise/dpm_solver.py | 23 +++++++++++++++----- TTS/tts/layers/xtts/tokenizer.py | 11 ++++++---- TTS/tts/layers/xtts/trainer/dataset.py | 1 + tests/xtts_tests/test_xtts_gpt_train.py | 4 +++- tests/xtts_tests/test_xtts_v2-0_gpt_train.py | 4 +++- 6 files changed, 37 insertions(+), 15 deletions(-) diff --git a/TTS/cs_api.py b/TTS/cs_api.py index c45f9d08..9dc6c30d 100644 --- a/TTS/cs_api.py +++ b/TTS/cs_api.py @@ -82,7 +82,6 @@ class CS_API: }, } - SUPPORTED_LANGUAGES = ["en", "es", "de", "fr", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja"] def __init__(self, api_token=None, model="XTTS"): @@ -308,7 +307,11 @@ if __name__ == "__main__": print(api.list_speakers_as_tts_models()) ts = time.time() - wav, sr = api.tts("It took me quite a long time to develop a voice.", language="en", speaker_name=api.speakers[0].name) + wav, sr = api.tts( + "It took me quite a long time to develop a voice.", language="en", speaker_name=api.speakers[0].name + ) print(f" [i] XTTS took {time.time() - ts:.2f}s") - filepath = api.tts_to_file(text="Hello world!", speaker_name=api.speakers[0].name, language="en", file_path="output.wav") + filepath = api.tts_to_file( + text="Hello world!", speaker_name=api.speakers[0].name, language="en", file_path="output.wav" + ) diff --git a/TTS/tts/layers/tortoise/dpm_solver.py b/TTS/tts/layers/tortoise/dpm_solver.py index 2166eebb..c70888df 100644 --- a/TTS/tts/layers/tortoise/dpm_solver.py +++ b/TTS/tts/layers/tortoise/dpm_solver.py @@ -562,15 +562,21 @@ class DPM_Solver: if order == 3: K = steps // 3 + 1 if steps % 3 == 0: - orders = [3,] * ( + orders = [ + 3, + ] * ( K - 2 ) + [2, 1] elif steps % 3 == 1: - orders = [3,] * ( + orders = [ + 3, + ] * ( K - 1 ) + [1] else: - orders = [3,] * ( + orders = [ + 3, + ] * ( K - 1 ) + [2] elif order == 2: @@ -581,7 +587,9 @@ class DPM_Solver: ] * K else: K = steps // 2 + 1 - orders = [2,] * ( + orders = [ + 2, + ] * ( K - 1 ) + [1] elif order == 1: @@ -1440,7 +1448,10 @@ class DPM_Solver: model_prev_list[-1] = self.model_fn(x, t) elif method in ["singlestep", "singlestep_fixed"]: if method == "singlestep": - (timesteps_outer, orders,) = self.get_orders_and_timesteps_for_singlestep_solver( + ( + timesteps_outer, + orders, + ) = self.get_orders_and_timesteps_for_singlestep_solver( steps=steps, order=order, skip_type=skip_type, @@ -1548,4 +1559,4 @@ def expand_dims(v, dims): Returns: a PyTorch tensor with shape [N, 1, 1, ..., 1] and the total dimension is `dims`. """ - return v[(...,) + (None,) * (dims - 1)] \ No newline at end of file + return v[(...,) + (None,) * (dims - 1)] diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index edb09042..211d0a93 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -1,6 +1,7 @@ import json import os import re +from functools import cached_property import pypinyin import torch @@ -8,7 +9,6 @@ from hangul_romanize import Transliter from hangul_romanize.rule import academic from num2words import num2words from tokenizers import Tokenizer -from functools import cached_property from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words @@ -560,19 +560,22 @@ class VoiceBpeTokenizer: @cached_property def katsu(self): import cutlet + return cutlet.Cutlet() - + def check_input_length(self, txt, lang): limit = self.char_limits.get(lang, 250) if len(txt) > limit: - print(f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio.") + print( + f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio." + ) def preprocess_text(self, txt, lang): if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "zh-cn"}: txt = multilingual_cleaners(txt, lang) if lang in {"zh", "zh-cn"}: txt = chinese_transliterate(txt) - elif lang == "ja": + elif lang == "ja": txt = japanese_cleaners(txt, self.katsu) elif lang == "ko": txt = korean_cleaners(txt) diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 8cb90ad0..2f958cb5 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -5,6 +5,7 @@ import sys import torch import torch.nn.functional as F import torch.utils.data + from TTS.tts.models.xtts import load_audio torch.set_num_threads(1) diff --git a/tests/xtts_tests/test_xtts_gpt_train.py b/tests/xtts_tests/test_xtts_gpt_train.py index 12c547d6..b8b9a4e3 100644 --- a/tests/xtts_tests/test_xtts_gpt_train.py +++ b/tests/xtts_tests/test_xtts_gpt_train.py @@ -60,7 +60,9 @@ XTTS_CHECKPOINT = None # "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_s # Training sentences generations -SPEAKER_REFERENCE = ["tests/data/ljspeech/wavs/LJ001-0002.wav"] # speaker reference to be used in training test sentences +SPEAKER_REFERENCE = [ + "tests/data/ljspeech/wavs/LJ001-0002.wav" +] # speaker reference to be used in training test sentences LANGUAGE = config_dataset.language diff --git a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py index b19b7210..6663433c 100644 --- a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py +++ b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py @@ -58,7 +58,9 @@ XTTS_CHECKPOINT = None # "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_s # Training sentences generations -SPEAKER_REFERENCE = ["tests/data/ljspeech/wavs/LJ001-0002.wav"] # speaker reference to be used in training test sentences +SPEAKER_REFERENCE = [ + "tests/data/ljspeech/wavs/LJ001-0002.wav" +] # speaker reference to be used in training test sentences LANGUAGE = config_dataset.language