diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index f829e4cc..fc87025f 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -18,34 +18,37 @@ from .french.abbreviations import abbreviations_fr _whitespace_re = re.compile(r"\s+") -def expand_abbreviations(text, lang="en"): +def expand_abbreviations(text: str, lang: str = "en") -> str: if lang == "en": _abbreviations = abbreviations_en elif lang == "fr": _abbreviations = abbreviations_fr + else: + msg = f"Language {lang} not supported in expand_abbreviations" + raise ValueError(msg) for regex, replacement in _abbreviations: text = re.sub(regex, replacement, text) return text -def lowercase(text): +def lowercase(text: str) -> str: return text.lower() -def collapse_whitespace(text): +def collapse_whitespace(text: str) -> str: return re.sub(_whitespace_re, " ", text).strip() -def convert_to_ascii(text): +def convert_to_ascii(text: str) -> str: return anyascii(text) -def remove_aux_symbols(text): +def remove_aux_symbols(text: str) -> str: text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text) return text -def replace_symbols(text, lang: Optional[str] = "en"): +def replace_symbols(text: str, lang: Optional[str] = "en") -> str: """Replace symbols based on the language tag. Args: @@ -78,14 +81,14 @@ def replace_symbols(text, lang: Optional[str] = "en"): return text -def basic_cleaners(text): +def basic_cleaners(text: str) -> str: """Basic pipeline that lowercases and collapses whitespace without transliteration.""" text = lowercase(text) text = collapse_whitespace(text) return text -def transliteration_cleaners(text): +def transliteration_cleaners(text: str) -> str: """Pipeline for non-English text that transliterates to ASCII.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -93,7 +96,7 @@ def transliteration_cleaners(text): return text -def basic_german_cleaners(text): +def basic_german_cleaners(text: str) -> str: """Pipeline for German text""" text = lowercase(text) text = collapse_whitespace(text) @@ -101,7 +104,7 @@ def basic_german_cleaners(text): # TODO: elaborate it -def basic_turkish_cleaners(text): +def basic_turkish_cleaners(text: str) -> str: """Pipeline for Turkish text""" text = text.replace("I", "ı") text = lowercase(text) @@ -109,7 +112,7 @@ def basic_turkish_cleaners(text): return text -def english_cleaners(text): +def english_cleaners(text: str) -> str: """Pipeline for English text, including number and abbreviation expansion.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -122,7 +125,7 @@ def english_cleaners(text): return text -def phoneme_cleaners(text): +def phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion. NB: This cleaner converts numbers into English words, for other languages @@ -136,7 +139,7 @@ def phoneme_cleaners(text): return text -def multilingual_phoneme_cleaners(text): +def multilingual_phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion.""" text = replace_symbols(text, lang=None) text = remove_aux_symbols(text) @@ -144,7 +147,7 @@ def multilingual_phoneme_cleaners(text): return text -def french_cleaners(text): +def french_cleaners(text: str) -> str: """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") text = lowercase(text) @@ -154,7 +157,7 @@ def french_cleaners(text): return text -def portuguese_cleaners(text): +def portuguese_cleaners(text: str) -> str: """Basic pipeline for Portuguese text. There is no need to expand abbreviation and numbers, phonemizer already does that""" text = lowercase(text) @@ -170,7 +173,7 @@ def chinese_mandarin_cleaners(text: str) -> str: return text -def multilingual_cleaners(text): +def multilingual_cleaners(text: str) -> str: """Pipeline for multilingual text""" text = lowercase(text) text = replace_symbols(text, lang=None) @@ -179,7 +182,7 @@ def multilingual_cleaners(text): return text -def no_cleaners(text): +def no_cleaners(text: str) -> str: # remove newline characters text = text.replace("\n", "") return text