From e5c208d2545f7d1248c93c3a8ef8ebdedc2c0672 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:06:03 +0200 Subject: [PATCH] feat(cleaners): add multilingual phoneme cleaner This doesn't convert numbers into English words. --- TTS/tts/utils/text/cleaners.py | 19 ++++++++++++++++--- tests/text_tests/test_text_cleaners.py | 7 ++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 794a87c8..f829e4cc 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -3,6 +3,7 @@ # TODO: pick the cleaner for languages dynamically import re +from typing import Optional from anyascii import anyascii @@ -44,8 +45,8 @@ def remove_aux_symbols(text): return text -def replace_symbols(text, lang="en"): - """Replace symbols based on the lenguage tag. +def replace_symbols(text, lang: Optional[str] = "en"): + """Replace symbols based on the language tag. Args: text: @@ -122,7 +123,11 @@ def english_cleaners(text): def phoneme_cleaners(text): - """Pipeline for phonemes mode, including number and abbreviation expansion.""" + """Pipeline for phonemes mode, including number and abbreviation expansion. + + NB: This cleaner converts numbers into English words, for other languages + use multilingual_phoneme_cleaners(). + """ text = en_normalize_numbers(text) text = expand_abbreviations(text) text = replace_symbols(text) @@ -131,6 +136,14 @@ def phoneme_cleaners(text): return text +def multilingual_phoneme_cleaners(text): + """Pipeline for phonemes mode, including number and abbreviation expansion.""" + text = replace_symbols(text, lang=None) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + def french_cleaners(text): """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") diff --git a/tests/text_tests/test_text_cleaners.py b/tests/text_tests/test_text_cleaners.py index fcfa71e7..bf0c8d5d 100644 --- a/tests/text_tests/test_text_cleaners.py +++ b/tests/text_tests/test_text_cleaners.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners +from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners def test_time() -> None: @@ -19,3 +19,8 @@ def test_currency() -> None: def test_expand_numbers() -> None: assert phoneme_cleaners("-1") == "minus one" assert phoneme_cleaners("1") == "one" + + +def test_multilingual_phoneme_cleaners() -> None: + assert multilingual_phoneme_cleaners("(Hello)") == "Hello" + assert multilingual_phoneme_cleaners("1:") == "1,"