feat(cleaners): add multilingual phoneme cleaner

This doesn't convert numbers into English words.
This commit is contained in:
Enno Hermann 2024-06-14 15:06:03 +02:00
parent 063e9e9de9
commit e5c208d254
2 changed files with 22 additions and 4 deletions

View File

@ -3,6 +3,7 @@
# TODO: pick the cleaner for languages dynamically # TODO: pick the cleaner for languages dynamically
import re import re
from typing import Optional
from anyascii import anyascii from anyascii import anyascii
@ -44,8 +45,8 @@ def remove_aux_symbols(text):
return text return text
def replace_symbols(text, lang="en"): def replace_symbols(text, lang: Optional[str] = "en"):
"""Replace symbols based on the lenguage tag. """Replace symbols based on the language tag.
Args: Args:
text: text:
@ -122,7 +123,11 @@ def english_cleaners(text):
def phoneme_cleaners(text): def phoneme_cleaners(text):
"""Pipeline for phonemes mode, including number and abbreviation expansion.""" """Pipeline for phonemes mode, including number and abbreviation expansion.
NB: This cleaner converts numbers into English words, for other languages
use multilingual_phoneme_cleaners().
"""
text = en_normalize_numbers(text) text = en_normalize_numbers(text)
text = expand_abbreviations(text) text = expand_abbreviations(text)
text = replace_symbols(text) text = replace_symbols(text)
@ -131,6 +136,14 @@ def phoneme_cleaners(text):
return text return text
def multilingual_phoneme_cleaners(text):
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
text = replace_symbols(text, lang=None)
text = remove_aux_symbols(text)
text = collapse_whitespace(text)
return text
def french_cleaners(text): def french_cleaners(text):
"""Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" """Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
text = expand_abbreviations(text, lang="fr") text = expand_abbreviations(text, lang="fr")

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners
def test_time() -> None: def test_time() -> None:
@ -19,3 +19,8 @@ def test_currency() -> None:
def test_expand_numbers() -> None: def test_expand_numbers() -> None:
assert phoneme_cleaners("-1") == "minus one" assert phoneme_cleaners("-1") == "minus one"
assert phoneme_cleaners("1") == "one" assert phoneme_cleaners("1") == "one"
def test_multilingual_phoneme_cleaners() -> None:
assert multilingual_phoneme_cleaners("(Hello)") == "Hello"
assert multilingual_phoneme_cleaners("1:") == "1,"