mirror of https://github.com/coqui-ai/TTS.git
feat(cleaners): add multilingual phoneme cleaner
This doesn't convert numbers into English words.
This commit is contained in:
parent
063e9e9de9
commit
e5c208d254
|
@ -3,6 +3,7 @@
|
|||
# TODO: pick the cleaner for languages dynamically
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from anyascii import anyascii
|
||||
|
||||
|
@ -44,8 +45,8 @@ def remove_aux_symbols(text):
|
|||
return text
|
||||
|
||||
|
||||
def replace_symbols(text, lang="en"):
|
||||
"""Replace symbols based on the lenguage tag.
|
||||
def replace_symbols(text, lang: Optional[str] = "en"):
|
||||
"""Replace symbols based on the language tag.
|
||||
|
||||
Args:
|
||||
text:
|
||||
|
@ -122,7 +123,11 @@ def english_cleaners(text):
|
|||
|
||||
|
||||
def phoneme_cleaners(text):
|
||||
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
|
||||
"""Pipeline for phonemes mode, including number and abbreviation expansion.
|
||||
|
||||
NB: This cleaner converts numbers into English words, for other languages
|
||||
use multilingual_phoneme_cleaners().
|
||||
"""
|
||||
text = en_normalize_numbers(text)
|
||||
text = expand_abbreviations(text)
|
||||
text = replace_symbols(text)
|
||||
|
@ -131,6 +136,14 @@ def phoneme_cleaners(text):
|
|||
return text
|
||||
|
||||
|
||||
def multilingual_phoneme_cleaners(text):
|
||||
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
|
||||
text = replace_symbols(text, lang=None)
|
||||
text = remove_aux_symbols(text)
|
||||
text = collapse_whitespace(text)
|
||||
return text
|
||||
|
||||
|
||||
def french_cleaners(text):
|
||||
"""Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
|
||||
text = expand_abbreviations(text, lang="fr")
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners
|
||||
from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners
|
||||
|
||||
|
||||
def test_time() -> None:
|
||||
|
@ -19,3 +19,8 @@ def test_currency() -> None:
|
|||
def test_expand_numbers() -> None:
|
||||
assert phoneme_cleaners("-1") == "minus one"
|
||||
assert phoneme_cleaners("1") == "one"
|
||||
|
||||
|
||||
def test_multilingual_phoneme_cleaners() -> None:
|
||||
assert multilingual_phoneme_cleaners("(Hello)") == "Hello"
|
||||
assert multilingual_phoneme_cleaners("1:") == "1,"
|
||||
|
|
Loading…
Reference in New Issue