mirror of https://github.com/coqui-ai/TTS.git
feat(cleaners): add multilingual phoneme cleaner
This doesn't convert numbers into English words.
This commit is contained in:
parent
063e9e9de9
commit
e5c208d254
|
@ -3,6 +3,7 @@
|
||||||
# TODO: pick the cleaner for languages dynamically
|
# TODO: pick the cleaner for languages dynamically
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from anyascii import anyascii
|
from anyascii import anyascii
|
||||||
|
|
||||||
|
@ -44,8 +45,8 @@ def remove_aux_symbols(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def replace_symbols(text, lang="en"):
|
def replace_symbols(text, lang: Optional[str] = "en"):
|
||||||
"""Replace symbols based on the lenguage tag.
|
"""Replace symbols based on the language tag.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text:
|
text:
|
||||||
|
@ -122,7 +123,11 @@ def english_cleaners(text):
|
||||||
|
|
||||||
|
|
||||||
def phoneme_cleaners(text):
|
def phoneme_cleaners(text):
|
||||||
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
|
"""Pipeline for phonemes mode, including number and abbreviation expansion.
|
||||||
|
|
||||||
|
NB: This cleaner converts numbers into English words, for other languages
|
||||||
|
use multilingual_phoneme_cleaners().
|
||||||
|
"""
|
||||||
text = en_normalize_numbers(text)
|
text = en_normalize_numbers(text)
|
||||||
text = expand_abbreviations(text)
|
text = expand_abbreviations(text)
|
||||||
text = replace_symbols(text)
|
text = replace_symbols(text)
|
||||||
|
@ -131,6 +136,14 @@ def phoneme_cleaners(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def multilingual_phoneme_cleaners(text):
|
||||||
|
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
|
||||||
|
text = replace_symbols(text, lang=None)
|
||||||
|
text = remove_aux_symbols(text)
|
||||||
|
text = collapse_whitespace(text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def french_cleaners(text):
|
def french_cleaners(text):
|
||||||
"""Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
|
"""Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
|
||||||
text = expand_abbreviations(text, lang="fr")
|
text = expand_abbreviations(text, lang="fr")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners
|
from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners
|
||||||
|
|
||||||
|
|
||||||
def test_time() -> None:
|
def test_time() -> None:
|
||||||
|
@ -19,3 +19,8 @@ def test_currency() -> None:
|
||||||
def test_expand_numbers() -> None:
|
def test_expand_numbers() -> None:
|
||||||
assert phoneme_cleaners("-1") == "minus one"
|
assert phoneme_cleaners("-1") == "minus one"
|
||||||
assert phoneme_cleaners("1") == "one"
|
assert phoneme_cleaners("1") == "one"
|
||||||
|
|
||||||
|
|
||||||
|
def test_multilingual_phoneme_cleaners() -> None:
|
||||||
|
assert multilingual_phoneme_cleaners("(Hello)") == "Hello"
|
||||||
|
assert multilingual_phoneme_cleaners("1:") == "1,"
|
||||||
|
|
Loading…
Reference in New Issue