mirror of https://github.com/coqui-ai/TTS.git
add Portuguese Cleaner
This commit is contained in:
parent
93a9cc4683
commit
b750452782
|
@ -67,15 +67,16 @@ def remove_aux_symbols(text):
|
|||
text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text)
|
||||
return text
|
||||
|
||||
|
||||
def replace_symbols(text):
|
||||
def replace_symbols(text, lang='en'):
|
||||
text = text.replace(';', ',')
|
||||
text = text.replace('-', ' ')
|
||||
text = text.replace(':', ',')
|
||||
text = text.replace('&', 'and')
|
||||
text = text.replace(':', ' ')
|
||||
if lang == 'en':
|
||||
text = text.replace('&', 'and')
|
||||
elif lang == 'pt':
|
||||
text = text.replace('&', ' e ')
|
||||
return text
|
||||
|
||||
|
||||
def basic_cleaners(text):
|
||||
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
|
||||
text = lowercase(text)
|
||||
|
@ -118,6 +119,14 @@ def english_cleaners(text):
|
|||
text = collapse_whitespace(text)
|
||||
return text
|
||||
|
||||
def portuguese_cleaners(text):
|
||||
'''Basic pipeline for Portuguese text. There is no need to expand abbreviation and
|
||||
numbers, phonemizer already does that'''
|
||||
text = lowercase(text)
|
||||
text = replace_symbols(text, lang='pt')
|
||||
text = remove_aux_symbols(text)
|
||||
text = collapse_whitespace(text)
|
||||
return text
|
||||
|
||||
def phoneme_cleaners(text):
|
||||
'''Pipeline for phonemes mode, including number and abbreviation expansion.'''
|
||||
|
|
Loading…
Reference in New Issue