mirror of https://github.com/coqui-ai/TTS.git
add Portuguese Cleaner
This commit is contained in:
parent
93a9cc4683
commit
b750452782
|
@ -67,15 +67,16 @@ def remove_aux_symbols(text):
|
||||||
text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text)
|
text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def replace_symbols(text, lang='en'):
|
||||||
def replace_symbols(text):
|
|
||||||
text = text.replace(';', ',')
|
text = text.replace(';', ',')
|
||||||
text = text.replace('-', ' ')
|
text = text.replace('-', ' ')
|
||||||
text = text.replace(':', ',')
|
text = text.replace(':', ' ')
|
||||||
text = text.replace('&', 'and')
|
if lang == 'en':
|
||||||
|
text = text.replace('&', 'and')
|
||||||
|
elif lang == 'pt':
|
||||||
|
text = text.replace('&', ' e ')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def basic_cleaners(text):
|
def basic_cleaners(text):
|
||||||
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
|
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
|
||||||
text = lowercase(text)
|
text = lowercase(text)
|
||||||
|
@ -118,6 +119,14 @@ def english_cleaners(text):
|
||||||
text = collapse_whitespace(text)
|
text = collapse_whitespace(text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def portuguese_cleaners(text):
|
||||||
|
'''Basic pipeline for Portuguese text. There is no need to expand abbreviation and
|
||||||
|
numbers, phonemizer already does that'''
|
||||||
|
text = lowercase(text)
|
||||||
|
text = replace_symbols(text, lang='pt')
|
||||||
|
text = remove_aux_symbols(text)
|
||||||
|
text = collapse_whitespace(text)
|
||||||
|
return text
|
||||||
|
|
||||||
def phoneme_cleaners(text):
|
def phoneme_cleaners(text):
|
||||||
'''Pipeline for phonemes mode, including number and abbreviation expansion.'''
|
'''Pipeline for phonemes mode, including number and abbreviation expansion.'''
|
||||||
|
|
Loading…
Reference in New Issue