Adding french cleaners

This commit is contained in:
Julian WEBER 2020-09-29 14:20:24 +02:00
parent 3c212be5a8
commit b2817e9e93
1 changed files with 11 additions and 0 deletions

View File

@ -73,6 +73,8 @@ def replace_symbols(text, lang='en'):
text = text.replace(':', ' ')
if lang == 'en':
text = text.replace('&', 'and')
elif lang == 'fr':
text = text.replace('&', ' et ')
elif lang == 'pt':
text = text.replace('&', ' e ')
return text
@ -118,6 +120,15 @@ def english_cleaners(text):
text = collapse_whitespace(text)
return text
def french_cleaners(text):
'''Basic pipeline for French text. There is no need to expand abbreviation and
numbers, phonemizer already does that'''
text = lowercase(text)
text = replace_symbols(text, lang='fr')
text = remove_aux_symbols(text)
text = collapse_whitespace(text)
return text
def portuguese_cleaners(text):
'''Basic pipeline for Portuguese text. There is no need to expand abbreviation and
numbers, phonemizer already does that'''