Adding french cleaners

This commit is contained in:
Julian WEBER 2020-09-29 14:20:24 +02:00
parent 3c212be5a8
commit b2817e9e93
1 changed files with 11 additions and 0 deletions

View File

@ -73,6 +73,8 @@ def replace_symbols(text, lang='en'):
text = text.replace(':', ' ') text = text.replace(':', ' ')
if lang == 'en': if lang == 'en':
text = text.replace('&', 'and') text = text.replace('&', 'and')
elif lang == 'fr':
text = text.replace('&', ' et ')
elif lang == 'pt': elif lang == 'pt':
text = text.replace('&', ' e ') text = text.replace('&', ' e ')
return text return text
@ -118,6 +120,15 @@ def english_cleaners(text):
text = collapse_whitespace(text) text = collapse_whitespace(text)
return text return text
def french_cleaners(text):
'''Basic pipeline for French text. There is no need to expand abbreviation and
numbers, phonemizer already does that'''
text = lowercase(text)
text = replace_symbols(text, lang='fr')
text = remove_aux_symbols(text)
text = collapse_whitespace(text)
return text
def portuguese_cleaners(text): def portuguese_cleaners(text):
'''Basic pipeline for Portuguese text. There is no need to expand abbreviation and '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and
numbers, phonemizer already does that''' numbers, phonemizer already does that'''