From b7504527828eb3e2377b932fe6dd5bcf1aa50a58 Mon Sep 17 00:00:00 2001 From: Edresson Date: Mon, 27 Jul 2020 17:20:51 -0300 Subject: [PATCH] add Portuguese Cleaner --- mozilla_voice_tts/tts/utils/text/cleaners.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mozilla_voice_tts/tts/utils/text/cleaners.py b/mozilla_voice_tts/tts/utils/text/cleaners.py index 6d1ace08..227118e6 100644 --- a/mozilla_voice_tts/tts/utils/text/cleaners.py +++ b/mozilla_voice_tts/tts/utils/text/cleaners.py @@ -67,15 +67,16 @@ def remove_aux_symbols(text): text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text) return text - -def replace_symbols(text): +def replace_symbols(text, lang='en'): text = text.replace(';', ',') text = text.replace('-', ' ') - text = text.replace(':', ',') - text = text.replace('&', 'and') + text = text.replace(':', ' ') + if lang == 'en': + text = text.replace('&', 'and') + elif lang == 'pt': + text = text.replace('&', ' e ') return text - def basic_cleaners(text): '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' text = lowercase(text) @@ -118,6 +119,14 @@ def english_cleaners(text): text = collapse_whitespace(text) return text +def portuguese_cleaners(text): + '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and + numbers, phonemizer already does that''' + text = lowercase(text) + text = replace_symbols(text, lang='pt') + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text def phoneme_cleaners(text): '''Pipeline for phonemes mode, including number and abbreviation expansion.'''