diff --git a/mozilla_voice_tts/tts/utils/text/cleaners.py b/mozilla_voice_tts/tts/utils/text/cleaners.py index 6d1ace08..227118e6 100644 --- a/mozilla_voice_tts/tts/utils/text/cleaners.py +++ b/mozilla_voice_tts/tts/utils/text/cleaners.py @@ -67,15 +67,16 @@ def remove_aux_symbols(text): text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text) return text - -def replace_symbols(text): +def replace_symbols(text, lang='en'): text = text.replace(';', ',') text = text.replace('-', ' ') - text = text.replace(':', ',') - text = text.replace('&', 'and') + text = text.replace(':', ' ') + if lang == 'en': + text = text.replace('&', 'and') + elif lang == 'pt': + text = text.replace('&', ' e ') return text - def basic_cleaners(text): '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' text = lowercase(text) @@ -118,6 +119,14 @@ def english_cleaners(text): text = collapse_whitespace(text) return text +def portuguese_cleaners(text): + '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and + numbers, phonemizer already does that''' + text = lowercase(text) + text = replace_symbols(text, lang='pt') + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text def phoneme_cleaners(text): '''Pipeline for phonemes mode, including number and abbreviation expansion.'''