From 664668265068243cfaa8796093acca17147c7fa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sat, 5 Dec 2020 09:46:34 +0100 Subject: [PATCH] cleaners: expand english time --- TTS/tts/utils/text/cleaners.py | 2 ++ TTS/tts/utils/text/time.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 TTS/tts/utils/text/time.py diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 190f52aa..7c3f1017 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -14,6 +14,7 @@ import re from unidecode import unidecode from .number_norm import normalize_numbers from .abbreviations import abbreviations_en, abbreviations_fr +from .time import expand_time_english # Regular expression matching whitespace: _whitespace_re = re.compile(r'\s+') @@ -95,6 +96,7 @@ def english_cleaners(text): '''Pipeline for English text, including number and abbreviation expansion.''' text = convert_to_ascii(text) text = lowercase(text) + text = expand_time_english(text) text = expand_numbers(text) text = expand_abbreviations(text) text = replace_symbols(text) diff --git a/TTS/tts/utils/text/time.py b/TTS/tts/utils/text/time.py new file mode 100644 index 00000000..55ecbd8c --- /dev/null +++ b/TTS/tts/utils/text/time.py @@ -0,0 +1,44 @@ +import re +import inflect + +_inflect = inflect.engine() + +_time_re = re.compile(r"""\b + ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours + : + ([0-5][0-9]) # minutes + \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm + \b""", + re.IGNORECASE | re.X) + + +def _expand_num(n: int) -> str: + return _inflect.number_to_words(n) + + +def _expand_time_english(match: "re.Match") -> str: + hour = int(match.group(1)) + past_noon = hour >= 12 + time = [] + if hour > 12: + hour -= 12 + elif hour == 0: + hour = 12 + past_noon = True + time.append(_expand_num(hour)) + + minute = int(match.group(6)) + if minute > 0: + if minute < 10: + time.append("oh") + time.append(_expand_num(minute)) + am_pm = match.group(7) + if am_pm is None: + time.append("p m" if past_noon else "a m") + else: + time.extend(list(am_pm.replace(".", ""))) + return " ".join(time) + + +def expand_time_english(text: str) -> str: + return re.sub(_time_re, _expand_time_english, text)