mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'dev' of https://github.com/mozilla/TTS into dev
This commit is contained in:
commit
f611e6ac01
|
@ -14,6 +14,7 @@ import re
|
|||
from unidecode import unidecode
|
||||
from .number_norm import normalize_numbers
|
||||
from .abbreviations import abbreviations_en, abbreviations_fr
|
||||
from .time import expand_time_english
|
||||
|
||||
# Regular expression matching whitespace:
|
||||
_whitespace_re = re.compile(r'\s+')
|
||||
|
@ -95,6 +96,7 @@ def english_cleaners(text):
|
|||
'''Pipeline for English text, including number and abbreviation expansion.'''
|
||||
text = convert_to_ascii(text)
|
||||
text = lowercase(text)
|
||||
text = expand_time_english(text)
|
||||
text = expand_numbers(text)
|
||||
text = expand_abbreviations(text)
|
||||
text = replace_symbols(text)
|
||||
|
@ -122,8 +124,8 @@ def portuguese_cleaners(text):
|
|||
|
||||
def phoneme_cleaners(text):
|
||||
'''Pipeline for phonemes mode, including number and abbreviation expansion.'''
|
||||
text = convert_to_ascii(text)
|
||||
text = expand_numbers(text)
|
||||
text = convert_to_ascii(text)
|
||||
text = expand_abbreviations(text)
|
||||
text = replace_symbols(text)
|
||||
text = remove_aux_symbols(text)
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
|
||||
import inflect
|
||||
import re
|
||||
from typing import Dict
|
||||
|
||||
_inflect = inflect.engine()
|
||||
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
|
||||
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
||||
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
|
||||
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
|
||||
_currency_re = re.compile(r'(£|\$|¥)([0-9\,\.]*[0-9]+)')
|
||||
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
|
||||
_number_re = re.compile(r'[0-9]+')
|
||||
|
||||
|
@ -20,24 +20,54 @@ def _expand_decimal_point(m):
|
|||
return m.group(1).replace('.', ' point ')
|
||||
|
||||
|
||||
def _expand_dollars(m):
|
||||
match = m.group(1)
|
||||
parts = match.split('.')
|
||||
def __expand_currency(value: str, inflection: Dict[float, str]) -> str:
|
||||
parts = value.replace(",", "").split('.')
|
||||
if len(parts) > 2:
|
||||
return match + ' dollars' # Unexpected format
|
||||
dollars = int(parts[0]) if parts[0] else 0
|
||||
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
||||
if dollars and cents:
|
||||
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
|
||||
cent_unit = 'cent' if cents == 1 else 'cents'
|
||||
return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
|
||||
if dollars:
|
||||
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
|
||||
return '%s %s' % (dollars, dollar_unit)
|
||||
if cents:
|
||||
cent_unit = 'cent' if cents == 1 else 'cents'
|
||||
return '%s %s' % (cents, cent_unit)
|
||||
return 'zero dollars'
|
||||
return f"{value} {inflection[2]}" # Unexpected format
|
||||
text = []
|
||||
integer = int(parts[0]) if parts[0] else 0
|
||||
if integer > 0:
|
||||
integer_unit = inflection.get(integer, inflection[2])
|
||||
text.append(f"{integer} {integer_unit}")
|
||||
fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
||||
if fraction > 0:
|
||||
fraction_unit = inflection.get(fraction/100, inflection[0.02])
|
||||
text.append(f"{fraction} {fraction_unit}")
|
||||
if len(text) == 0:
|
||||
return f"zero {inflection[2]}"
|
||||
return " ".join(text)
|
||||
|
||||
|
||||
def _expand_currency(m: "re.Match") -> str:
|
||||
currencies = {
|
||||
"$": {
|
||||
0.01: "cent",
|
||||
0.02: "cents",
|
||||
1: "dollar",
|
||||
2: "dollars",
|
||||
},
|
||||
"€": {
|
||||
0.01: "cent",
|
||||
0.02: "cents",
|
||||
1: "euro",
|
||||
2: "euros",
|
||||
},
|
||||
"£": {
|
||||
0.01: "penny",
|
||||
0.02: "pence",
|
||||
1: "pound sterling",
|
||||
2: "pounds sterling",
|
||||
},
|
||||
"¥": {
|
||||
# TODO rin
|
||||
0.02: "sen",
|
||||
2: "yen",
|
||||
}
|
||||
}
|
||||
unit = m.group(1)
|
||||
currency = currencies[unit]
|
||||
value = m.group(2)
|
||||
return __expand_currency(value, currency)
|
||||
|
||||
|
||||
def _expand_ordinal(m):
|
||||
|
@ -62,8 +92,7 @@ def _expand_number(m):
|
|||
|
||||
def normalize_numbers(text):
|
||||
text = re.sub(_comma_number_re, _remove_commas, text)
|
||||
text = re.sub(_pounds_re, r'\1 pounds', text)
|
||||
text = re.sub(_dollars_re, _expand_dollars, text)
|
||||
text = re.sub(_currency_re, _expand_currency, text)
|
||||
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
||||
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
||||
text = re.sub(_number_re, _expand_number, text)
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
import re
|
||||
import inflect
|
||||
|
||||
_inflect = inflect.engine()
|
||||
|
||||
_time_re = re.compile(r"""\b
|
||||
((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours
|
||||
:
|
||||
([0-5][0-9]) # minutes
|
||||
\s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm
|
||||
\b""",
|
||||
re.IGNORECASE | re.X)
|
||||
|
||||
|
||||
def _expand_num(n: int) -> str:
|
||||
return _inflect.number_to_words(n)
|
||||
|
||||
|
||||
def _expand_time_english(match: "re.Match") -> str:
|
||||
hour = int(match.group(1))
|
||||
past_noon = hour >= 12
|
||||
time = []
|
||||
if hour > 12:
|
||||
hour -= 12
|
||||
elif hour == 0:
|
||||
hour = 12
|
||||
past_noon = True
|
||||
time.append(_expand_num(hour))
|
||||
|
||||
minute = int(match.group(6))
|
||||
if minute > 0:
|
||||
if minute < 10:
|
||||
time.append("oh")
|
||||
time.append(_expand_num(minute))
|
||||
am_pm = match.group(7)
|
||||
if am_pm is None:
|
||||
time.append("p m" if past_noon else "a m")
|
||||
else:
|
||||
time.extend(list(am_pm.replace(".", "")))
|
||||
return " ".join(time)
|
||||
|
||||
|
||||
def expand_time_english(text: str) -> str:
|
||||
return re.sub(_time_re, _expand_time_english, text)
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners
|
||||
from typing import Any
|
||||
|
||||
|
||||
def assert_equal(actual: Any, expected: Any) -> None:
|
||||
assert actual == expected, f"\n{actual} \n vs \n{expected}"
|
||||
|
||||
|
||||
def test_time() -> None:
|
||||
assert_equal(english_cleaners("It's 11:00"), "it's eleven a m")
|
||||
assert_equal(english_cleaners("It's 9:01"), "it's nine oh one a m")
|
||||
assert_equal(english_cleaners("It's 16:00"), "it's four p m")
|
||||
assert_equal(english_cleaners("It's 00:00 am"), "it's twelve a m")
|
||||
|
||||
|
||||
def test_currency() -> None:
|
||||
assert_equal(phoneme_cleaners("It's $10.50"),
|
||||
"It's ten dollars fifty cents")
|
||||
assert_equal(phoneme_cleaners("£1.1"),
|
||||
"one pound sterling one penny")
|
||||
assert_equal(phoneme_cleaners("¥1"),
|
||||
"one yen")
|
|
@ -171,4 +171,4 @@ def test_text2phone():
|
|||
gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
|
||||
lang = "en-us"
|
||||
ph = text2phone(text, lang)
|
||||
assert gt == ph, f"\n{phonemes} \n vs \n{gt}"
|
||||
assert gt == ph, f"\n{ph} \n vs \n{gt}"
|
||||
|
|
Loading…
Reference in New Issue