mirror of https://github.com/coqui-ai/TTS.git
Handle wider range of sentence splits
This commit is contained in:
parent
95b6a16d65
commit
ce2481d9cd
|
@ -6,6 +6,7 @@ import time
|
|||
import numpy as np
|
||||
import torch
|
||||
import yaml
|
||||
import pysbd
|
||||
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.io import load_config
|
||||
|
@ -18,13 +19,6 @@ from TTS.utils.synthesis import *
|
|||
|
||||
from TTS.utils.text import make_symbols, phonemes, symbols
|
||||
|
||||
alphabets = r"([A-Za-z])"
|
||||
prefixes = r"(Mr|St|Mrs|Ms|Dr)[.]"
|
||||
suffixes = r"(Inc|Ltd|Jr|Sr|Co)"
|
||||
starters = r"(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||||
acronyms = r"([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||||
websites = r"[.](com|net|org|io|gov)"
|
||||
|
||||
|
||||
class Synthesizer(object):
|
||||
def __init__(self, config):
|
||||
|
@ -32,6 +26,7 @@ class Synthesizer(object):
|
|||
self.vocoder_model = None
|
||||
self.config = config
|
||||
print(config)
|
||||
self.seg = self.get_segmenter("en")
|
||||
self.use_cuda = self.config.use_cuda
|
||||
if self.use_cuda:
|
||||
assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
|
||||
|
@ -43,6 +38,10 @@ class Synthesizer(object):
|
|||
self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_checkpoint,
|
||||
self.config.wavernn_config, self.config.use_cuda)
|
||||
|
||||
@staticmethod
|
||||
def get_segmenter(lang):
|
||||
return pysbd.Segmenter(language=lang, clean=True)
|
||||
|
||||
def load_tts(self, tts_checkpoint, tts_config, use_cuda):
|
||||
# pylint: disable=global-statement
|
||||
global symbols, phonemes
|
||||
|
@ -132,37 +131,8 @@ class Synthesizer(object):
|
|||
wav = np.array(wav)
|
||||
self.ap.save_wav(wav, path)
|
||||
|
||||
@staticmethod
|
||||
def split_into_sentences(text):
|
||||
text = " " + text + " <stop>"
|
||||
text = text.replace("\n", " ")
|
||||
text = re.sub(prefixes, "\\1<prd>", text)
|
||||
text = re.sub(websites, "<prd>\\1", text)
|
||||
if "Ph.D" in text:
|
||||
text = text.replace("Ph.D.", "Ph<prd>D<prd>")
|
||||
text = re.sub(r"\s" + alphabets + "[.] ", " \\1<prd> ", text)
|
||||
text = re.sub(acronyms+" "+starters, "\\1<stop> \\2", text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]", "\\1<prd>\\2<prd>\\3<prd>", text)
|
||||
text = re.sub(alphabets + "[.]" + alphabets + "[.]", "\\1<prd>\\2<prd>", text)
|
||||
text = re.sub(" "+suffixes+"[.] "+starters, " \\1<stop> \\2", text)
|
||||
text = re.sub(" "+suffixes+"[.]", " \\1<prd>", text)
|
||||
text = re.sub(" " + alphabets + "[.]", " \\1<prd>", text)
|
||||
if "”" in text:
|
||||
text = text.replace(".”", "”.")
|
||||
if "\"" in text:
|
||||
text = text.replace(".\"", "\".")
|
||||
if "!" in text:
|
||||
text = text.replace("!\"", "\"!")
|
||||
if "?" in text:
|
||||
text = text.replace("?\"", "\"?")
|
||||
text = text.replace(".", ".<stop>")
|
||||
text = text.replace("?", "?<stop>")
|
||||
text = text.replace("!", "!<stop>")
|
||||
text = text.replace("<prd>", ".")
|
||||
sentences = text.split("<stop>")
|
||||
sentences = sentences[:-1]
|
||||
sentences = list(filter(None, [s.strip() for s in sentences])) # remove empty sentences
|
||||
return sentences
|
||||
def split_into_sentences(self, text):
|
||||
return self.seg.segment(text)
|
||||
|
||||
def tts(self, text, speaker_id=None):
|
||||
start_time = time.time()
|
||||
|
|
|
@ -32,3 +32,27 @@ class DemoServerTest(unittest.TestCase):
|
|||
config['tts_config'] = os.path.join(tts_root_path, config['tts_config'])
|
||||
synthesizer = Synthesizer(config)
|
||||
synthesizer.tts("Better this test works!!")
|
||||
|
||||
def test_split_into_sentences(self):
|
||||
"""Check demo server sentences split as expected"""
|
||||
print("\n > Testing demo server sentence splitting")
|
||||
# pylint: disable=attribute-defined-outside-init
|
||||
self.seg = Synthesizer.get_segmenter("en")
|
||||
sis = Synthesizer.split_into_sentences
|
||||
assert sis(self, 'Hello. Two sentences') == ['Hello.', 'Two sentences']
|
||||
assert sis(self, 'He went to meet the adviser from Scott, Waltman & Co. next morning.') == ['He went to meet the adviser from Scott, Waltman & Co. next morning.']
|
||||
assert sis(self, 'Let\'s run it past Sarah and co. They\'ll want to see this.') == ['Let\'s run it past Sarah and co.', 'They\'ll want to see this.']
|
||||
assert sis(self, 'Where is Bobby Jr.\'s rabbit?') == ['Where is Bobby Jr.\'s rabbit?']
|
||||
assert sis(self, 'Please inform the U.K. authorities right away.') == ['Please inform the U.K. authorities right away.']
|
||||
assert sis(self, 'Were David and co. at the event?') == ['Were David and co. at the event?']
|
||||
assert sis(self, 'paging dr. green, please come to theatre four immediately.') == ['paging dr. green, please come to theatre four immediately.']
|
||||
assert sis(self, 'The email format is Firstname.Lastname@example.com. I think you reversed them.') == ['The email format is Firstname.Lastname@example.com.', 'I think you reversed them.']
|
||||
assert sis(self, 'The demo site is: https://top100.example.com/subsection/latestnews.html. Please send us your feedback.') == ['The demo site is: https://top100.example.com/subsection/latestnews.html.', 'Please send us your feedback.']
|
||||
assert sis(self, 'Scowling at him, \'You are not done yet!\' she yelled.') == ['Scowling at him, \'You are not done yet!\' she yelled.'] # with the final lowercase "she" we see it's all one sentence
|
||||
assert sis(self, 'Hey!! So good to see you.') == ['Hey!!', 'So good to see you.']
|
||||
assert sis(self, 'He went to Yahoo! but I don\'t know the division.') == ['He went to Yahoo! but I don\'t know the division.']
|
||||
assert sis(self, 'If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."') == ['If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."']
|
||||
assert sis(self, 'The address is not google.com.') == ['The address is not google.com.']
|
||||
assert sis(self, '1.) The first item 2.) The second item') == ['1.) The first item', '2.) The second item']
|
||||
assert sis(self, '1) The first item 2) The second item') == ['1) The first item', '2) The second item']
|
||||
assert sis(self, 'a. The first item b. The second item c. The third list item') == ['a. The first item', 'b. The second item', 'c. The third list item']
|
||||
|
|
Loading…
Reference in New Issue