Fix Punctuation

2021-11-19 10:39:21 +01:00 · 2021-11-19 10:39:21 +01:00 · d8bdeb8b8f
parent ff7c385838
commit d8bdeb8b8f
3 changed files with 32 additions and 29 deletions
--- a/TTS/tts/utils/text/characters.py
+++ b/TTS/tts/utils/text/characters.py
@ -101,7 +101,7 @@ class BaseCharacters:
            is_unique (bool):
                Remove duplicates from the provided characters. Defaults to True.
-el
+    el
            is_sorted (bool):
                Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True.
    """
@ -214,7 +214,7 @@ el
    def id_to_char(self, idx: int) -> str:
        return self._id_to_char[idx]
-    def print_log(self, level:int=0):
+    def print_log(self, level: int = 0):
        """
        Prints the vocabulary in a nice format.
        """
--- a/TTS/tts/utils/text/punctuation.py
+++ b/TTS/tts/utils/text/punctuation.py
@ -91,10 +91,13 @@ class Punctuation:
            puncs.append(_PUNC_IDX(match.group(), position))
        # convert str text to a List[str], each item is separated by a punctuation
        splitted_text = []
-        for punc in puncs:
+        for idx, punc in enumerate(puncs):
            split = text.split(punc.punc)
            prefix, suffix = split[0], punc.punc.join(split[1:])
            splitted_text.append(prefix)
            # if the text does not end with a punctuation, add it to the last item
            if idx == len(puncs) - 1 and len(suffix) > 0:
                splitted_text.append(suffix)
            text = suffix
        return splitted_text, puncs
@ -126,7 +129,7 @@ class Punctuation:
        current = puncs[0]
        if current.position == PuncPosition.BEGIN:
-            return cls._restore([current.mark + text[0]] + text[1:], puncs[1:], num)
+            return cls._restore([current.punc + text[0]] + text[1:], puncs[1:], num)
        if current.position == PuncPosition.END:
            return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)
--- a/TTS/tts/utils/text/tokenizer.py
+++ b/TTS/tts/utils/text/tokenizer.py
@ -1,8 +1,8 @@
 from typing import Callable, Dict, List, Union
 from TTS.tts.utils.text import cleaners
 from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
 from TTS.tts.utils.text.characters import Graphemes, IPAPhonemes
 from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
 class TTSTokenizer: