modify according to PR reviews

2021-02-16 17:25:18 +01:00 · 2021-02-16 17:25:18 +01:00 · fb0655d1e7
parent c4c7bc1b88
commit fb0655d1e7
5 changed files with 52 additions and 24 deletions
--- a/TTS/tts/datasets/preprocess.py
+++ b/TTS/tts/datasets/preprocess.py
@ -3,6 +3,7 @@ from glob import glob
 import re
 import sys
 from pathlib import Path
+from typing import List, Tuple

 from tqdm import tqdm

@ -355,10 +356,15 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):



+def baker(root_path: str, meta_file: str) ->  List[List[str]]:
+    """Normalizes the Baker meta data file to TTS format

-# ======================================== Baker (chinese mandarin single speaker) ===========================================
-def baker(root_path, meta_file):
-    """Normalizes the Baker meta data file to TTS format"""
+    Args:
+        root_path (str): path to the baker dataset
+        meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence  
+    Returns:
+        List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
+    """
    txt_file = os.path.join(root_path, meta_file)
    items = []
    speaker_name = "baker"
--- a/TTS/tts/utils/chinese_mandarin/numbers.py
+++ b/TTS/tts/utils/chinese_mandarin/numbers.py
@ -10,16 +10,24 @@ import re
 import itertools


-def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
-    """
-    Converts numbers to Chinese representations.
-    `big`   : use financial characters.
-    `simp`  : use simplified characters instead of traditional characters.
-    `o`     : use 〇 for zero.
-    `twoalt`: use 两/兩 for two when appropriate.
-    Note that `o` and `twoalt` is ignored when `big` is used, 
-    and `twoalt` is ignored when `o` is used for formal representations.
+def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
+    """Convert numerical arabic numbers (0->9) to chinese hanzi numbers (〇 -> 九)
+
+    Args:
+        num (str): arabic number to convert
+        big (bool, optional): use financial characters. Defaults to False.
+        simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True.
+        o (bool, optional): use 〇 for 'zero'. Defaults to False.
+        twoalt (bool, optional): use 两/兩 for 'two' when appropriate. Defaults to False.
+
+    Raises:
+        ValueError: if number is more than 1e48
+        ValueError: if 'e' exposent in number
+
+    Returns:
+        str: converted number as hanzi characters
    """
+
    # check num first
    nd = str(num)
    if abs(float(nd)) >= 1e48:
@ -97,11 +105,27 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):



-def _number_replace(match : re.Match):
+def _number_replace(match: re.Match) -> str:
+    """function to apply in a match, transform all numbers in a match by chinese characters
+
+    Args:
+        match (re.Match): numbers regex matches
+
+    Returns:
+        str: replaced characters for the numbers
+    """
    match_str: str = match.group()
    return _num2chinese(match_str)


-def replace_numbers_to_characters_in_text(text : str):
+def replace_numbers_to_characters_in_text(text: str) -> str:
+    """Replace all arabic numbers in a text by their equivalent in chinese characters (simplified)
+
+    Args:
+        text (str): input text to transform
+
+    Returns:
+        str: output text
+    """
    text = re.sub(r'[0-9]+', _number_replace, text)
    return text
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@ -220,7 +220,6 @@ def synthesis(model,
                model outputs.
            speaker_id (int): id of speaker
            style_wav (str | Dict[str, float]): Uses for style embedding of GST.
-            style_wav (str): Uses for style embedding of GST.
            truncated (bool): keep model states after inference. It can be used
                for continuous inference at long texts.
            enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
--- a/TTS/tts/utils/text/init.py
+++ b/TTS/tts/utils/text/init.py
@ -29,16 +29,15 @@ PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+'


 def text2phone(text, language):
-    '''
-    Convert graphemes to phonemes. For most of the languages, it calls
+    '''Convert graphemes to phonemes. For most of the languages, it calls
    the phonemizer python library that calls espeak/espeak-ng. For chinese
    mandarin, it calls pypinyin + custom function for phonemizing
-            Parameters:
-                    text (str): text to phonemize
-                    language (str): language of the text
-            Returns:
-                    ph (str): phonemes as a string seperated by "|"
-                            ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
+        Parameters:
+                text (str): text to phonemize
+                language (str): language of the text
+        Returns:
+                ph (str): phonemes as a string seperated by "|"
+                        ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
    '''

    # TO REVIEW : How to have a good implementation for this?
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@ -127,7 +127,7 @@ class Synthesizer(object):
        gst_style_input = None
        if self.tts_config.use_gst:
            if self.tts_config.gst["gst_style_input"] not in ["", {}]:
-                style_wav = self.tts_config.gst["gst_style_input"]
+                gst_style_input = self.tts_config.gst["gst_style_input"]

        for sen in sens:
            # synthesize voice