diff --git a/TTS/tts/datasets/preprocess.py b/TTS/tts/datasets/preprocess.py index 4ebdde98..26c17174 100644 --- a/TTS/tts/datasets/preprocess.py +++ b/TTS/tts/datasets/preprocess.py @@ -3,6 +3,7 @@ from glob import glob import re import sys from pathlib import Path +from typing import List, Tuple from tqdm import tqdm @@ -371,10 +372,15 @@ def _voxcel_x(root_path, meta_file, voxcel_idx): +def baker(root_path: str, meta_file: str) -> List[List[str]]: + """Normalizes the Baker meta data file to TTS format -# ======================================== Baker (chinese mandarin single speaker) =========================================== -def baker(root_path, meta_file): - """Normalizes the Baker meta data file to TTS format""" + Args: + root_path (str): path to the baker dataset + meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence + Returns: + List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences + """ txt_file = os.path.join(root_path, meta_file) items = [] speaker_name = "baker" diff --git a/TTS/tts/utils/chinese_mandarin/numbers.py b/TTS/tts/utils/chinese_mandarin/numbers.py index 8d2f40ff..a662ea1c 100644 --- a/TTS/tts/utils/chinese_mandarin/numbers.py +++ b/TTS/tts/utils/chinese_mandarin/numbers.py @@ -10,16 +10,24 @@ import re import itertools -def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False): - """ - Converts numbers to Chinese representations. - `big` : use financial characters. - `simp` : use simplified characters instead of traditional characters. - `o` : use 〇 for zero. - `twoalt`: use 两/兩 for two when appropriate. - Note that `o` and `twoalt` is ignored when `big` is used, - and `twoalt` is ignored when `o` is used for formal representations. +def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str: + """Convert numerical arabic numbers (0->9) to chinese hanzi numbers (〇 -> 九) + + Args: + num (str): arabic number to convert + big (bool, optional): use financial characters. Defaults to False. + simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True. + o (bool, optional): use 〇 for 'zero'. Defaults to False. + twoalt (bool, optional): use 两/兩 for 'two' when appropriate. Defaults to False. + + Raises: + ValueError: if number is more than 1e48 + ValueError: if 'e' exposent in number + + Returns: + str: converted number as hanzi characters """ + # check num first nd = str(num) if abs(float(nd)) >= 1e48: @@ -97,11 +105,27 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False): -def _number_replace(match : re.Match): +def _number_replace(match: re.Match) -> str: + """function to apply in a match, transform all numbers in a match by chinese characters + + Args: + match (re.Match): numbers regex matches + + Returns: + str: replaced characters for the numbers + """ match_str: str = match.group() return _num2chinese(match_str) -def replace_numbers_to_characters_in_text(text : str): +def replace_numbers_to_characters_in_text(text: str) -> str: + """Replace all arabic numbers in a text by their equivalent in chinese characters (simplified) + + Args: + text (str): input text to transform + + Returns: + str: output text + """ text = re.sub(r'[0-9]+', _number_replace, text) return text \ No newline at end of file diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py index e7b1546e..adbd0d20 100644 --- a/TTS/tts/utils/synthesis.py +++ b/TTS/tts/utils/synthesis.py @@ -220,7 +220,6 @@ def synthesis(model, model outputs. speaker_id (int): id of speaker style_wav (str | Dict[str, float]): Uses for style embedding of GST. - style_wav (str): Uses for style embedding of GST. truncated (bool): keep model states after inference. It can be used for continuous inference at long texts. enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence. diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py index 16172596..4f4a38ea 100644 --- a/TTS/tts/utils/text/__init__.py +++ b/TTS/tts/utils/text/__init__.py @@ -29,16 +29,15 @@ PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+' def text2phone(text, language): - ''' - Convert graphemes to phonemes. For most of the languages, it calls + '''Convert graphemes to phonemes. For most of the languages, it calls the phonemizer python library that calls espeak/espeak-ng. For chinese mandarin, it calls pypinyin + custom function for phonemizing - Parameters: - text (str): text to phonemize - language (str): language of the text - Returns: - ph (str): phonemes as a string seperated by "|" - ph = "ɪ|g|ˈ|z|æ|m|p|ə|l" + Parameters: + text (str): text to phonemize + language (str): language of the text + Returns: + ph (str): phonemes as a string seperated by "|" + ph = "ɪ|g|ˈ|z|æ|m|p|ə|l" ''' # TO REVIEW : How to have a good implementation for this? diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 4b4bc04c..3e65e175 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -127,7 +127,7 @@ class Synthesizer(object): gst_style_input = None if self.tts_config.use_gst: if self.tts_config.gst["gst_style_input"] not in ["", {}]: - style_wav = self.tts_config.gst["gst_style_input"] + gst_style_input = self.tts_config.gst["gst_style_input"] for sen in sens: # synthesize voice