modify according to PR reviews

This commit is contained in:
kirianguiller 2021-02-16 17:25:18 +01:00
parent 9cb02aeea7
commit 3e59d3c28d
5 changed files with 52 additions and 24 deletions

View File

@ -3,6 +3,7 @@ from glob import glob
import re import re
import sys import sys
from pathlib import Path from pathlib import Path
from typing import List, Tuple
from tqdm import tqdm from tqdm import tqdm
@ -355,10 +356,15 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
def baker(root_path: str, meta_file: str) -> List[List[str]]:
"""Normalizes the Baker meta data file to TTS format
# ======================================== Baker (chinese mandarin single speaker) =========================================== Args:
def baker(root_path, meta_file): root_path (str): path to the baker dataset
"""Normalizes the Baker meta data file to TTS format""" meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
Returns:
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
"""
txt_file = os.path.join(root_path, meta_file) txt_file = os.path.join(root_path, meta_file)
items = [] items = []
speaker_name = "baker" speaker_name = "baker"

View File

@ -10,16 +10,24 @@ import re
import itertools import itertools
def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False): def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
""" """Convert numerical arabic numbers (0->9) to chinese hanzi numbers ( -> 九)
Converts numbers to Chinese representations.
`big` : use financial characters. Args:
`simp` : use simplified characters instead of traditional characters. num (str): arabic number to convert
`o` : use for zero. big (bool, optional): use financial characters. Defaults to False.
`twoalt`: use / for two when appropriate. simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True.
Note that `o` and `twoalt` is ignored when `big` is used, o (bool, optional): use for 'zero'. Defaults to False.
and `twoalt` is ignored when `o` is used for formal representations. twoalt (bool, optional): use / for 'two' when appropriate. Defaults to False.
Raises:
ValueError: if number is more than 1e48
ValueError: if 'e' exposent in number
Returns:
str: converted number as hanzi characters
""" """
# check num first # check num first
nd = str(num) nd = str(num)
if abs(float(nd)) >= 1e48: if abs(float(nd)) >= 1e48:
@ -97,11 +105,27 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
def _number_replace(match : re.Match): def _number_replace(match: re.Match) -> str:
"""function to apply in a match, transform all numbers in a match by chinese characters
Args:
match (re.Match): numbers regex matches
Returns:
str: replaced characters for the numbers
"""
match_str: str = match.group() match_str: str = match.group()
return _num2chinese(match_str) return _num2chinese(match_str)
def replace_numbers_to_characters_in_text(text : str): def replace_numbers_to_characters_in_text(text: str) -> str:
"""Replace all arabic numbers in a text by their equivalent in chinese characters (simplified)
Args:
text (str): input text to transform
Returns:
str: output text
"""
text = re.sub(r'[0-9]+', _number_replace, text) text = re.sub(r'[0-9]+', _number_replace, text)
return text return text

View File

@ -220,7 +220,6 @@ def synthesis(model,
model outputs. model outputs.
speaker_id (int): id of speaker speaker_id (int): id of speaker
style_wav (str | Dict[str, float]): Uses for style embedding of GST. style_wav (str | Dict[str, float]): Uses for style embedding of GST.
style_wav (str): Uses for style embedding of GST.
truncated (bool): keep model states after inference. It can be used truncated (bool): keep model states after inference. It can be used
for continuous inference at long texts. for continuous inference at long texts.
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence. enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.

View File

@ -29,16 +29,15 @@ PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+'
def text2phone(text, language): def text2phone(text, language):
''' '''Convert graphemes to phonemes. For most of the languages, it calls
Convert graphemes to phonemes. For most of the languages, it calls
the phonemizer python library that calls espeak/espeak-ng. For chinese the phonemizer python library that calls espeak/espeak-ng. For chinese
mandarin, it calls pypinyin + custom function for phonemizing mandarin, it calls pypinyin + custom function for phonemizing
Parameters: Parameters:
text (str): text to phonemize text (str): text to phonemize
language (str): language of the text language (str): language of the text
Returns: Returns:
ph (str): phonemes as a string seperated by "|" ph (str): phonemes as a string seperated by "|"
ph = "ɪ|g|ˈ|z|æ|m|p|ə|l" ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
''' '''
# TO REVIEW : How to have a good implementation for this? # TO REVIEW : How to have a good implementation for this?

View File

@ -127,7 +127,7 @@ class Synthesizer(object):
gst_style_input = None gst_style_input = None
if self.tts_config.use_gst: if self.tts_config.use_gst:
if self.tts_config.gst["gst_style_input"] not in ["", {}]: if self.tts_config.gst["gst_style_input"] not in ["", {}]:
style_wav = self.tts_config.gst["gst_style_input"] gst_style_input = self.tts_config.gst["gst_style_input"]
for sen in sens: for sen in sens:
# synthesize voice # synthesize voice