modify according to PR reviews

This commit is contained in:
kirianguiller 2021-02-16 17:25:18 +01:00
parent c4c7bc1b88
commit fb0655d1e7
5 changed files with 52 additions and 24 deletions

View File

@ -3,6 +3,7 @@ from glob import glob
import re
import sys
from pathlib import Path
from typing import List, Tuple
from tqdm import tqdm
@ -355,10 +356,15 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
def baker(root_path: str, meta_file: str) -> List[List[str]]:
"""Normalizes the Baker meta data file to TTS format
# ======================================== Baker (chinese mandarin single speaker) ===========================================
def baker(root_path, meta_file):
"""Normalizes the Baker meta data file to TTS format"""
Args:
root_path (str): path to the baker dataset
meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
Returns:
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
"""
txt_file = os.path.join(root_path, meta_file)
items = []
speaker_name = "baker"

View File

@ -10,16 +10,24 @@ import re
import itertools
def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
"""
Converts numbers to Chinese representations.
`big` : use financial characters.
`simp` : use simplified characters instead of traditional characters.
`o` : use for zero.
`twoalt`: use / for two when appropriate.
Note that `o` and `twoalt` is ignored when `big` is used,
and `twoalt` is ignored when `o` is used for formal representations.
def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
"""Convert numerical arabic numbers (0->9) to chinese hanzi numbers ( -> 九)
Args:
num (str): arabic number to convert
big (bool, optional): use financial characters. Defaults to False.
simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True.
o (bool, optional): use for 'zero'. Defaults to False.
twoalt (bool, optional): use / for 'two' when appropriate. Defaults to False.
Raises:
ValueError: if number is more than 1e48
ValueError: if 'e' exposent in number
Returns:
str: converted number as hanzi characters
"""
# check num first
nd = str(num)
if abs(float(nd)) >= 1e48:
@ -97,11 +105,27 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
def _number_replace(match : re.Match):
def _number_replace(match: re.Match) -> str:
"""function to apply in a match, transform all numbers in a match by chinese characters
Args:
match (re.Match): numbers regex matches
Returns:
str: replaced characters for the numbers
"""
match_str: str = match.group()
return _num2chinese(match_str)
def replace_numbers_to_characters_in_text(text : str):
def replace_numbers_to_characters_in_text(text: str) -> str:
"""Replace all arabic numbers in a text by their equivalent in chinese characters (simplified)
Args:
text (str): input text to transform
Returns:
str: output text
"""
text = re.sub(r'[0-9]+', _number_replace, text)
return text

View File

@ -220,7 +220,6 @@ def synthesis(model,
model outputs.
speaker_id (int): id of speaker
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
style_wav (str): Uses for style embedding of GST.
truncated (bool): keep model states after inference. It can be used
for continuous inference at long texts.
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.

View File

@ -29,16 +29,15 @@ PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+'
def text2phone(text, language):
'''
Convert graphemes to phonemes. For most of the languages, it calls
'''Convert graphemes to phonemes. For most of the languages, it calls
the phonemizer python library that calls espeak/espeak-ng. For chinese
mandarin, it calls pypinyin + custom function for phonemizing
Parameters:
text (str): text to phonemize
language (str): language of the text
Returns:
ph (str): phonemes as a string seperated by "|"
ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
Parameters:
text (str): text to phonemize
language (str): language of the text
Returns:
ph (str): phonemes as a string seperated by "|"
ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
'''
# TO REVIEW : How to have a good implementation for this?

View File

@ -127,7 +127,7 @@ class Synthesizer(object):
gst_style_input = None
if self.tts_config.use_gst:
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
style_wav = self.tts_config.gst["gst_style_input"]
gst_style_input = self.tts_config.gst["gst_style_input"]
for sen in sens:
# synthesize voice