mirror of https://github.com/coqui-ai/TTS.git
modify according to PR reviews
This commit is contained in:
parent
9cb02aeea7
commit
3e59d3c28d
|
@ -3,6 +3,7 @@ from glob import glob
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
@ -355,10 +356,15 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def baker(root_path: str, meta_file: str) -> List[List[str]]:
|
||||||
|
"""Normalizes the Baker meta data file to TTS format
|
||||||
|
|
||||||
# ======================================== Baker (chinese mandarin single speaker) ===========================================
|
Args:
|
||||||
def baker(root_path, meta_file):
|
root_path (str): path to the baker dataset
|
||||||
"""Normalizes the Baker meta data file to TTS format"""
|
meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
|
||||||
|
Returns:
|
||||||
|
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
|
||||||
|
"""
|
||||||
txt_file = os.path.join(root_path, meta_file)
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
items = []
|
items = []
|
||||||
speaker_name = "baker"
|
speaker_name = "baker"
|
||||||
|
|
|
@ -10,16 +10,24 @@ import re
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
|
def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
|
||||||
"""
|
"""Convert numerical arabic numbers (0->9) to chinese hanzi numbers (〇 -> 九)
|
||||||
Converts numbers to Chinese representations.
|
|
||||||
`big` : use financial characters.
|
Args:
|
||||||
`simp` : use simplified characters instead of traditional characters.
|
num (str): arabic number to convert
|
||||||
`o` : use 〇 for zero.
|
big (bool, optional): use financial characters. Defaults to False.
|
||||||
`twoalt`: use 两/兩 for two when appropriate.
|
simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True.
|
||||||
Note that `o` and `twoalt` is ignored when `big` is used,
|
o (bool, optional): use 〇 for 'zero'. Defaults to False.
|
||||||
and `twoalt` is ignored when `o` is used for formal representations.
|
twoalt (bool, optional): use 两/兩 for 'two' when appropriate. Defaults to False.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if number is more than 1e48
|
||||||
|
ValueError: if 'e' exposent in number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: converted number as hanzi characters
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# check num first
|
# check num first
|
||||||
nd = str(num)
|
nd = str(num)
|
||||||
if abs(float(nd)) >= 1e48:
|
if abs(float(nd)) >= 1e48:
|
||||||
|
@ -97,11 +105,27 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _number_replace(match : re.Match):
|
def _number_replace(match: re.Match) -> str:
|
||||||
|
"""function to apply in a match, transform all numbers in a match by chinese characters
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match (re.Match): numbers regex matches
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: replaced characters for the numbers
|
||||||
|
"""
|
||||||
match_str: str = match.group()
|
match_str: str = match.group()
|
||||||
return _num2chinese(match_str)
|
return _num2chinese(match_str)
|
||||||
|
|
||||||
|
|
||||||
def replace_numbers_to_characters_in_text(text : str):
|
def replace_numbers_to_characters_in_text(text: str) -> str:
|
||||||
|
"""Replace all arabic numbers in a text by their equivalent in chinese characters (simplified)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): input text to transform
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: output text
|
||||||
|
"""
|
||||||
text = re.sub(r'[0-9]+', _number_replace, text)
|
text = re.sub(r'[0-9]+', _number_replace, text)
|
||||||
return text
|
return text
|
|
@ -220,7 +220,6 @@ def synthesis(model,
|
||||||
model outputs.
|
model outputs.
|
||||||
speaker_id (int): id of speaker
|
speaker_id (int): id of speaker
|
||||||
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
|
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
|
||||||
style_wav (str): Uses for style embedding of GST.
|
|
||||||
truncated (bool): keep model states after inference. It can be used
|
truncated (bool): keep model states after inference. It can be used
|
||||||
for continuous inference at long texts.
|
for continuous inference at long texts.
|
||||||
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
|
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
|
||||||
|
|
|
@ -29,16 +29,15 @@ PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+'
|
||||||
|
|
||||||
|
|
||||||
def text2phone(text, language):
|
def text2phone(text, language):
|
||||||
'''
|
'''Convert graphemes to phonemes. For most of the languages, it calls
|
||||||
Convert graphemes to phonemes. For most of the languages, it calls
|
|
||||||
the phonemizer python library that calls espeak/espeak-ng. For chinese
|
the phonemizer python library that calls espeak/espeak-ng. For chinese
|
||||||
mandarin, it calls pypinyin + custom function for phonemizing
|
mandarin, it calls pypinyin + custom function for phonemizing
|
||||||
Parameters:
|
Parameters:
|
||||||
text (str): text to phonemize
|
text (str): text to phonemize
|
||||||
language (str): language of the text
|
language (str): language of the text
|
||||||
Returns:
|
Returns:
|
||||||
ph (str): phonemes as a string seperated by "|"
|
ph (str): phonemes as a string seperated by "|"
|
||||||
ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
|
ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# TO REVIEW : How to have a good implementation for this?
|
# TO REVIEW : How to have a good implementation for this?
|
||||||
|
|
|
@ -127,7 +127,7 @@ class Synthesizer(object):
|
||||||
gst_style_input = None
|
gst_style_input = None
|
||||||
if self.tts_config.use_gst:
|
if self.tts_config.use_gst:
|
||||||
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
|
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
|
||||||
style_wav = self.tts_config.gst["gst_style_input"]
|
gst_style_input = self.tts_config.gst["gst_style_input"]
|
||||||
|
|
||||||
for sen in sens:
|
for sen in sens:
|
||||||
# synthesize voice
|
# synthesize voice
|
||||||
|
|
Loading…
Reference in New Issue