mirror of https://github.com/coqui-ai/TTS.git
sound normalization while reading, adapting get_Speaker for multiple datasets
This commit is contained in:
parent
8dec2a9e95
commit
0849e3c42f
11
train.py
11
train.py
|
@ -43,10 +43,6 @@ print(" > Number of GPUs: ", num_gpus)
|
||||||
|
|
||||||
|
|
||||||
def setup_loader(ap, is_val=False, verbose=False):
|
def setup_loader(ap, is_val=False, verbose=False):
|
||||||
global meta_data_train
|
|
||||||
global meta_data_eval
|
|
||||||
if "meta_data_train" not in globals():
|
|
||||||
meta_data_train, meta_data_eval = load_meta_data(c.datasets)
|
|
||||||
if is_val and not c.run_eval:
|
if is_val and not c.run_eval:
|
||||||
loader = None
|
loader = None
|
||||||
else:
|
else:
|
||||||
|
@ -470,6 +466,7 @@ def evaluate(model, criterion, criterion_st, ap, global_step, epoch):
|
||||||
|
|
||||||
# FIXME: move args definition/parsing inside of main?
|
# FIXME: move args definition/parsing inside of main?
|
||||||
def main(args): # pylint: disable=redefined-outer-name
|
def main(args): # pylint: disable=redefined-outer-name
|
||||||
|
global meta_data_train, meta_data_eval
|
||||||
# Audio processor
|
# Audio processor
|
||||||
ap = AudioProcessor(**c.audio)
|
ap = AudioProcessor(**c.audio)
|
||||||
|
|
||||||
|
@ -479,8 +476,12 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
c.distributed["backend"], c.distributed["url"])
|
c.distributed["backend"], c.distributed["url"])
|
||||||
num_chars = len(phonemes) if c.use_phonemes else len(symbols)
|
num_chars = len(phonemes) if c.use_phonemes else len(symbols)
|
||||||
|
|
||||||
|
# load data instances
|
||||||
|
meta_data_train, meta_data_eval = load_meta_data(c.datasets)
|
||||||
|
|
||||||
|
# parse speakers
|
||||||
if c.use_speaker_embedding:
|
if c.use_speaker_embedding:
|
||||||
speakers = get_speakers(c.data_path, c.meta_file_train, c.dataset)
|
speakers = get_speakers(meta_data_train)
|
||||||
if args.restore_path:
|
if args.restore_path:
|
||||||
prev_out_path = os.path.dirname(args.restore_path)
|
prev_out_path = os.path.dirname(args.restore_path)
|
||||||
speaker_mapping = load_speaker_mapping(prev_out_path)
|
speaker_mapping = load_speaker_mapping(prev_out_path)
|
||||||
|
|
|
@ -24,6 +24,7 @@ class AudioProcessor(object):
|
||||||
clip_norm=True,
|
clip_norm=True,
|
||||||
griffin_lim_iters=None,
|
griffin_lim_iters=None,
|
||||||
do_trim_silence=False,
|
do_trim_silence=False,
|
||||||
|
sound_norm=False,
|
||||||
**_):
|
**_):
|
||||||
|
|
||||||
print(" > Setting up Audio Processor...")
|
print(" > Setting up Audio Processor...")
|
||||||
|
@ -45,6 +46,7 @@ class AudioProcessor(object):
|
||||||
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
||||||
self.clip_norm = clip_norm
|
self.clip_norm = clip_norm
|
||||||
self.do_trim_silence = do_trim_silence
|
self.do_trim_silence = do_trim_silence
|
||||||
|
self.sound_norm = sound_norm
|
||||||
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
|
self.n_fft, self.hop_length, self.win_length = self._stft_parameters()
|
||||||
members = vars(self)
|
members = vars(self)
|
||||||
for key, value in members.items():
|
for key, value in members.items():
|
||||||
|
@ -243,6 +245,8 @@ class AudioProcessor(object):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f' [!] File cannot be trimmed for silence - {filename}')
|
print(f' [!] File cannot be trimmed for silence - {filename}')
|
||||||
assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
|
assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
|
||||||
|
if self.sound_norm:
|
||||||
|
x = x / x.max() * 0.9
|
||||||
return x
|
return x
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -25,9 +25,7 @@ def save_speaker_mapping(out_path, speaker_mapping):
|
||||||
json.dump(speaker_mapping, f, indent=4)
|
json.dump(speaker_mapping, f, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def get_speakers(data_root, meta_file, dataset_type):
|
def get_speakers(items):
|
||||||
"""Returns a sorted, unique list of speakers in a given dataset."""
|
"""Returns a sorted, unique list of speakers in a given dataset."""
|
||||||
preprocessor = get_preprocessor_by_name(dataset_type)
|
|
||||||
items = preprocessor(data_root, meta_file)
|
|
||||||
speakers = {e[2] for e in items}
|
speakers = {e[2] for e in items}
|
||||||
return sorted(speakers)
|
return sorted(speakers)
|
||||||
|
|
Loading…
Reference in New Issue