mirror of https://github.com/coqui-ai/TTS.git
Remove preemphasis from audio processing
This commit is contained in:
parent
dac8fdffa9
commit
0ef3c0ac3f
|
@ -16,7 +16,7 @@ class LJSpeechDataset(Dataset):
|
||||||
def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
|
def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
|
||||||
text_cleaner, num_mels, min_level_db, frame_shift_ms,
|
text_cleaner, num_mels, min_level_db, frame_shift_ms,
|
||||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power,
|
frame_length_ms, preemphasis, ref_level_db, num_freq, power,
|
||||||
min_seq_len=0):
|
min_mel_freq, max_mel_freq, min_seq_len=0):
|
||||||
|
|
||||||
with open(csv_file, "r", encoding="utf8") as f:
|
with open(csv_file, "r", encoding="utf8") as f:
|
||||||
self.frames = [line.split('|') for line in f]
|
self.frames = [line.split('|') for line in f]
|
||||||
|
@ -26,7 +26,8 @@ class LJSpeechDataset(Dataset):
|
||||||
self.cleaners = text_cleaner
|
self.cleaners = text_cleaner
|
||||||
self.min_seq_len = min_seq_len
|
self.min_seq_len = min_seq_len
|
||||||
self.ap = AudioProcessor(sample_rate, num_mels, min_level_db, frame_shift_ms,
|
self.ap = AudioProcessor(sample_rate, num_mels, min_level_db, frame_shift_ms,
|
||||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power)
|
frame_length_ms, preemphasis, ref_level_db, num_freq, power,
|
||||||
|
min_mel_freq, max_mel_freq)
|
||||||
print(" > Reading LJSpeech from - {}".format(root_dir))
|
print(" > Reading LJSpeech from - {}".format(root_dir))
|
||||||
print(" | > Number of instances : {}".format(len(self.frames)))
|
print(" | > Number of instances : {}".format(len(self.frames)))
|
||||||
self._sort_frames()
|
self._sort_frames()
|
||||||
|
|
6
train.py
6
train.py
|
@ -352,6 +352,8 @@ def main(args):
|
||||||
c.ref_level_db,
|
c.ref_level_db,
|
||||||
c.num_freq,
|
c.num_freq,
|
||||||
c.power,
|
c.power,
|
||||||
|
c.min_mel_freq,
|
||||||
|
c.max_mel_freq,
|
||||||
min_seq_len=c.min_seq_len
|
min_seq_len=c.min_seq_len
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -372,7 +374,9 @@ def main(args):
|
||||||
c.preemphasis,
|
c.preemphasis,
|
||||||
c.ref_level_db,
|
c.ref_level_db,
|
||||||
c.num_freq,
|
c.num_freq,
|
||||||
c.power
|
c.power,
|
||||||
|
c.min_mel_freq,
|
||||||
|
c.max_mel_freq
|
||||||
)
|
)
|
||||||
|
|
||||||
val_loader = DataLoader(val_dataset, batch_size=c.eval_batch_size,
|
val_loader = DataLoader(val_dataset, batch_size=c.eval_batch_size,
|
||||||
|
|
|
@ -59,11 +59,11 @@ class AudioProcessor(object):
|
||||||
def _db_to_amp(self, x):
|
def _db_to_amp(self, x):
|
||||||
return np.power(10.0, x * 0.05)
|
return np.power(10.0, x * 0.05)
|
||||||
|
|
||||||
def apply_preemphasis(self, x):
|
# def apply_preemphasis(self, x):
|
||||||
return signal.lfilter([1, -self.preemphasis], [1], x)
|
# return signal.lfilter([1, -self.preemphasis], [1], x)
|
||||||
|
#
|
||||||
def apply_inv_preemphasis(self, x):
|
# def apply_inv_preemphasis(self, x):
|
||||||
return signal.lfilter([1], [1, -self.preemphasis], x)
|
# return signal.lfilter([1], [1, -self.preemphasis], x)
|
||||||
|
|
||||||
def spectrogram(self, y):
|
def spectrogram(self, y):
|
||||||
# D = self._stft(self.apply_preemphasis(y))
|
# D = self._stft(self.apply_preemphasis(y))
|
||||||
|
@ -105,7 +105,7 @@ class AudioProcessor(object):
|
||||||
return y
|
return y
|
||||||
|
|
||||||
def melspectrogram(self, y):
|
def melspectrogram(self, y):
|
||||||
D = self._stft(self.apply_preemphasis(y))
|
D = self._stft(y)
|
||||||
S = self._amp_to_db(self._linear_to_mel(np.abs(D))) - self.ref_level_db
|
S = self._amp_to_db(self._linear_to_mel(np.abs(D))) - self.ref_level_db
|
||||||
return self._normalize(S)
|
return self._normalize(S)
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ def create_experiment_folder(root_path, model_name, debug):
|
||||||
date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I:%M%p")
|
date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I:%M%p")
|
||||||
if debug:
|
if debug:
|
||||||
commit_hash = 'debug'
|
commit_hash = 'debug'
|
||||||
else:
|
else:
|
||||||
commit_hash = get_commit_hash()
|
commit_hash = get_commit_hash()
|
||||||
output_folder = os.path.join(root_path, date_str + '-' + model_name + '-' + commit_hash)
|
output_folder = os.path.join(root_path, date_str + '-' + model_name + '-' + commit_hash)
|
||||||
os.makedirs(output_folder, exist_ok=True)
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
|
@ -135,21 +135,6 @@ def lr_decay(init_lr, global_step, warmup_steps):
|
||||||
return lr
|
return lr
|
||||||
|
|
||||||
|
|
||||||
def create_attn_mask(N, T, g=0.05):
|
|
||||||
r'''creating attn mask for guided attention
|
|
||||||
TODO: vectorize'''
|
|
||||||
M = np.zeros([N, T])
|
|
||||||
for t in range(T):
|
|
||||||
for n in range(N):
|
|
||||||
val = 20 * np.exp(-pow((n/N)-(t/T), 2.0)/g)
|
|
||||||
M[n, t] = val
|
|
||||||
e_x = np.exp(M - np.max(M))
|
|
||||||
M = e_x / e_x.sum(axis=0) # only difference
|
|
||||||
M = torch.FloatTensor(M).t().cuda()
|
|
||||||
M = torch.stack([M]*32)
|
|
||||||
return M
|
|
||||||
|
|
||||||
|
|
||||||
def mk_decay(init_mk, max_epoch, n_epoch):
|
def mk_decay(init_mk, max_epoch, n_epoch):
|
||||||
return init_mk * ((max_epoch - n_epoch) / max_epoch)
|
return init_mk * ((max_epoch - n_epoch) / max_epoch)
|
||||||
|
|
||||||
|
@ -159,6 +144,20 @@ def count_parameters(model):
|
||||||
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||||
|
|
||||||
|
|
||||||
|
# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1
|
||||||
|
def sequence_mask(sequence_length, max_len=None):
|
||||||
|
if max_len is None:
|
||||||
|
max_len = sequence_length.data.max()
|
||||||
|
batch_size = sequence_length.size(0)
|
||||||
|
seq_range = torch.arange(0, max_len).long()
|
||||||
|
seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
|
||||||
|
if sequence_length.is_cuda:
|
||||||
|
seq_range_expand = seq_range_expand.cuda()
|
||||||
|
seq_length_expand = (sequence_length.unsqueeze(1)
|
||||||
|
.expand_as(seq_range_expand))
|
||||||
|
return seq_range_expand < seq_length_expand
|
||||||
|
|
||||||
|
|
||||||
class Progbar(object):
|
class Progbar(object):
|
||||||
"""Displays a progress bar.
|
"""Displays a progress bar.
|
||||||
Args:
|
Args:
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
|
|
||||||
def get_param_size(model):
|
|
||||||
params = 0
|
|
||||||
for p in model.parameters():
|
|
||||||
tmp = 1
|
|
||||||
for x in p.size():
|
|
||||||
tmp *= x
|
|
||||||
params += tmp
|
|
||||||
return params
|
|
Loading…
Reference in New Issue