a ton of linter updates

This commit is contained in:
Eren Gölge 2021-03-08 05:06:54 +01:00
parent 4422642ec0
commit 9a48ba3821
45 changed files with 244 additions and 241 deletions

View File

@ -170,7 +170,7 @@ def main():
args.vocoder_name = model_item['default_vocoder'] if args.vocoder_name is None else args.vocoder_name args.vocoder_name = model_item['default_vocoder'] if args.vocoder_name is None else args.vocoder_name
if args.vocoder_name is not None: if args.vocoder_name is not None:
vocoder_path, vocoder_config_path, vocoder_item = manager.download_model(args.vocoder_name) vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
# CASE3: load custome models # CASE3: load custome models
if args.model_path is not None: if args.model_path is not None:

View File

@ -573,7 +573,7 @@ def main(args): # pylint: disable=redefined-outer-name
if c.run_eval: if c.run_eval:
target_loss = eval_avg_loss_dict['avg_loss'] target_loss = eval_avg_loss_dict['avg_loss']
best_loss = save_best_model(target_loss, best_loss, model, optimizer, best_loss = save_best_model(target_loss, best_loss, model, optimizer,
global_step, epoch, c.r, OUT_PATH, global_step, epoch, c.r, OUT_PATH, model_characters,
keep_all_best=keep_all_best, keep_after=keep_after) keep_all_best=keep_all_best, keep_after=keep_after)

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import argparse
import glob
import os import os
import sys import sys
import time import time
@ -535,7 +533,7 @@ def main(args): # pylint: disable=redefined-outer-name
if c.run_eval: if c.run_eval:
target_loss = eval_avg_loss_dict['avg_loss'] target_loss = eval_avg_loss_dict['avg_loss']
best_loss = save_best_model(target_loss, best_loss, model, optimizer, best_loss = save_best_model(target_loss, best_loss, model, optimizer,
global_step, epoch, c.r, OUT_PATH, global_step, epoch, c.r, OUT_PATH, model_characters,
keep_all_best=keep_all_best, keep_after=keep_after) keep_all_best=keep_all_best, keep_after=keep_after)

View File

@ -648,12 +648,14 @@ def main(args): # pylint: disable=redefined-outer-name
epoch, epoch,
c.r, c.r,
OUT_PATH, OUT_PATH,
model_characters,
keep_all_best=keep_all_best, keep_all_best=keep_all_best,
keep_after=keep_after, keep_after=keep_after,
scaler=scaler.state_dict() if c.mixed_precision else None scaler=scaler.state_dict() if c.mixed_precision else None
) )
if __name__ == '__main__': if __name__ == '__main__':
args = parse_arguments(sys.argv) args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args( c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(

View File

@ -50,7 +50,7 @@ def setup_loader(ap, is_val=False, verbose=False):
sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None
loader = DataLoader(dataset, loader = DataLoader(dataset,
batch_size=1 if is_val else c.batch_size, batch_size=1 if is_val else c.batch_size,
shuffle=False if num_gpus > 1 else True, shuffle=num_gpus == 0,
drop_last=False, drop_last=False,
sampler=sampler, sampler=sampler,
num_workers=c.num_val_loader_workers num_workers=c.num_val_loader_workers

View File

@ -1,7 +1,7 @@
import collections import collections
import os import os
import random import random
from multiprocessing import Manager, Pool from multiprocessing import Pool
import numpy as np import numpy as np
import torch import torch

View File

@ -3,7 +3,7 @@ from glob import glob
import re import re
import sys import sys
from pathlib import Path from pathlib import Path
from typing import List, Tuple from typing import List
from tqdm import tqdm from tqdm import tqdm

View File

@ -366,8 +366,10 @@ class RelativePositionTransformer(nn.Module):
self.proj = nn.Conv1d(hidden_channels, out_channels, 1) self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
self.ffn_layers.append( self.ffn_layers.append(
FeedForwardNetwork(hidden_channels, FeedForwardNetwork(
hidden_channels if (idx + 1) != self.num_layers else out_channels, hidden_channels,
hidden_channels if
(idx + 1) != self.num_layers else out_channels,
hidden_channels_ffn, hidden_channels_ffn,
kernel_size, kernel_size,
dropout_p=dropout_p)) dropout_p=dropout_p))

View File

@ -75,7 +75,7 @@ class ReferenceEncoder(nn.Module):
# x: 3D tensor [batch_size, post_conv_width, # x: 3D tensor [batch_size, post_conv_width,
# num_channels*post_conv_height] # num_channels*post_conv_height]
self.recurrence.flatten_parameters() self.recurrence.flatten_parameters()
memory, out = self.recurrence(x) _, out = self.recurrence(x)
# out: 3D tensor [seq_len==1, batch_size, encoding_size=128] # out: 3D tensor [seq_len==1, batch_size, encoding_size=128]
return out.squeeze(0) return out.squeeze(0)

View File

@ -2,13 +2,12 @@ import math
import numpy as np import numpy as np
import torch import torch
from torch import nn from torch import nn
from inspect import signature
from torch.nn import functional from torch.nn import functional
from TTS.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.generic_utils import sequence_mask
from TTS.tts.utils.ssim import ssim from TTS.tts.utils.ssim import ssim
# pylint: disable=abstract-method Method # pylint: disable=abstract-method
# relates https://github.com/pytorch/pytorch/issues/42305 # relates https://github.com/pytorch/pytorch/issues/42305
class L1LossMasked(nn.Module): class L1LossMasked(nn.Module):
def __init__(self, seq_len_norm): def __init__(self, seq_len_norm):

View File

@ -78,8 +78,7 @@ class RelativePositionTransformerEncoder(nn.Module):
kernel_size=5, kernel_size=5,
num_res_blocks=3, num_res_blocks=3,
num_conv_blocks=1, num_conv_blocks=1,
dilations=[1, 1, 1] dilations=[1, 1, 1])
)
self.rel_pos_transformer = RelativePositionTransformer( self.rel_pos_transformer = RelativePositionTransformer(
hidden_channels, out_channels, hidden_channels, **params) hidden_channels, out_channels, hidden_channels, **params)
@ -104,8 +103,7 @@ class ResidualConv1dBNEncoder(nn.Module):
""" """
def __init__(self, in_channels, out_channels, hidden_channels, params): def __init__(self, in_channels, out_channels, hidden_channels, params):
super().__init__() super().__init__()
self.prenet = nn.Sequential( self.prenet = nn.Sequential(nn.Conv1d(in_channels, hidden_channels, 1),
nn.Conv1d(in_channels, hidden_channels, 1),
nn.ReLU()) nn.ReLU())
self.res_conv_block = ResidualConv1dBNBlock(hidden_channels, self.res_conv_block = ResidualConv1dBNBlock(hidden_channels,
hidden_channels, hidden_channels,
@ -183,9 +181,8 @@ class Encoder(nn.Module):
# init encoder # init encoder
if encoder_type.lower() == "transformer": if encoder_type.lower() == "transformer":
# text encoder # text encoder
self.encoder = RelativePositionTransformerEncoder(in_hidden_channels, self.encoder = RelativePositionTransformerEncoder(
out_channels, in_hidden_channels, out_channels, in_hidden_channels,
in_hidden_channels,
encoder_params) # pylint: disable=unexpected-keyword-arg encoder_params) # pylint: disable=unexpected-keyword-arg
elif encoder_type.lower() == 'residual_conv_bn': elif encoder_type.lower() == 'residual_conv_bn':
self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, self.encoder = ResidualConv1dBNEncoder(in_hidden_channels,

View File

@ -32,7 +32,7 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
nd = str(num) nd = str(num)
if abs(float(nd)) >= 1e48: if abs(float(nd)) >= 1e48:
raise ValueError('number out of range') raise ValueError('number out of range')
elif 'e' in nd: if 'e' in nd:
raise ValueError('scientific notation is not supported') raise ValueError('scientific notation is not supported')
c_symbol = '正负点' if simp else '正負點' c_symbol = '正负点' if simp else '正負點'
if o: # formal if o: # formal
@ -69,7 +69,7 @@ def _num2chinese(num :str, big=False, simp=True, o=False, twoalt=False) -> str:
if int(unit) == 0: # 0000 if int(unit) == 0: # 0000
intresult.append(c_basic[0]) intresult.append(c_basic[0])
continue continue
elif nu > 0 and int(unit) == 2: # 0002 if nu > 0 and int(unit) == 2: # 0002
intresult.append(c_twoalt + c_unit2[nu - 1]) intresult.append(c_twoalt + c_unit2[nu - 1])
continue continue
ulist = [] ulist = []

View File

@ -135,7 +135,7 @@ def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None):
return model return model
def is_tacotron(c): def is_tacotron(c):
return False if c['model'] in ['speedy_speech', 'glow_tts'] else True return not c['model'] in ['speedy_speech', 'glow_tts']
def check_config_tts(c): def check_config_tts(c):
check_argument('model', c, enum_list=['tacotron', 'tacotron2', 'glow_tts', 'speedy_speech'], restricted=True, val_type=str) check_argument('model', c, enum_list=['tacotron', 'tacotron2', 'glow_tts', 'speedy_speech'], restricted=True, val_type=str)

View File

@ -7,7 +7,7 @@ from TTS.utils.io import RenamingUnpickler
def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False): def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False): # pylint: disable=redefined-builtin
"""Load ```TTS.tts.models``` checkpoints. """Load ```TTS.tts.models``` checkpoints.
Args: Args:

View File

@ -63,8 +63,8 @@ def parse_speakers(c, args, meta_data_train, OUT_PATH):
speaker_embedding_dim = None speaker_embedding_dim = None
save_speaker_mapping(OUT_PATH, speaker_mapping) save_speaker_mapping(OUT_PATH, speaker_mapping)
num_speakers = len(speaker_mapping) num_speakers = len(speaker_mapping)
print(" > Training with {} speakers: {}".format(len(speakers), print(" > Training with {} speakers: {}".format(
", ".join(speakers))) len(speakers), ", ".join(speakers)))
else: else:
num_speakers = 0 num_speakers = 0
speaker_embedding_dim = None speaker_embedding_dim = None

View File

@ -17,6 +17,7 @@ def create_window(window_size, channel):
window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
return window return window
def _ssim(img1, img2, window, window_size, channel, size_average=True): def _ssim(img1, img2, window, window_size, channel, size_average=True):
mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
@ -25,9 +26,13 @@ def _ssim(img1, img2, window, window_size, channel, size_average = True):
mu2_sq = mu2.pow(2) mu2_sq = mu2.pow(2)
mu1_mu2 = mu1*mu2 mu1_mu2 = mu1*mu2
sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq sigma1_sq = F.conv2d(
sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 sigma2_sq = F.conv2d(
img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
sigma12 = F.conv2d(
img1 * img2, window, padding=window_size // 2,
groups=channel) - mu1_mu2
C1 = 0.01**2 C1 = 0.01**2
C2 = 0.03**2 C2 = 0.03**2
@ -64,6 +69,7 @@ class SSIM(torch.nn.Module):
return _ssim(img1, img2, window, self.window_size, channel, self.size_average) return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
def ssim(img1, img2, window_size=11, size_average=True): def ssim(img1, img2, window_size=11, size_average=True):
(_, channel, _, _) = img1.size() (_, channel, _, _) = img1.size()
window = create_window(window_size, channel) window = create_window(window_size, channel)

View File

@ -20,9 +20,13 @@ def text_to_seqvec(text, CONFIG):
add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False), add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False),
dtype=np.int32) dtype=np.int32)
else: else:
seq = np.asarray( seq = np.asarray(text_to_sequence(
text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None, text,
add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False), dtype=np.int32) text_cleaner,
tp=CONFIG.characters if 'characters' in CONFIG.keys() else None,
add_blank=CONFIG['add_blank']
if 'add_blank' in CONFIG.keys() else False),
dtype=np.int32)
return seq return seq

View File

@ -144,8 +144,3 @@ class ModelManager(object):
if isinstance(key, str) and len(my_dict[key]) > 0: if isinstance(key, str) and len(my_dict[key]) > 0:
return True return True
return False return False

View File

@ -4,7 +4,7 @@ from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
class TorchSTFT(nn.Module): class TorchSTFT(nn.Module): # pylint: disable=abstract-method
def __init__(self, n_fft, hop_length, win_length, window='hann_window'): def __init__(self, n_fft, hop_length, win_length, window='hann_window'):
""" Torch based STFT operation """ """ Torch based STFT operation """
super(TorchSTFT, self).__init__() super(TorchSTFT, self).__init__()

View File

@ -23,7 +23,9 @@ class PositionalEncoding(nn.Module):
def forward(self, x, noise_level): def forward(self, x, noise_level):
if x.shape[2] > self.pe.shape[1]: if x.shape[2] > self.pe.shape[1]:
self.init_pe_matrix(x.shape[1], x.shape[2], x) self.init_pe_matrix(x.shape[1], x.shape[2], x)
return x + noise_level[..., None, None] + self.pe[:, :x.size(2)].repeat(x.shape[0], 1, 1) / self.C return x + noise_level[..., None,
None] + self.pe[:, :x.size(2)].repeat(
x.shape[0], 1, 1) / self.C
def init_pe_matrix(self, n_channels, max_len, x): def init_pe_matrix(self, n_channels, max_len, x):
pe = torch.zeros(max_len, n_channels) pe = torch.zeros(max_len, n_channels)
@ -172,4 +174,3 @@ class DBlock(nn.Module):
for idx, layer in enumerate(self.main_block): for idx, layer in enumerate(self.main_block):
if len(layer.state_dict()) != 0: if len(layer.state_dict()) != 0:
self.main_block[idx] = weight_norm(layer) self.main_block[idx] = weight_norm(layer)

View File

@ -79,7 +79,7 @@ class Wavegrad(nn.Module):
return x return x
def load_noise_schedule(self, path): def load_noise_schedule(self, path):
beta = np.load(path, allow_pickle=True).item()['beta'] beta = np.load(path, allow_pickle=True).item()['beta'] # pylint: disable=unexpected-keyword-arg
self.compute_noise_level(beta) self.compute_noise_level(beta)
@torch.no_grad() @torch.no_grad()
@ -91,8 +91,8 @@ class Wavegrad(nn.Module):
y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0).to(x) y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0).to(x)
sqrt_alpha_hat = self.noise_level.to(x) sqrt_alpha_hat = self.noise_level.to(x)
for n in range(len(self.alpha) - 1, -1, -1): for n in range(len(self.alpha) - 1, -1, -1):
y_n = self.c1[n] * (y_n - y_n = self.c1[n] * (y_n - self.c2[n] * self.forward(
self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0]))) y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0])))
if n > 0: if n > 0:
z = torch.randn_like(y_n) z = torch.randn_like(y_n)
y_n += self.sigma[n - 1] * z y_n += self.sigma[n - 1] * z

View File

@ -118,9 +118,8 @@ class UpsampleNetwork(nn.Module):
class Upsample(nn.Module): class Upsample(nn.Module):
def __init__( def __init__(self, scale, pad, num_res_blocks, feat_dims, compute_dims,
self, scale, pad, num_res_blocks, feat_dims, compute_dims, res_out_dims, use_aux_net res_out_dims, use_aux_net):
):
super().__init__() super().__init__()
self.scale = scale self.scale = scale
self.pad = pad self.pad = pad

View File

@ -44,9 +44,11 @@ def log_sum_exp(x):
# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py # It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py
def discretized_mix_logistic_loss( def discretized_mix_logistic_loss(y_hat,
y_hat, y, num_classes=65536, log_scale_min=None, reduce=True y,
): num_classes=65536,
log_scale_min=None,
reduce=True):
if log_scale_min is None: if log_scale_min is None:
log_scale_min = float(np.log(1e-14)) log_scale_min = float(np.log(1e-14))
y_hat = y_hat.permute(0, 2, 1) y_hat = y_hat.permute(0, 2, 1)

View File

@ -7,7 +7,7 @@ import pickle as pickle_tts
from TTS.utils.io import RenamingUnpickler from TTS.utils.io import RenamingUnpickler
def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False): def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False): # pylint: disable=redefined-builtin
try: try:
state = torch.load(checkpoint_path, map_location=torch.device('cpu')) state = torch.load(checkpoint_path, map_location=torch.device('cpu'))
except ModuleNotFoundError: except ModuleNotFoundError:

View File

@ -217,4 +217,3 @@ class SSIMLossTests(unittest.TestCase):
(sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
output = layer(dummy_input + mask, dummy_target, dummy_length) output = layer(dummy_input + mask, dummy_target, dummy_length)
assert output.item() == 0, "0 vs {}".format(output.item()) assert output.item() == 0, "0 vs {}".format(output.item())

View File

@ -356,4 +356,3 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
), "param {} with shape {} not updated!! \n{}\n{}".format( ), "param {} with shape {} not updated!! \n{}\n{}".format(
count, param.shape, param, param_ref) count, param.shape, param, param_ref)
count += 1 count += 1

View File

@ -17,5 +17,5 @@ def test_currency() -> None:
def test_expand_numbers() -> None: def test_expand_numbers() -> None:
assert "minus one" == phoneme_cleaners("-1") assert phoneme_cleaners("-1") == 'minus one'
assert "one" == phoneme_cleaners("1") assert phoneme_cleaners("1") == 'one'

View File

@ -17,7 +17,7 @@ def test_phoneme_to_sequence():
lang = "en-us" lang = "en-us"
sequence = phoneme_to_sequence(text, text_cleaner, lang) sequence = phoneme_to_sequence(text, text_cleaner, lang)
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
gt = 'ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!' gt = 'ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!'
assert text_hat == text_hat_with_params == gt assert text_hat == text_hat_with_params == gt