New initialization for embedding layer and use phonemes count instead of symbols for embedding layer init

This commit is contained in:
Eren Golge 2019-01-09 16:00:57 +01:00
parent 916f5df5f9
commit c2637f114e
1 changed files with 7 additions and 4 deletions

View File

@ -1,7 +1,8 @@
# coding: utf-8 # coding: utf-8
import torch import torch
from torch import nn from torch import nn
from utils.text.symbols import symbols from math import sqrt
from utils.text.symbols import symbols, phonemes
from layers.tacotron import Prenet, Encoder, Decoder, PostCBHG from layers.tacotron import Prenet, Encoder, Decoder, PostCBHG
@ -17,9 +18,11 @@ class Tacotron(nn.Module):
self.mel_dim = mel_dim self.mel_dim = mel_dim
self.linear_dim = linear_dim self.linear_dim = linear_dim
self.embedding = nn.Embedding( self.embedding = nn.Embedding(
len(symbols), embedding_dim, padding_idx=padding_idx) len(phonemes), embedding_dim, padding_idx=padding_idx)
print(" | > Number of characters : {}".format(len(symbols))) print(" | > Number of characters : {}".format(len(phonemes)))
self.embedding.weight.data.normal_(0, 0.3) std = sqrt(2.0 / (len(phonemes) + embedding_dim))
val = sqrt(3.0) * std # uniform bounds for std
self.embedding.weight.data.uniform_(-val, val)
self.encoder = Encoder(embedding_dim) self.encoder = Encoder(embedding_dim)
self.decoder = Decoder(256, mel_dim, r) self.decoder = Decoder(256, mel_dim, r)
self.postnet = PostCBHG(mel_dim) self.postnet = PostCBHG(mel_dim)