diff --git a/layers/tacotron.py b/layers/tacotron.py
index cba6b1ae..b0327917 100644
--- a/layers/tacotron.py
+++ b/layers/tacotron.py
@@ -109,20 +109,25 @@ class CBHG(nn.Module):
 
     def __init__(self,
                  in_features,
-                 hid_features=128,
                  K=16,
-                 projections=[128, 128],
+                 conv_bank_features=128,
+                 conv_projections=[128, 128],
+                 highway_features=128,
+                 gru_features=128,
                  num_highways=4):
         super(CBHG, self).__init__()
         self.in_features = in_features
-        self.hid_features = hid_features
+        self.conv_bank_features = conv_bank_features
+        self.highway_features = highway_features
+        self.gru_features = gru_features
+        self.conv_projections = conv_projections
         self.relu = nn.ReLU()
         # list of conv1d bank with filter size k=1...K
         # TODO: try dilational layers instead
         self.conv1d_banks = nn.ModuleList([
             BatchNormConv1d(
                 in_features,
-                hid_features,
+                conv_bank_features,
                 kernel_size=k,
                 stride=1,
                 padding=k // 2,
@@ -131,12 +136,12 @@ class CBHG(nn.Module):
         # max pooling of conv bank
         # TODO: try average pooling OR larger kernel size
         self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)
-        out_features = [K * hid_features] + projections[:-1]
-        activations = [self.relu] * (len(projections) - 1)
+        out_features = [K * conv_bank_features] + conv_projections[:-1]
+        activations = [self.relu] * (len(conv_projections) - 1)
         activations += [None]
         # setup conv1d projection layers
         layer_set = []
-        for (in_size, out_size, ac) in zip(out_features, projections,
+        for (in_size, out_size, ac) in zip(out_features, conv_projections,
                                            activations):
             layer = BatchNormConv1d(
                 in_size,
@@ -148,13 +153,20 @@ class CBHG(nn.Module):
             layer_set.append(layer)
         self.conv1d_projections = nn.ModuleList(layer_set)
         # setup Highway layers
-        if self.hid_features != self.in_features:
-            self.pre_highway = nn.Linear(projections[-1], hid_features, bias=False)
-        self.highways = nn.ModuleList(
-            [Highway(hid_features, hid_features) for _ in range(num_highways)])
+        if self.highway_features != conv_projections[-1]:
+            self.pre_highway = nn.Linear(
+                conv_projections[-1], highway_features, bias=False)
+        self.highways = nn.ModuleList([
+            Highway(highway_features, highway_features)
+            for _ in range(num_highways)
+        ])
         # bi-directional GPU layer
         self.gru = nn.GRU(
-            128, 128, 1, batch_first=True, bidirectional=True)
+            gru_features,
+            gru_features,
+            1,
+            batch_first=True,
+            bidirectional=True)
 
     def forward(self, inputs):
         # (B, T_in, in_features)
@@ -172,7 +184,7 @@ class CBHG(nn.Module):
             out = out[:, :, :T]
             outs.append(out)
         x = torch.cat(outs, dim=1)
-        assert x.size(1) == self.hid_features * len(self.conv1d_banks)
+        assert x.size(1) == self.conv_bank_features * len(self.conv1d_banks)
         x = self.max_pool1d(x)[:, :, :T]
         for conv1d in self.conv1d_projections:
             x = conv1d(x)
@@ -180,7 +192,7 @@ class CBHG(nn.Module):
         x = x.transpose(1, 2)
         # Back to the original shape
         x += inputs
-        if x.size(-1) != self.hid_features:
+        if self.highway_features != self.conv_projections[-1]:
             x = self.pre_highway(x)
         # Residual connection
         # TODO: try residual scaling as in Deep Voice 3
@@ -195,10 +207,16 @@ class CBHG(nn.Module):
 
 
 class EncoderCBHG(nn.Module):
-
     def __init__(self):
         super(EncoderCBHG, self).__init__()
-        self.cbhg = CBHG(128, hid_features=128, K=16, projections=[128, 128])
+        self.cbhg = CBHG(
+            128,
+            K=16,
+            conv_bank_features=128,
+            conv_projections=[128, 128],
+            highway_features=128,
+            gru_features=128,
+            num_highways=4)
 
     def forward(self, x):
         return self.cbhg(x)
@@ -226,11 +244,16 @@ class Encoder(nn.Module):
 
 
 class PostCBHG(nn.Module):
-
     def __init__(self, mel_dim):
         super(PostCBHG, self).__init__()
-        self.cbhg = CBHG(mel_dim, hid_features=128, K=8, projections=[256, mel_dim])
-
+        self.cbhg = CBHG(
+            mel_dim,
+            K=8,
+            conv_bank_features=80,
+            conv_projections=[160, mel_dim],
+            highway_features=80,
+            gru_features=80,
+            num_highways=4)
     def forward(self, x):
         return self.cbhg(x)
 
diff --git a/models/tacotron.py b/models/tacotron.py
index e25cb467..cd023a92 100644
--- a/models/tacotron.py
+++ b/models/tacotron.py
@@ -23,7 +23,7 @@ class Tacotron(nn.Module):
         self.encoder = Encoder(embedding_dim)
         self.decoder = Decoder(256, mel_dim, r)
         self.postnet = PostCBHG(mel_dim)
-        self.last_linear = nn.Linear(256, linear_dim)
+        self.last_linear = nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim)
 
     def forward(self, characters, mel_specs=None, mask=None):
         B = characters.size(0)
diff --git a/utils/visual.py b/utils/visual.py
index 862e7e5c..114e0ef2 100644
--- a/utils/visual.py
+++ b/utils/visual.py
@@ -17,7 +17,7 @@ def plot_alignment(alignment, info=None):
     plt.tight_layout()
     fig.canvas.draw()
     data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
-    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
+    data = data.reshape((3, ) + fig.canvas.get_width_height()[::-1])
     plt.close()
     return data
 
@@ -30,6 +30,6 @@ def plot_spectrogram(linear_output, audio):
     plt.tight_layout()
     fig.canvas.draw()
     data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
-    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
+    data = data.reshape((3, ) + fig.canvas.get_width_height()[::-1])
     plt.close()
     return data