From bc6764a5c7c58463d2a879f54ca0cc390ca69070 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Fri, 21 Feb 2020 14:57:10 +0100
Subject: [PATCH] bug fix at server

---
 server/synthesizer.py |  4 ++--
 utils/synthesis.py    | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/server/synthesizer.py b/server/synthesizer.py
index 347bef21..afc083aa 100644
--- a/server/synthesizer.py
+++ b/server/synthesizer.py
@@ -94,8 +94,8 @@ class Synthesizer(object):
             sample_rate=self.ap.sample_rate,
         ).cuda()
 
-        check = torch.load(model_file)
-        self.wavernn.load_state_dict(check['model'], map_location="cpu")
+        check = torch.load(model_file, map_location="cpu")
+        self.wavernn.load_state_dict(check['model'])
         if use_cuda:
             self.wavernn.cuda()
         self.wavernn.eval()
diff --git a/utils/synthesis.py b/utils/synthesis.py
index 79a17c78..b4512dc6 100644
--- a/utils/synthesis.py
+++ b/utils/synthesis.py
@@ -69,6 +69,24 @@ def id_to_torch(speaker_id):
     return speaker_id
 
 
+# TODO: perform GL with pytorch for batching
+def apply_griffin_lim(inputs, input_lens, CONFIG, ap):
+    '''Apply griffin-lim to each sample iterating throught the first dimension.
+    Args:
+        inputs (Tensor or np.Array): Features to be converted by GL. First dimension is the batch size.
+        input_lens (Tensor or np.Array): 1D array of sample lengths.
+        CONFIG (Dict): TTS config.
+        ap (AudioProcessor): TTS audio processor.
+    '''
+    wavs = []
+    for idx, spec in enumerate(inputs):
+        wav_len = (input_lens[idx] * ap.hop_length) - ap.hop_length  # inverse librosa padding
+        wav = inv_spectrogram(spec, ap, CONFIG)
+        # assert len(wav) == wav_len, f" [!] wav lenght: {len(wav)} vs expected: {wav_len}"
+        wavs.append(wav[:wav_len])
+    return wavs
+
+
 def synthesis(model,
               text,
               CONFIG,