From f9fd21e745112991e48611ab649cae169122b653 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Thu, 13 Aug 2020 14:33:52 +0200
Subject: [PATCH] compute_f0 with pyworld

---
 mozilla_voice_tts/utils/audio.py | 12 ++++++++++++
 requirements.txt                 |  1 +
 2 files changed, 13 insertions(+)

diff --git a/mozilla_voice_tts/utils/audio.py b/mozilla_voice_tts/utils/audio.py
index 7fe7a91b..46c459f9 100644
--- a/mozilla_voice_tts/utils/audio.py
+++ b/mozilla_voice_tts/utils/audio.py
@@ -3,6 +3,7 @@ import soundfile as sf
 import numpy as np
 import scipy.io.wavfile
 import scipy.signal
+import pyworld as pw
 
 from mozilla_voice_tts.tts.utils.data import StandardScaler
 
@@ -285,6 +286,17 @@ class AudioProcessor(object):
             return 0, pad
         return pad // 2, pad // 2 + pad % 2
 
+    ### Compute F0 ###
+    def compute_f0(self, x):
+        f0, t = pw.dio(
+            x.astype(np.double),
+            fs=self.sample_rate,
+            f0_ceil=self.mel_fmax,
+            frame_period=1000 * self.hop_length / self.sample_rate,
+        )
+        f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
+        return f0
+
     ### Audio Processing ###
     def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
         window_length = int(self.sample_rate * min_silence_sec)
diff --git a/requirements.txt b/requirements.txt
index 4b88877b..fdec4c57 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,6 +15,7 @@ tqdm
 inflect
 bokeh==1.4.0
 pysbd
+pyworld
 soundfile
 nose==1.3.7
 cardboardlint==1.3.0