compute_f0 with pyworld

2020-08-13 14:33:52 +02:00 · 2020-08-13 14:33:52 +02:00 · f9fd21e745
parent 012bd45f13
commit f9fd21e745
2 changed files with 13 additions and 0 deletions
--- a/mozilla_voice_tts/utils/audio.py
+++ b/mozilla_voice_tts/utils/audio.py
@ -3,6 +3,7 @@ import soundfile as sf
 import numpy as np
 import scipy.io.wavfile
 import scipy.signal
+import pyworld as pw

 from mozilla_voice_tts.tts.utils.data import StandardScaler

@ -285,6 +286,17 @@ class AudioProcessor(object):
            return 0, pad
        return pad // 2, pad // 2 + pad % 2

+    ### Compute F0 ###
+    def compute_f0(self, x):
+        f0, t = pw.dio(
+            x.astype(np.double),
+            fs=self.sample_rate,
+            f0_ceil=self.mel_fmax,
+            frame_period=1000 * self.hop_length / self.sample_rate,
+        )
+        f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
+        return f0
+
    ### Audio Processing ###
    def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
        window_length = int(self.sample_rate * min_silence_sec)
--- a/requirements.txt
+++ b/requirements.txt
@ -15,6 +15,7 @@ tqdm
 inflect
 bokeh==1.4.0
 pysbd
+pyworld
 soundfile
 nose==1.3.7
 cardboardlint==1.3.0