From f9fd21e745112991e48611ab649cae169122b653 Mon Sep 17 00:00:00 2001 From: erogol Date: Thu, 13 Aug 2020 14:33:52 +0200 Subject: [PATCH] compute_f0 with pyworld --- mozilla_voice_tts/utils/audio.py | 12 ++++++++++++ requirements.txt | 1 + 2 files changed, 13 insertions(+) diff --git a/mozilla_voice_tts/utils/audio.py b/mozilla_voice_tts/utils/audio.py index 7fe7a91b..46c459f9 100644 --- a/mozilla_voice_tts/utils/audio.py +++ b/mozilla_voice_tts/utils/audio.py @@ -3,6 +3,7 @@ import soundfile as sf import numpy as np import scipy.io.wavfile import scipy.signal +import pyworld as pw from mozilla_voice_tts.tts.utils.data import StandardScaler @@ -285,6 +286,17 @@ class AudioProcessor(object): return 0, pad return pad // 2, pad // 2 + pad % 2 + ### Compute F0 ### + def compute_f0(self, x): + f0, t = pw.dio( + x.astype(np.double), + fs=self.sample_rate, + f0_ceil=self.mel_fmax, + frame_period=1000 * self.hop_length / self.sample_rate, + ) + f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate) + return f0 + ### Audio Processing ### def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8): window_length = int(self.sample_rate * min_silence_sec) diff --git a/requirements.txt b/requirements.txt index 4b88877b..fdec4c57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,7 @@ tqdm inflect bokeh==1.4.0 pysbd +pyworld soundfile nose==1.3.7 cardboardlint==1.3.0