compute_f0 with pyworld

This commit is contained in:
erogol 2020-08-13 14:33:52 +02:00
parent 012bd45f13
commit f9fd21e745
2 changed files with 13 additions and 0 deletions

View File

@ -3,6 +3,7 @@ import soundfile as sf
import numpy as np
import scipy.io.wavfile
import scipy.signal
import pyworld as pw
from mozilla_voice_tts.tts.utils.data import StandardScaler
@ -285,6 +286,17 @@ class AudioProcessor(object):
return 0, pad
return pad // 2, pad // 2 + pad % 2
### Compute F0 ###
def compute_f0(self, x):
f0, t = pw.dio(
x.astype(np.double),
fs=self.sample_rate,
f0_ceil=self.mel_fmax,
frame_period=1000 * self.hop_length / self.sample_rate,
)
f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
return f0
### Audio Processing ###
def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
window_length = int(self.sample_rate * min_silence_sec)

View File

@ -15,6 +15,7 @@ tqdm
inflect
bokeh==1.4.0
pysbd
pyworld
soundfile
nose==1.3.7
cardboardlint==1.3.0