mirror of https://github.com/coqui-ai/TTS.git
compute_f0 with pyworld
This commit is contained in:
parent
012bd45f13
commit
f9fd21e745
|
@ -3,6 +3,7 @@ import soundfile as sf
|
|||
import numpy as np
|
||||
import scipy.io.wavfile
|
||||
import scipy.signal
|
||||
import pyworld as pw
|
||||
|
||||
from mozilla_voice_tts.tts.utils.data import StandardScaler
|
||||
|
||||
|
@ -285,6 +286,17 @@ class AudioProcessor(object):
|
|||
return 0, pad
|
||||
return pad // 2, pad // 2 + pad % 2
|
||||
|
||||
### Compute F0 ###
|
||||
def compute_f0(self, x):
|
||||
f0, t = pw.dio(
|
||||
x.astype(np.double),
|
||||
fs=self.sample_rate,
|
||||
f0_ceil=self.mel_fmax,
|
||||
frame_period=1000 * self.hop_length / self.sample_rate,
|
||||
)
|
||||
f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
|
||||
return f0
|
||||
|
||||
### Audio Processing ###
|
||||
def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
|
||||
window_length = int(self.sample_rate * min_silence_sec)
|
||||
|
|
|
@ -15,6 +15,7 @@ tqdm
|
|||
inflect
|
||||
bokeh==1.4.0
|
||||
pysbd
|
||||
pyworld
|
||||
soundfile
|
||||
nose==1.3.7
|
||||
cardboardlint==1.3.0
|
||||
|
|
Loading…
Reference in New Issue