mirror of https://github.com/coqui-ai/TTS.git
compute_f0 with pyworld
This commit is contained in:
parent
012bd45f13
commit
f9fd21e745
|
@ -3,6 +3,7 @@ import soundfile as sf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.io.wavfile
|
import scipy.io.wavfile
|
||||||
import scipy.signal
|
import scipy.signal
|
||||||
|
import pyworld as pw
|
||||||
|
|
||||||
from mozilla_voice_tts.tts.utils.data import StandardScaler
|
from mozilla_voice_tts.tts.utils.data import StandardScaler
|
||||||
|
|
||||||
|
@ -285,6 +286,17 @@ class AudioProcessor(object):
|
||||||
return 0, pad
|
return 0, pad
|
||||||
return pad // 2, pad // 2 + pad % 2
|
return pad // 2, pad // 2 + pad % 2
|
||||||
|
|
||||||
|
### Compute F0 ###
|
||||||
|
def compute_f0(self, x):
|
||||||
|
f0, t = pw.dio(
|
||||||
|
x.astype(np.double),
|
||||||
|
fs=self.sample_rate,
|
||||||
|
f0_ceil=self.mel_fmax,
|
||||||
|
frame_period=1000 * self.hop_length / self.sample_rate,
|
||||||
|
)
|
||||||
|
f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
|
||||||
|
return f0
|
||||||
|
|
||||||
### Audio Processing ###
|
### Audio Processing ###
|
||||||
def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
|
def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8):
|
||||||
window_length = int(self.sample_rate * min_silence_sec)
|
window_length = int(self.sample_rate * min_silence_sec)
|
||||||
|
|
|
@ -15,6 +15,7 @@ tqdm
|
||||||
inflect
|
inflect
|
||||||
bokeh==1.4.0
|
bokeh==1.4.0
|
||||||
pysbd
|
pysbd
|
||||||
|
pyworld
|
||||||
soundfile
|
soundfile
|
||||||
nose==1.3.7
|
nose==1.3.7
|
||||||
cardboardlint==1.3.0
|
cardboardlint==1.3.0
|
||||||
|
|
Loading…
Reference in New Issue