mirror of https://github.com/coqui-ai/TTS.git
Fix import in CheckSpectrogram.ipynb
This commit is contained in:
parent
3fd78241f8
commit
625ab614f0
|
@ -3,28 +3,24 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"from TTS.utils.audio import AudioProcessor\n",
|
||||
"from TTS.tts.utils.visual import plot_spectrogram\n",
|
||||
"from TTS.utils.io import load_config\n",
|
||||
"from TTS.config import load_config\n",
|
||||
"\n",
|
||||
"import IPython.display as ipd\n",
|
||||
"import glob"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n",
|
||||
"data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n",
|
||||
|
@ -39,28 +35,28 @@
|
|||
"\n",
|
||||
"print(\"File list, by index:\")\n",
|
||||
"dict(enumerate(file_paths))"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"source": [
|
||||
"### Setup Audio Processor\n",
|
||||
"Play with the AP parameters until you find a good fit with the synthesis speech below.\n",
|
||||
"\n",
|
||||
"The default values are loaded from your config.json file, so you only need to\n",
|
||||
"uncomment and modify values below that you'd like to tune."
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tune_params={\n",
|
||||
"# 'audio_processor': 'audio',\n",
|
||||
|
@ -95,54 +91,54 @@
|
|||
"tuned_config.update(tune_params)\n",
|
||||
"\n",
|
||||
"AP = AudioProcessor(**tuned_config);"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"source": [
|
||||
"### Check audio loading "
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wav = AP.load_wav(SAMPLE_FILE_PATH)\n",
|
||||
"ipd.Audio(data=wav, rate=AP.sample_rate) "
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"source": [
|
||||
"### Generate Mel-Spectrogram and Re-synthesis with GL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"AP.power = 1.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"AP.power = 1.5"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"mel = AP.melspectrogram(wav)\n",
|
||||
"print(\"Max:\", mel.max())\n",
|
||||
|
@ -152,24 +148,24 @@
|
|||
"\n",
|
||||
"wav_gen = AP.inv_melspectrogram(mel)\n",
|
||||
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"source": [
|
||||
"### Generate Linear-Spectrogram and Re-synthesis with GL"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spec = AP.spectrogram(wav)\n",
|
||||
"print(\"Max:\", spec.max())\n",
|
||||
|
@ -179,26 +175,26 @@
|
|||
"\n",
|
||||
"wav_gen = AP.inv_spectrogram(spec)\n",
|
||||
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"source": [
|
||||
"### Compare values for a certain parameter\n",
|
||||
"\n",
|
||||
"Optimize your parameters by comparing different values per parameter at a time."
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from librosa import display\n",
|
||||
"from matplotlib import pylab as plt\n",
|
||||
|
@ -238,36 +234,39 @@
|
|||
" val = values[idx]\n",
|
||||
" print(\" > {} = {}\".format(attribute, val))\n",
|
||||
" IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"Collapsed": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.8.5 64-bit ('torch': conda)"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
@ -280,8 +279,11 @@
|
|||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
},
|
||||
"interpreter": {
|
||||
"hash": "27648abe09795c3a768a281b31f7524fcf66a207e733f8ecda3a4e1fd1059fb0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue