Fix import in CheckSpectrogram.ipynb

This commit is contained in:
Eren Gölge 2021-08-17 11:47:44 +00:00
parent 3fd78241f8
commit 625ab614f0
1 changed files with 82 additions and 80 deletions

View File

@ -3,28 +3,24 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"from TTS.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.visual import plot_spectrogram\n",
"from TTS.utils.io import load_config\n",
"from TTS.config import load_config\n",
"\n",
"import IPython.display as ipd\n",
"import glob"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n",
"data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n",
@ -39,28 +35,28 @@
"\n",
"print(\"File list, by index:\")\n",
"dict(enumerate(file_paths))"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "markdown",
"metadata": {
"Collapsed": "false"
},
"source": [
"### Setup Audio Processor\n",
"Play with the AP parameters until you find a good fit with the synthesis speech below.\n",
"\n",
"The default values are loaded from your config.json file, so you only need to\n",
"uncomment and modify values below that you'd like to tune."
]
],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"tune_params={\n",
"# 'audio_processor': 'audio',\n",
@ -95,54 +91,54 @@
"tuned_config.update(tune_params)\n",
"\n",
"AP = AudioProcessor(**tuned_config);"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "markdown",
"metadata": {
"Collapsed": "false"
},
"source": [
"### Check audio loading "
]
],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"wav = AP.load_wav(SAMPLE_FILE_PATH)\n",
"ipd.Audio(data=wav, rate=AP.sample_rate) "
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "markdown",
"metadata": {
"Collapsed": "false"
},
"source": [
"### Generate Mel-Spectrogram and Re-synthesis with GL"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"AP.power = 1.5"
]
},
{
"cell_type": "code",
"execution_count": null,
],
"metadata": {
"Collapsed": "false"
},
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"AP.power = 1.5"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"mel = AP.melspectrogram(wav)\n",
"print(\"Max:\", mel.max())\n",
@ -152,24 +148,24 @@
"\n",
"wav_gen = AP.inv_melspectrogram(mel)\n",
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "markdown",
"metadata": {
"Collapsed": "false"
},
"source": [
"### Generate Linear-Spectrogram and Re-synthesis with GL"
]
],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"spec = AP.spectrogram(wav)\n",
"print(\"Max:\", spec.max())\n",
@ -179,26 +175,26 @@
"\n",
"wav_gen = AP.inv_spectrogram(spec)\n",
"ipd.Audio(wav_gen, rate=AP.sample_rate)"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "markdown",
"metadata": {
"Collapsed": "false"
},
"source": [
"### Compare values for a certain parameter\n",
"\n",
"Optimize your parameters by comparing different values per parameter at a time."
]
],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"from librosa import display\n",
"from matplotlib import pylab as plt\n",
@ -238,36 +234,39 @@
" val = values[idx]\n",
" print(\" > {} = {}\".format(attribute, val))\n",
" IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])"
]
],
"outputs": [],
"metadata": {
"Collapsed": "false"
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"name": "python3",
"display_name": "Python 3.8.5 64-bit ('torch': conda)"
},
"language_info": {
"codemirror_mode": {
@ -280,8 +279,11 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"interpreter": {
"hash": "27648abe09795c3a768a281b31f7524fcf66a207e733f8ecda3a4e1fd1059fb0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}