diff --git a/notebooks/dataset_analysis/PhonemeCoverage.ipynb b/notebooks/dataset_analysis/PhonemeCoverage.ipynb
new file mode 100644
index 00000000..af00deaf
--- /dev/null
+++ b/notebooks/dataset_analysis/PhonemeCoverage.ipynb
@@ -0,0 +1,251 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "source": [
+    "# Jupyter Notbook for phoneme coverage analysis\n",
+    "\n",
+    "This jupyter notebook checks dataset configured in config.json for phoneme coverage.\n",
+    "As mentioned here https://github.com/mozilla/TTS/wiki/Dataset#what-makes-a-good-dataset a good phoneme coverage is recommended.\n",
+    "\n",
+    "Most parameters will be taken from config.json file in mozilla tts repo so please ensure it's configured correctly for your dataset.\n",
+    "This notebook used lots of existring code from the TTS repo to ensure future compatibility.\n",
+    "\n",
+    "Many thanks to Neil Stoker supporting me on this topic :-).\n",
+    "\n",
+    "I provide this notebook without any warrenty but it's hopefully useful for your dataset analysis.\n",
+    "\n",
+    "Happy TTS'ing :-)\n",
+    "\n",
+    "Thorsten Müller\n",
+    "\n",
+    "* https://github.com/thorstenMueller/deep-learning-german-tts\n",
+    "* https://discourse.mozilla.org/t/contributing-my-german-voice-for-tts/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# set some vars\n",
+    "# TTS_PATH = \"/home/thorsten/___dev/tts/mozilla/TTS\"\n",
+    "CONFIG_FILE = \"/path/to/config/config.json\"\n",
+    "CHARS_TO_REMOVE = \".,:!?'\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# import stuff\n",
+    "from TTS.utils.io import load_config\n",
+    "from TTS.tts.datasets.preprocess import load_meta_data\n",
+    "from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme\n",
+    "from tqdm import tqdm\n",
+    "from matplotlib import pylab as plt\n",
+    "from multiprocessing import Pool, cpu_count\n",
+    "\n",
+    "# extra imports that might not be included in requirements.txt\n",
+    "import collections\n",
+    "import operator\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Load config.json properties\n",
+    "CONFIG = load_config(CONFIG_FILE)\n",
+    "\n",
+    "# Load some properties from config.json\n",
+    "CONFIG_METADATA = sorted(load_meta_data(CONFIG.datasets)[0])\n",
+    "CONFIG_METADATA = CONFIG_METADATA\n",
+    "CONFIG_DATASET = CONFIG.datasets[0]\n",
+    "CONFIG_PHONEME_LANGUAGE = CONFIG.phoneme_language\n",
+    "CONFIG_TEXT_CLEANER = CONFIG.text_cleaner\n",
+    "CONFIG_ENABLE_EOS_BOS_CHARS = CONFIG.enable_eos_bos_chars\n",
+    "\n",
+    "# Will be printed on generated output graph\n",
+    "CONFIG_RUN_NAME = CONFIG.run_name\n",
+    "CONFIG_RUN_DESC = CONFIG.run_description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# print some debug information on loaded config values\n",
+    "print(\" > Run name: \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\")\n",
+    "print(\" > Dataset files: \" + str(len(CONFIG_METADATA)))\n",
+    "print(\" > Phoneme language: \" + CONFIG_PHONEME_LANGUAGE)\n",
+    "print(\" > Used text cleaner: \" + CONFIG_TEXT_CLEANER)\n",
+    "print(\" > Enable eos bos chars: \" + str(CONFIG_ENABLE_EOS_BOS_CHARS))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_phoneme_from_sequence(text):\n",
+    "    temp_list = []\n",
+    "    if len(text[0]) > 0:\n",
+    "        temp_text = text[0].rstrip('\\n')\n",
+    "        for rm_bad_chars in CHARS_TO_REMOVE:\n",
+    "            temp_text = temp_text.replace(rm_bad_chars,\"\")\n",
+    "        seq = phoneme_to_sequence(temp_text, [CONFIG_TEXT_CLEANER], CONFIG_PHONEME_LANGUAGE, CONFIG_ENABLE_EOS_BOS_CHARS)\n",
+    "        text = sequence_to_phoneme(seq)\n",
+    "        text = text.replace(\" \",\"\")\n",
+    "        temp_list.append(text)\n",
+    "    return temp_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Get phonemes from metadata\n",
+    "phonemes = []\n",
+    "\n",
+    "with Pool(cpu_count()-1) as p:\n",
+    "    \n",
+    "    phonemes = list(tqdm(p.imap(get_phoneme_from_sequence, CONFIG_METADATA), total=len(CONFIG_METADATA)))\n",
+    "    phonemes = [i for sub in phonemes for i in sub]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "s = \"\"\n",
+    "phonemeString = s.join(phonemes)\n",
+    "\n",
+    "d = {}\n",
+    "collections._count_elements(d, phonemeString)\n",
+    "sorted_d = dict(sorted(d.items(), key=operator.itemgetter(1),reverse=True))\n",
+    "\n",
+    "# remove useless keys\n",
+    "sorted_d.pop(' ', None)\n",
+    "sorted_d.pop('ˈ', None)\n",
+    "\n",
+    "phonemesSum = len(phonemeString.replace(\" \",\"\"))\n",
+    "\n",
+    "print(\"Dataset contains \" + str(len(sorted_d)) + \" different ipa phonemes.\")\n",
+    "print(\"Dataset consists of \" + str(phonemesSum) + \" phonemes\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(\"5 rarest phonemes\")\n",
+    "\n",
+    "rareList = dict(sorted(sorted_d.items(), key=operator.itemgetter(1), reverse=False)[:5])\n",
+    "for key, value in rareList.items():\n",
+    "    print(key + \" --> \" + str(value) + \" occurrences\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# create plot from analysis result\n",
+    "\n",
+    "x = []\n",
+    "y = []\n",
+    "\n",
+    "for key, value in sorted_d.items():\n",
+    "    x.append(key)\n",
+    "    y.append(value)\n",
+    "\n",
+    "plt.figure(figsize=(50,50))\n",
+    "plt.title(\"Phoneme coverage for \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\", fontsize=50)\n",
+    "plt.xticks(fontsize=50)\n",
+    "plt.yticks(fontsize=50)\n",
+    "plt.barh(x,y, align='center', alpha=1.0)\n",
+    "plt.gca().invert_yaxis()\n",
+    "plt.ylabel('phoneme', fontsize=50)\n",
+    "plt.xlabel('occurrences', fontsize=50)\n",
+    "\n",
+    "for i, v in enumerate(y):\n",
+    "    plt.text(v + 2, i - .2, str(v), fontsize=20)\n",
+    "    plt.text(v + 2, i + .2, \"(\" + str(round(100/phonemesSum * v,2)) + \"%)\", fontsize=20)\n",
+    "    \n",
+    "    \n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9-final"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file