diff --git a/notebooks/data-analysis.ipynb b/notebooks/data-analysis.ipynb
new file mode 100644
index 0000000..d7c44c2
--- /dev/null
+++ b/notebooks/data-analysis.ipynb
@@ -0,0 +1,214 @@
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "df = pd.read_csv('../src/out/risk_xoi.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
+ "\n",
+ "
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " group_count | \n",
+ " row_count | \n",
+ " marketer | \n",
+ " prosecutor | \n",
+ " field_count | \n",
+ "
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 432512 | \n",
+ " 79080802 | \n",
+ " 0.005469 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ "
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 17824004 | \n",
+ " 79080802 | \n",
+ " 0.225390 | \n",
+ " 1 | \n",
+ " 28 | \n",
+ "
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 43538084 | \n",
+ " 79080802 | \n",
+ " 0.550552 | \n",
+ " 1 | \n",
+ " 38 | \n",
+ "
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 64042788 | \n",
+ " 79080802 | \n",
+ " 0.809840 | \n",
+ " 1 | \n",
+ " 46 | \n",
+ "
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 6866070 | \n",
+ " 79080802 | \n",
+ " 0.086823 | \n",
+ " 1 | \n",
+ " 17 | \n",
+ "
+ " \n",
+ "
+ "
+ ],
+ "text/plain": [
+ " Unnamed: 0 group_count row_count marketer prosecutor field_count\n",
+ "0 0 432512 79080802 0.005469 1 10\n",
+ "1 0 17824004 79080802 0.225390 1 28\n",
+ "2 0 43538084 79080802 0.550552 1 38\n",
+ "3 0 64042788 79080802 0.809840 1 46\n",
+ "4 0 6866070 79080802 0.086823 1 17"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "compiled = df.groupby('field_count')[['field_count','marketer','prosecutor']].mean()\n",
+ "figure = compiled[['marketer','prosecutor']].plot.line().get_figure()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "variables": {
+ " figure ": ""
+ }
+ },
+ "source": [
+ "# Dataset Used\n",
+ "---\n",
+ "\n",
+ "We performed joins against all the tables from all-of-us and truncated records while randomly selecting on record per every join. As a result we have roughly a dataset of about **80 million** records and about **5000** distinct patients.\n",
+ "\n",
+ "## Expriment Design\n",
+ "---\n",
+ "\n",
+ "We compute both marketer and prosecutor risk computation while randomly selecting the number of attributes out of **111**. This selection is between ***2*** and **111** attributes. The number of maximum number of attributes that can be computed at any time is **64** : limitations of Google's Big-query. We performed **500** runs.\n",
+ "\n",
+ "## Results\n",
+ "---\n",
+ "\n",
+ "The results show the prosecutor risk is unchanging perhaps as an artifact of the number of runs **500** or the dataset curation: The joins we performed. The prosecutor risk shows there is at least one record that vulnerable.\n",
+ "\n",
+ "The marketer risk seems to increase as the number of randomly selected attributes increases as a general trend. \n",
+ "\n",
+ "{{ figure }} \n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.10"
+ },
+ "varInspector": {
+ "cols": {
+ "lenName": 16,
+ "lenType": 16,
+ "lenVar": 40
+ },
+ "kernels_config": {
+ "python": {
+ "delete_cmd_postfix": "",
+ "delete_cmd_prefix": "del ",
+ "library": "var_list.py",
+ "varRefreshCmd": "print(var_dic_list())"
+ },
+ "r": {
+ "delete_cmd_postfix": ") ",
+ "delete_cmd_prefix": "rm(",
+ "library": "var_list.r",
+ "varRefreshCmd": "cat(var_dic_list()) "
+ }
+ },
+ "types_to_exclude": [
+ "module",
+ "function",
+ "builtin_function_or_method",
+ "instance",
+ "_Feature"
+ ],
+ "window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
diff --git a/src/out/risk_xoi.csv b/src/out/risk_xoi.csv
index 477e8a5..8e84d80 100644
--- a/src/out/risk_xoi.csv
+++ b/src/out/risk_xoi.csv
@@ -1,501 +1,501 @@