From 715e40407a4ca8d638d0a92a6299cf8a34354484 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 2 Apr 2024 17:00:11 -0500 Subject: [PATCH] adding notebooks (test/examples --- notebooks/bigquery.ipynb | 169 +++++++++++++++++++++++++++++++++++++ notebooks/mongodb.ipynb | 155 ++++++++++++++++++++++++++++++++++ notebooks/mysql.ipynb | 150 ++++++++++++++++++++++++++++++++ notebooks/postgresql.ipynb | 157 ++++++++++++++++++++++++++++++++++ notebooks/sqlite.ipynb | 139 ++++++++++++++++++++++++++++++ 5 files changed, 770 insertions(+) create mode 100644 notebooks/bigquery.ipynb create mode 100644 notebooks/mongodb.ipynb create mode 100644 notebooks/mysql.ipynb create mode 100644 notebooks/postgresql.ipynb create mode 100644 notebooks/sqlite.ipynb diff --git a/notebooks/bigquery.ipynb b/notebooks/bigquery.ipynb new file mode 100644 index 0000000..750f167 --- /dev/null +++ b/notebooks/bigquery.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Writing to Google Bigquery\n", + "\n", + "1. Insure you have a Google Bigquery service account key on disk\n", + "2. The service key location is set as an environment variable **BQ_KEY**\n", + "3. The dataset will be automatically created within the project associated with the service key\n", + "\n", + "The cell below creates a dataframe that will be stored within Google Bigquery" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:00<00:00, 5440.08it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['data transport version ', '2.0.0']\n" + ] + } + ], + "source": [ + "#\n", + "# Writing to Google Bigquery database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "import os\n", + "\n", + "PRIVATE_KEY = os.environ['BQ_KEY'] #-- location of the service key\n", + "DATASET = 'demo'\n", + "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n", + "bqw = transport.factory.instance(provider=providers.BIGQUERY,dataset=DATASET,table='friends',context='write',private_key=PRIVATE_KEY)\n", + "bqw.write(_data,if_exists='replace') #-- default is append\n", + "print (['data transport version ', transport.__version__])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reading from Google Bigquery\n", + "\n", + "The cell below reads the data that has been written by the cell above and computes the average age within a Google Bigquery (simple query). \n", + "\n", + "- Basic read of the designated table (friends) created above\n", + "- Execute an aggregate SQL against the table\n", + "\n", + "**NOTE**\n", + "\n", + "It is possible to use **transport.factory.instance** or **transport.instance** they are the same. It allows the maintainers to know that we used a factory design pattern." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading: 100%|\u001b[32m██████████\u001b[0m|\n", + "Downloading: 100%|\u001b[32m██████████\u001b[0m|\n", + " name age\n", + "0 James Bond 55\n", + "1 Steve Rogers 150\n", + "2 Steve Nyemba 44\n", + "--------- STATISTICS ------------\n", + " _counts f0_\n", + "0 3 83.0\n" + ] + } + ], + "source": [ + "\n", + "import transport\n", + "from transport import providers\n", + "import os\n", + "PRIVATE_KEY=os.environ['BQ_KEY']\n", + "pgr = transport.instance(provider=providers.BIGQUERY,dataset='demo',table='friends',private_key=PRIVATE_KEY)\n", + "_df = pgr.read()\n", + "_query = 'SELECT COUNT(*) _counts, AVG(age) from demo.friends'\n", + "_sdf = pgr.read(sql=_query)\n", + "print (_df)\n", + "print ('--------- STATISTICS ------------')\n", + "print (_sdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell bellow show the content of an auth_file, in this case if the dataset/table in question is not to be shared then you can use auth_file with information associated with the parameters.\n", + "\n", + "**NOTE**:\n", + "\n", + "The auth_file is intended to be **JSON** formatted" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dataset': 'demo', 'table': 'friends'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "{\n", + " \n", + " \"dataset\":\"demo\",\"table\":\"friends\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/mongodb.ipynb b/notebooks/mongodb.ipynb new file mode 100644 index 0000000..0554669 --- /dev/null +++ b/notebooks/mongodb.ipynb @@ -0,0 +1,155 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Writing to mongodb\n", + "\n", + "Insure mongodb is actually installed on the system, The cell below creates a dataframe that will be stored within mongodb" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.0\n" + ] + } + ], + "source": [ + "#\n", + "# Writing to mongodb database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n", + "mgw = transport.factory.instance(provider=providers.MONGODB,db='demo',collection='friends',context='write')\n", + "mgw.write(_data)\n", + "print (transport.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reading from mongodb\n", + "\n", + "The cell below reads the data that has been written by the cell above and computes the average age within a mongodb pipeline. The code in the background executes an aggregation using **db.runCommand**\n", + "\n", + "- Basic read of the designated collection **find=\\**\n", + "- Executing an aggregate pipeline against a collection **aggreate=\\**\n", + "\n", + "**NOTE**\n", + "\n", + "It is possible to use **transport.factory.instance** or **transport.instance** they are the same. It allows the maintainers to know that we used a factory design pattern." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " name age\n", + "0 James Bond 55\n", + "1 Steve Rogers 150\n", + "--------- STATISTICS ------------\n", + " _id _counts _mean\n", + "0 0 2 102.5\n" + ] + } + ], + "source": [ + "\n", + "import transport\n", + "from transport import providers\n", + "mgr = transport.instance(provider=providers.MONGODB,db='foo',collection='friends')\n", + "_df = mgr.read()\n", + "PIPELINE = [{\"$group\":{\"_id\":0,\"_counts\":{\"$sum\":1}, \"_mean\":{\"$avg\":\"$age\"}}}]\n", + "_sdf = mgr.read(aggregate='friends',pipeline=PIPELINE)\n", + "print (_df)\n", + "print ('--------- STATISTICS ------------')\n", + "print (_sdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell bellow show the content of an auth_file, in this case if the dataset/table in question is not to be shared then you can use auth_file with information associated with the parameters.\n", + "\n", + "**NOTE**:\n", + "\n", + "The auth_file is intended to be **JSON** formatted" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'host': 'klingon.io',\n", + " 'port': 27017,\n", + " 'username': 'me',\n", + " 'password': 'foobar',\n", + " 'db': 'foo',\n", + " 'collection': 'friends',\n", + " 'authSource': '',\n", + " 'mechamism': ''}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{\n", + " \"host\":\"klingon.io\",\"port\":27017,\"username\":\"me\",\"password\":\"foobar\",\"db\":\"foo\",\"collection\":\"friends\",\n", + " \"authSource\":\"\",\"mechamism\":\"\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/mysql.ipynb b/notebooks/mysql.ipynb new file mode 100644 index 0000000..a54d46d --- /dev/null +++ b/notebooks/mysql.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Writing to MySQL\n", + "\n", + "1. Insure MySQL is actually installed on the system, \n", + "2. There is a database called demo created on the said system\n", + "\n", + "The cell below creates a dataframe that will be stored within postgreSQL" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.0\n" + ] + } + ], + "source": [ + "#\n", + "# Writing to PostgreSQL database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n", + "myw = transport.factory.instance(provider=providers.MYSQL,database='demo',table='friends',context='write',auth_file=\"/home/steve/auth-mysql.json\")\n", + "myw.write(_data,if_exists='replace') #-- default is append\n", + "print (transport.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reading from MySQL\n", + "\n", + "The cell below reads the data that has been written by the cell above and computes the average age within a MySQL (simple query). \n", + "\n", + "- Basic read of the designated table (friends) created above\n", + "- Execute an aggregate SQL against the table\n", + "\n", + "**NOTE**\n", + "\n", + "It is possible to use **transport.factory.instance** or **transport.instance** they are the same. It allows the maintainers to know that we used a factory design pattern." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " name age\n", + "0 James Bond 55\n", + "1 Steve Rogers 150\n", + "2 Steve Nyemba 44\n", + "--------- STATISTICS ------------\n", + " _counts avg\n", + "0 3 83.0\n" + ] + } + ], + "source": [ + "\n", + "import transport\n", + "from transport import providers\n", + "myr = transport.instance(provider=providers.POSTGRESQL,database='demo',table='friends',auth_file='/home/steve/auth-mysql.json')\n", + "_df = myr.read()\n", + "_query = 'SELECT COUNT(*) _counts, AVG(age) from friends'\n", + "_sdf = myr.read(sql=_query)\n", + "print (_df)\n", + "print ('--------- STATISTICS ------------')\n", + "print (_sdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell bellow show the content of an auth_file, in this case if the dataset/table in question is not to be shared then you can use auth_file with information associated with the parameters.\n", + "\n", + "**NOTE**:\n", + "\n", + "The auth_file is intended to be **JSON** formatted" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'host': 'klingon.io',\n", + " 'port': 3306,\n", + " 'username': 'me',\n", + " 'password': 'foobar',\n", + " 'database': 'demo',\n", + " 'table': 'friends'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{\n", + " \"host\":\"klingon.io\",\"port\":3306,\"username\":\"me\",\"password\":\"foobar\",\n", + " \"database\":\"demo\",\"table\":\"friends\"\n", + "}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/postgresql.ipynb b/notebooks/postgresql.ipynb new file mode 100644 index 0000000..5046f4d --- /dev/null +++ b/notebooks/postgresql.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Writing to PostgreSQL\n", + "\n", + "1. Insure PostgreSQL is actually installed on the system, \n", + "2. There is a database called demo created on the said system\n", + "\n", + "The cell below creates a dataframe that will be stored within postgreSQL" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.0\n" + ] + } + ], + "source": [ + "#\n", + "# Writing to PostgreSQL database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n", + "pgw = transport.factory.instance(provider=providers.POSTGRESQL,database='demo',table='friends',context='write')\n", + "pgw.write(_data,if_exists='replace') #-- default is append\n", + "print (transport.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reading from PostgreSQL\n", + "\n", + "The cell below reads the data that has been written by the cell above and computes the average age within a PostreSQL (simple query). \n", + "\n", + "- Basic read of the designated table (friends) created above\n", + "- Execute an aggregate SQL against the table\n", + "\n", + "**NOTE**\n", + "\n", + "It is possible to use **transport.factory.instance** or **transport.instance** they are the same. It allows the maintainers to know that we used a factory design pattern." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " name age\n", + "0 James Bond 55\n", + "1 Steve Rogers 150\n", + "2 Steve Nyemba 44\n", + "--------- STATISTICS ------------\n", + " _counts avg\n", + "0 3 83.0\n" + ] + } + ], + "source": [ + "\n", + "import transport\n", + "from transport import providers\n", + "pgr = transport.instance(provider=providers.POSTGRESQL,database='demo',table='friends')\n", + "_df = pgr.read()\n", + "_query = 'SELECT COUNT(*) _counts, AVG(age) from friends'\n", + "_sdf = pgr.read(sql=_query)\n", + "print (_df)\n", + "print ('--------- STATISTICS ------------')\n", + "print (_sdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell bellow show the content of an auth_file, in this case if the dataset/table in question is not to be shared then you can use auth_file with information associated with the parameters.\n", + "\n", + "**NOTE**:\n", + "\n", + "The auth_file is intended to be **JSON** formatted" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'host': 'klingon.io',\n", + " 'port': 5432,\n", + " 'username': 'me',\n", + " 'password': 'foobar',\n", + " 'database': 'demo',\n", + " 'table': 'friends'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{\n", + " \"host\":\"klingon.io\",\"port\":5432,\"username\":\"me\",\"password\":\"foobar\",\n", + " \"database\":\"demo\",\"table\":\"friends\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/sqlite.ipynb b/notebooks/sqlite.ipynb new file mode 100644 index 0000000..5c249de --- /dev/null +++ b/notebooks/sqlite.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Writing to SQLite3+\n", + "\n", + "The requirements to get started are minimal (actually none). The cell below creates a dataframe that will be stored within SQLite 3+" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.0\n" + ] + } + ], + "source": [ + "#\n", + "# Writing to PostgreSQL database\n", + "#\n", + "import transport\n", + "from transport import providers\n", + "import pandas as pd\n", + "_data = pd.DataFrame({\"name\":['James Bond','Steve Rogers','Steve Nyemba'],'age':[55,150,44]})\n", + "sqw = transport.factory.instance(provider=providers.SQLITE,database='/home/steve/demo.db3',table='friends',context='write')\n", + "sqw.write(_data,if_exists='replace') #-- default is append\n", + "print (transport.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reading from SQLite3+\n", + "\n", + "The cell below reads the data that has been written by the cell above and computes the average age within a PostreSQL (simple query). \n", + "\n", + "- Basic read of the designated table (friends) created above\n", + "- Execute an aggregate SQL against the table\n", + "\n", + "**NOTE**\n", + "\n", + "It is possible to use **transport.factory.instance** or **transport.instance** they are the same. It allows the maintainers to know that we used a factory design pattern." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " name age\n", + "0 James Bond 55\n", + "1 Steve Rogers 150\n", + "2 Steve Nyemba 44\n", + "--------- STATISTICS ------------\n", + " _counts AVG(age)\n", + "0 3 83.0\n" + ] + } + ], + "source": [ + "\n", + "import transport\n", + "from transport import providers\n", + "pgr = transport.instance(provider=providers.SQLITE,database='/home/steve/demo.db3',table='friends')\n", + "_df = pgr.read()\n", + "_query = 'SELECT COUNT(*) _counts, AVG(age) from friends'\n", + "_sdf = pgr.read(sql=_query)\n", + "print (_df)\n", + "print ('--------- STATISTICS ------------')\n", + "print (_sdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell bellow show the content of an auth_file, in this case if the dataset/table in question is not to be shared then you can use auth_file with information associated with the parameters.\n", + "\n", + "**NOTE**:\n", + "\n", + "The auth_file is intended to be **JSON** formatted. This is an overkill for SQLite ;-)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "{\n", + " \"provider\":\"sqlite\",\n", + " \"database\":\"/home/steve/demo.db3\",\"table\":\"friends\"\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}