From d0651ef6e6a9a214989c01c3b927f4ff73a06495 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Sat, 29 Jan 2022 17:18:20 -0600 Subject: [PATCH] documentation --- README.md | 55 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4a4657c..805fb8f 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,11 @@ This project implements an abstraction of objects that can have access to a vari The supported data store providers : | Provider | Underlying Drivers | Description | -| ---- | ---| ---- | +| :---- | :----: | ----: | | sqlite| Native SQLite|SQLite3| | postgresql| psycopg2 | PostgreSQL | redshift| psycopg2 | Amazon Redshift +| s3| boto3 | Amazon Simple Storage Service | netezza| nzpsql | IBM Neteeza | Files: CSV, TSV| pandas| pandas data-frame | Couchdb| cloudant | Couchbase/Couchdb @@ -24,33 +25,51 @@ Mostly data scientists that don't really care about the underlying database and 1. Familiarity with **pandas data-frames** 2. Connectivity **drivers** are included -3. Useful for ETL +3. Useful for data migrations or ETL +# Usage -### Installation +## Installation -Within the virtual environment perform the following command: +Within the virtual environment perform the following : pip install git+https://dev.the-phi.com/git/steve/data-transport.git -Binaries and eggs will be provided later on -### Usage +## In code (Embedded) + +**Reading/Writing Mongodb** + +For this example we assume here we are tunneling through port 27018 and there is not access control: + +``` +import transport +reader = factory.instance(provider='mongodb',context='read',host='localhost',port='27018',db='example',doc='logs') + +df = reader.read() #-- reads the entire collection +print (df.head()) +# +#-- Applying mongodb command +PIPELINE = [{"$group":{"_id":None,"count":{"$sum":1}}}] +_command_={"cursor":{},"allowDiskUse":True,"aggregate":"logs","pipeline":PIPLINE} +df = reader.read(mongo=_command) +print (df.head()) +reader.close() +``` +**Writing to Mongodb** +--- +``` +import transport +improt pandas as pd +writer = factory.instance(provider='mongodb',context='write',host='localhost',port='27018',db='example',doc='logs') + +df = pd.DataFrame({"names":["steve","nico"],"age":[40,30]}) +writer.write(df) +writer.close() +``` -In your code, perform the - import transport - from transport import factory - # - # importing a mongo reader - args = {"host":":","dbname":"","doc":"",["username":"","password":""]} - reader = factory.instance(provider='mongodb',doc=,db=) - # - # reading a document i.e just applying a find (no filters) - # - df = mreader.read() #-- pandas data frame - df.head() # # reading from postgresql