bug fixes and optimizations

This commit is contained in:
Steve Nyemba 2022-03-12 12:25:29 -06:00
parent 105ff00224
commit 38e1bce6c2
3 changed files with 46 additions and 8 deletions

View File

@ -35,9 +35,44 @@ Within the virtual environment perform the following :
pip install git+https://dev.the-phi.com/git/steve/data-transport.git
Once installed **data-transport** can be used as a library in code or a command line interface (CLI)
## Data Transport as a Library (in code)
---
## In code (Embedded)
The data-transport can be used within code as a library
* Read/Write against [mongodb](https://github.com/lnyemba/data-transport/wiki/mongodb)
* Read/Write against tranditional [RDBMS](https://github.com/lnyemba/data-transport/wiki/rdbms)
* Read/Write against [bigquery](https://github.com/lnyemba/data-transport/wiki/bigquery)
The read/write functions make data-transport a great candidate for **data-science**; **data-engineering** or all things pertaining to data. It enables operations across multiple data-stores(relational or not)
## Command Line Interface (CLI)
---
The CLI program is called **transport** and it requires a configuratio file
```
[
{
"id":"logs",
"source":{
"provider":"postgresql","context":"read","database":"mydb",
"cmd":{"sql":"SELECT * FROM logs limit 10"}
},
"target":{
"provider":"bigquery","private_key":"/bgqdrive/account/bq-service-account-key.json",
"dataset":"mydataset"
}
},
]
```
Assuming the above content is stored in a file called **etl-config.json**, we would perform the following in a terminal window:
```
[steve@data-transport]$ transport --config ./etl-config.json [--index <value>]
```
**Reading/Writing Mongodb**

View File

@ -13,7 +13,7 @@ args = {
"license":"MIT",
"packages":["transport"]}
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
args["install_requires"] = ['pymongo','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
args["install_requires"] = ['pymongo','sqlalchemy','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
args['scripts'] = ['bin/transport']
if sys.version_info[0] == 2 :

View File

@ -125,9 +125,9 @@ class SQLRW :
_out = None
try:
if "select" in _sql.lower() :
cursor.close()
_conn = self._engine.connect() if self._engine else self.conn
return pd.read_sql(_sql,_conn)
# _conn = self._engine if self._engine else self.conn
return pd.read_sql(_sql,self.conn)
else:
# Executing a command i.e no expected return values ...
cursor.execute(_sql)
@ -151,7 +151,8 @@ class SQLReader(SQLRW,Reader) :
if 'sql' in _args :
_sql = (_args['sql'])
else:
_sql = "SELECT :fields FROM "+self.table
table = self.table if self.table is not None else _args['table']
_sql = "SELECT :fields FROM "+self._tablename(table)
if 'filter' in _args :
_sql = _sql +" WHERE "+_args['filter']
_fields = '*' if not self.fields else ",".join(self.fields)
@ -220,7 +221,7 @@ class SQLWriter(SQLRW,Writer):
# cursor.close()
self.conn.commit()
pass
def write(self,info):
def write(self,info,**_args):
"""
:param info writes a list of data to a given set of fields
"""
@ -324,7 +325,8 @@ class BQReader(BigQuery,Reader) :
def __init__(self,**_args):
super().__init__(**_args)
def apply(self,sql):
self.read(sql=sql)
pass
def read(self,**_args):
SQL = None
@ -359,6 +361,7 @@ class BQWriter(BigQuery,Writer):
try:
if self.parallel or 'lock' in _args :
BQWriter.lock.acquire()
_args['table'] = self.table if 'table' not in _args else _args['table']
self._write(_info,**_args)
finally:
if self.parallel: