bug fixes and optimizations
This commit is contained in:
parent
105ff00224
commit
38e1bce6c2
37
README.md
37
README.md
|
@ -35,9 +35,44 @@ Within the virtual environment perform the following :
|
||||||
|
|
||||||
pip install git+https://dev.the-phi.com/git/steve/data-transport.git
|
pip install git+https://dev.the-phi.com/git/steve/data-transport.git
|
||||||
|
|
||||||
|
Once installed **data-transport** can be used as a library in code or a command line interface (CLI)
|
||||||
|
|
||||||
|
## Data Transport as a Library (in code)
|
||||||
|
---
|
||||||
|
|
||||||
## In code (Embedded)
|
The data-transport can be used within code as a library
|
||||||
|
* Read/Write against [mongodb](https://github.com/lnyemba/data-transport/wiki/mongodb)
|
||||||
|
* Read/Write against tranditional [RDBMS](https://github.com/lnyemba/data-transport/wiki/rdbms)
|
||||||
|
* Read/Write against [bigquery](https://github.com/lnyemba/data-transport/wiki/bigquery)
|
||||||
|
|
||||||
|
The read/write functions make data-transport a great candidate for **data-science**; **data-engineering** or all things pertaining to data. It enables operations across multiple data-stores(relational or not)
|
||||||
|
|
||||||
|
## Command Line Interface (CLI)
|
||||||
|
---
|
||||||
|
The CLI program is called **transport** and it requires a configuratio file
|
||||||
|
|
||||||
|
```
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id":"logs",
|
||||||
|
"source":{
|
||||||
|
"provider":"postgresql","context":"read","database":"mydb",
|
||||||
|
"cmd":{"sql":"SELECT * FROM logs limit 10"}
|
||||||
|
},
|
||||||
|
"target":{
|
||||||
|
"provider":"bigquery","private_key":"/bgqdrive/account/bq-service-account-key.json",
|
||||||
|
"dataset":"mydataset"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Assuming the above content is stored in a file called **etl-config.json**, we would perform the following in a terminal window:
|
||||||
|
|
||||||
|
```
|
||||||
|
[steve@data-transport]$ transport --config ./etl-config.json [--index <value>]
|
||||||
|
```
|
||||||
|
|
||||||
**Reading/Writing Mongodb**
|
**Reading/Writing Mongodb**
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ args = {
|
||||||
"license":"MIT",
|
"license":"MIT",
|
||||||
"packages":["transport"]}
|
"packages":["transport"]}
|
||||||
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
|
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
|
||||||
args["install_requires"] = ['pymongo','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
|
args["install_requires"] = ['pymongo','sqlalchemy','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
|
||||||
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
|
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
|
||||||
args['scripts'] = ['bin/transport']
|
args['scripts'] = ['bin/transport']
|
||||||
if sys.version_info[0] == 2 :
|
if sys.version_info[0] == 2 :
|
||||||
|
|
|
@ -125,9 +125,9 @@ class SQLRW :
|
||||||
_out = None
|
_out = None
|
||||||
try:
|
try:
|
||||||
if "select" in _sql.lower() :
|
if "select" in _sql.lower() :
|
||||||
cursor.close()
|
|
||||||
_conn = self._engine.connect() if self._engine else self.conn
|
# _conn = self._engine if self._engine else self.conn
|
||||||
return pd.read_sql(_sql,_conn)
|
return pd.read_sql(_sql,self.conn)
|
||||||
else:
|
else:
|
||||||
# Executing a command i.e no expected return values ...
|
# Executing a command i.e no expected return values ...
|
||||||
cursor.execute(_sql)
|
cursor.execute(_sql)
|
||||||
|
@ -151,7 +151,8 @@ class SQLReader(SQLRW,Reader) :
|
||||||
if 'sql' in _args :
|
if 'sql' in _args :
|
||||||
_sql = (_args['sql'])
|
_sql = (_args['sql'])
|
||||||
else:
|
else:
|
||||||
_sql = "SELECT :fields FROM "+self.table
|
table = self.table if self.table is not None else _args['table']
|
||||||
|
_sql = "SELECT :fields FROM "+self._tablename(table)
|
||||||
if 'filter' in _args :
|
if 'filter' in _args :
|
||||||
_sql = _sql +" WHERE "+_args['filter']
|
_sql = _sql +" WHERE "+_args['filter']
|
||||||
_fields = '*' if not self.fields else ",".join(self.fields)
|
_fields = '*' if not self.fields else ",".join(self.fields)
|
||||||
|
@ -220,7 +221,7 @@ class SQLWriter(SQLRW,Writer):
|
||||||
# cursor.close()
|
# cursor.close()
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
pass
|
pass
|
||||||
def write(self,info):
|
def write(self,info,**_args):
|
||||||
"""
|
"""
|
||||||
:param info writes a list of data to a given set of fields
|
:param info writes a list of data to a given set of fields
|
||||||
"""
|
"""
|
||||||
|
@ -324,7 +325,8 @@ class BQReader(BigQuery,Reader) :
|
||||||
def __init__(self,**_args):
|
def __init__(self,**_args):
|
||||||
|
|
||||||
super().__init__(**_args)
|
super().__init__(**_args)
|
||||||
|
def apply(self,sql):
|
||||||
|
self.read(sql=sql)
|
||||||
pass
|
pass
|
||||||
def read(self,**_args):
|
def read(self,**_args):
|
||||||
SQL = None
|
SQL = None
|
||||||
|
@ -359,6 +361,7 @@ class BQWriter(BigQuery,Writer):
|
||||||
try:
|
try:
|
||||||
if self.parallel or 'lock' in _args :
|
if self.parallel or 'lock' in _args :
|
||||||
BQWriter.lock.acquire()
|
BQWriter.lock.acquire()
|
||||||
|
_args['table'] = self.table if 'table' not in _args else _args['table']
|
||||||
self._write(_info,**_args)
|
self._write(_info,**_args)
|
||||||
finally:
|
finally:
|
||||||
if self.parallel:
|
if self.parallel:
|
||||||
|
|
Loading…
Reference in New Issue