bug fix with type inference
This commit is contained in:
parent
1e3e0eac45
commit
2b228f6075
|
@ -282,9 +282,11 @@ class Generator (Learner):
|
||||||
|
|
||||||
if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
|
if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
|
||||||
FORMAT = '%Y-%m-%d'
|
FORMAT = '%Y-%m-%d'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
#
|
#
|
||||||
#-- Sometimes data isn't all it's meant to be
|
#-- Sometimes data isn't all it's meant to be
|
||||||
|
SIZE = -1
|
||||||
if 'format' in self.info and name in self.info['format'] :
|
if 'format' in self.info and name in self.info['format'] :
|
||||||
FORMAT = self.info['format'][name]
|
FORMAT = self.info['format'][name]
|
||||||
SIZE = 10
|
SIZE = 10
|
||||||
|
@ -292,20 +294,34 @@ class Generator (Learner):
|
||||||
FORMAT = '%Y-%m-%d %H:%M:%S'
|
FORMAT = '%Y-%m-%d %H:%M:%S'
|
||||||
SIZE = 19
|
SIZE = 19
|
||||||
|
|
||||||
|
if SIZE > 0 :
|
||||||
|
|
||||||
|
values = pd.to_datetime(_df[name], format=FORMAT).astype(str)
|
||||||
|
_df[name] = [_date[:SIZE] for _date in values]
|
||||||
|
|
||||||
|
|
||||||
r[name] = FORMAT
|
r[name] = FORMAT
|
||||||
_df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
|
# _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
|
||||||
if _item['type'] in ['DATETIME','TIMESTAMP']:
|
if _item['type'] in ['DATETIME','TIMESTAMP']:
|
||||||
pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]')
|
pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]')
|
||||||
else:
|
|
||||||
_df[name] = _df[name].astype(str)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# print (_item)
|
|
||||||
pass
|
#
|
||||||
_df = _df.replace('NaT','').replace('NA','')
|
# Because types are inferred on the basis of the sample being processed they can sometimes be wrong
|
||||||
|
# To help disambiguate we add the schema information
|
||||||
|
_type = None
|
||||||
|
if 'int' in _df[name].dtypes.name or 'int' in _item['type'].lower():
|
||||||
|
_type = np.int
|
||||||
|
elif 'float' in _df[name].dtypes.name or 'float' in _item['type'].lower():
|
||||||
|
_type = np.float
|
||||||
|
if _type :
|
||||||
|
_df[name] = _df[name].fillna(0).replace('',0).astype(_type)
|
||||||
|
# _df = _df.replace('NaT','').replace('NA','')
|
||||||
|
|
||||||
if r :
|
if r :
|
||||||
self.log(**{'action':'format','input':r})
|
self.log(**{'action':'format','input':r})
|
||||||
|
@ -319,7 +335,7 @@ class Generator (Learner):
|
||||||
_store['context'] = 'write' #-- Just in case
|
_store['context'] = 'write' #-- Just in case
|
||||||
if 'table' not in _store :
|
if 'table' not in _store :
|
||||||
_store['table'] = self.info['from']
|
_store['table'] = self.info['from']
|
||||||
writer = transport.factory.instance(**_store)
|
|
||||||
N = 0
|
N = 0
|
||||||
for _iodf in _candidates :
|
for _iodf in _candidates :
|
||||||
_df = self._df.copy()
|
_df = self._df.copy()
|
||||||
|
@ -346,7 +362,9 @@ class Generator (Learner):
|
||||||
_schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
|
_schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
|
||||||
_df = self.format(_df,_schema)
|
_df = self.format(_df,_schema)
|
||||||
|
|
||||||
|
writer = transport.factory.instance(**_store)
|
||||||
writer.write(_df,schema=_schema)
|
writer.write(_df,schema=_schema)
|
||||||
|
# _df.to_csv('foo.csv')
|
||||||
|
|
||||||
self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
|
self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})
|
||||||
class Shuffle(Generator):
|
class Shuffle(Generator):
|
||||||
|
|
|
@ -209,6 +209,7 @@ class Input :
|
||||||
# @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
|
# @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
|
||||||
#
|
#
|
||||||
_matrix = np.array([np.repeat(0,cols.size) for i in range(0,row_count)])
|
_matrix = np.array([np.repeat(0,cols.size) for i in range(0,row_count)])
|
||||||
|
|
||||||
[np.put(_matrix[i], np.where(cols == rows[i]) ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
|
[np.put(_matrix[i], np.where(cols == rows[i]) ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
|
||||||
# else:
|
# else:
|
||||||
# _matrix = cp.zeros([row_count,cols.size])
|
# _matrix = cp.zeros([row_count,cols.size])
|
||||||
|
|
Loading…
Reference in New Issue