bug fix: missing data, adding an additional type: pandas._lib.missing.NAType in addition to np.nan, np.na
This commit is contained in:
parent
2b228f6075
commit
6841ccbd5e
|
@ -52,6 +52,7 @@ class Learner(Process):
|
|||
self._encoder = None
|
||||
self._map = None
|
||||
self._df = _args['data'] if 'data' in _args else None
|
||||
|
||||
self.name = self.__class__.__name__
|
||||
|
||||
#
|
||||
|
@ -92,10 +93,22 @@ class Learner(Process):
|
|||
if self._df is None :
|
||||
self._df = reader.read(**_read_args)
|
||||
columns = self.columns if self.columns else self._df.columns
|
||||
#
|
||||
# Below is a source of inefficiency, unfortunately python's type inference doesn't work well in certain cases
|
||||
# - The code below tries to address the issue (Perhaps better suited for the reading components)
|
||||
for name in columns :
|
||||
_index = np.random.choice(np.arange(self._df[name].size),5,False)
|
||||
no_value = [type(value) in [int,float,np.int64,np.int32,np.float32,np.float64] for value in self._df[name].values[_index]]
|
||||
print ([name,np.sum(no_value)])
|
||||
no_value = 0 if np.sum(no_value) > 0 else ''
|
||||
|
||||
self._df[name] = self._df[name].fillna(no_value)
|
||||
|
||||
|
||||
#
|
||||
# convert the data to binary here ...
|
||||
|
||||
_args = {"schema":self.get_schema(),"data":self._df,"columns":columns}
|
||||
_schema = self.get_schema()
|
||||
_args = {"schema":_schema,"data":self._df,"columns":columns}
|
||||
if self._map :
|
||||
_args['map'] = self._map
|
||||
self._encoder = prepare.Input(**_args) if self._df.shape[0] > 0 else None
|
||||
|
|
Loading…
Reference in New Issue