From 6841ccbd5e4abb8322df6da8b55904f99bcae89c Mon Sep 17 00:00:00 2001
From: Steve Nyemba <nyemba@gmail.com>
Date: Tue, 17 May 2022 13:24:24 -0500
Subject: [PATCH] bug fix:  missing data, adding an additional type:
 pandas._lib.missing.NAType in addition to np.nan, np.na

---
 data/maker/__init__.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/data/maker/__init__.py b/data/maker/__init__.py
index 24fabe8..b9b48e4 100644
--- a/data/maker/__init__.py
+++ b/data/maker/__init__.py
@@ -52,6 +52,7 @@ class Learner(Process):
         self._encoder = None
         self._map = None
         self._df = _args['data'] if 'data' in _args else None
+        
         self.name   =  self.__class__.__name__
         
         #
@@ -92,10 +93,22 @@ class Learner(Process):
         if self._df is None :
             self._df     = reader.read(**_read_args)
         columns = self.columns if self.columns else self._df.columns
+        #
+        # Below is a source of inefficiency, unfortunately python's type inference doesn't work well in certain cases
+        # - The code below tries to address the issue (Perhaps better suited for the reading components)
+        for name in columns :
+            _index = np.random.choice(np.arange(self._df[name].size),5,False)
+            no_value = [type(value) in [int,float,np.int64,np.int32,np.float32,np.float64] for value in self._df[name].values[_index]]
+            print ([name,np.sum(no_value)])
+            no_value = 0 if np.sum(no_value) > 0 else ''
+            
+            self._df[name] = self._df[name].fillna(no_value)
+
+
         #
         # convert the data to binary here ...
-        
-        _args = {"schema":self.get_schema(),"data":self._df,"columns":columns}
+        _schema = self.get_schema()       
+        _args = {"schema":_schema,"data":self._df,"columns":columns}
         if self._map :
             _args['map'] = self._map
         self._encoder = prepare.Input(**_args)  if self._df.shape[0] > 0 else None