# This function determines the most common delimiter from a subset of possible delimiters.
# It uses a statistical approach (distribution) to guage the distribution of columns for a given delimiter
# :sample sample string/content expecting matrix i.e list of rows
# """
# m = {',':[],'\t':[],'|':[],'\x3A':[]}
# delim = m.keys()
# for row in sample:
# for xchar in delim:
# if row.split(xchar) > 1:
# m[xchar].append(len(row.split(xchar)))
# else:
# m[xchar].append(0)
# #
# # The delimiter with the smallest variance, provided the mean is greater than 1
# # This would be troublesome if there many broken records sampled
# #
# m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
# index = m.values().index( min(m.values()))
# xchar = m.keys()[index]
# return xchar
# def col_count(self,sample):
# """
# This function retirms the number of columns of a given sample
# @pre self.xchar is not None
# """
# m = {}
# i = 0
# for row in sample:
# row = self.format(row)
# id = str(len(row))
# #id = str(len(row.split(self.xchar)))
# if id not in m:
# m[id] = 0
# m[id] = m[id] + 1
# index = m.values().index( max(m.values()) )
# ncols = int(m.keys()[index])
# return ncols;
# def format (self,row):
# """
# This function will clean records of a given row by removing non-ascii characters
# @pre self.xchar is not None
# """
# if isinstance(row,list) == False:
# #
# # We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
# cols = self.split(row)
# #cols = row.split(self.xchar)
# else:
# cols = row ;
# return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
# def split (self,row):
# """
# This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.