some handy functions to group continous variables and missing value imputation in dataframe

Following example shows how to group age variable into groups,
and some simple missing value imputaiton proecdures.

There is also an example to transform timestamp variable to week day and hour infomation.

import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin

# utility functions

def age_input(age):

if pd.isnull(age):

return 'missing'

age = int(age)

if age<=20:

return '16-20'

elif age<=24:

return '21-24'

elif age<=34:

return '25-34'

elif age<=44:

return '35-44'

elif age<=54:

return '45-54'

elif age<=64:

return '55-64'


return '65+'

# missing value handelling or imputation in dataframe

def missing_handle(df):

for col in df.columns:

if df[col].dtype==object:

df[col] = df[col].fillna('missing')

elif df[col].dtype == bool:

df[col+'_null'] = df[col].apply(lambda x: 1 if pd.isnull(x) else 0)

df[col] = data[col].fillna(data[col].mode()[0])


df[col] = df[col].fillna(-999)

return df

class dayandhour_Transformer(BaseEstimator, TransformerMixin):

# Class Constructor

def __init__(self):


# Return self, nothing else to do here

def fit(self, X, y=None):

return self

# Customized transformer method

def transform(self, X_, y=None):

X = X_.copy()



X = X.drop('sentat',axis=1)

# apply age group function here

X['age_group'] = X['age'].apply(age_input)

X = X.drop('age',axis=1)

# apply missing handelling here

X = missing_handle(X)

return X

# define the transformer
dayandhour_transformer = dayandhour_Transformer()

# usage example
df_new = dayandhour_transformer.transform(df)

Author: robot learner
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source robot learner !