0% found this document useful (0 votes)
8 views2 pages

2795529-Python One Hot Encoding

This document shows how to preprocess a sample dataset with missing values and both numerical and categorical features using scikit-learn pipelines and column transformer. The preprocessing includes imputing missing values, one-hot encoding categorical features, and scaling numerical features.

Uploaded by

Santhosh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views2 pages

2795529-Python One Hot Encoding

This document shows how to preprocess a sample dataset with missing values and both numerical and categorical features using scikit-learn pipelines and column transformer. The preprocessing includes imputing missing values, one-hot encoding categorical features, and scaling numerical features.

Uploaded by

Santhosh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

import pandas as pd

from sklearn.preprocessing import StandardScaler, OneHotEncoder


from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Sample dataset
data = {
'Age': [25, 30, 35, None, 40],
'Gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
'Income': [50000, 60000, None, 70000, 80000],
'Education': ['Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']
}

df = pd.DataFrame(data)

# Define preprocessing steps for numerical and categorical features


numeric_features = ['Age', 'Incoming']
categorical_features = ['Gender', 'Education']

numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent')),
('onehot', OneHotEncoder())
])

# Combine the transformers using ColumnTransformer


preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])

# Apply preprocessing to the dataset


transformed_data = preprocessor.fit_transform(df)

# Print the transformed data


print(transformed_data)

#------------------------------------------------------------------------------------------------------------------

# import pandas as pd
# from sklearn.preprocessing import StandardScaler, OneHotEncoder
# from sklearn.compose import ColumnTransformer
# from sklearn.pipeline import Pipeline
# from sklearn.impute import SimpleImputer
#
# # Sample dataset
# data = {
# 'Age': [25, 30, 35, None, 40],
# 'Gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
# 'Income': [50000, 60000, None, 70000, 80000],
# 'Education': ['Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']
# }
#
# df = pd.DataFrame(data)
#
# # Define preprocessing steps for numerical and categorical features
# numeric_features = ['Age', 'Income']
# numeric_transformer = Pipeline(steps=[
# ('imputer', SimpleImputer(strategy='mean')),
# ('scaler', StandardScaler())
# ])
#
# categorical_features = ['Gender', 'Education']
# categorical_transformer = Pipeline(steps=[
# ('imputer', SimpleImputer(strategy='most_frequent')),
# ('onehot', OneHotEncoder())
# ])
#
# # Combine the transformers using ColumnTransformer
# preprocessor = ColumnTransformer(
# transformers=[
# ('num', numeric_transformer, numeric_features),
# ('cat', categorical_transformer, categorical_features)
# ])
#
# # Apply preprocessing to the dataset
# transformed_data = preprocessor.fit_transform(df)
#
# # Concatenate the transformed data back into the original DataFrame
# transformed_df = pd.DataFrame(transformed_data, columns=['Age', 'Income', 'Male', 'Female', 'Bachelor', 'Master/PhD'])
# final_df = pd.concat([df.drop(columns=categorical_features), transformed_df], axis=1)
#
# # Print the final DataFrame
# print(final_df)

You might also like