Activity_Detection_Code
Activity_Detection_Code
"""Activity_Detection_Code.ipynb
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='darkgrid')
!pip install m2cgen
def features(lst):
if len(lst) == 0:
return 0,0,0,0,0,0,0,0,0,0
l=len(lst)
#Computing various features
avg=sum(lst)/l #1
std_dev=(sum([((i - avg) ** 2) for i in lst]) / l)**0.5 #2
energy=sum([i**2 for i in lst])/l #3
skew=sum([((i - avg) ** 3) for i in lst]) * (1 / (l * ((std_dev+0.000001) ** 3)))
#4
return [avg, std_dev, energy, skew]
def process_data(activity_df):
# Convert all columns to numeric (ensuring data consistency)
for col in activity_df.columns:
activity_df[col] = pd.to_numeric(activity_df[col], errors='coerce')
#after doing EDA, it was found that only A and G features are sufficient for
training
activity_df.drop(columns=['Index', 'M_x', 'M_y', 'M_z'], errors='ignore',
inplace=True)
computed_features.append(feature_vector)
# Loop through each activity and plot the corresponding sensor data
for i, (activity, df) in enumerate(activities.items()):
df[axis].plot(ax=ax[i], legend=False)
ax[i].set_title(f'{activity} - {axis} ({sensor_type})', fontsize=12)
ax[i].set_xlabel("Time")
import numpy as np
import matplotlib.pyplot as plt
if activity == 'Idle':
df_cleaned.loc[(df_cleaned['A_x'] > 1) | (df_cleaned['A_x'] < -1), 'A_x'] =
np.nan
print(f"Marked {sum((df['A_x'] > 1) | (df['A_x'] < -1))} outliers in A_x as
NaN in {activity} dataset.")
else:
df_cleaned.loc[(df_cleaned['A_x'] > 1000) | (df_cleaned['A_x'] < -1000),
'A_x'] = np.nan
print(f"Marked {sum((df['A_x'] > 1000) | (df['A_x'] < -1000))} outliers in
A_x as NaN in {activity} dataset.")
cleaned_activities[activity] = df_cleaned
sensor_groups = {
'Accelerometer': ['A_x', 'A_y', 'A_z'],
'Gyroscope': ['G_x', 'G_y', 'G_z'],
'Magnetometer': ['M_x', 'M_y', 'M_z']
}
idle = process_data(idle_df)
idle['Target'] = 'Idle'
jumping = process_data(jumping_df)
jumping['Target'] = 'Jumping'
sweeping = process_data(sweeping_df)
sweeping['Target'] = 'Sweeping'
vibration = process_data(vibration_df)
vibration['Target'] = 'Vibration'
walking = process_data(walking_df)
walking['Target'] = 'Walking'
cols = df.columns.drop('Target')
X, y = df[cols], df['Target']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=42)
# X_train
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
model_rf=RandomForestClassifier(max_depth=5, n_estimators=5, random_state=0)
model_rf.fit(X_train, y_train)
print("Training")
training_predict_rf = model_rf.predict(X_train)
print (metrics.classification_report(y_train, training_predict_rf, digits = 3 ))
print (metrics.confusion_matrix(y_train, training_predict_rf))
test_predict_rf = model_rf.predict(X_test)
print ("Test")
print (metrics.classification_report(y_test, test_predict_rf, digits = 3 ))
print (metrics.confusion_matrix(y_test, test_predict_rf))
print(model_to_python_rf)
# Training Evaluation
print("Training Performance")
training_predict_dt = model_dt.predict(X_train)
print(metrics.classification_report(y_train, training_predict_dt, digits=3))
print(metrics.confusion_matrix(y_train, training_predict_dt))
# Testing Evaluation
print("\nTesting Performance")
test_predict_dt = model_dt.predict(X_test)
print(metrics.classification_report(y_test, test_predict_dt, digits=3))
print(metrics.confusion_matrix(y_test, test_predict_dt))
# Training Performance
print("Training Performance")
training_predict_et = model_extra_trees.predict(X_train)
print(metrics.classification_report(y_train_encoded, training_predict_et,
digits=3))
print(metrics.confusion_matrix(y_train_encoded, training_predict_et))
# Testing Performance
print("\nTesting Performance")
test_predict_et = model_extra_trees.predict(X_test)
print(metrics.classification_report(y_test_encoded, test_predict_et, digits=3))
print(metrics.confusion_matrix(y_test_encoded, test_predict_et))
print(model_rf.__class__)
model_rf.classes_