Only Code
Only Code
import pandas as pd
import numpy as np
import tensorflow as tf
file_path = "/content/Water_creek_marine_seawater_beach_2023.csv"
water.columns = water.columns.str.strip().str.lower()
water = water.iloc[1:].reset_index(drop=True)
# Rename columns for clarity
water.rename(columns={
'ph': 'min_ph',
'ph.1': 'max_ph',
'bod\n(mg/l)': 'min_bod',
'bod\n(mg/l).1': 'max_bod',
}, inplace=True)
'min_fecal_coliform', 'max_fecal_coliform',
'min_total_coliform', 'max_total_coliform']
water[numeric_columns] = water[numeric_columns].apply(pd.to_numeric,
errors='coerce')
water.dropna(subset=numeric_columns, inplace=True)
water['mean_temperature'] = (water['min_temperature'] +
water['max_temperature']) / 2
y_ph = water['mean_ph']
y_bod = water['mean_bod']
y_do = water['mean_do']
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
inputs = Input(shape=(X_train_scaled.shape[1],))
x = Dense(64, activation='relu')(inputs)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
model.compile(optimizer='adam', loss='mse')
model.summary()
history = model.fit(
X_train_scaled,
epochs=50,
batch_size=16
sns.set(style="darkgrid")
plt.figure(figsize=(10, 6))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
plt.figure(figsize=(10, 6))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
plt.figure(figsize=(10, 6))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
Pytorch Model: -
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
#
# 1. DATA PREPARATION
#
# Assume 'water' DataFrame is already loaded and cleaned
X = water[['mean_temperature', 'mean_nitrate']]
y_ph = water['mean_ph']
y_bod = water['mean_bod']
y_do = water['mean_do']
# Custom Dataset
class WaterDataset(Dataset):
def init (self, X, y_ph, y_bod, y_do):
self.X = torch.tensor(X, dtype=torch.float32)
self.y_ph = torch.tensor(y_ph.values, dtype=torch.float32).view(-1,
1)
self.y_bod = torch.tensor(y_bod.values,
dtype=torch.float32).view(-1, 1)
self.y_do = torch.tensor(y_do.values, dtype=torch.float32).view(-1,
1)
#
# 2. MODEL DEFINITION
#
class MultiTaskNet(nn.Module):
def init (self, input_dim):
super(MultiTaskNet, self). init ()
self.shared = nn.Sequential(
nn.Linear(input_dim, 64),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(64, 32),
nn.ReLU()
)
self.ph_head = nn.Linear(32, 1)
self.bod_head = nn.Linear(32, 1)
self.do_head = nn.Linear(32, 1)
input_dim = X_train_scaled.shape[1]
model = MultiTaskNet(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
#
# 3. TRAINING + VALIDATION LOOP (with Loss Tracking)
#
num_epochs = 50
val_losses_ph = []
val_losses_bod = []
val_losses_do = []
#
# VALIDATION
#
model.eval()
val_running_loss_ph = 0.0
val_running_loss_bod = 0.0
val_running_loss_do = 0.0
with torch.no_grad():
for X_val, y_ph_val, y_bod_val, y_do_val in test_loader:
pred_ph_val, pred_bod_val, pred_do_val = model(X_val)
val_running_loss_ph += val_loss_ph.item()
val_running_loss_bod += val_loss_bod.item()
val_running_loss_do += val_loss_do.item()
val_losses_ph.append(val_loss_ph_epoch)
val_losses_bod.append(val_loss_bod_epoch)
val_losses_do.append(val_loss_do_epoch)
# --- pH ---
axes[0].plot(epochs_range, train_losses_ph, label='Train pH Loss',
marker='o')
axes[0].plot(epochs_range, val_losses_ph, label='Val pH Loss', marker='o')
axes[0].set_title('pH Loss Over Epochs')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].legend()
# --- DO ---
axes[2].plot(epochs_range, train_losses_do, label='Train DO Loss',
marker='o')
axes[2].plot(epochs_range, val_losses_do, label='Val DO Loss', marker='o')
axes[2].set_title('DO Loss Over Epochs')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('MSE Loss')
axes[2].legend()
plt.tight_layout()
plt.show()
WǪI Computation and Predictive Modeling Code
This section includes code for computing the Water Quality Index, as well as machine
learning models for regression and classification.
Computation
Each parameter is converted into a subindex using a linear scaling method:
print(water[['mean_ph','mean_do','mean_bod','mean_temperature','mean_nitrat
e','WQI']].head(25))
plt.figure(figsize=(6,4))
water['WQI'].hist(bins=20)
plt.ylabel("Frequency")
plt.show()
import pandas as pd
import numpy as np
######################################
######################################
def categorize_wqi(wqi):
return "Poor"
return "Moderate"
else:
return "Good"
water['Quality_Category'] = water['WQI'].apply(categorize_wqi)
######################################
######################################
y_class = water['Quality_Category']
data_class = water.dropna(subset=['Quality_Category'])
y_class = data_class['Quality_Category']
######################################
# 3. Split dataset
######################################
X_class,
y_class,
test_size=0.2,
random_state=42
######################################
######################################
pipeline_clf = Pipeline([
('imputer', SimpleImputer(strategy='mean')),
])
pipeline_clf.fit(Xc_train, yc_train)
######################################
# 5. Make predictions
######################################
yc_pred = pipeline_clf.predict(Xc_test)
######################################
# 6. Print metrics
######################################
print(classification_report(yc_test, yc_pred))
######################################
######################################
df_results = Xc_test.copy()
df_results['Actual_Quality'] = yc_test.values
df_results['Predicted_Quality'] = yc_pred
print("\nSample rows from the Actual vs. Predicted Quality table:")
print(df_results.head(10))
######################################
######################################
predicted_distribution =
df_results.groupby('Predicted_Quality').size().reset_index(name='Count')
print(predicted_distribution)
######################################
# 9. Confusion Matrix
######################################
cm = confusion_matrix(df_results['Actual_Quality'],
df_results['Predicted_Quality'], labels=labels)
print("\nConfusion Matrix:")
print(cm_df)
######################################
# 10. Visualizations
######################################
axes[0].bar(predicted_distribution['Predicted_Quality'],
predicted_distribution['Count'], color=['green','orange','red'])
plt.tight_layout()
plt.show()