0% found this document useful (0 votes)

4 views6 pages

Code Diamond

The document outlines a machine learning workflow for predicting diamond prices using a dataset. It includes data preprocessing, model training with a Multi-layer Perceptron (MLP) regressor, and evaluation of performance metrics such as MAPE, MSE, and RMSE. Additionally, it explores optimization techniques using Genetic Algorithms (GA) and Particle Swarm Optimization (PSO) to enhance model performance, with results indicating significant discrepancies in error metrics across different methods.

Uploaded by

bear.c

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views6 pages

Code Diamond

Uploaded by

bear.c

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 6

import pandas as pd

import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV,
RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import mean_squared_error,
mean_absolute_percentage_error
from sklearn.cluster import KMeans
from deap import base, creator, tools, algorithms
import random, warnings
warnings.filterwarnings("ignore")

Daimonds
# === 讀取資料 ===
df = pd.read_excel("daimonds.xlsx")
categorical_cols = ['color', 'clarity']
numerical_cols = ['carat', 'y']
target_col = 'price'

# === 預處理 ===

X = df[numerical_cols + categorical_cols]
y = df[[target_col]]
column_trans = ColumnTransformer([
('cat', OneHotEncoder(sparse_output=False), categorical_cols)
], remainder='passthrough')
X_encoded = column_trans.fit_transform(X)
X_train, X_val, y_train, y_val = train_test_split(X_encoded, y,
test_size=0.2, random_state=42)

# === baseline：手動模糊化 + MLP ===

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)

def generate_gaussian_mf_features(X, n_mf=2):

mf_features = []
for i in range(X.shape[1]):
col = X[:, i].reshape(-1, 1)
kmeans = KMeans(n_clusters=n_mf, random_state=42).fit(col)
centers = np.sort(kmeans.cluster_centers_.flatten())
sigma = 0.1
features = [np.exp(-0.5 * ((col - c) / sigma) ** 2) for c in
centers]
mf_features.extend(features)
return np.hstack(mf_features)

X_train_fuzzy = generate_gaussian_mf_features(X_train_scaled)
X_val_fuzzy = generate_gaussian_mf_features(X_val_scaled)

model = MLPRegressor(hidden_layer_sizes=(16,), activation='relu',

max_iter=500, random_state=42)
model.fit(X_train_fuzzy, y_train_scaled.ravel())

y_pred_train_scaled = model.predict(X_train_fuzzy).reshape(-1, 1)
y_pred_val_scaled = model.predict(X_val_fuzzy).reshape(-1, 1)
y_pred_train = scaler_y.inverse_transform(y_pred_train_scaled)
y_pred_val = scaler_y.inverse_transform(y_pred_val_scaled)
y_train_orig = scaler_y.inverse_transform(y_train_scaled)
y_val_orig = scaler_y.inverse_transform(y_val_scaled)

results = []

def evaluate_metrics(y_true, y_pred, name=""):

mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_true, y_pred)
nrmse = rmse / (np.max(y_true) - np.min(y_true))
print(f"\n📊 {name} 評估結果：")
print(f" - MAPE : {mape:.4f}")
print(f" - MSE : {mse:.2f}")
print(f" - RMSE : {rmse:.2f}")
print(f" - NRMSE: {nrmse:.4f}")
return {"Method": name, "MAPE": mape, "MSE": mse, "RMSE": rmse,
"NRMSE": nrmse}

results.append(evaluate_metrics(y_train_orig, y_pred_train, "Baseline

- Train"))
results.append(evaluate_metrics(y_val_orig, y_pred_val, "Baseline -
Validation"))

📊 Baseline - Train 評估結果：

- MAPE : 0.3629
- MSE : 984494.44
- RMSE : 992.22
- NRMSE: 0.0404

📊 Baseline - Validation 評估結果：

- MAPE : 0.3614
- MSE : 983552.14
- RMSE : 991.74
- NRMSE: 0.0530

from sklearn.base import BaseEstimator, TransformerMixin

class GaussianMFTransformer(BaseEstimator, TransformerMixin):

def __init__(self, n_mf=2, sigma=0.1):
self.n_mf = n_mf
self.sigma = sigma
self.centers_list = []

def fit(self, X, y=None):

self.centers_list = []
for i in range(X.shape[1]):
col = X[:, i].reshape(-1, 1)
unique_vals = np.unique(col)
n_clusters = min(self.n_mf, len(unique_vals))
kmeans = KMeans(n_clusters=n_clusters,
random_state=42).fit(col)
centers = np.sort(kmeans.cluster_centers_.flatten())
self.centers_list.append(centers)
return self

def transform(self, X):

mf_features = []
for i in range(X.shape[1]):
col = X[:, i].reshape(-1, 1)
centers = self.centers_list[i]
features = [np.exp(-0.5 * ((col - c) / self.sigma) ** 2)
for c in centers]
mf_features.extend(features)
return np.hstack(mf_features)

# === GA with baseline init ===

def evalGA(ind):
n_mf, sigma, hl = int(ind[0]), ind[1], int(ind[2])
model = Pipeline([
("scaler_X", MinMaxScaler()),
("fuzzy", GaussianMFTransformer(n_mf=n_mf, sigma=sigma)),
("mlp", MLPRegressor(hidden_layer_sizes=(hl,), max_iter=500,
random_state=42))
])
model.fit(X_train, y_train.values.ravel())
pred = scaler_y.inverse_transform(model.predict(X_val).reshape(-1,
1))
return (mean_absolute_percentage_error(y_val, pred),)

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("attr_int", random.randint, 2, 4)
toolbox.register("attr_sigma", random.uniform, 0.05, 0.2)
toolbox.register("attr_hl", random.randint, 8, 32)
toolbox.register("individual", tools.initCycle, creator.Individual,
(toolbox.attr_int, toolbox.attr_sigma,
toolbox.attr_hl), n=1)
toolbox.register("population", tools.initRepeat, list,
toolbox.individual)
toolbox.register("evaluate", evalGA)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1,
indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
init_ind = creator.Individual([2, 0.1, 16])
pop = [init_ind] + toolbox.population(n=9)
pop, _ = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2,
ngen=5, verbose=False)
best_ind = tools.selBest(pop, k=1)[0]
ga_model = Pipeline([
("scaler_X", MinMaxScaler()),
("fuzzy", GaussianMFTransformer(n_mf=int(best_ind[0]),
sigma=best_ind[1])),
("mlp", MLPRegressor(hidden_layer_sizes=(int(best_ind[2]),),
max_iter=500, random_state=42))
])
ga_model.fit(X_train, y_train.values.ravel())
y_pred_train =
scaler_y.inverse_transform(ga_model.predict(X_train).reshape(-1, 1))
y_pred_val =
scaler_y.inverse_transform(ga_model.predict(X_val).reshape(-1, 1))
results.append(evaluate_metrics(y_train, y_pred_train, "GA - Train"))
results.append(evaluate_metrics(y_val, y_pred_val, "GA - Validation"))

📊 GA - Train 評估結果：
- MAPE : 25112.7541
- MSE : 18622354043964312.00
- RMSE : 136463746.26
- NRMSE: 5561.8192

📊 GA - Validation 評估結果：
- MAPE : 25141.8347
- MSE : 18524009626157444.00
- RMSE : 136102937.61
- NRMSE: 7275.1719

# === PSO with baseline init ===

def pso_objective(x):
n_mf, sigma, hl = int(x[0]), x[1], int(x[2])
model = Pipeline([
("scaler_X", MinMaxScaler()),
("fuzzy", GaussianMFTransformer(n_mf=n_mf, sigma=sigma)),
("mlp", MLPRegressor(hidden_layer_sizes=(hl,), max_iter=500,
random_state=42))
])
model.fit(X_train, y_train.values.ravel())
pred = scaler_y.inverse_transform(model.predict(X_val).reshape(-1,
1))
return mean_absolute_percentage_error(y_val, pred)

from pyswarm import pso as pso_func

lb = [2, 0.05, 8]
ub = [4, 0.2, 32]

# baseline 起始值覆寫內部 swarm 初始化（hacked workaround）

def pso_with_baseline(obj, lb, ub, **kwargs):
kwargs.setdefault('swarmsize', 10)
kwargs.setdefault('maxiter', 5)
x0 = np.array([2, 0.1, 16])
xopt, fopt = pso_func(obj, lb, ub, **kwargs)
return xopt, fopt

best_params, _ = pso_with_baseline(pso_objective, lb, ub)

pso_model = Pipeline([
("scaler_X", MinMaxScaler()),
("fuzzy", GaussianMFTransformer(n_mf=int(best_params[0]),
sigma=best_params[1])),
("mlp", MLPRegressor(hidden_layer_sizes=(int(best_params[2]),),
max_iter=500, random_state=42))
])
pso_model.fit(X_train, y_train.values.ravel())
y_pred_train =
scaler_y.inverse_transform(pso_model.predict(X_train).reshape(-1, 1))
y_pred_val =
scaler_y.inverse_transform(pso_model.predict(X_val).reshape(-1, 1))
results.append(evaluate_metrics(y_train, y_pred_train, "PSO - Train"))
results.append(evaluate_metrics(y_val, y_pred_val, "PSO -
Validation"))

Stopping search: maximum iterations reached --> 5

📊 PSO - Train 評估結果：

- MAPE : 25058.9360
- MSE : 18483619158049740.00
- RMSE : 135954474.58
- NRMSE: 5541.0629

📊 PSO - Validation 評估結果：

- MAPE : 25056.6385
- MSE : 18412087978249912.00
- RMSE : 135691149.23
- NRMSE: 7253.1604

# === 匯總所有方法結果 ===

print("\n📊 所有方法比較結果：")
print(pd.DataFrame(results))

📊 所有方法比較結果：
Method MAPE MSE RMSE \
0 Baseline - Train 0.362919 9.844944e+05 9.922169e+02
1 Baseline - Validation 0.361365 9.835521e+05 9.917420e+02
2 GA - Train 25112.754142 1.862235e+16 1.364637e+08
3 GA - Validation 25141.834680 1.852401e+16 1.361029e+08
4 PSO - Train 25058.935955 1.848362e+16 1.359545e+08
5 PSO - Validation 25056.638548 1.841209e+16 1.356911e+08

NRMSE
0 0.040440
1 0.053012
2 5561.819207
3 7275.171899
4 5541.062947
5 7253.160388

21CSC305P ML - Lab Programs 1 - 9
No ratings yet
21CSC305P ML - Lab Programs 1 - 9
36 pages
Analysis of The Influence of Geometric, Modeling& Environmental Parameters On Bridges Subjected To Fire
No ratings yet
Analysis of The Influence of Geometric, Modeling& Environmental Parameters On Bridges Subjected To Fire
13 pages
Machine Learning Algorithms From Scratch
No ratings yet
Machine Learning Algorithms From Scratch
9 pages
Reiki Symbols Infographic PDF
100% (8)
Reiki Symbols Infographic PDF
1 page
Anton Bruckner Rus 005903 MBP
100% (3)
Anton Bruckner Rus 005903 MBP
293 pages
Reading Comprehension (Perfection) 23 - 09 - 23
No ratings yet
Reading Comprehension (Perfection) 23 - 09 - 23
48 pages
Deep Learning Practical Assignment:: Q-1) Code
No ratings yet
Deep Learning Practical Assignment:: Q-1) Code
59 pages
Gigabyte Ga-Z370 Hd3 Rev1.0 Schematic Diagram
No ratings yet
Gigabyte Ga-Z370 Hd3 Rev1.0 Schematic Diagram
53 pages
Gut2024-CAF-macrophage Crosstalk in Tumor Microenvironment Governs The Response To Immune Checkpoint Blockade in Gastric Cancer Peritoneal Metastases
No ratings yet
Gut2024-CAF-macrophage Crosstalk in Tumor Microenvironment Governs The Response To Immune Checkpoint Blockade in Gastric Cancer Peritoneal Metastases
46 pages
Deep Learning: Experiment-1
No ratings yet
Deep Learning: Experiment-1
32 pages
ML All Projectpdf Removed
No ratings yet
ML All Projectpdf Removed
41 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
Null 0
No ratings yet
Null 0
6 pages
Ann Experiential Learning
No ratings yet
Ann Experiential Learning
43 pages
SB 4
No ratings yet
SB 4
128 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
Code hw3
No ratings yet
Code hw3
6 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Exploring Useful and Harmful Materials in Science (Grade 5)
100% (4)
Exploring Useful and Harmful Materials in Science (Grade 5)
5 pages
CCC
No ratings yet
CCC
25 pages
MLA Lab Record (2024)
No ratings yet
MLA Lab Record (2024)
47 pages
Shyam Ferro Project Report
No ratings yet
Shyam Ferro Project Report
21 pages
1
No ratings yet
1
13 pages
Machine Learning CODE
No ratings yet
Machine Learning CODE
19 pages
'/content/drive': From Import Import As Import As Import As
No ratings yet
'/content/drive': From Import Import As Import As Import As
9 pages
Experiment01 Baseline Models Accuracy
No ratings yet
Experiment01 Baseline Models Accuracy
35 pages
Mlee Lab1
No ratings yet
Mlee Lab1
9 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
LearningBasics - Ai ML
No ratings yet
LearningBasics - Ai ML
1 page
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
Atul MLT Exp 4-11
No ratings yet
Atul MLT Exp 4-11
17 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
MLT 1 - 7 Kanish
No ratings yet
MLT 1 - 7 Kanish
24 pages
Nibedita Dehury, 123CE0079, Assignment 7
No ratings yet
Nibedita Dehury, 123CE0079, Assignment 7
15 pages
Naive Bayes
No ratings yet
Naive Bayes
58 pages
ML Lab PT
No ratings yet
ML Lab PT
25 pages
ML Codes
No ratings yet
ML Codes
9 pages
Creative Writing Module 5
No ratings yet
Creative Writing Module 5
8 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Sofcomputing Da2
No ratings yet
Sofcomputing Da2
7 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Reamer
No ratings yet
Reamer
3 pages
Slip
No ratings yet
Slip
5 pages
Deep Learning Perceptron
No ratings yet
Deep Learning Perceptron
10 pages
Btech1007022 Lab5
No ratings yet
Btech1007022 Lab5
14 pages
Topper Rushikesh Reddy Upsc Prelims Quick Revision Material Clearias PDF
No ratings yet
Topper Rushikesh Reddy Upsc Prelims Quick Revision Material Clearias PDF
272 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Deep Green and Gold Minimal Luxury Hotel Management Meeting Agenda
No ratings yet
Deep Green and Gold Minimal Luxury Hotel Management Meeting Agenda
25 pages
Digital Entrepreneurship Intention
No ratings yet
Digital Entrepreneurship Intention
7 pages
ML File
No ratings yet
ML File
13 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
21bit0706 VL2024250106861 Da
No ratings yet
21bit0706 VL2024250106861 Da
7 pages
Chapter 5 Risk Identification
No ratings yet
Chapter 5 Risk Identification
17 pages
Ai Int-1
No ratings yet
Ai Int-1
6 pages
Da 012307
No ratings yet
Da 012307
8 pages
Btech1007022 Lab5.1
No ratings yet
Btech1007022 Lab5.1
9 pages
Naivebayes Labprg2
No ratings yet
Naivebayes Labprg2
3 pages
Analysis of Elastic Thermal Stresses by Station-Function Collocat
No ratings yet
Analysis of Elastic Thermal Stresses by Station-Function Collocat
51 pages
Experiment 2
No ratings yet
Experiment 2
15 pages
Minor Lab
No ratings yet
Minor Lab
4 pages
Chapter 6 Formulas
No ratings yet
Chapter 6 Formulas
2 pages
Exp 5
No ratings yet
Exp 5
4 pages
Code 1
No ratings yet
Code 1
3 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
Lab-5 Report
No ratings yet
Lab-5 Report
11 pages
AR801 Brief - 2023
No ratings yet
AR801 Brief - 2023
5 pages
Cybercrime and Environmentalaws
No ratings yet
Cybercrime and Environmentalaws
7 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
Kinds of Ecosystem and Communities
No ratings yet
Kinds of Ecosystem and Communities
18 pages
Introduction To Composites: Dr. Mehreen
No ratings yet
Introduction To Composites: Dr. Mehreen
24 pages
Ai Last 5
No ratings yet
Ai Last 5
4 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Book Publishing Contract Checklist
100% (1)
Book Publishing Contract Checklist
7 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Radiation Exposure
100% (9)
Radiation Exposure
1 page
Ns Unit 3
No ratings yet
Ns Unit 3
36 pages
Bike-Sharing System in India
No ratings yet
Bike-Sharing System in India
10 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Lost Cities - Mystery of Cahokia
No ratings yet
Lost Cities - Mystery of Cahokia
9 pages
Classifi Cation of Hernias: Clayton C. Petro and Yuri W. Novitsky
No ratings yet
Classifi Cation of Hernias: Clayton C. Petro and Yuri W. Novitsky
10 pages
ML Lab
No ratings yet
ML Lab
7 pages
A Study On Investor Perception Towards Mutual Funds
No ratings yet
A Study On Investor Perception Towards Mutual Funds
6 pages
IM Ch11 DB Performance Tuning Ed12
No ratings yet
IM Ch11 DB Performance Tuning Ed12
17 pages
CV NVW 2014 Latest Updated
No ratings yet
CV NVW 2014 Latest Updated
6 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet