0% found this document useful (0 votes)
12 views5 pages

Import As Import As From Import Import As: - Init

Uploaded by

352677469
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views5 pages

Import As Import As From Import Import As: - Init

Uploaded by

352677469
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# 核岭回归模型
class RidgeRegressor:
def __init__(self, kernel, X_train, sigma=0, lambda_=0, c=0, p=0,
l=0):
self.kernel = kernel
self.X_train = X_train
self.lambda_ = lambda_
self.sigma = sigma
self.c = c
self.p = p
self.l = l

def linear_kernel(self, x, z):


return np.dot(x, z.T)

def polynomial_kernel(self, x, z):


return (np.dot(x, z.T) + self.c) ** self.p

def gaussian_kernel(self, x, z):


# 扩展 x 和 z 以进行广播
x_expanded = x[:, np.newaxis, :]
z_expanded = z[np.newaxis, :, :]

# 计算 x 和 z 之间的欧几里得距离的平方
dist = np.sum((x_expanded - z_expanded) ** 2, axis=2) # axis=2
表示沿着特征维度求和

# 应用高斯函数
return np.exp(-dist / (self.l ** 2))

def periodic_kernel(self, x, z):


x_expanded = x[:, np.newaxis, :]
z_expanded = z[np.newaxis, :, :]
dist = (np.sin(np.pi * (x_expanded - z_expanded) / self.p)) ** 2
return np.exp(-0.5 * np.sum(dist, axis=2) / (self.l ** 2))

def fit(self, x_train, y_train):


n = x_train.shape[0]
if self.kernel == 'linear':
k_tr_tr = self.linear_kernel(x_train, x_train)
elif self.kernel == 'polynomial':
k_tr_tr = self.polynomial_kernel(x_train, x_train)
elif self.kernel == 'gaussian':
k_tr_tr = self.gaussian_kernel(x_train, x_train)
elif self.kernel == 'periodic':
k_tr_tr = self.periodic_kernel(x_train, x_train)
else:
raise ValueError('Unknown kernel')

k_reg = self.sigma ** 2 * self.lambda_ * np.eye(n) + k_tr_tr


self.alpha = np.linalg.solve(k_reg, y_train)

def predict(self, x_test):


if self.kernel == 'linear':
k_te_tr = self.linear_kernel(x_test, self.X_train)
elif self.kernel == 'polynomial':
k_te_tr = self.polynomial_kernel(x_test, self.X_train)
elif self.kernel == 'gaussian':
k_te_tr = self.gaussian_kernel(x_test, self.X_train)
elif self.kernel == 'periodic':
k_te_tr = self.periodic_kernel(x_test, self.X_train)
else:
raise ValueError('Unknown kernel type')

return k_te_tr.dot(self.alpha)

def params(self):
return {
'sigma': self.sigma,
'lambda_': self.lambda_,
'c': self.c,
'p': self.p,
'l': self.l
}

def set_params(self, **params):


for key, value in params.items():
if hasattr(self, key):
setattr(self, key, value)

params_linear = [
{'sigma': 100, 'lambda_': 10000},
{'sigma': 10, 'lambda_': 1000},
{'sigma': 1, 'lambda_': 1},
]

params_polynomial = [
{'sigma': 1, 'lambda_': 1, 'c': 0.5, 'p': 2},
{'sigma': 1, 'lambda_': 0.1, 'c': 0.3, 'p': 7},
{'sigma': 2, 'lambda_': 0.45, 'c': 0.5, 'p': 11},
]

params_gaussian = [
{'sigma': 50, 'lambda_': 0.1, 'l': 10},
{'sigma': 10, 'lambda_': 0.01, 'l': 0.1},
{'sigma': 1, 'lambda_': 1, 'l': 0.01},
]

params_periodic = [
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 1},
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 5},
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 7},
]

# 读取数据集
train_data = pd.read_csv('data_train.csv')
valid_data = pd.read_csv('data_valid.csv')
test_data = pd.read_csv('data_test.csv')

X_train = train_data['X'].values.reshape(-1, 1)
y_train = train_data['Y'].values
X_test = test_data['X'].values.reshape(-1, 1)
y_test = test_data['Y'].values
x_val = valid_data['X'].values.reshape(-1, 1)
y_val = valid_data['Y'].values

# 存储每个模型的预测结果和 MSE
results = []

for params in params_linear:


model = RidgeRegressor(kernel='linear', X_train=X_train, **params)
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
y_pred_valid = model.predict(x_val)
mse = mean_squared_error(y_test, y_pred_test)
results.append(('linear', params, mse, y_pred_test, y_pred_train,
y_pred_valid))

for params in params_polynomial:


model = RidgeRegressor(kernel='polynomial',
X_train=X_train ,**params)
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
y_pred_valid = model.predict(x_val)
mse = mean_squared_error(y_test, y_pred_test)
results.append(('polynomial', params, mse, y_pred_test,
y_pred_train, y_pred_valid))

for params in params_gaussian:


model = RidgeRegressor(kernel='gaussian', X_train=X_train, **params)
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
y_pred_valid = model.predict(x_val)
mse = mean_squared_error(y_test, y_pred_test)
results.append(('gaussian', params, mse, y_pred_test, y_pred_train,
y_pred_valid))

for params in params_periodic:


model = RidgeRegressor(kernel='periodic', X_train=X_train, **params)
model.fit(X_train, y_train)
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
y_pred_valid = model.predict(x_val)
mse = mean_squared_error(y_test, y_pred_test)
results.append(('periodic', params, mse, y_pred_test, y_pred_train,
y_pred_valid))

# 可视化结果
for i, (kernel_type, params, mse, y_pred_test, y_pred_train,
y_pred_valid) in enumerate(results):
plt.figure(figsize=(10, 6))
plt.scatter(range(len(y_train)), y_train, color='blue',
label='train')
plt.scatter(range(len(y_train),len(y_train)+len(y_test)), y_test,
color='yellow', label='test')
plt.scatter(range(len(y_train)+len(y_test),len(y_val)+len(y_train)
+len(y_test)), y_val, color='green', label='val')
plt.plot(range(len(y_train), len(y_train) + len(y_test)),
y_pred_test, color='red', label='Predicted')
plt.plot(range(len(y_train)), y_pred_train, color='red',
label='Predicted')
plt.plot(range(len(y_train)+len(y_test),len(y_val)+len(y_train)
+len(y_test)), y_pred_valid, color='red', label='Predicted')

combined_pred = np.concatenate((y_pred_train, y_pred_test,


y_pred_valid))
combined_x = np.concatenate((np.arange(len(y_train)),
np.arange(len(y_train), len(y_train) + len(y_test)),
np.arange(len(y_train) + len(y_test), len(y_train) + len(y_test) +
len(y_val))))
plt.plot(combined_x, combined_pred, color='red', label='Combined
Predicted')

plt.title(f'Kernel: {kernel_type}, MSE: {mse:.2f}')


plt.ylim(0, 25)
plt.legend()
plt.show()

You might also like