Import As Import As From Import Import As: - Init
Import As Import As From Import Import As: - Init
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# 核岭回归模型
class RidgeRegressor:
def __init__(self, kernel, X_train, sigma=0, lambda_=0, c=0, p=0,
l=0):
self.kernel = kernel
self.X_train = X_train
self.lambda_ = lambda_
self.sigma = sigma
self.c = c
self.p = p
self.l = l
# 计算 x 和 z 之间的欧几里得距离的平方
dist = np.sum((x_expanded - z_expanded) ** 2, axis=2) # axis=2
表示沿着特征维度求和
# 应用高斯函数
return np.exp(-dist / (self.l ** 2))
return k_te_tr.dot(self.alpha)
def params(self):
return {
'sigma': self.sigma,
'lambda_': self.lambda_,
'c': self.c,
'p': self.p,
'l': self.l
}
params_linear = [
{'sigma': 100, 'lambda_': 10000},
{'sigma': 10, 'lambda_': 1000},
{'sigma': 1, 'lambda_': 1},
]
params_polynomial = [
{'sigma': 1, 'lambda_': 1, 'c': 0.5, 'p': 2},
{'sigma': 1, 'lambda_': 0.1, 'c': 0.3, 'p': 7},
{'sigma': 2, 'lambda_': 0.45, 'c': 0.5, 'p': 11},
]
params_gaussian = [
{'sigma': 50, 'lambda_': 0.1, 'l': 10},
{'sigma': 10, 'lambda_': 0.01, 'l': 0.1},
{'sigma': 1, 'lambda_': 1, 'l': 0.01},
]
params_periodic = [
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 1},
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 5},
{'sigma': 2, 'lambda_': 1, 'l': 0.05, 'p': 7},
]
# 读取数据集
train_data = pd.read_csv('data_train.csv')
valid_data = pd.read_csv('data_valid.csv')
test_data = pd.read_csv('data_test.csv')
X_train = train_data['X'].values.reshape(-1, 1)
y_train = train_data['Y'].values
X_test = test_data['X'].values.reshape(-1, 1)
y_test = test_data['Y'].values
x_val = valid_data['X'].values.reshape(-1, 1)
y_val = valid_data['Y'].values
# 存储每个模型的预测结果和 MSE
results = []
# 可视化结果
for i, (kernel_type, params, mse, y_pred_test, y_pred_train,
y_pred_valid) in enumerate(results):
plt.figure(figsize=(10, 6))
plt.scatter(range(len(y_train)), y_train, color='blue',
label='train')
plt.scatter(range(len(y_train),len(y_train)+len(y_test)), y_test,
color='yellow', label='test')
plt.scatter(range(len(y_train)+len(y_test),len(y_val)+len(y_train)
+len(y_test)), y_val, color='green', label='val')
plt.plot(range(len(y_train), len(y_train) + len(y_test)),
y_pred_test, color='red', label='Predicted')
plt.plot(range(len(y_train)), y_pred_train, color='red',
label='Predicted')
plt.plot(range(len(y_train)+len(y_test),len(y_val)+len(y_train)
+len(y_test)), y_pred_valid, color='red', label='Predicted')