from sklearn.model_selection import validation_curve
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression
import numpy as np
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=100, n_features=20, noise=0.1, random_state=42)
# Define the parameter range for alpha
param_range = np.logspace(-7, 3, 3)
# Compute the validation curve
train_scores, valid_scores = validation_curve(Ridge(), X, y, param_name="alpha",
param_range=param_range, cv=5)
# Calculate mean and standard deviation of training and validation scores
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)
# Plotting the validation curve
plt.figure()
plt.title("Validation Curve with Ridge Regression")
plt.xlabel("Alpha")
plt.ylabel("Score")
plt.ylim(0.0, 1.1)
plt.semilogx(param_range, train_scores_mean, label="Training score", color="darkorange", lw=2)
plt.fill_between(param_range, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.2,
color="darkorange", lw=2)
plt.semilogx(param_range, valid_scores_mean, label="Cross-validation score",
color="navy", lw=2)
plt.fill_between(param_range, valid_scores_mean - valid_scores_std,
valid_scores_mean + valid_scores_std, alpha=0.2,
color="navy", lw=2)
plt.legend(loc="best")
plt.show()