Experiment No.8
Experiment No.8
8
Aim: Implement and demonstrate simple Linear Regression Algorithm based on a given set
of training data samples. Read the training data from a .CSV file. Use salary dataset from
Kaggle.
Python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Display the first few rows of the dataframe to inspect the data
print("First 5 rows of the dataset:")
print(df.head())
# Get information about the dataset (columns, data types, non-null values)
print("\nDataset information:")
df.info() # Print a summary of the DataFrame df to the console. This includes the data types
of each column, the number of non-null values, and the memory usage.
print("\nModel Evaluation:")
print(f"Mean Squared Error: {mse:.2f}") # Print the calculated Mean Squared Error to the
console, formatted to two decimal places.
print(f"R-squared: {r2:.2f}") # Print the calculated R-squared value to the console, formatted
to two decimal places.
plt.title('Salary vs. Years of Experience (Test Set)') # Set the title of the plot.
plt.xlabel('Years of Experience') # Set the label for the x-axis.
plt.ylabel('Salary') # Set the label for the y-axis.
plt.legend() # Display the legend.
plt.show() # Display the plot.