python 1
python 1
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Step 1: Load the dataset (here using UCI Heart Disease dataset)
# You can download the dataset using pd.read_csv() from a local file or a URL.
url="https://raw.githubusercontent.com/datablist/sample-csvfiles/main/files/
people/people-100.csv"
# Replace with your dataset URL
df = pd.read_csv(url)
# Filling missing values for numerical columns with the column's mean
df.fillna(df.mean(), inplace=True)
# Step 5: Data Splitting - Split dataset into training and testing sets
# We will predict salary based on age
X = df[['age']] # Feature (Independent variable)
y = df['salary'] # Target (Dependent variable)