0% found this document useful (0 votes)

9 views4 pages

Micro

Contains notes

Uploaded by

pramoddoddmane

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

9 views4 pages

Micro

Contains notes

Uploaded by

pramoddoddmane

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 4

import pandas as pd

import numpy as np # Step 4: Check for Duplicates

import matplotlib.pyplot as plt duplicates = df_read.duplicated().sum()

import seaborn as sns print(duplicates)

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression # Step 5: Check for Skew

from sklearn.metrics import mean_squared_error, skewness = df_read.skew()

r2_score
print(skewness)

# Create sample dataset for demonstration

# Step 6: Display Mean, Mode, Standard Deviation,
data = { and Variance

'A': [1, 5, 9, 13, np.nan, 21], mean_values = df_read.mean()

'B': [2, np.nan, 10, 14, 18, 22], mode_values = df_read.mode().iloc[0]

'C': [3, 7, 11, np.nan, 19, 23], std_dev = df_read.std()

'D': [4, 8, 12, 16, 20, 24] variance = df_read.var()

print("Mean Values:\n", mean_values)

# Create a DataFrame print("Mode Values:\n", mode_values)

df = pd.DataFrame(data) print("Standard Deviation:\n", std_dev)

print("Variance:\n", variance)

# Save to CSV

csv_file_path = 'sample_data.csv' # Step 7: Show Ways to Remove Null Values

df.to_csv(csv_file_path, index=False) df_dropped_rows = df_read.dropna()

df_dropped_columns = df_read.dropna(axis=1)

# Read the data from CSV file

df_read = pd.read_csv(csv_file_path) print("Dropped Rows:\n", df_dropped_rows)

print("Dropped Columns:\n", df_dropped_columns)

# Step 2: Show All Columns and Their Data Types

columns_and_types = df_read.dtypes # Step 8: Replace Null Values with Mean, Mode, and a
Certain Value
print(columns_and_types)
df_fill_mean = df_read.fillna(df_read.mean())

df_fill_mode = df_read.fillna(df_read.mode().iloc[0])
# Step 3: Check for Null Values
df_fill_value = df_read.fillna(0)
null_values = df_read.isnull().sum()

print(null_values)
print("Filled with Mean:\n", df_fill_mean)
print("Filled with Mode:\n", df_fill_mode) print("R-squared (R2) Score:", r2)

print("Filled with Value 0:\n", df_fill_value)

# Step 12: Plot the Relevant Results

# Step 9: Visualize the Dataset # Plot actual vs predicted values

sns.pairplot(df_read) plt.scatter(y_test, y_pred)

plt.show() plt.xlabel('Actual Values')

plt.ylabel('Predicted Values')

sns.heatmap(df_read.isnull(), cbar=False, plt.title('Actual vs Predicted Values')

cmap='viridis')
plt.show()
plt.show()

# Step 10: Choose an Appropriate Learning Model and

Justify

# Step 11: Compute Accuracy and Other Scores

# Prepare the data

df_filled = df_read.fillna(df_read.mean()) # Fill

missing values with mean for simplicity

X = df_filled[['A', 'B', 'C']]

y = df_filled['D']

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)

# Train the model

model = LinearRegression()

model.fit(X_train, y_train)

# Predict

y_pred = model.predict(X_test)

# Compute scores

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)