0% found this document useful (0 votes)
24 views21 pages

Import Pandas As PD

Uploaded by

captainsaad838
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views21 pages

Import Pandas As PD

Uploaded by

captainsaad838
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 21

import pandas as pd

import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler (libraray for data load etc)

for data loading:

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

handel and missing values:’ # Identify missing values

print(data.isnull().sum())

data_dropped = data.dropna()

data_imputed = data.fillna(data.mean()) # For numerical columns

scaling and normalizing:

X = data_imputed.drop('target', axis=1) # Assuming 'target' is the column to predict

y = data_imputed['target']

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

normalizer = MinMaxScaler()

X_normalized = normalizer.fit_transform(X)

Display process data:

X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

X_normalized_df = pd.DataFrame(X_normalized, columns=X.columns)

print("Scaled Data:")

print(X_scaled_df.head()
print("\nNormalized Data:")

print(X_normalized_df.head())

Exploratory Data Analysis including summary statistics, visualization tools matplotlib, seaborn
histograms, boxplots, countplots, scatte:

Libraray:

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

sns.set(style="whitegrid")

data loading:

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

summary and static:

print(data.head())

print(data.describe())

print(data.describe(include=['object', 'category']))

Visualization:

Histogram:

plt.figure(figsize=(10, 6))

sns.histplot(data['numerical_column'], kde=True)

plt.title('Distribution of Numerical Column')

plt.show()
data.hist(figsize=(15, 10), bins=20, edgecolor='black')

plt.tight_layout()

plt.show()

Boxplots:

plt.figure(figsize=(10, 6))

sns.boxplot(x=data['numerical_column'])

plt.title('Boxplot of Numerical Column')

plt.show()

plt.figure(figsize=(15, 10))

sns.boxplot(data=data.select_dtypes(include=['float64', 'int64']))

plt.xticks(rotation=90)

plt.title('Boxplots of Numerical Columns')

plt.show()

Countplots:

plt.figure(figsize=(10, 6))

sns.countplot(x='categorical_column', data=data)

plt.title('Countplot of Categorical Column')

plt.show()

Scatter Plots:

plt.figure(figsize=(10, 6))

sns.scatterplot(x='numerical_column1', y='numerical_column2', data=data)

plt.title('Scatter Plot of Numerical Column 1 vs Numerical Column 2')

plt.show()

plt.figure(figsize=(10, 6))

sns.scatterplot(x='numerical_column1', y='numerical_column2', hue='categorical_column', data=data)

plt.title('Scatter Plot with Hue')

plt.show()
Data splitting:

Import Libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

Load and Explore the Data:

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

print(data.head())

print(data.describe())

print(data.info())

print(data.isnull().sum())

Visualize the Data

# Histograms

data.hist(bins=30, figsize=(15, 10))

plt.suptitle('Histograms')

plt.show()

Split the Data

X = data.drop('target', axis=1) # Replace 'target' with your actual target column name

y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")

print(f"Testing data shape: {X_test.shape}")

Model development:

Import Libraries
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Load Data

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

Exploratory Data Analysis

print(data.describe())

Visualizations

# Histograms

data.hist(bins=30, figsize=(20, 15))

plt.show()

# Boxplots

plt.figure(figsize=(20, 10))

sns.boxplot(data=data)

plt.show()

# Countplots (for categorical variables)

sns.countplot(x='categorical_column', data=data)

plt.show()

# Scatter plots

sns.pairplot(data)
plt.show()

Data Preprocessing

# Identify missing values

print(data.isnull().sum())

# Impute missing values with the mean for numerical columns

data = data.fillna(data.mean())

# Alternatively, you could drop rows with missing values

# data = data.dropna()

Scaling and Normalization

# Separate features and target variable

X = data.drop('target', axis=1) # Replace 'target' with your actual target column name

y = data['target']

# Standard Scaling

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

# Normalization

normalizer = MinMaxScaler()

X_normalized = normalizer.fit_transform(X)

Model Development

Logistic Regression

# Initialize the model

model = LogisticRegression()

# Train the model

model.fit(X_train, y_train)
# Make predictions

y_pred = model.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)

print("Confusion Matrix:\n", conf_matrix)

print("Classification Report:\n", class_report)

Visualize Model Performance

# Confusion Matrix Heatmap

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Confusion Matrix')

plt.show()

Complete Example in Google Colab

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Load data

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

# Handle missing values

data = data.fillna(data.mean())

# Summary statistics

print(data.describe())

# Histograms

data.hist(bins=30, figsize=(20, 15))

plt.show()

# Boxplots

plt.figure(figsize=(20, 10))

sns.boxplot(data=data)

plt.show()

# Countplot

sns.countplot(x='categorical_column', data=data)

plt.show()

# Scatter plots

sns.pairplot(data)

plt.show()

# Preprocessing

X
= data.drop('target', axis=1) # Replace 'target' with your actual target column name

y = data['target']

# Scaling and Normalization

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

normalizer = MinMaxScaler()

X_normalized = normalizer.fit_transform(X)

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

# Model development

model = LogisticRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)

print("Confusion Matrix:\n", conf_matrix)

print("Classification Report:\n", class_report)

# Confusion Matrix Heatmap

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')

plt.xlabel('Predicted')
plt.ylabel('Actual')

plt.title('Confusion Matrix')

plt.show()

Model training:

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LogisticRegression

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load data

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

# Handle missing values

data = data.fillna(data.mean())

# Separate features and target variable

X = data.drop('target', axis=1) # Replace 'target' with your actual target column name

y = data['target']

# Scaling and Normalization

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
normalizer = MinMaxScaler()

X_normalized = normalizer.fit_transform(X)

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

# Model training and predictions

log_reg = LogisticRegression()

log_reg.fit(X_train, y_train)

y_pred_log_reg = log_reg.predict(X_test)

decision_tree = DecisionTreeClassifier()

decision_tree.fit(X_train, y_train)

y_pred_decision_tree = decision_tree.predict(X_test)

random_forest = RandomForestClassifier()

random_forest.fit(X_train, y_train)

y_pred_random_forest = random_forest.predict(X_test)

# Model evaluation

accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)

conf_matrix_log_reg = confusion_matrix(y_test, y_pred_log_reg)

class_report_log_reg = classification_report(y_test, y_pred_log_reg)

print("Logistic Regression - Accuracy:", accuracy_log_reg)

print("Logistic Regression - Confusion Matrix:\n", conf_matrix_log_reg)

print("Logistic Regression - Classification Report:\n", class_report_log_reg)

accuracy_decision_tree = accuracy_score(y_test, y_pred_decision_tree)


conf_matrix_decision_tree = confusion_matrix(y_test, y_pred_decision_tree)

class_report_decision_tree = classification_report(y_test, y_pred_decision_tree)

print("Decision Tree - Accuracy:", accuracy_decision_tree)

print("Decision Tree - Confusion Matrix:\n", conf_matrix_decision_tree)

print("Decision Tree - Classification Report:\n", class_report_decision_tree)

accuracy_random_forest = accuracy_score(y_test, y_pred_random_forest)

conf_matrix_random_forest = confusion_matrix(y_test, y_pred_random_forest)

class_report_random_forest = classification_report(y_test, y_pred_random_forest)

print("Random Forest - Accuracy:", accuracy_random_forest)

print("Random Forest - Confusion Matrix:\n", conf_matrix_random_forest)

print("Random Forest - Classification Report:\n", class_report_random_forest)

# Visualization

sns.heatmap(conf_matrix_log_reg, annot=True, fmt='d', cmap='Blues')

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Logistic Regression - Confusion Matrix')

plt.show()

sns.heatmap(conf_matrix_decision_tree, annot=True, fmt='d', cmap='Blues')

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Decision Tree - Confusion Matrix')

plt.show()

sns.heatmap(conf_matrix_random_forest, annot=True, fmt='d', cmap='Blues')


plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Random Forest - Confusion Matrix')

plt.show()

Model evaluation

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LogisticRegression

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score,


roc_curve

# Load data

url = 'https://example.com/your-dataset.csv'

data = pd.read_csv(url)

# Handle missing values

data = data.fillna(data.mean())

# Separate features and target variable

X = data.drop('target', axis=1) # Replace 'target' with your actual target column name

y = data['target']

Image preprocessing
# Scaling and Normalization

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

normalizer = MinMaxScaler()

X_normalized = normalizer.fit_transform(X)

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

# Model training and predictions

log_reg = LogisticRegression()

log_reg.fit(X_train, y_train)

y_pred_log_reg = log_reg.predict(X_test)

y_prob_log_reg = log_reg.predict_proba(X_test)[:, 1]

decision_tree = DecisionTreeClassifier()

decision_tree.fit(X_train, y_train)

y_pred_decision_tree = decision_tree.predict(X_test)

y_prob_decision_tree = decision_tree.predict_proba(X_test)[:, 1]

random_forest = RandomForestClassifier()

random_forest.fit(X_train, y_train)

y_pred_random_forest = random_forest.predict(X_test)

y_prob_random_forest = random_forest.predict_proba(X_test)[:, 1]

# Model evaluation

def evaluate_model(y_test, y_pred, y_prob, model_name):

accuracy = accuracy_score(y_test, y_pred)


conf_matrix = confusion_matrix(y_test, y_pred)

class_report = classification_report(y_test, y_pred)

roc_auc = roc_auc_score(y_test, y_prob)

print(f"{model_name} - Accuracy: {accuracy}")

print(f"{model_name} - Confusion Matrix:\n", conf_matrix)

print(f"{model_name} - Classification Report:\n", class_report)

print(f"{model_name} - ROC AUC Score: {roc_auc}")

# ROC Curve

fpr, tpr, _ = roc_curve(y_test, y_prob)

plt.plot(fpr, tpr, label=f'{model_name} (area = {roc_auc:.2f})')

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.title('ROC Curve')

plt.legend(loc='best')

plt.show()

# Evaluate Logistic Regression

evaluate_model(y_test, y_pred_log_reg, y_prob_log_reg, "Logistic Regression")

# Evaluate Decision Tree

evaluate_model(y_test, y_pred_decision_tree, y_prob_decision_tree, "Decision Tree")

# Evaluate Random Forest

evaluate_model(y_test, y_pred_random_forest, y_prob_random_forest, "Random Forest")

Image preprocessing

import cv2

import numpy as np
from PIL import Image

import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from google.colab import files

from io import BytesIO

import requests

# Upload images

uploaded = files.upload()

# Load an image using PIL

image_path = next(iter(uploaded))

image = Image.open(BytesIO(uploaded[image_path]))

# Display the image

plt.imshow(image)

plt.axis('off')

plt.show()

# Resize the image

image_resized = image.resize((224, 224))

# Display the resized image

plt.imshow(image_resized)

plt.axis('off')

plt.show()

# Normalize the image


image_array = np.array(image_resized)

image_normalized = image_array / 255.0

# Display the normalized image

plt.imshow(image_normalized)

plt.axis('off')

plt.show()

# Data augmentation

datagen = ImageDataGenerator(

rotation_range=40,

width_shift_range=0.2,

height_shift_range=0.2,

shear_range=0.2,

zoom_range=0.2,

horizontal_flip=True,

fill_mode='nearest'

# Convert image to array and reshape

image_array = np.array(image_resized)

image_array = image_array.reshape((1,) + image_array.shape)

# Generate augmented images and display them

i=0

for batch in datagen.flow(image_array, batch_size=1):

plt.figure(i)

imgplot = plt.imshow(np.squeeze(batch))

plt.axis('off')
i += 1

if i % 4 == 0:

break

plt.show()

# Save the normalized image

normalized_image_pil = Image.fromarray((image_normalized * 255).astype('uint8'))

normalized_image_pil.save('normalized_image.jpg')

# Save the augmented images

i=0

for batch in datagen.flow(image_array, batch_size=1, save_to_dir='.', save_prefix='aug',


save_format='jpeg'):

i += 1

if i % 4 == 0:

break

Neural network development:

Import Libraries

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

from tensorflow.keras.losses import SparseCategoricalCrossentropy

from tensorflow.keras.optimizers import Adam

Load and Preprocess Data

# Load dataset (replace with your data loading code)

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize pixel values to between 0 and 1

X_train, X_test = X_train / 255.0, X_test / 255.0


Define the Model Architecture

model = Sequential([

Flatten(input_shape=(28, 28)), # Input layer (flatten 28x28 images)

Dense(128, activation='relu'), # Hidden layer with 128 neurons and ReLU activation

Dense(10, activation='softmax') # Output layer with 10 neurons (for 10 classes) and softmax
activation

])

Compile the Model

model.compile(optimizer='adam',

loss='sparse_categorical_crossentropy',

metrics=['accuracy'])

Train the Model

history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Evaluate the Model

test_loss, test_acc = model.evaluate(X_test, y_test)

print('Test accuracy:', test_acc)

Make Predictions

predictions = model.predict(X_test)

CNNs:

Import Libraries

import tensorflow as tf

from tensorflow.keras import Sequential

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

Load and Preprocess Data

# Load dataset (replace with your data loading code)

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize pixel values to between 0 and 1


X_train, X_test = X_train / 255.0, X_test / 255.0

# Reshape data for CNN input (add a channel dimension)

X_train = X_train.reshape((-1, 28, 28, 1))

X_test = X_test.reshape((-1, 28, 28, 1))

Define the CNN Architecture

model = Sequential([

Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),

MaxPooling2D((2, 2)),

Conv2D(64, (3, 3), activation='relu'),

MaxPooling2D((2, 2)),

Conv2D(64, (3, 3), activation='relu'),

Flatten(),

Dense(64, activation='relu'),

Dense(10, activation='softmax')

])

Compile the Model

model.compile(optimizer='adam',

loss='sparse_categorical_crossentropy',

metrics=['accuracy'])

Train the Model

history = model.fit(train_images, train_labels, epochs=10,

validation_data=(test_images, test_labels))

Evaluate the Model

test_loss, test_acc = model.evaluate(test_images, test_labels)

print('Test accuracy:', test_acc)

Visualize Training History

import matplotlib.pyplot as plt


plt.plot(history.history['accuracy'], label='accuracy')

plt.plot(history.history['val_accuracy'], label='val_accuracy')

plt.xlabel('Epoch')

plt.ylabel('Accuracy')

plt.legend(loc='lower right')

plt.show()

You might also like