Import Pandas As PD
Import Pandas As PD
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler (libraray for data load etc)
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
print(data.isnull().sum())
data_dropped = data.dropna()
y = data_imputed['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)
print("Scaled Data:")
print(X_scaled_df.head()
print("\nNormalized Data:")
print(X_normalized_df.head())
Exploratory Data Analysis including summary statistics, visualization tools matplotlib, seaborn
histograms, boxplots, countplots, scatte:
Libraray:
import pandas as pd
import numpy as np
sns.set(style="whitegrid")
data loading:
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
print(data.head())
print(data.describe())
print(data.describe(include=['object', 'category']))
Visualization:
Histogram:
plt.figure(figsize=(10, 6))
sns.histplot(data['numerical_column'], kde=True)
plt.show()
data.hist(figsize=(15, 10), bins=20, edgecolor='black')
plt.tight_layout()
plt.show()
Boxplots:
plt.figure(figsize=(10, 6))
sns.boxplot(x=data['numerical_column'])
plt.show()
plt.figure(figsize=(15, 10))
sns.boxplot(data=data.select_dtypes(include=['float64', 'int64']))
plt.xticks(rotation=90)
plt.show()
Countplots:
plt.figure(figsize=(10, 6))
sns.countplot(x='categorical_column', data=data)
plt.show()
Scatter Plots:
plt.figure(figsize=(10, 6))
plt.show()
plt.figure(figsize=(10, 6))
plt.show()
Data splitting:
Import Libraries
import pandas as pd
import numpy as np
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
print(data.head())
print(data.describe())
print(data.info())
print(data.isnull().sum())
# Histograms
plt.suptitle('Histograms')
plt.show()
X = data.drop('target', axis=1) # Replace 'target' with your actual target column name
y = data['target']
Model development:
Import Libraries
import pandas as pd
import numpy as np
Load Data
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
print(data.describe())
Visualizations
# Histograms
plt.show()
# Boxplots
plt.figure(figsize=(20, 10))
sns.boxplot(data=data)
plt.show()
sns.countplot(x='categorical_column', data=data)
plt.show()
# Scatter plots
sns.pairplot(data)
plt.show()
Data Preprocessing
print(data.isnull().sum())
data = data.fillna(data.mean())
# data = data.dropna()
X = data.drop('target', axis=1) # Replace 'target' with your actual target column name
y = data['target']
# Standard Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Normalization
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)
Model Development
Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
print("Accuracy:", accuracy)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
import pandas as pd
import numpy as np
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
data = data.fillna(data.mean())
# Summary statistics
print(data.describe())
# Histograms
plt.show()
# Boxplots
plt.figure(figsize=(20, 10))
sns.boxplot(data=data)
plt.show()
# Countplot
sns.countplot(x='categorical_column', data=data)
plt.show()
# Scatter plots
sns.pairplot(data)
plt.show()
# Preprocessing
X
= data.drop('target', axis=1) # Replace 'target' with your actual target column name
y = data['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)
# Model development
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
Model training:
import pandas as pd
import numpy as np
# Load data
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
data = data.fillna(data.mean())
X = data.drop('target', axis=1) # Replace 'target' with your actual target column name
y = data['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
y_pred_decision_tree = decision_tree.predict(X_test)
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)
y_pred_random_forest = random_forest.predict(X_test)
# Model evaluation
# Visualization
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
plt.ylabel('Actual')
plt.show()
Model evaluation
import pandas as pd
import numpy as np
# Load data
url = 'https://example.com/your-dataset.csv'
data = pd.read_csv(url)
data = data.fillna(data.mean())
X = data.drop('target', axis=1) # Replace 'target' with your actual target column name
y = data['target']
Image preprocessing
# Scaling and Normalization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
y_prob_log_reg = log_reg.predict_proba(X_test)[:, 1]
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
y_pred_decision_tree = decision_tree.predict(X_test)
y_prob_decision_tree = decision_tree.predict_proba(X_test)[:, 1]
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)
y_pred_random_forest = random_forest.predict(X_test)
y_prob_random_forest = random_forest.predict_proba(X_test)[:, 1]
# Model evaluation
# ROC Curve
plt.title('ROC Curve')
plt.legend(loc='best')
plt.show()
Image preprocessing
import cv2
import numpy as np
from PIL import Image
import tensorflow as tf
import requests
# Upload images
uploaded = files.upload()
image_path = next(iter(uploaded))
image = Image.open(BytesIO(uploaded[image_path]))
plt.imshow(image)
plt.axis('off')
plt.show()
plt.imshow(image_resized)
plt.axis('off')
plt.show()
plt.imshow(image_normalized)
plt.axis('off')
plt.show()
# Data augmentation
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest'
image_array = np.array(image_resized)
i=0
plt.figure(i)
imgplot = plt.imshow(np.squeeze(batch))
plt.axis('off')
i += 1
if i % 4 == 0:
break
plt.show()
normalized_image_pil.save('normalized_image.jpg')
i=0
i += 1
if i % 4 == 0:
break
Import Libraries
import tensorflow as tf
model = Sequential([
Dense(128, activation='relu'), # Hidden layer with 128 neurons and ReLU activation
Dense(10, activation='softmax') # Output layer with 10 neurons (for 10 classes) and softmax
activation
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
Make Predictions
predictions = model.predict(X_test)
CNNs:
Import Libraries
import tensorflow as tf
model = Sequential([
MaxPooling2D((2, 2)),
MaxPooling2D((2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
validation_data=(test_images, test_labels))
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()