0% found this document useful (0 votes)
36 views4 pages

Lab7 Hameed 211086

Uploaded by

Abdul Moaid
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
36 views4 pages

Lab7 Hameed 211086

Uploaded by

Abdul Moaid
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

3/19/24, 11:44 PM lab7.

ipynb - Colaboratory

Name Hameed Ullah

211086-A

AI Lab TASK 7

# Importing necessary libraries


from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Load the dataset


file_path = "/content/drive/MyDrive/Colab Notebooks/KDDTrain+.txt"
column_names = ["duration", "protocol_type", "attack_type"]
data = pd.read_csv(file_path, header=None, names=column_names)

# Preprocessing
selected_columns = ["duration", "protocol_type", "attack_type"]
preprocessed_data = data[selected_columns]

# Check for missing values


print(preprocessed_data.isnull().sum())

# Splitting the dataset into features (x) and target (y)


x = preprocessed_data.drop(columns=["attack_type"])
y = preprocessed_data["attack_type"]

# Splitting the dataset into training and test sets


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

# Check data types


print(x_train.dtypes)

# Convert 'protocol_type' to numeric using label encoding


label_encoder = LabelEncoder()
x_train['protocol_type_encoded'] = label_encoder.fit_transform(x_train['protocol_type'])

# Extract x and y variables for visualization


x_set = x_train[['duration', 'protocol_type_encoded']].values
y_set = y_train.values

# Check data types after label encoding


print(x_set.dtype)
print(np.isnan(x_set).sum())

# Fitting Decision Tree classifier to the training set


classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier.fit(x_set, y_set)

# Meshgrid creation
x1_min, x1_max = x_set[:, 0].min() - 1, x_set[:, 0].max() + 1
x2_min, x2_max = x_set[:, 1].min() - 1, x_set[:, 1].max() + 1
x1, x2 = np.meshgrid(np.arange(start=x1_min, stop=x1_max, step=1),
np.arange(start=x2_min, stop=x2_max, step=1))

# Plotting the decision boundary


plt.figure(figsize=(10, 6))
plt.contourf(x1, x2, classifier.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
alpha=0.75, cmap=ListedColormap(('purple', 'green')))

# Plotting the data points


for i, j in enumerate(np.unique(y_set)):
plt.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c=ListedColormap(('purple', 'green'))(i), label=j)

# Setting plot labels and legend


plt.xlim(x1.min(), x1.max())
https://colab.research.google.com/drive/1QkVj8I_GkxQLWkwssl4-iFD1oQmMIZ9Y#scrollTo=51BptnM6_KHL&printMode=true 1/4
3/19/24, 11:44 PM lab7.ipynb - Colaboratory
p ( (), ())
plt.ylim(x2.min(), x2.max())
plt.title('Decision Tree Algorithm (Training set)')
plt.xlabel('duration')
plt.ylabel('protocol_type_encoded')
plt.legend()

# Show plot
plt.show()

duration 0
protocol_type 0
attack_type 0
dtype: int64
duration float64
protocol_type object
dtype: object
float64
0
<ipython-input-25-49c950067830>:61: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, wh
plt.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],

# Importing necessary libraries


from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Load the dataset


file_path = "/content/drive/MyDrive/Colab Notebooks/KDDTest+.txt"
column_names = ["duration", "protocol_type", "attack_type"]
data = pd.read_csv(file_path, header=None, names=column_names)

# Preprocessing
selected_columns = ["duration", "protocol_type", "attack_type"]
preprocessed_data = data[selected_columns]

# Check for missing values


i t( d d t i ll() ())
https://colab.research.google.com/drive/1QkVj8I_GkxQLWkwssl4-iFD1oQmMIZ9Y#scrollTo=51BptnM6_KHL&printMode=true 2/4
3/19/24, 11:44 PM lab7.ipynb - Colaboratory
print(preprocessed_data.isnull().sum())

# Splitting the dataset into features (x) and target (y)


x = preprocessed_data.drop(columns=["attack_type"])
y = preprocessed_data["attack_type"]

# Splitting the dataset into training and test sets


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

# Check data types


print(x_train.dtypes)

# Convert 'protocol_type' to numeric using label encoding


label_encoder = LabelEncoder()
x_train['protocol_type_encoded'] = label_encoder.fit_transform(x_train['protocol_type'])

# Extract x and y variables for visualization


x_set = x_train[['duration', 'protocol_type_encoded']].values
y_set = y_train.values

# Check data types after label encoding


print(x_set.dtype)
print(np.isnan(x_set).sum())

# Fitting Decision Tree classifier to the training set


classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier.fit(x_set, y_set)

# Meshgrid creation
x1_min, x1_max = x_set[:, 0].min() - 1, x_set[:, 0].max() + 1
x2_min, x2_max = x_set[:, 1].min() - 1, x_set[:, 1].max() + 1
x1, x2 = np.meshgrid(np.arange(start=x1_min, stop=x1_max, step=1),
np.arange(start=x2_min, stop=x2_max, step=1))

# Plotting the decision boundary


plt.figure(figsize=(10, 6))
plt.contourf(x1, x2, classifier.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
alpha=0.75, cmap=ListedColormap(('purple', 'green')))

# Plotting the data points


for i, j in enumerate(np.unique(y_set)):
plt.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c=ListedColormap(('purple', 'green'))(i), label=j)

# Setting plot labels and legend


plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())
plt.title('Decision Tree Algorithm (testing set)')
plt.xlabel('duration')
plt.ylabel('protocol_type_encoded')
plt.legend()

# Show plot
plt.show()

https://colab.research.google.com/drive/1QkVj8I_GkxQLWkwssl4-iFD1oQmMIZ9Y#scrollTo=51BptnM6_KHL&printMode=true 3/4
3/19/24, 11:44 PM lab7.ipynb - Colaboratory

duration 0
protocol_type 0
attack_type 0
dtype: int64
duration float64
protocol_type object
dtype: object
float64
0
<ipython-input-26-72b12f93fe69>:61: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, wh
plt.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],

https://colab.research.google.com/drive/1QkVj8I_GkxQLWkwssl4-iFD1oQmMIZ9Y#scrollTo=51BptnM6_KHL&printMode=true 4/4

You might also like