Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1
# Importing required libraries
import pandas as pd # For data manipulation
from sklearn.tree import DecisionTreeClassifier # To create a Decision Tree model from sklearn.model_selection import train_test_split # To split the data into training and testing sets from sklearn import metrics # To evaluate the model's performance
# Loading the dataset
data1 = pd.read_csv('/content/drive/My Drive/logisticDS.csv') # Reads a CSV file into a pandas DataFrame print(data1.head()) # Displays the first few rows of the dataset for a quick preview
# Defining feature columns and target variable
fcols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'] x = data1[fcols] # Selecting the features (independent variables) y = data1.Outcome # Selecting the target (dependent variable), typically indicating a classification outcome
# Splitting the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=1) # `train_test_split` splits the data into 70% training and 30% testing subsets. Setting `random_state=1` ensures reproducibility.
# Initializing and training the Decision Tree Classifier
classify = DecisionTreeClassifier(max_depth=3) # Creating a Decision Tree with a maximum depth of 3 classify1 = classify.fit(xtrain, ytrain) # Fitting the classifier to the training data
# Making predictions with the trained model on the test set
ypred = classify1.predict(xtest) # Predicting the target for the test set
# Evaluating the model's accuracy
print("Accuracy:", metrics.accuracy_score(ytest, ypred)) # Outputs the model accuracy by comparing actual and predicted values
# Visualizing the Decision Tree
from matplotlib import pyplot as plt from sklearn import tree
fig = plt.figure() # Initializes a new figure
tree.plot_tree(classify1) # Plots the trained Decision Tree structure plt.show() # Displays the plot fig.savefig("dt.png") # Saves the Decision Tree plot as an image file named 'dt.png'