ML N PY Programs
ML N PY Programs
(150, 5)
<class 'pandas.core.frame.DataFrame'>
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
1 4.9 3.0 1.4 0.2 Setosa
2 4.7 3.2 1.3 0.2 Setosa
iris = load_iris()
:Summary Statistics:
Q3. Write a Python program to split the iris dataset into its
attributes (X) and labels (y). The X variable contains the first
four columns (i.e. attributes) and y contains the labels of the
dataset
iris = load_iris()
Training Set:
sepal_length sepal_width petal_length petal_width species
81 5.5 2.4 3.7 1.0 versicolor
133 6.3 2.8 5.1 1.5 virginica
137 6.4 3.1 5.5 1.8 virginica
75 6.6 3.0 4.4 1.4 versicolor
109 7.2 3.6 6.1 2.5 virginica
.. ... ... ... ... ...
71 6.1 2.8 4.0 1.3 versicolor
106 4.9 2.5 4.5 1.7 virginica
14 5.8 4.0 1.2 0.2 setosa
92 5.8 2.6 4.0 1.2 versicolor
102 7.1 3.0 5.9 2.1 virginica
Test Set:
sepal_length sepal_width petal_length petal_width species
73 6.1 2.8 4.7 1.2 versicolor
18 5.7 3.8 1.7 0.3 setosa
118 7.7 2.6 6.9 2.3 virginica
78 6.0 2.9 4.5 1.5 versicolor
76 6.8 2.8 4.8 1.4 versicolor
31 5.4 3.4 1.5 0.4 setosa
64 5.6 2.9 3.6 1.3 versicolor
141 6.9 3.1 5.1 2.3 virginica
68 6.2 2.2 4.5 1.5 versicolor
82 5.8 2.7 3.9 1.2 versicolor
110 6.5 3.2 5.1 2.0 virginica
12 4.8 3.0 1.4 0.1 setosa
36 5.5 3.5 1.3 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
19 5.1 3.8 1.5 0.3 setosa
56 6.3 3.3 4.7 1.6 versicolor
104 6.5 3.0 5.8 2.2 virginica
69 5.6 2.5 3.9 1.1 versicolor
55 5.7 2.8 4.5 1.3 versicolor
132 6.4 2.8 5.6 2.2 virginica
29 4.7 3.2 1.6 0.2 setosa
127 6.1 3.0 4.9 1.8 virginica
26 5.0 3.4 1.6 0.4 setosa
128 6.4 2.8 5.6 2.1 virginica
131 7.9 3.8 6.4 2.0 virginica
145 6.7 3.0 5.2 2.3 virginica
108 6.7 2.5 5.8 1.8 virginica
143 6.8 3.2 5.9 2.3 virginica
45 4.8 3.0 1.4 0.3 setosa
30 4.8 3.1 1.6 0.2 setosa
22 4.6 3.6 1.0 0.2 setosa
15 5.7 4.4 1.5 0.4 setosa
65 6.7 3.1 4.4 1.4 versicolor
11 4.8 3.4 1.6 0.2 setosa
42 4.4 3.2 1.3 0.2 setosa
146 6.3 2.5 5.0 1.9 virginica
51 6.4 3.2 4.5 1.5 versicolor
27 5.2 3.5 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
32 5.2 4.1 1.5 0.1 setosa
142 5.8 2.7 5.1 1.9 virginica
85 6.0 3.4 4.5 1.6 versicolor
86 6.7 3.1 4.7 1.5 versicolor
Ankur Madan 2201921570019
16 5.4 3.9 1.3 0.4 setosa
10 5.4 3.7 1.5 0.2 setosa
def find_most_specific_hypothesis(training_data):
# Check if there are positive examples
positive_examples = training_data[training_data['label'] == 'Y']
if positive_examples.empty:
print("No positive examples in the training data. Setting the hypothesis to
# Set the hypothesis to a default value, such as all '?'
return ['?'] * (len(training_data.columns) - 1)
return hypothesis
def initialize_hypothesis(data):
return hypothesis
def candidate_elimination(training_data):
hypothesis_space = initialize_hypothesis(training_data)
hypothesis_space[0][i] = instance[i]
elif hypothesis_space[0][i] != instance[i]:
hypothesis_space[1][i] = instance[i]
if instance[i] != hypothesis_space[1][i]:
return hypothesis_space
Ankur Madan 2201921570019
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/
iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'lab
print("Training Data:")
print(iris)
hypotheses = candidate_elimination(iris)
Training Data:
sepal_length sepal_width petal_length petal_width label
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... ... ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica
clf = DecisionTreeClassifier(random_state=42)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
Ankur Madan 2201921570019
predicted_class = clf.predict(new_sample)
class_name = iris.target_names[predicted_class][0]
class NeuralNetwork:
def init (self, input_size, hidden_size, output_size):
# Initialize weights and biases
self.weights_input_hidden = np.random.rand(input_size, hidden_size)
self.bias_hidden = np.zeros((1, hidden_size))
self.weights_hidden_output = np.random.rand(hidden_size, output_size)
self.bias_output = np.zeros((1, output_size))
# Output layer
output_delta = error * self.sigmoid_derivative(self.predicted_output)
hidden_error = output_delta.dot(self.weights_hidden_output.T)
# Hidden layer
hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_layer_out
self.weights_input_hidden += inputs.T.dot(hidden_delta) * learning_rate
self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning
return self.forward(inputs)
prediction = nn.predict(np.array([inputs[i]]))
print(f"Input: {inputs[i]}, Predicted Output: {prediction}")
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
Accuracy: 1.0
Confusion Matrix:
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Classification Report:
precision recall f1-score support
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
scaled_data = scaler.fit_transform(data)
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(scaled_data)
data['KMeans_Cluster'] = kmeans_labels
data['Hierarchical_Cluster'] = hierarchical_labels
plt.figure(figsize=(12, 6))
C:\ProgramData\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1412: Future
Warning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set t
he value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\ProgramData\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1436: UserWa
rning: KMeans is known to have a memory leak on Windows with MKL, when there are l
ess chunks than available threads. You can avoid it by setting the environment var
iable OMP_NUM_THREADS=1.
warnings.warn(
Silhouette Score - K-Means: 0.45994823920518635
Silhouette Score - Hierarchical: 0.446689041028591
X_train_scaled = scaler.fit_transform(X_train)
knn_classifier = KNeighborsClassifier(n_neighbors=k_value)
knn_classifier.fit(X_train_scaled, y_train)
y_pred = knn_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{confusion_mat}')
print(f'Classification Report:\n{classification_rep}')
Accuracy: 1.0
Confusion Matrix:
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Classification Report:
precision recall f1-score support
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
simplefilter("ignore", category=ConvergenceWarning)
X_scaled = scaler.fit_transform(X)
logistic_regression_model = LogisticRegression(max_iter=1000)
logistic_regression_model.fit(X_train, y_train)
y_pred = logistic_regression_model.predict(X_test)
print(f'Accuracy: {accuracy}')
Accuracy: 1.0