Py Programs
Py Programs
AIM: Write a java program to prepare a simulated data set with unique instances.
SOURCE CODE:
import java.io.FileWriter; // Import the FileWriter class
import java.util.*;
class dataset {
noOfAtributes = sc.nextInt();
noOfInstances = sc.nextInt();
try {
temp_str = sc.next();
sc.nextLine();
class_attributes = sc.nextLine();
myWriter.write("@DATA\n");
data = sc.nextLine();
myWriter.write(data + "\n");
myWriter.close();
} catch (IOException e) {
e.printStackTrace();
Output:
Mid1
Mid2
abc,def,pqr,xyz
Enter row 1 :
25,28
Enter row 2 :
21,23
Enter row 3 :
14,24Enter row 4 : 19,22[Successfully wrote to the file.]
EXPERIMENT – 9
AIM: Write a Python program to generate frequent item sets / association rules using
Apriori algorithm.
Program:
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt import pandas as pd import numpy
as np
# df =
pd.read_csv('/content/retail_dataset.csv')
## Print first 10 rows
df.head(10) items = set() for col
in df:
items.update(df[col].unique())
print(items) itemset = set(items)
encoded_vals = [] for index, row
in df.iterrows():
rowset = set(row) labels = {}
uncommons = list(itemset - rowset) commons =
list(itemset.intersection(rowset)) for uc in
uncommons: labels[uc] = 0 for com in
commons: labels[com] = 1
encoded_vals.append(labels)
#encoded_vals[0] ohe_df =
pd.DataFrame(encoded_vals)
freq_items = apriori(ohe_df, min_support=0.1, use_colnames=True,
verbose=1)
freq_items.head(7)
OUTPUT:
Pencil', 'Eggs', 'Diaper', 'Bread', 'Milk', 'Bagel', 'Wine', 'Cheese',
'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
{'Pencil', 'Eggs', 'Diaper', nan, 'Bread', 'Milk', 'Bagel', 'Wine',
'Cheese', 'Meat'}
Processing 6 combinations | Sampling itemset size 6
/usr/local/lib/python3.10/distpackages/mlxtend/frequent_patterns/fpcommon.p
y:110:
support itemsets
0 1.0 (Eggs)
1 1.0 (nan)
2 1.0 (Bread)
3 1.0 (Bagel)
4 1.0 (Wine)
5 1.0 (Meat)
Program:
from scipy.stats import chi2_contingency
chi2_contingency(data)
OUTPUT:
p value is0.1031971404730939
Independent (H0 holds true)
EXPERIMENT – 11
AIM: Write a program of Naive Bayesian classification using Python programming language.
Program:
import pandas as pd
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() # importing the dataset
iris.data # showing the iris data
X=iris.data #assign the data to the X
y=iris.target #assign the target/flower type to the y
print (X.shape)
print (y.shape)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_
state=9)
from sklearn.naive_bayes import GaussianNB
nv = GaussianNB() # create a classifier
nv.fit(X_train,y_train) # fitting the data
from sklearn.metrics import accuracy_score
y_pred = nv.predict(X_test) # store the prediction data
accuracy_score(y_test,y_pred)
OUTPUT:
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
(150, 4)
(150,)
1.0
EXPERIMENT – 12
AIM: Implement a Java program to perform Apriori algorithm.
SOURCE CODE:
import java.util.*;
int n = scanner.nextInt();
String[] itemNames = {"Milk", "Bread", "Coffee", "Juice", "Cookies", "Jam", "Tea", "Butter",
"Sugar", "Water"};
System.out.println("List of items:");
items[i][j] = scanner.nextInt();
if (items[i][j] == 1) {
nt[j]++;
int t1 = 0;
q[j] = 1;
t1++;
nt1[j][m]++;
System.out.println("Number of Items of " + itemNames[j] + " & " + itemNames[m] + ": " +
nt1[j][m]);
}
}
output:
Enter the number of transactions:
List of items:
1--Milk2--Bread3--Coffee
4--Juice
5--Cookies
6--Jam
7--Tea
8--Butter
9--Sugar
10--Water
Transaction 1:
0
Is Item Coffee present in this transaction? (1/0):
Transaction 2:
Number of Bread: 0
Number of Coffee: 2
Number of Juice: 0
Number of Cookies: 1
Number of Jam: 0
Number of Tea: 1
Number of Butter: 0
Number of Sugar: 2
Number of Water: 0
Milk is selected
Coffee is selected
Cookies is selected
Tea is selected
Sugar is selected
Number of Items of Milk & Bread: 0Number of Items of Milk & Coffee: 2
SOURCE CODE:
import java.util.*;
class KmeansJ {
int dataset[][] = {
{2,1},
{5,2},
{2,2},
{4,1},
{4,3},
{7,5},
{3,6},
{5,7},
{1,4},
{4,1}
};
int i,j,k=2;
int i1 = 0, i2 = 0, itr = 0;
for(i=0;i<10;i++) {
System.out.println(dataset[i][0]+" "+dataset[i][1]);
mean1[0][0] = 2;
mean1[0][1] = 2;
mean2[0][0] = 5;
mean2[0][1] = 7;
// Loop till the new mean and previous mean are same
for(i=0;i<10;i++) {
part1[i][0] = 0;
part1[i][1] = 0;
part2[i][0] = 0;
part2[i][1] = 0;
i1 = 0; i2 = 0;
//Finding distance between mean and data point and store the data point in
the corresponding partition
for(i=0;i<10;i++) {
part1[i1][0] = dataset[i][0];
part1[i1][1] = dataset[i][1];
i1++;
}
else {
part2[i2][0] = dataset[i][0];
part2[i2][1] = dataset[i][1];
i2++;
temp1[0][0] = mean1[0][0];
temp1[0][1] = mean1[0][1];
temp2[0][0] = mean2[0][0];
temp2[0][1] = mean2[0][1];
for(i=0;i<i1;i++) {
sum11 += part1[i][0];
sum12 += part1[i][1];
for(i=0;i<i2;i++) {
sum21 += part2[i][0];
sum22 += part2[i][1];
mean1[0][0] = (float)sum11/i1;
mean1[0][1] = (float)sum12/i1;
mean2[0][0] = (float)sum21/i2;
mean2[0][1] = (float)sum22/i2;
itr++;
System.out.println("Part1:");
for(i=0;i<i1;i++) {
System.out.println(part1[i][0]+" "+part1[i][1]);
System.out.println("\nPart2:");
for(i=0;i<i2;i++) {
System.out.println(part2[i][0]+" "+part2[i][1]);
Output:
Dataset: 2 1
52
22
41
43
75
36
57
14
41
Number of partitions: 2
Final Partition:
Part1:
2 15 2
22
4 14 3
1 44 1
Part2:
75
36
57
Final Mean:
Total Iteration: 2
EXPERIMENT – 14
AIM:Write a program of cluster analysis using simple k-means algorithm Python
programming language.
Program:
importnumpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans
data = pd.read_csv('/content/cluster.csv')
data
plt.scatter(data['Longitude'],data['Latitude'])
plt.xlim(-180,180)
plt.ylim(-90,90)
plt.show()
x = data.iloc[:,1:3] # 1t for rows and second for columns
x
kmeans = KMeans(3)
kmeans.fit(x)
identified_clusters = kmeans.fit_predict(x)
identified_clusters
data_with_clusters = data.copy()
data_with_clusters['Clusters'] = identified_clusters
plt.scatter(data_with_clusters['Longitude'],data_with_clusters['Latitude
'],c=data_with_clusters['Clusters'],cmap='rainbow')
print(data_with_clusters)
OUTPUT:
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkern el.py:283:
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
/content/kmeans.csv.docx Latitude Longitude LANGUAGE Clusters
0 USA 44.97 -103.77 ENGLISH NaN 1
1 CANADA 62.40 -96.80 ENGLISH NaN 1
2 UK 66.25 2.40 FRENCH NaN 0
3 GERMANY 96.88 -2.53 ENGLISH NaN 0
4 AUSTRALIA 33.74 10.40 GERMAN NaN 0
5 INDIA 22.36 133.11 ENGLISH NaN 2
6 EUROPE 66.44 2.40 FRENCH NaN 0
7 IRAN 72.66 -96.80 ENGLISH NaN 1
8 IRAQ 46.75 54.07 ENGLISH NaN 0
9 SRILANA 22.58 22.77 FRENCH NaN 0
10 USA 44.97 -103.77 ENGLISH NaN 1
11 CANADA 62.40 -96.80 ENGLISH NaN 1
12 UK 66.25 2.40 FRENCH NaN 0
13 GERMANY 96.88 -2.53 ENGLISH NaN 0
14 AUSTRALIA 33.74 10.40 GERMAN NaN 0
15 INDIA 22.36 133.11 ENGLISH NaN 2
16 EUROPE 66.44 2.40 FRENCH NaN 0
17 IRAN 72.66 -96.80 ENGLISH NaN 1
18 IRAQ 46.75 54.07 ENGLISH NaN 0
19 SRILANA 22.58 22.77 FRENCH NaN 0
20 USA 44.97 -103.77 ENGLISH NaN 1
21 CANADA 62.40 -96.80 ENGLISH NaN 1
22 UK 66.25 2.40 FRENCH NaN 0
23 GERMANY 96.88 -2.53 ENGLISH NaN 0
24 AUSTRALIA 33.74 10.40 GERMAN NaN 0
25 INDIA 22.36 133.11 ENGLISH NaN 2
26 EUROPE 66.44 2.40 FRENCH NaN 0
27 IRAN 72.66 -96.80 ENGLISH NaN 1
28 IRAQ 46.75 54.07 ENGLISH NaN 0
29 SRILANA 22.58 22.77 FRENCH NaN 0
30 USA 44.97 -103.77 ENGLISH NaN 1
31 CANADA 62.40 -96.80 ENGLISH NaN 1
32 UK 66.25 2.40 FRENCH NaN 0
33 GERMANY 96.88 -2.53 ENGLISH NaN 0
34 AUSTRALIA 33.74 10.40 GERMAN NaN 0
35 INDIA 22.36 133.11 ENGLISH NaN 2
36 EUROPE 66.44 2.40 FRENCH NaN 0
37 IRAN 72.66 -96.80 ENGLISH NaN 1
38 IRAQ 46.75 54.07 ENGLISH NaN 0
39 SRILANA 22.58 22.77 FRENCH NaN 0
40 USA 44.97 -103.77 ENGLISH NaN 1
41 CANADA 62.40 -96.80 ENGLISH NaN 1
42 UK 66.25 2.40 FRENCH NaN 0
43 GERMANY 96.88 -2.53 ENGLISH NaN 0
44 AUSTRALIA 33.74 10.40 GERMAN NaN 0
45 INDIA 22.36 133.11 ENGLISH NaN 2
46 EUROPE 66.44 2.40 FRENCH NaN 0
47 IRAN 72.66 -96.80 ENGLISH NaN 1
48 IRAQ 46.75 54.07 ENGLISH NaN 0
49 SRILANA 22.58 22.77 FRENCH NaN 0
50 USA 44.97 -103.77 ENGLISH NaN 1
51 CANADA 62.40 -96.80 ENGLISH NaN 1
52 UK 66.25 2.40 FRENCH NaN 0
53 GERMANY 96.88 -2.53 ENGLISH NaN 0
54 AUSTRALIA 33.74 10.40 GERMAN NaN 0
55 INDIA 22.36 133.11 ENGLISH NaN 2
56 EUROPE 66.44 2.40 FRENCH NaN 0
57 IRAN 72.66 -96.80 ENGLISH NaN 1
58 IRAQ 46.75 54.07 ENGLISH NaN 0
59 SRILANA 22.58 22.77 FRENCH NaN 0
0
EXPERIMENT – 15
AIM: Write a program to compute/display dissimilarity matrix (for your own dataset
containing at least four instances with two attributes) using Python.
Program:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
np.random.seed(42)
x=np.random.randn(15)
y=x+np.random.randn(15)
plt.scatter(x,y)
plt.plot(np.unique(x),np.poly1d(np.polyfit(x,y,1))(np.unique(x)))
plt.xlabel('x')
plt.ylabel('y')
plt.show()
corr,_=pearsonr(x,y)
print('pearsons correlation:%.3f'%corr)
from sklearn.metrics.pairwise import cosine_similarity
cos_sin=cosine_similarity(x.reshape(1,-1),y.reshape(1,-1))
print('cosine similarity:%.3f'%cos_sin)
from sklearn.metrics import jaccard_score
a=[1,1,1,0]
b=[1,1,0,1]
jacc=jaccard_score(a,b)
print("Jaccard score:%.3f"%jacc)
from scipy.spatial import distance
dat=distance.euclidean(x,y)
print("Euclidean distance:%.3f"%dat)
dst=distance.cityblock(x,y)
print("Manhattan Distance:%.3f"%dst)
OUTPUT:
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
pearsons correlation:0.810
cosine similarity:0.773
Jaccard score:0.500
Euclidean distance:3.273
Manhattan Distance:10.468
EXPERIMENT – 16
AIM: Visualize the datasets using matplotlib in python.(Histogram, Box plot, Bar chart, Pie
chart etc.,)
Program:
import numpy as np
x = np.random.normal(20, 10, 30)
print(x)
import matplotlib.pyplot as plt
import numpy as np
x = np.random.normal(170, 10, 250)
plt.hist(x)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Creating dataset
np.random.seed(10)
data = np.random.normal(100, 20, 200)
fig = plt.figure(figsize =(10, 7))
# Creating plot
plt.boxplot(data)
import numpy as np
import matplotlib.pyplot as plt
# creating the dataset
data = {'C':20, 'C++':15, 'Java':30,'Python':35}
courses = list(data.keys())
values = list(data.values())
fig1 = plt.figure(figsize = (10, 5))
OUTPUT:
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
[13.98293388 38.52278185 19.86502775 9.42289071 28.22544912 7.7915635
22.08863595 0.40329876 6.71813951 21.96861236 27.3846658 21.71368281
18.84351718 16.98896304 5.2147801 12.80155792 15.39361229 30.57122226
23.4361829 2.36959845 23.24083969 16.1491772 13.23078 26.11676289
30.30999522 29.31280119 11.60782477 16.90787624 23.31263431 29.75545127]