0% found this document useful (0 votes)
22 views

Algorithm

The document describes the Apriori algorithm for association rule learning and frequent itemset mining. It provides Python code to implement the Apriori algorithm and demonstrates generating frequent itemsets and association rules from sample transaction data.

Uploaded by

kumar207y1a3330
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views

Algorithm

The document describes the Apriori algorithm for association rule learning and frequent itemset mining. It provides Python code to implement the Apriori algorithm and demonstrates generating frequent itemsets and association rules from sample transaction data.

Uploaded by

kumar207y1a3330
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

Apriori Algorithm:

Program:
from itertools import combinations
def load_data():
# Sample transactional dataset
dataset = [
['bread', 'milk', 'beer'],
['bread', 'diaper', 'beer', 'egg'],
['milk', 'diaper', 'beer', 'cola'],
['bread', 'milk', 'diaper', 'beer'],
['bread', 'milk', 'diaper', 'cola']
]
return dataset

def create_c1(dataset):
c1 = set()
for transaction in dataset:
for item in transaction:
c1.add(frozenset([item]))
return c1

def filter_candidates(dataset, candidates, min_support):


candidate_counts = {}
for transaction in dataset:
for candidate in candidates:
if candidate.issubset(transaction):
candidate_counts[candidate] = candidate_counts.get(candidate, 0) + 1

num_transactions = float(len(dataset))
qualified_items = []
support_data = {}
for candidate, count in candidate_counts.items():
support = count / num_transactions
if support >= min_support:
qualified_items.append(candidate)
support_data[candidate] = support
return qualified_items, support_data

def generate_candidates(Lk, k):


candidates = []
len_Lk = len(Lk)
for i in range(len_Lk):
for j in range(i + 1, len_Lk):
L1 = list(Lk[i])[:k - 2]
L2 = list(Lk[j])[:k - 2]
L1.sort()
L2.sort()
if L1 == L2:
candidates.append(Lk[i] | Lk[j])
return candidates

def apriori(dataset, min_support=0.5):


C1 = create_c1(dataset)
D = list(map(set, dataset))
L1, support_data = filter_candidates(D, C1, min_support)
L = [L1]
k=2
while len(L[k - 2]) > 0:
candidates = generate_candidates(L[k - 2], k)
Lk, support_k = filter_candidates(D, candidates, min_support)
support_data.update(support_k)
L.append(Lk)
k += 1
return L, support_data

def generate_rules(L, support_data, min_confidence=0.7):


rules = []
for i in range(1, len(L)):
for freq_set in L[i]:
for subset in combinations(freq_set, r=i):
confidence = support_data[freq_set] / support_data[freq_set - set(subset)]
if confidence >= min_confidence:
rules.append((set(subset), freq_set - set(subset), confidence))
return rules

def print_results(L, support_data, rules):


for i, itemset in enumerate(L):
print(f"Frequent {i+1}-itemsets")
print("===================")
for item in itemset:
print(f"{item} - support: {support_data[item]}")
print()
print("Association Rules")
print("===================")
for rule in rules:
antecedent, consequent, confidence = rule
print(f"{antecedent} => {consequent} - confidence: {confidence}")
if __name__ == "__main__":
dataset = load_data()
L, support_data = apriori(dataset, min_support=0.4)
rules = generate_rules(L, support_data, min_confidence=0.7)
print_results(L, support_data, rules)

Output:
Frequent 1-itemsets
===================
frozenset({'bread'}) - support: 0.8
frozenset({'beer'}) - support: 0.8
frozenset({'milk'}) - support: 0.8
frozenset({'diaper'}) - support: 0.8
frozenset({'cola'}) - support: 0.4

Frequent 2-itemsets
===================
frozenset({'beer', 'bread'}) - support: 0.6
frozenset({'milk', 'bread'}) - support: 0.6
frozenset({'beer', 'milk'}) - support: 0.6
frozenset({'diaper', 'bread'}) - support: 0.6
frozenset({'beer', 'diaper'}) - support: 0.6
frozenset({'diaper', 'milk'}) - support: 0.6
frozenset({'milk', 'cola'}) - support: 0.4
frozenset({'diaper', 'cola'}) - support: 0.4

Frequent 3-itemsets
===================
frozenset({'beer', 'milk', 'bread'}) - support: 0.4
frozenset({'beer', 'diaper', 'bread'}) - support: 0.4
frozenset({'beer', 'diaper', 'milk'}) - support: 0.4
frozenset({'diaper', 'milk', 'cola'}) - support: 0.4
frozenset({'diaper', 'milk', 'bread'}) - support: 0.4

Frequent 4-itemsets
===================

Association Rules
===================
{'beer'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'beer'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'beer'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'cola'}) - confidence: 1.0
{'diaper'} => frozenset({'cola'}) - confidence: 1.0
{'diaper', 'milk'} => frozenset({'cola'}) - confidence: 1.0
FP Growth Algorithm:
Program:
from mlxtend.frequent_patterns import fpgrowth
import pandas as pd
dataset = [
['milk', 'bread', 'biscuit'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'bread'],
['milk', 'bread', 'biscuit', 'butter'],
['jam', 'butter'],
['jam', 'bread'],
['milk', 'jam'],
['bread', 'butter']
]
df = pd.DataFrame(dataset)
encoded_df = pd.get_dummies(df.apply(lambda x: pd.Series(x)), prefix='', prefix_sep='')
frequent_itemsets = fpgrowth(encoded_df, min_support=0.2, use_colnames=True)
print(frequent_itemsets)

Note: Before running program install mlxtend package


Type “pip install mlxtend” command in terminal
Output:
support itemsets
0 0.555556 (bread)
1 0.555556 (milk)
2 0.222222 (biscuit)
3 0.333333 (butter)
4 0.222222 (bread)
5 0.222222 (jam)
6 0.444444 (bread, milk)
7 0.222222 (biscuit, milk)
8 0.222222 (biscuit, bread)
9 0.222222 (biscuit, bread, milk)
10 0.222222 (butter, bread)
Naïve Bayes Algorithm:
Program:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)
y_pred = naive_bayes.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Output:
Accuracy: 1.0

You might also like