BDA Experiments
BDA Experiments
ANALYTICS
EXPERIMEN
TS ================================
============================
Sachin Kuldeep
12212112
CS-B 06
1. Implementation of Apriori algorithm.
CODE:
import csv
from itertools import combinations
# Load dataset correctly
def load_transactions(file_path):
transactions = []
with open(file_path, "r") as file:
reader = csv.reader(file)
headers = next(reader) # Read headers
for row in reader:
transaction = set()
for i in range(1, len(headers)): # Skip first column if it's an index
value = row[i].strip().lower()
if value in ["true", "1", "yes"]: # Handle different truthy values
transaction.add(headers[i])
transactions.append(transaction)
return transactions
def get_frequent_itemsets(transactions, min_support):
itemsets = {}
# Count occurrences of single items
for transaction in transactions:
for item in transaction:
itemsets[frozenset([item])] = itemsets.get(frozenset([item]), 0) + 1
itemsets = {k: v for k, v in itemsets.items() if v >= min_support}
k=2
frequent_itemsets = dict(itemsets)
while True:
new_itemsets = {}
prev_itemsets = list(itemsets.keys())
for i in range(len(prev_itemsets)):
for j in range(i + 1, len(prev_itemsets)):
combined = prev_itemsets[i] | prev_itemsets[j]
if len(combined) == k:
count = sum(1 for transaction in transactions if
combined.issubset(transaction))
if count >= min_support:
new_itemsets[combined] = count
if not new_itemsets:
break
frequent_itemsets.update(new_itemsets)
itemsets = new_itemsets
k += 1
return frequent_itemsets
def generate_association_rules(frequent_itemsets, min_confidence):
rules = []
for itemset, support in frequent_itemsets.items():
if len(itemset) < 2:
continue # Rules need at least 2 items
for i in range(1, len(itemset)):
for left in combinations(itemset, i):
left = frozenset(left)
right = itemset - left
if right:
left_support = frequent_itemsets.get(left, 1)
confidence = support / left_support
if confidence >= min_confidence:
rules.append((set(left), set(right), confidence))
return rules
if __name__ == "__main__":
file_path = r"C:\Users\Lenovo\OneDrive\Desktop\Image Processing
Lab\Apriori_dataset2.csv" # Adjust path as needed
transactions = load_transactions(file_path)
min_support = int(input("Enter minimum support count: "))
min_confidence = float(input("Enter minimum confidence (0 to 1): "))
frequent_itemsets = get_frequent_itemsets(transactions,
min_support)
association_rules = generate_association_rules(frequent_itemsets,
min_confidence)
print("\nAssociation Rules:")
for left, right, confidence in association_rules:
print(f"{left} => {right} (Confidence: {confidence:.2f})")
FIGURE: