DMDW Fielding Set
DMDW Fielding Set
Date of
S.No Topic Name Signature Remarks
Experiment
01 Assignment 1 10/02/2025
02 Assignment 2 17/02/2025
03 Assignment 3 24/02/2025
Snowflake
05 17/03/2025
Schema
KNN
06 14/04/2025
Imputation
Apriori
07 21/04/2025
Algorithm
Data
08 21/04/2025
Visualization
Decision
09 Decision Tree 28/04/2025 28/04/2025
Tree
Corrected Code
import sqlite3
# Student table
1
cursor.execute("""
CREATE TABLE IF NOT EXISTS student (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
age INTEGER NOT NULL,
dept TEXT NOT NULL,
phone TEXT NOT NULL
)""")
# Faculty table
cursor.execute("""
CREATE TABLE IF NOT EXISTS faculty (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
dept TEXT NOT NULL,
phone TEXT NOT NULL
)""")
# Books table
cursor.execute("""
CREATE TABLE IF NOT EXISTS books (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
writer TEXT NOT NULL,
isbn TEXT NOT NULL,
issue_no INTEGER NOT NULL
)""")
conn.commit()
# Display data
print("Students:")
cursor.execute("SELECT * FROM student")
for row in cursor.fetchall(): print(row)
2
print("
Faculty:")
cursor.execute("SELECT * FROM faculty")
for row in cursor.fetchall(): print(row)
print("
Books:")
cursor.execute("SELECT * FROM books")
for row in cursor.fetchall(): print(row)
conn.close()
Corrected Code
import sqlite3
conn = sqlite3.connect("sales_warehouse.db")
cursor = conn.cursor()
def insert_sales_data():
data = [
(1, "Laptop", 20000, 2023),
(2, "Phone", 50000, 2024),
(3, "Monitor", 8000, 2023)
]
cursor.executemany("INSERT OR IGNORE INTO sales VALUES (?,?,?,?)", data)
conn.commit()
def query_data():
# Roll-up
3
cursor.execute("SELECT year, SUM(product_price) FROM sales GROUP BY year")
print("Roll-up:
", cursor.fetchall())
# Drill-down
cursor.execute("SELECT year, product_name, SUM(product_price) FROM sales
GROUP BY year, product_name")
print("
Drill-down:
", cursor.fetchall())
# Slice
cursor.execute("SELECT * FROM sales WHERE year = 2023")
print("
Slice:
", cursor.fetchall())
# Dice
cursor.execute("SELECT * FROM sales WHERE year=2023 AND product_price >
10000")
print("
Dice:
", cursor.fetchall())
# Pivot
cursor.execute("""
SELECT year,
SUM(CASE WHEN product_name='Laptop' THEN product_price ELSE 0 END) AS
Laptop,
SUM(CASE WHEN product_name='Phone' THEN product_price ELSE 0 END) AS
Phone
FROM sales GROUP BY year
""")
print("
Pivot:
", cursor.fetchall())
# Execute functions
insert_sales_data()
query_data()
conn.close()
4
Page 9–10: Assignment 3 – Star Schema Implementation:
Assignment 3 – Star Schema Implementation
Aim: Design a star schema with product_dim, customer_dim, and sales_fact tables, load sample data,
and generate a consolidated sales report.
Corrected Code
import sqlite3
# Connect to database
conn = sqlite3.connect("star_schema.db")
cursor = conn.cursor()
5
# Generate sales report via join
cursor.execute("""
SELECT s.sale_id, p.product_name, c.customer_name, s.price
FROM sales_fact s
JOIN product_dim p ON s.product_id = p.product_id
JOIN customer_dim c ON s.customer_id = c.customer_id
""")
print("Sales Report:")
for row in cursor.fetchall():
print(row)
conn.close()
Corrected Code
import pandas as pd
# Dimension: Categories
product_category = pd.DataFrame({
'category_id': [1, 2],
'category_name': ['Electronics', 'Stationery']
})
# Dimension: Products
products = pd.DataFrame({
'product_id': [101, 102, 103],
'product_name': ['Laptop', 'Mouse', 'Printer'],
'category_id': [1, 1, 2]
})
# Fact: Sales
sales = pd.DataFrame({
'sale_id': [1001, 1002, 1003],
'product_id': [101, 102, 103],
'quantity': [2, 4, 3],
'amount': [2500, 1000, 1500]
})
# Merge to snowflake
merged = sales.merge(products, on='product_id')
final = merged.merge(product_category, on='category_id')
6
print(final[['sale_id', 'product_name', 'category_name', 'quantity', 'amount']])
Corrected Code
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
# KNN Imputation
imputer = KNNImputer(n_neighbors=2)
df[['Price', 'Rating']] = imputer.fit_transform(df[['Price', 'Rating']])
print("After Imputation:\n", df)
# Data Integration
df_sales = pd.DataFrame({
'Product_ID': [1, 2, 3],
'Product': ['Laptop', 'Phone', 'Tablet'],
'Price': [1000, 500, 700]
})
df_ratings = pd.DataFrame({
'Product_ID': [1, 2, 3, 4],
'Rating': [4.5, 4.2, 4.8, 4.3]
})
Output:
After Imputation:
Product Price Rating
7
0 Laptop 1000.0 4.5
1 Phone 500.0 4.3
2 Tablet 400.0 4.2
3 Monitor 300.0 4.0
4 TV 600.0 4.7
Merged Data:
Product_ID Product Price Rating
0 1 Laptop 1000 4.5
1 2 Phone 500 4.2
2 3 Tablet 700 4.8
Corrected Code
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
# Transaction dataset
data = {
'TID': [1, 2, 3, 4, 5],
'Milk': [1, 1, 1, 0, 1],
'Bread': [1, 1, 1, 0, 1],
'Butter': [0, 1, 1, 1, 0],
'Jam': [0, 0, 1, 0, 1]
}
df = pd.DataFrame(data).set_index('TID')
# Apriori Algorithm
freq_items = apriori(df, min_support=0.4, use_colnames=True)
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)
print("Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence']])
Output:
Association Rules:
antecedents consequents support confidence
0 (Milk) (Bread) 0.8 1.000000
1 (Bread) (Milk) 0.8 1.000000
2 (Butter) (Milk) 0.4 0.666667
8
3 (Butter) (Bread) 0.4 0.666667
4 (Jam) (Bread) 0.4 1.000000
Corrected Code
iris = sns.load_dataset("iris")
# Scatter Plot
plt.scatter(iris['sepal_length'], iris['petal_length'])
plt.title('Sepal vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.show()
# Histogram
plt.hist(iris['sepal_length'], bins=15)
plt.title('Sepal Length Distribution')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.show()
# Box Plot
sns.boxplot(x='species', y='sepal_width', data=iris)
plt.title('Sepal Width by Species')
plt.show()
# Heatmap
corr = iris.corr(numeric_only=True)
sns.heatmap(corr, annot=True)
plt.title('Correlation Matrix')
plt.show()
9
Pages 17–18: Assignment 9 (Decision Tree)
Corrected Code
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
# Sample dataset
data = {
'Age': [45, 32, 60, 28, 50],
'Weight': [70, 55, 90, 65, 80],
'Smoker': [1, 0, 1, 0, 1],
'Heart_Attack': [1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf = DecisionTreeClassifier(max_depth=2)
clf.fit(X_train, y_train)
# Evaluation
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# Tree Visualization
plt.figure(figsize=(12,8))
plot_tree(clf, feature_names=X.columns, class_names=['No','Yes'], filled=True)
plt.show()
Output (example):
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1
1 1.00 1.00 1.00 1
10
accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
11