0% found this document useful (0 votes)
2 views

assignment_180b

The document outlines an in-class activity focused on setting up an SQL environment for music streaming data. It details the creation of database tables for Users, Songs, Listens, and Recommendations, along with sample data insertion and various SQL queries for retrieving and analyzing song recommendations. The final goal is to recommend songs to a user named Minnie based on average ratings of songs she hasn't listened to yet.

Uploaded by

kunal4boomegle
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

assignment_180b

The document outlines an in-class activity focused on setting up an SQL environment for music streaming data. It details the creation of database tables for Users, Songs, Listens, and Recommendations, along with sample data insertion and various SQL queries for retrieving and analyzing song recommendations. The final goal is to recommend songs to a user named Minnie based on average ratings of songs she hasn't listened to yet.

Uploaded by

kunal4boomegle
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

assignment 20/02/25, 10:41 PM

In-class activity - Song


recommendation

Let's setup the SQL environment


In [106… #Install pysqlite3 for python and import pandas to use later
#!pip install pysqlite3
from sqlite3 import dbapi2 as sqlite3
print(sqlite3.sqlite_version)
import pandas as pd
from IPython.display import display, HTML

3.45.3

Let's define some helper functions for running queries and printing results

In [107… dbname = "music_streaming4.db"

def printSqlResults(cursor, tblName):


try:
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.descr
display(HTML("<b><font color=Green> " + tblName + "</font></b>" + df.to_
except:
pass

def runSql(caption, query):


conn = sqlite3.connect(dbname) # Connect to the database
cursor = conn.cursor() # Create a cursor (think: it's like a "pointer")
cursor.execute(query) # Execute the query
printSqlResults(cursor, caption) # Print the results
conn.close()

def runStepByStepSql(query, fromline):


lines = query.strip().split('\n')
for lineidx in range(fromline, len(lines)):
partial_query = '\n'.join(lines[:lineidx])
caption = 'Query till line:' + partial_query
runSql(caption, partial_query + ';')

Let's setup a Schema and insert some data

In [108… # Connect to database (creates the file if it doesn't exist)


"""
1. Connections: A connection represents a connection to a database through
which we can execute SQL queries. The dbname here specifies the database.
In SQLlite, if the DB doesn't exist, it will be created.
2. Cursors: A cursor is an object associated with a database connection.
It allows you to execute SQL queries, fetch query results.
"""
conn = sqlite3.connect(dbname)

file:///Users/kunalsahni/Downloads/assignment.html Page 1 of 9
assignment 20/02/25, 10:41 PM

cursor = conn.cursor()

# Create the Users table


cursor.execute("""
CREATE TABLE IF NOT EXISTS Users (
user_id INTEGER PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE
);
""")

# Create the Songs table


cursor.execute("""
CREATE TABLE IF NOT EXISTS Songs (
song_id INTEGER PRIMARY KEY,
title VARCHAR(100) NOT NULL,
artist VARCHAR(100) NOT NULL,
genre VARCHAR(100)
);
""")

# Create the Listens table


cursor.execute("""
CREATE TABLE IF NOT EXISTS Listens (
listen_id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL,
song_id INTEGER NOT NULL,
rating FLOAT,
listen_time TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES Users(user_id),
FOREIGN KEY (song_id) REFERENCES Songs(song_id)
);
""")

# Create the recommendations table


cursor.execute("""
CREATE TABLE IF NOT EXISTS Recommendations (
user_id INTEGER NOT NULL,
song_id INTEGER NOT NULL,
recommendation_id not NULL,
recommendation_time TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES Users(user_id),
FOREIGN KEY (song_id) REFERENCES Songs(song_id)
);
""")

# Commit changes and close the connection


conn.commit()
conn.close()

In [109… # Connect to database again and insert sample data


conn = sqlite3.connect(dbname)
sqlite3.enable_callback_tracebacks(True)

cursor = conn.cursor()
cursor.execute("delete from Songs;")

file:///Users/kunalsahni/Downloads/assignment.html Page 2 of 9
assignment 20/02/25, 10:41 PM

cursor.execute("delete from Users;")


cursor.execute("delete from Listens;")
cursor.execute("delete from Recommendations;")

# Insert sample users


cursor.execute("""
INSERT INTO Users (user_id, name, email)
VALUES
(1, 'Mickey', '[email protected]'),
(2, 'Minnie', '[email protected]'),
(3, 'Daffy', '[email protected]'),
(4, 'Pluto', '[email protected]');
""")

# Insert sample songs from Taylor Swift, Ed Sheeran, Beatles


cursor.execute("""
INSERT INTO Songs (song_id, title, artist, genre)
VALUES
(1, 'Evermore', 'Taylor Swift', 'Pop'),
(2, 'Willow', 'Taylor Swift', 'Pop'),
(3, 'Shape of You', 'Ed Sheeran', 'Rock'),
(4, 'Photograph', 'Ed Sheeran', 'Rock'),
(5, 'Shivers', 'Ed Sheeran', 'Rock'),
(6, 'Yesterday', 'Beatles', 'Classic'),
(7, 'Yellow Submarine', 'Beatles', 'Classic'),
(8, 'Hey Jude', 'Beatles', 'Classic'),
(9, 'Bad Blood', 'Taylor Swift', 'Rock'),
(10, 'DJ Mix', 'DJ', NULL);
""")

# Insert sample listens


cursor.execute("""
INSERT INTO Listens (listen_id, user_id, song_id, rating)
VALUES
(1, 1, 1, 4.5),
(2, 1, 2, 4.2),
(3, 1, 6, 3.9),
(4, 2, 2, 4.7),
(5, 2, 7, 4.6),
(6, 2, 8, 3.9),
(7, 3, 1, 2.9),
(8, 3, 2, 4.9),
(9, 3, 6, NULL);
""")
# Commit changes and close the connection
conn.commit()
conn.close()

runSql('Users', "select * from Users;")


runSql('Songs', "select * from Songs;")
runSql('Listens', "select * from Listens;")

file:///Users/kunalsahni/Downloads/assignment.html Page 3 of 9
assignment 20/02/25, 10:41 PM

Users
user_id name email

1 Mickey [email protected]

2 Minnie [email protected]

3 Daffy [email protected]

4 Pluto [email protected]

Songs
song_id title artist genre

1 Evermore Taylor Swift Pop

2 Willow Taylor Swift Pop

3 Shape of You Ed Sheeran Rock

4 Photograph Ed Sheeran Rock

5 Shivers Ed Sheeran Rock

6 Yesterday Beatles Classic

7 Yellow Submarine Beatles Classic

8 Hey Jude Beatles Classic

9 Bad Blood Taylor Swift Rock

10 DJ Mix DJ None

Listens
listen_id user_id song_id rating listen_time

1 1 1 4.5 None

2 1 2 4.2 None

3 1 6 3.9 None

4 2 2 4.7 None

5 2 7 4.6 None

6 2 8 3.9 None

7 3 1 2.9 None

8 3 2 4.9 None

9 3 6 NaN None

Basic SQL queries (ORDER BY, GROUP


BY, LIMIT, JOINS, LEFT JOINs)

file:///Users/kunalsahni/Downloads/assignment.html Page 4 of 9
assignment 20/02/25, 10:41 PM

In [110… """ Goal: Learn basic forms of SELECT, FROM, WHERE, DISTINCT """

qry_classic_songs = """
-- Find the titles and artists of songs in the "Classic" genre.
SELECT Songs.title, Songs.artist
FROM Songs
WHERE Songs.genre = 'Classic';"""
runSql('Classic songs', qry_classic_songs)

qry_genres = """
-- List of all genres in the Songs table
SELECT genre
FROM Songs;"""
runSql('All genres in the Songs table', qry_genres)

qry_distinct = """
-- List of unique genres in the Songs table
SELECT DISTINCT genre
FROM Songs;"""
runSql('Unique genres in the Songs table', qry_distinct)

qry_taylor_count = """
-- Songs by Taylor Swift in different genres
SELECT genre, count(*) as num_songs
FROM Songs
where artist = 'Taylor Swift'
GROUP BY genre;"""
runSql('Count songs by Taylor Swift in different genres', qry_taylor_count)

Classic songs
title artist

Yesterday Beatles

Yellow Submarine Beatles

Hey Jude Beatles

file:///Users/kunalsahni/Downloads/assignment.html Page 5 of 9
assignment 20/02/25, 10:41 PM

All genres in the Songs table


genre

Pop

Pop

Rock

Rock

Rock

Classic

Classic

Classic

Rock

None

Unique genres in the Songs table


genre

Pop

Rock

Classic

None

Count songs by Taylor Swift in different genres


genre num_songs

Pop 2

Rock 1

Query that calculates average ratings of all songs. Only includes songs with
Listens

In [111… qry_join_songs_ratings="""
SELECT Songs.song_id, Songs.artist, Songs.title, AVG(Listens.rating) as avg_
FROM songs
JOIN Listens
ON Songs.song_id = Listens.song_id
GROUP BY Songs.song_id"""
runSql('Calculates average ratings for songs', qry_join_songs_ratings)

file:///Users/kunalsahni/Downloads/assignment.html Page 6 of 9
assignment 20/02/25, 10:41 PM

Calculates average ratings for songs


song_id artist title avg_rating

1 Taylor Swift Evermore 3.7

2 Taylor Swift Willow 4.6

6 Beatles Yesterday 3.9

7 Beatles Yellow Submarine 4.6

8 Beatles Hey Jude 3.9

TO DO: 1. Create a Recommendations table as shown in lecture slides. 2. Write


a query to produce two song recommendations for Minnie, and insert into the
Recommendations table. The recommendations should be the two songs with
the highest average rating not listened by Minnie 3. Write a query to retrieve
the song title and artist of the recommendations for Minnie.

In [112… # query to retrive song recommendations


# sorting songs not listened by Minnie in the order of their average rating
query_recommendations = """
SELECT Distinct Songs.song_id, AVG(Listens.rating) as avg_rating
from Songs JOIN LISTENS ON Songs.song_id = Listens.song_id
WHERE Songs.song_id NOT IN (
SELECT song_id
FROM Listens
WHERE user_id = (Select user_id from Users where name = "Minnie")
)
GROUP BY Listens.song_id Order By avg_rating DESC LIMIT 2"""

runSql('querying recommendations', query_recommendations)

querying recommendations
song_id avg_rating

6 3.9

1 3.7

In [113… #2. Populate above info into Recommendations table :


# Connect to database again and insert sample data
conn = sqlite3.connect(dbname)
sqlite3.enable_callback_tracebacks(True)

cursor = conn.cursor()

create_temp_query = """
CREATE TEMPORARY TABLE temp_minnie_id AS
SELECT user_id FROM Users WHERE name = "Minnie";
"""

recommendations_query = """
CREATE TEMPORARY TABLE temp_top_songs AS

file:///Users/kunalsahni/Downloads/assignment.html Page 7 of 9
assignment 20/02/25, 10:41 PM

SELECT Songs.song_id, AVG(Listens.rating) as avg_rating


FROM Songs
JOIN LISTENS ON Listens.song_id = Songs.song_id
WHERE Songs.song_id NOT IN (
SELECT song_id
FROM Listens
WHERE user_id = (Select user_id from Users where name = "Minnie")
)
GROUP BY Songs.song_id
ORDER BY avg_rating DESC
LIMIT 2;
"""

insert_query = """
INSERT INTO RECOMMENDATIONS (user_id, song_id, recommendation_id, recommenda
SELECT
(SELECT user_id FROM temp_minnie_id),
song_id,
ROW_NUMBER() OVER (ORDER BY avg_rating DESC) - 1 as recommendation_id,
datetime('now')
FROM temp_top_songs;
"""

drop_temp_query = """
DROP TABLE temp_minnie_id;
DROP TABLE temp_top_songs;
"""

cursor.execute(create_temp_query)
cursor.execute(recommendations_query)
cursor.execute(insert_query)
conn.commit()
conn.close()

# Print Recommendations table to verify whether values have been inserted


runSql("print_query", "Select * from Recommendations")

print_query
user_id song_id recommendation_id recommendation_time

2 6 0 2025-02-21 06:40:56

2 1 1 2025-02-21 06:40:56

In [115… retreival_query = """


Select distinct title, artist from
Songs JOIN Recommendations ON Songs.song_id = Recommendations.song_id
Where Recommendations.user_id = (Select user_id from Users where name = "Min
"""

runSql("retreival_query", retreival_query)

file:///Users/kunalsahni/Downloads/assignment.html Page 8 of 9
assignment 20/02/25, 10:41 PM

retreival_query
title artist

Yesterday Beatles

Evermore Taylor Swift

In [ ]:

file:///Users/kunalsahni/Downloads/assignment.html Page 9 of 9

You might also like