0% found this document useful (0 votes)

7 views19 pages

Source Code

Uploaded by

Kash Sharma

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

7 views19 pages

Source Code

Uploaded by

Kash Sharma

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 19

Source Code:

library(recommenderlab)

library(ggplot2)

library(data.table)

library(reshape2)

# Retrieve and display data

movie_data <- read.csv("movies.csv",stringsAsFactors=FALSE)

rating_data <- read.csv("ratings.csv")

str(movie_data)

# Overview the summary

summary(movie_data)

head(movie_data)

summary(rating_data)

head(rating_data)

# Data pre-processing

# Creating a one-hot encoding to create a matrix that comprises of corresponding genres for each of
the films.

movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)

library(data.table)

movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',

type.convert=TRUE),

stringsAsFactors=FALSE)

colnames(movie_genre2) <- c(1:10)

list_genre <- c("Action", "Adventure", "Animation", "Children",

"Comedy", "Crime","Documentary", "Drama", "Fantasy",

"Film-Noir", "Horror", "Musical", "Mystery","Romance",

"Sci-Fi", "Thriller", "War", "Western")

genre_mat1 <- matrix(0,10330,18)

genre_mat1[1,] <- list_genre

colnames(genre_mat1) <- list_genre

for (index in 1:nrow(movie_genre2)) {

for (col in 1:ncol(movie_genre2)) {

gen_col = which(genre_mat1[1,] == movie_genre2[index,col])

genre_mat1[index+1,gen_col] <- 1

genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE) #remove first row, which was

the genre list

for (col in 1:ncol(genre_mat2)) {

genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from characters to integers

str(genre_mat2)

# Creating a ‘search matrix’ - searching films by specifying the genre

SearchMatrix <- cbind(movie_data[,1:2], genre_mat2[])

head(SearchMatrix)

ratingMatrix <- dcast(rating_data, userId~movieId, value.var = "rating", na.rm=FALSE)

ratingMatrix <- as.matrix(ratingMatrix[,-1]) #remove userIds

#Convert rating matrix into a recommenderlab sparse matrix

ratingMatrix <- as(ratingMatrix, "realRatingMatrix")

ratingMatrix

# Overview some important parameters for building recommendation systems for movies

recommendation_model <- recommenderRegistry$get_entries(dataType = "realRatingMatrix")

names(recommendation_model)

lapply(recommendation_model, "[[", "description")

# Implementing a single model in the R project – Item Based Collaborative Filtering

recommendation_model$IBCF_realRatingMatrix$parameters

# Collaborative Filtering involves suggesting movies to the users that are based on collecting
preferences from many other users.

# With the help of recommenderlab, we can compute similarities between users

similarity_mat <- similarity(ratingMatrix[1:4, ],

method = "cosine",
which = "users")

as.matrix(similarity_mat)

image(as.matrix(similarity_mat), main = "User's Similarities")

# Portray the similarity that is shared between the films

movie_similarity <- similarity(ratingMatrix[, 1:4], method =

"cosine", which = "items")

as.matrix(movie_similarity)

image(as.matrix(movie_similarity), main = "Movies similarity")

rating_values <- as.vector(ratingMatrix@data)

unique(rating_values) # extracting unique ratings

Table_of_Ratings <- table(rating_values) # creating a count of movie ratings

Table_of_Ratings

# Most viewed movies visualization

library(ggplot2)

movie_views <- colCounts(ratingMatrix) # count views for each movie

table_views <- data.frame(movie = names(movie_views),

views = movie_views) # create dataframe of views

table_views <- table_views[order(table_views$views,

decreasing = TRUE), ] # sort by number of views

table_views$title <- NA

for (index in 1:10325){

table_views[index,3] <- as.character(subset(movie_data,

movie_data$movieId == table_views[index,1])$title)

table_views[1:6,]

# Visualize a bar plot for the total number of views of the top films

ggplot(table_views[1:6, ], aes(x = title, y = views)) +

geom_bar(stat="identity", fill = 'steelblue') +

geom_text(aes(label=views), vjust=-0.3, size=3.5) +

theme(axis.text.x = element_text(angle = 45, hjust = 1)) +

ggtitle("Total Views of the Top Films")

# Heatmap of Movie Ratings

# Visualize a heatmap of the movie ratings

image(ratingMatrix[1:20, 1:25], axes = FALSE, main = "Heatmap of the first 25 rows and 25 columns")

# Data Preparation

movie_ratings <- ratingMatrix[rowCounts(ratingMatrix) > 50,

colCounts(ratingMatrix) > 50]

movie_ratings

# describing matrix of relevant users

minimum_movies<- quantile(rowCounts(movie_ratings), 0.98)

minimum_users <- quantile(colCounts(movie_ratings), 0.98)

image(movie_ratings[rowCounts(movie_ratings) > minimum_movies,

colCounts(movie_ratings) > minimum_users],

main = "Heatmap of the top users and movies")

# Visualizing the distribution of the average ratings per user

average_ratings <- rowMeans(movie_ratings)

qplot(average_ratings, fill=I("steelblue"), col=I("red")) +

ggtitle("Distribution of the average rating per user")

# Data Normalization

normalized_ratings <- normalize(movie_ratings)

sum(rowMeans(normalized_ratings) > 0.00001)

image(normalized_ratings[rowCounts(normalized_ratings) > minimum_movies,

colCounts(normalized_ratings) > minimum_users],

main = "Normalized Ratings of the Top Users")

# Data Binarization

binary_minimum_movies <- quantile(rowCounts(movie_ratings), 0.95)

binary_minimum_users <- quantile(colCounts(movie_ratings), 0.95)

#movies_watched <- binarize(movie_ratings, minRating = 1)

good_rated_films <- binarize(movie_ratings, minRating = 3)

image(good_rated_films[rowCounts(movie_ratings) > binary_minimum_movies,

colCounts(movie_ratings) > binary_minimum_users],

main = "Heatmap of the top users and movies")

# Collaborative Filtering System

# Splitting the dataset into 80% training set and 20% test set

sampled_data<- sample(x = c(TRUE, FALSE),

size = nrow(movie_ratings),

replace = TRUE,

prob = c(0.8, 0.2))

training_data <- movie_ratings[sampled_data, ]

testing_data <- movie_ratings[!sampled_data, ]

# Building the Recommendation System

recommendation_system <- recommenderRegistry$get_entries(dataType ="realRatingMatrix")

recommendation_system$IBCF_realRatingMatrix$parameters

recommen_model <- Recommender(data = training_data,

method = "IBCF",

parameter = list(k = 30))

recommen_model

class(recommen_model)

# Exploring the data science recommendation system model

model_info <- getModel(recommen_model)

class(model_info$sim)

dim(model_info$sim)

top_items <- 20

image(model_info$sim[1:top_items, 1:top_items],

main = "Heatmap of the first rows and columns")

# Visualize sum of rows and columns with the similarity of the objects above 0

sum_rows <- rowSums(model_info$sim > 0)

table(sum_rows)

sum_cols <- colSums(model_info$sim > 0)

qplot(sum_cols, fill=I("steelblue"), col=I("red"))+ ggtitle("Distribution of the column count")

# the number of items to recommend to each user

top_recommendations <- 10

predicted_recommendations <- predict(object = recommen_model,

newdata = testing_data,

n = top_recommendations)

predicted_recommendations

# recommendation for the first user

user1 <- predicted_recommendations@items[[1]]

movies_user1 <- predicted_recommendations@itemLabels[user1]

movies_user2 <- movies_user1

for (index in 1:10){

movies_user2[index] <- as.character(subset(movie_data,

movie_data$movieId == movies_user1[index])$title)

movies_user2

# matrix with the recommendations for each user

recommendation_matrix <- sapply(predicted_recommendations@items,

function(x){ as.integer(colnames(movie_ratings)[x]) })

#dim(recc_matrix)

recommendation_matrix[,1:4]

# Distribution of the Number of Items for IBCF

number_of_items <- factor(table(recommendation_matrix))

chart_title <- "Distribution of the Number of Items for IBCF"

qplot(number_of_items, fill=I("steelblue"), col=I("red")) + ggtitle(chart_title)

number_of_items_sorted <- sort(number_of_items, decreasing = TRUE)

number_of_items_top <- head(number_of_items_sorted, n = 4)

table_top <- data.frame(as.integer(names(number_of_items_top)),

number_of_items_top)

for(i in 1:4) {

table_top[i,1] <- as.character(subset(movie_data,

movie_data$movieId == table_top[i,1])$title)

}
colnames(table_top) <- c("Movie Title", "No. of Items")

head(table_top)

OutPut:
> library(recommenderlab)
> library(ggplot2)
> library(data.table)
> library(reshape2)
> # Retrieve and display data
>
> movie_data <- read.csv("movies.csv",stringsAsFactors=FALSE)
> rating_data <- read.csv("ratings.csv")
> str(movie_data)
'data.frame': 10329 obs. of 3 variables:
$ movieId: int 1 2 3 4 5 6 7 8 9 10 ...
$ title : chr "Toy Story (1995)" "Jumanji (1995)" "Grumpier Old Men
(1995)" "Waiting to Exhale (1995)" ...
$ genres : chr "Adventure|Animation|Children|Comedy|Fantasy" "Adventure|
Children|Fantasy" "Comedy|Romance" "Comedy|Drama|Romance" ...
> # Overview the summary
> summary(movie_data)
movieId title genres
Min. : 1 Length:10329 Length:10329
1st Qu.: 3240 Class :character Class :character
Median : 7088 Mode :character Mode :character
Mean : 31924
3rd Qu.: 59900
Max. :149532
> head(movie_data)
movieId title
1 1 Toy Story (1995)
2 2 Jumanji (1995)
3 3 Grumpier Old Men (1995)
4 4 Waiting to Exhale (1995)
5 5 Father of the Bride Part II (1995)
6 6 Heat (1995)
genres
1 Adventure|Animation|Children|Comedy|Fantasy
2 Adventure|Children|Fantasy
3 Comedy|Romance
4 Comedy|Drama|Romance
5 Comedy
6 Action|Crime|Thriller
> summary(rating_data)
userId movieId rating
Min. : 1.0 Min. : 1 Min. :0.500
1st Qu.:192.0 1st Qu.: 1073 1st Qu.:3.000
Median :383.0 Median : 2497 Median :3.500
Mean :364.9 Mean : 13381 Mean :3.517
3rd Qu.:557.0 3rd Qu.: 5991 3rd Qu.:4.000
Max. :668.0 Max. :149532 Max. :5.000
timestamp
Min. :8.286e+08
1st Qu.:9.711e+08
Median :1.115e+09
Mean :1.130e+09
3rd Qu.:1.275e+09
Max. :1.452e+09
> head(rating_data)
userId movieId rating timestamp
1 1 16 4.0 1217897793
2 1 24 1.5 1217895807
3 1 32 4.0 1217896246
4 1 47 4.0 1217896556
5 1 50 4.0 1217896523
6 1 110 4.0 1217896150
> # Data pre-processing
> # Creating a one-hot encoding to create a matrix that comprises of
corresponding genres for each of the films.
> movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
> library(data.table)
> movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
+ type.convert=TRUE),
+ stringsAsFactors=FALSE)
> colnames(movie_genre2) <- c(1:10)
> list_genre <- c("Action", "Adventure", "Animation", "Children",
+ "Comedy", "Crime","Documentary", "Drama", "Fantasy",
+ "Film-Noir", "Horror", "Musical", "Mystery","Romance",
+ "Sci-Fi", "Thriller", "War", "Western")
> genre_mat1 <- matrix(0,10330,18)
> genre_mat1[1,] <- list_genre
> colnames(genre_mat1) <- list_genre
> for (index in 1:nrow(movie_genre2)) {
+ for (col in 1:ncol(movie_genre2)) {
+ gen_col = which(genre_mat1[1,] == movie_genre2[index,col])
+ genre_mat1[index+1,gen_col] <- 1
+ }
+ }
> genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE)
#remove first row, which was the genre list
> for (col in 1:ncol(genre_mat2)) {
+ genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from
characters to integers
+ }
> str(genre_mat2)
'data.frame': 10329 obs. of 18 variables:
$ Action : int 0 0 0 0 0 1 0 0 1 1 ...
$ Adventure : int 1 1 0 0 0 0 0 1 0 1 ...
$ Animation : int 1 0 0 0 0 0 0 0 0 0 ...
$ Children : int 1 1 0 0 0 0 0 1 0 0 ...
$ Comedy : int 1 0 1 1 1 0 1 0 0 0 ...
$ Crime : int 0 0 0 0 0 1 0 0 0 0 ...
$ Documentary: int 0 0 0 0 0 0 0 0 0 0 ...
$ Drama : int 0 0 0 1 0 0 0 0 0 0 ...
$ Fantasy : int 1 1 0 0 0 0 0 0 0 0 ...
$ Film-Noir : int 0 0 0 0 0 0 0 0 0 0 ...
$ Horror : int 0 0 0 0 0 0 0 0 0 0 ...
$ Musical : int 0 0 0 0 0 0 0 0 0 0 ...
$ Mystery : int 0 0 0 0 0 0 0 0 0 0 ...
$ Romance : int 0 0 1 1 0 0 1 0 0 0 ...
$ Sci-Fi : int 0 0 0 0 0 0 0 0 0 0 ...
$ Thriller : int 0 0 0 0 0 1 0 0 0 1 ...
$ War : int 0 0 0 0 0 0 0 0 0 0 ...
$ Western : int 0 0 0 0 0 0 0 0 0 0 ...
> # Creating a ‘search matrix’ - searching films by specifying the genre
> SearchMatrix <- cbind(movie_data[,1:2], genre_mat2[])
> head(SearchMatrix)
movieId title Action Adventure
1 1 Toy Story (1995) 0 1
2 2 Jumanji (1995) 0 1
3 3 Grumpier Old Men (1995) 0 0
4 4 Waiting to Exhale (1995) 0 0
5 5 Father of the Bride Part II (1995) 0 0
6 6 Heat (1995) 1 0
Animation Children Comedy Crime Documentary Drama Fantasy
1 1 1 1 0 0 0 1
2 0 1 0 0 0 0 1
3 0 0 1 0 0 0 0
4 0 0 1 0 0 1 0
5 0 0 1 0 0 0 0
6 0 0 0 1 0 0 0
Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 1 0 0 0
4 0 0 0 0 1 0 0 0
5 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 1 0
Western
1 0
2 0
3 0
4 0
5 0
6 0
> ratingMatrix <- dcast(rating_data, userId~movieId, value.var = "rating",
na.rm=FALSE)
> ratingMatrix <- as.matrix(ratingMatrix[,-1]) #remove userIds
> #Convert rating matrix into a recommenderlab sparse matrix
> ratingMatrix <- as(ratingMatrix, "realRatingMatrix")
> ratingMatrix
668 x 10325 rating matrix of class ‘realRatingMatrix’ with 105339 ratings.
> # Overview some important parameters for building recommendation systems
for movies
> recommendation_model <- recommenderRegistry$get_entries(dataType =
"realRatingMatrix")
> names(recommendation_model)
[1] "HYBRID_realRatingMatrix"
[2] "ALS_realRatingMatrix"
[3] "ALS_implicit_realRatingMatrix"
[4] "IBCF_realRatingMatrix"
[5] "LIBMF_realRatingMatrix"
[6] "POPULAR_realRatingMatrix"
[7] "RANDOM_realRatingMatrix"
[8] "RERECOMMEND_realRatingMatrix"
[9] "SVD_realRatingMatrix"
[10] "SVDF_realRatingMatrix"
[11] "UBCF_realRatingMatrix"
> lapply(recommendation_model, "[[", "description")
$HYBRID_realRatingMatrix
[1] "Hybrid recommender that aggegates several recommendation strategies
using weighted averages."
$ALS_realRatingMatrix
[1] "Recommender for explicit ratings based on latent factors, calculated
by alternating least squares algorithm."

$ALS_implicit_realRatingMatrix
[1] "Recommender for implicit data based on latent factors, calculated by
alternating least squares algorithm."

$IBCF_realRatingMatrix
[1] "Recommender based on item-based collaborative filtering."

$LIBMF_realRatingMatrix
[1] "Matrix factorization with LIBMF via package recosystem
(https://cran.r-project.org/web/packages/recosystem/vignettes/introduction
.html)."

$POPULAR_realRatingMatrix
[1] "Recommender based on item popularity."

$RANDOM_realRatingMatrix
[1] "Produce random recommendations (real ratings)."

$RERECOMMEND_realRatingMatrix
[1] "Re-recommends highly rated items (real ratings)."
$SVD_realRatingMatrix
[1] "Recommender based on SVD approximation with column-mean imputation."

$SVDF_realRatingMatrix
[1] "Recommender based on Funk SVD with gradient descend
(https://sifter.org/~simon/journal/20061211.html)."

$UBCF_realRatingMatrix
[1] "Recommender based on user-based collaborative filtering."

> # Implementing a single model in the R project – Item Based

Collaborative Filtering
> recommendation_model$IBCF_realRatingMatrix$parameters
$k
[1] 30

$method
[1] "cosine"

$normalize
[1] "center"

$normalize_sim_matrix
[1] FALSE

$alpha
[1] 0.5

$na_as_zero
[1] FALSE

> # Collaborative Filtering involves suggesting movies to the users that

are based on collecting preferences from many other users.
> # With the help of recommenderlab, we can compute similarities between
users
> similarity_mat <- similarity(ratingMatrix[1:4, ],
+ method = "cosine",
+ which = "users")
> as.matrix(similarity_mat)
1 2 3 4
1 NA 0.9880430 0.9820862 0.9957199
2 0.9880430 NA 0.9962866 0.9687126
3 0.9820862 0.9962866 NA 0.9944484
4 0.9957199 0.9687126 0.9944484 NA
> image(as.matrix(similarity_mat), main = "User's Similarities")
> # Portray the similarity that is shared between the films
> movie_similarity <- similarity(ratingMatrix[, 1:4], method =
+ "cosine", which = "items")
> as.matrix(movie_similarity)
1 2 3 4
1 NA 0.9834866 0.9779671 0.9550638
2 0.9834866 NA 0.9829378 0.9706208
3 0.9779671 0.9829378 NA 0.9932438
4 0.9550638 0.9706208 0.9932438 NA
> image(as.matrix(movie_similarity), main = "Movies similarity")
> rating_values <- as.vector(ratingMatrix@data)
> unique(rating_values) # extracting unique ratings
[1] 0.0 5.0 4.0 3.0 4.5 1.5 2.0 3.5 1.0 2.5 0.5
> Table_of_Ratings <- table(rating_values) # creating a count of movie
ratings
> Table_of_Ratings
rating_values
0 0.5 1 1.5 2 2.5 3 3.5
6791761 1198 3258 1567 7943 5484 21729 12237
4 4.5 5
28880 8187 14856
> # Most viewed movies visualization
> library(ggplot2)
> movie_views <- colCounts(ratingMatrix) # count views for each movie
> table_views <- data.frame(movie = names(movie_views),
+ views = movie_views) # create dataframe of
views
> table_views <- table_views[order(table_views$views,
+ decreasing = TRUE), ] # sort by number
of views
> table_views$title <- NA
> for (index in 1:10325){
+ table_views[index,3] <- as.character(subset(movie_data,
+ movie_data$movieId ==
table_views[index,1])$title)
+ }
> table_views[1:6,]
movie views title
296 296 325 Pulp Fiction (1994)
356 356 311 Forrest Gump (1994)
318 318 308 Shawshank Redemption, The (1994)
480 480 294 Jurassic Park (1993)
593 593 290 Silence of the Lambs, The (1991)
260 260 273 Star Wars: Episode IV - A New Hope (1977)
> # Visualize a bar plot for the total number of views of the top films
> ggplot(table_views[1:6, ], aes(x = title, y = views)) +
+ geom_bar(stat="identity", fill = 'steelblue') +
+ geom_text(aes(label=views), vjust=-0.3, size=3.5) +
+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+ ggtitle("Total Views of the Top Films")
> # Heatmap of Movie Ratings
> # Visualize a heatmap of the movie ratings
> image(ratingMatrix[1:20, 1:25], axes = FALSE, main = "Heatmap of the
first 25 rows and 25 columns")
> # Data Preparation
> movie_ratings <- ratingMatrix[rowCounts(ratingMatrix) > 50,
+ colCounts(ratingMatrix) > 50]
> movie_ratings
420 x 447 rating matrix of class ‘realRatingMatrix’ with 38341 ratings.
> # describing matrix of relevant users
> minimum_movies<- quantile(rowCounts(movie_ratings), 0.98)
> minimum_users <- quantile(colCounts(movie_ratings), 0.98)
> image(movie_ratings[rowCounts(movie_ratings) > minimum_movies,
+ colCounts(movie_ratings) > minimum_users],
+ main = "Heatmap of the top users and movies")
> # Visualizing the distribution of the average ratings per user
> average_ratings <- rowMeans(movie_ratings)
> qplot(average_ratings, fill=I("steelblue"), col=I("red")) +
+ ggtitle("Distribution of the average rating per user")
`stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
> # Data Normalization
> normalized_ratings <- normalize(movie_ratings)
> sum(rowMeans(normalized_ratings) > 0.00001)
[1] 0
> image(normalized_ratings[rowCounts(normalized_ratings) > minimum_movies,
+ colCounts(normalized_ratings) > minimum_users],
+ main = "Normalized Ratings of the Top Users")
> # Data Binarization
> binary_minimum_movies <- quantile(rowCounts(movie_ratings), 0.95)
> binary_minimum_users <- quantile(colCounts(movie_ratings), 0.95)
> #movies_watched <- binarize(movie_ratings, minRating = 1)
> good_rated_films <- binarize(movie_ratings, minRating = 3)
> image(good_rated_films[rowCounts(movie_ratings) > binary_minimum_movies,
+ colCounts(movie_ratings) > binary_minimum_users],
+ main = "Heatmap of the top users and movies")
> # Collaborative Filtering System
> # Splitting the dataset into 80% training set and 20% test set
> sampled_data<- sample(x = c(TRUE, FALSE),
+ size = nrow(movie_ratings),
+ replace = TRUE,
+ prob = c(0.8, 0.2))
> training_data <- movie_ratings[sampled_data, ]
> testing_data <- movie_ratings[!sampled_data, ]
> # Building the Recommendation System
> recommendation_system <- recommenderRegistry$get_entries(dataType
="realRatingMatrix")
> recommendation_system$IBCF_realRatingMatrix$parameters
$k
[1] 30

$method
[1] "cosine"

$normalize
[1] "center"

$normalize_sim_matrix
[1] FALSE

$alpha
[1] 0.5

$na_as_zero
[1] FALSE

> recommen_model <- Recommender(data = training_data,

+ method = "IBCF",
+ parameter = list(k = 30))
> recommen_model
Recommender of type ‘IBCF’ for ‘realRatingMatrix’
learned using 342 users.
> class(recommen_model)
[1] "Recommender"
attr(,"package")
[1] "recommenderlab"
> # Exploring the data science recommendation system model
> model_info <- getModel(recommen_model)
> class(model_info$sim)
[1] "dgCMatrix"
attr(,"package")
[1] "Matrix"
> dim(model_info$sim)
[1] 447 447
> top_items <- 20
> image(model_info$sim[1:top_items, 1:top_items],
+ main = "Heatmap of the first rows and columns")
> # Visualize sum of rows and columns with the similarity of the objects
above 0
> sum_rows <- rowSums(model_info$sim > 0)
> table(sum_rows)
sum_rows
30
447
> sum_cols <- colSums(model_info$sim > 0)
> qplot(sum_cols, fill=I("steelblue"), col=I("red"))+
ggtitle("Distribution of the column count")
`stat_bin()` using `bins = 30`. Pick better value with
`binwidth`.
> # the number of items to recommend to each user
> top_recommendations <- 10
> predicted_recommendations <- predict(object = recommen_model,
+ newdata = testing_data,
+ n = top_recommendations)
> predicted_recommendations
Recommendations as ‘topNList’ with n = 10 for 78 users.
> # recommendation for the first user
> user1 <- predicted_recommendations@items[[1]]
> movies_user1 <- predicted_recommendations@itemLabels[user1]
> movies_user2 <- movies_user1
> for (index in 1:10){
+ movies_user2[index] <- as.character(subset(movie_data,
+ movie_data$movieId ==
movies_user1[index])$title)
+ }
> movies_user2
[1] "Toy Story (1995)"
[2] "Casino (1995)"
[3] "Leaving Las Vegas (1995)"
[4] "Twelve Monkeys (a.k.a. 12 Monkeys) (1995)"
[5] "Taxi Driver (1976)"
[6] "Like Water for Chocolate (Como agua para chocolate) (1992)"
[7] "Star Trek: Generations (1994)"
[8] "Blade Runner (1982)"
[9] "Trainspotting (1996)"
[10] "Godfather, The (1972)"
> # matrix with the recommendations for each user
> recommendation_matrix <- sapply(predicted_recommendations@items,
+ function(x)
{ as.integer(colnames(movie_ratings)[x]) })
> #dim(recc_matrix)
> recommendation_matrix[,1:4]
0 1 2 3
[1,] 1 145 368 165
[2,] 16 261 457 858
[3,] 25 293 595 3039
[4,] 32 1047 708 32587
[5,] 111 1396 1246 48516
[6,] 265 2353 1393 70286
[7,] 329 2355 1704 3147
[8,] 541 2407 2268 1732
[9,] 778 3949 3578 1214
[10,] 858 4246 5418 3897
> # Distribution of the Number of Items for IBCF
> number_of_items <- factor(table(recommendation_matrix))
> chart_title <- "Distribution of the Number of Items for IBCF"
> qplot(number_of_items, fill=I("steelblue"), col=I("red")) +
ggtitle(chart_title)
>
> number_of_items_sorted <- sort(number_of_items, decreasing = TRUE)
> number_of_items_top <- head(number_of_items_sorted, n = 4)
> table_top <- data.frame(as.integer(names(number_of_items_top)),
+ number_of_items_top)
> for(i in 1:4) {
+ table_top[i,1] <- as.character(subset(movie_data,
+ movie_data$movieId ==
table_top[i,1])$title)
+ }
>
> colnames(table_top) <- c("Movie Title", "No. of Items")
> head(table_top)
Movie Title No. of Items
36 Dead Man Walking (1995) 10
111 Taxi Driver (1976) 9
529 Searching for Bobby Fischer (1993) 8
903 Vertigo (1958) 8

Essential n8n Playbook
From Everand
Essential n8n Playbook
Leandro Calado
No ratings yet
IMDB Movie Analysis 05 Project
No ratings yet
IMDB Movie Analysis 05 Project
7 pages
TSAT Model Manual PDF
100% (1)
TSAT Model Manual PDF
431 pages
DSLAB5
No ratings yet
DSLAB5
17 pages
Movie Recommendation System in R Jupyter Notebook
No ratings yet
Movie Recommendation System in R Jupyter Notebook
18 pages
Recommendation Engine Problem Statement
No ratings yet
Recommendation Engine Problem Statement
37 pages
MovieLens Final-Project
No ratings yet
MovieLens Final-Project
18 pages
Practical Work 1 - Recommender Systems
No ratings yet
Practical Work 1 - Recommender Systems
3 pages
1st Harvard Project
No ratings yet
1st Harvard Project
17 pages
04 - Movie Rating Analysis
No ratings yet
04 - Movie Rating Analysis
9 pages
Recommendation System
No ratings yet
Recommendation System
11 pages
DL Project
No ratings yet
DL Project
9 pages
IMDb+Movie+Assignment Stub
No ratings yet
IMDb+Movie+Assignment Stub
9 pages
Adriano Axel Pliopas Pereira - 83393 - Exercise 8 - Ggplot2movies
No ratings yet
Adriano Axel Pliopas Pereira - 83393 - Exercise 8 - Ggplot2movies
15 pages
SML PBL
No ratings yet
SML PBL
18 pages
Global Baseline Estimate - 12S21009
No ratings yet
Global Baseline Estimate - 12S21009
8 pages
Movie Recommendation System KNN (ML-Usecase)
No ratings yet
Movie Recommendation System KNN (ML-Usecase)
7 pages
MIT Data Science and Big Data Analytics Case Study
No ratings yet
MIT Data Science and Big Data Analytics Case Study
8 pages
IMDB Dataframe Insights
No ratings yet
IMDB Dataframe Insights
3 pages
Project Problem Statement
No ratings yet
Project Problem Statement
3 pages
Movie Recommendation System
No ratings yet
Movie Recommendation System
22 pages
Movie Recommender Systems
No ratings yet
Movie Recommender Systems
11 pages
Implementation and Comparison of Recommender Systems Using Various Models
100% (1)
Implementation and Comparison of Recommender Systems Using Various Models
13 pages
F24 Proj4
No ratings yet
F24 Proj4
6 pages
Chapter 9 - Recommendation Systems
No ratings yet
Chapter 9 - Recommendation Systems
12 pages
Recommender System
No ratings yet
Recommender System
45 pages
Recommender System Unit Ii
No ratings yet
Recommender System Unit Ii
14 pages
Group 15 Report
No ratings yet
Group 15 Report
23 pages
Hands-On Lab - Importing Data in R
No ratings yet
Hands-On Lab - Importing Data in R
8 pages
Movie Recommendation Report
No ratings yet
Movie Recommendation Report
27 pages
Recommendation Engine 1657857468
No ratings yet
Recommendation Engine 1657857468
15 pages
Business Case - Zee Recommender Systems Approach
No ratings yet
Business Case - Zee Recommender Systems Approach
10 pages
Report Final-MovieLens
No ratings yet
Report Final-MovieLens
47 pages
Exp 2
No ratings yet
Exp 2
14 pages
Assignment 5zeerak
No ratings yet
Assignment 5zeerak
6 pages
T10 Recommender System
No ratings yet
T10 Recommender System
45 pages
Personalize Movie Recommendation System CS 229 Project Final Writeup
0% (1)
Personalize Movie Recommendation System CS 229 Project Final Writeup
6 pages
Math 551 Lab 9
No ratings yet
Math 551 Lab 9
5 pages
Nloypqbmz: Pandas PD
No ratings yet
Nloypqbmz: Pandas PD
3 pages
Beercase
No ratings yet
Beercase
2 pages
ML Project Movie Recommendation System
No ratings yet
ML Project Movie Recommendation System
2 pages
Movie Recommendation System Project
No ratings yet
Movie Recommendation System Project
9 pages
Movie Recommdation Report
No ratings yet
Movie Recommdation Report
10 pages
AIML Presentation
No ratings yet
AIML Presentation
21 pages
R Project 98
No ratings yet
R Project 98
15 pages
Recommended System
No ratings yet
Recommended System
33 pages
Project Movielense Solution
No ratings yet
Project Movielense Solution
4 pages
Project 5
No ratings yet
Project 5
5 pages
ML Case Study
No ratings yet
ML Case Study
4 pages
Recommendation System Based On Collaborative Filtering: Zheng Wen December 12, 2008
No ratings yet
Recommendation System Based On Collaborative Filtering: Zheng Wen December 12, 2008
10 pages
Movie Recomendation System Using R
No ratings yet
Movie Recomendation System Using R
41 pages
Recommendation System in Python
No ratings yet
Recommendation System in Python
6 pages
AIML Mod4 Loki
No ratings yet
AIML Mod4 Loki
11 pages
Finalproj Aml
No ratings yet
Finalproj Aml
69 pages
CSE545 sp23 (9) Recommendation Systems 4-10
No ratings yet
CSE545 sp23 (9) Recommendation Systems 4-10
72 pages
RecSys Updated
No ratings yet
RecSys Updated
37 pages
Ex 3
No ratings yet
Ex 3
2 pages
IMDB Analysis
No ratings yet
IMDB Analysis
4 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
TensorFlow深度学习项目实战: Chinese Edition
From Everand
TensorFlow深度学习项目实战: Chinese Edition
Posts & Telecom Press
No ratings yet
Sdds004a Ai Journal
No ratings yet
Sdds004a Ai Journal
21 pages
Sdds050a Ai Journal
No ratings yet
Sdds050a Ai Journal
11 pages
AI
No ratings yet
AI
15 pages
B3 G4 Project
No ratings yet
B3 G4 Project
81 pages
Automated Trading System
No ratings yet
Automated Trading System
10 pages
50 Kash Sharma DSP
No ratings yet
50 Kash Sharma DSP
39 pages
AI Art + AR + Real-Time Inpainting and
No ratings yet
AI Art + AR + Real-Time Inpainting and
26 pages
Harsh Thakur
No ratings yet
Harsh Thakur
2 pages
Admit Card
No ratings yet
Admit Card
2 pages
Design and Implementation of An ESP32-Based Smart Home Automation System With Environmental Monitoring and Automated Controls
No ratings yet
Design and Implementation of An ESP32-Based Smart Home Automation System With Environmental Monitoring and Automated Controls
6 pages
Introduction To Python Part 3
No ratings yet
Introduction To Python Part 3
2 pages
Muhammad Salman Alfarisi, Candra Adi Bintang, Sarah Ayatillah Universitas Negeri Semarang Corresponding Author
No ratings yet
Muhammad Salman Alfarisi, Candra Adi Bintang, Sarah Ayatillah Universitas Negeri Semarang Corresponding Author
16 pages
Fi-7800 Fujitsu Scanner
No ratings yet
Fi-7800 Fujitsu Scanner
2 pages
Conky
No ratings yet
Conky
4 pages
Quite A Box of Tricks Book PDF
No ratings yet
Quite A Box of Tricks Book PDF
33 pages
Object Picker UI - Microsoft Learn
No ratings yet
Object Picker UI - Microsoft Learn
6 pages
On DFT
No ratings yet
On DFT
16 pages
Screenshot 2025-03-12 at 1.07.27 AM
No ratings yet
Screenshot 2025-03-12 at 1.07.27 AM
69 pages
Install 2 DGames XML
No ratings yet
Install 2 DGames XML
3 pages
Lesson 1 - Make Your First Game With Stencyl: 1 Get Started
No ratings yet
Lesson 1 - Make Your First Game With Stencyl: 1 Get Started
2 pages
Deformation Vibration 1-8
No ratings yet
Deformation Vibration 1-8
88 pages
MySQL - Learn Data Analytics Together's Group
No ratings yet
MySQL - Learn Data Analytics Together's Group
96 pages
Process Models
No ratings yet
Process Models
28 pages
6.UX, Usability and UI in Mobile Computing
No ratings yet
6.UX, Usability and UI in Mobile Computing
19 pages
Exploiting Honeypot For Cryptojacking The Other Side of The Story of Honeypot Deployment
No ratings yet
Exploiting Honeypot For Cryptojacking The Other Side of The Story of Honeypot Deployment
5 pages
End Winding Characterization
No ratings yet
End Winding Characterization
138 pages
Siemens Step7 TCP and Proface HMI
100% (1)
Siemens Step7 TCP and Proface HMI
46 pages
2 Overview of Call Admission Control: Load Control Feature Parameter Description
No ratings yet
2 Overview of Call Admission Control: Load Control Feature Parameter Description
2 pages
Course Answer-Booklet
No ratings yet
Course Answer-Booklet
3 pages
Ip Final Sem - Merged
No ratings yet
Ip Final Sem - Merged
36 pages
Max 536
No ratings yet
Max 536
24 pages
Student Resource Portal: Meghan Patil, Mihir Prajapati, Ankit Patel
No ratings yet
Student Resource Portal: Meghan Patil, Mihir Prajapati, Ankit Patel
18 pages
M360 SU M360 PEI Supplement D-33155 RevA
No ratings yet
M360 SU M360 PEI Supplement D-33155 RevA
56 pages
Career in IT
No ratings yet
Career in IT
19 pages
Non-Employee Access Agreement V - 2!1!7163105 - 02
No ratings yet
Non-Employee Access Agreement V - 2!1!7163105 - 02
5 pages
Memory Management & Virtual Memory
No ratings yet
Memory Management & Virtual Memory
16 pages