0% found this document useful (0 votes)
13 views11 pages

R Notebook

The document is an R Notebook that demonstrates data analysis using various machine learning models, including logistic regression and K-nearest neighbors (KNN). It involves loading sensor data, imputing missing values, and training models with different regularization techniques (ridge, elastic-net, and lasso) to predict machine status. The notebook includes code for model training, evaluation, and error calculation for both training and test datasets.

Uploaded by

kongjun9423
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views11 pages

R Notebook

The document is an R Notebook that demonstrates data analysis using various machine learning models, including logistic regression and K-nearest neighbors (KNN). It involves loading sensor data, imputing missing values, and training models with different regularization techniques (ridge, elastic-net, and lasso) to predict machine status. The notebook includes code for model training, evaluation, and error calculation for both training and test datasets.

Uploaded by

kongjun9423
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

2022/10/31 10:55 R Notebook

R Notebook
Code

Hide

library(class)
library(ggplot2)
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

filter, lag

The following objects are masked from ‘package:base’:

intersect, setdiff, setequal, union

Hide

library(glmnet)

Loading required package: Matrix


Loaded glmnet 4.1-4

Hide

library(Hmisc)

file:///Users/junkong/Desktop/1.nb.html 1/11
2022/10/31 10:55 R Notebook

Loading required package: lattice


Loading required package: survival
Loading required package: Formula
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table

Attaching package: ‘Hmisc’

The following objects are masked from ‘package:dplyr’:

src, summarize

The following objects are masked from ‘package:base’:

format.pval, units

Load the data


Hide

file:///Users/junkong/Desktop/1.nb.html 2/11
2022/10/31 10:55 R Notebook

df = read.csv("/Users/junkong/Desktop/ranial/sensor.csv")
df$sensor_00=impute(df$sensor_00,median)
df$sensor_01=impute(df$sensor_01,median)
df$sensor_02=impute(df$sensor_02,median)
df$sensor_03=impute(df$sensor_03,median)
df$sensor_04=impute(df$sensor_04,median)
df$sensor_05=impute(df$sensor_05,median)
df$sensor_06=impute(df$sensor_06,median)
df$sensor_07=impute(df$sensor_07,median)
df$sensor_08=impute(df$sensor_08,median)
df$sensor_09=impute(df$sensor_09,median)
df$sensor_10=impute(df$sensor_10,median)

df$sensor_11=impute(df$sensor_11,median)
df$sensor_12=impute(df$sensor_12,median)
df$sensor_13=impute(df$sensor_13,median)
df$sensor_14=impute(df$sensor_14,median)
df$sensor_15=impute(df$sensor_15,median)
df$sensor_16=impute(df$sensor_16,median)
df$sensor_17=impute(df$sensor_17,median)
df$sensor_18=impute(df$sensor_18,median)
df$sensor_19=impute(df$sensor_19,median)
df$sensor_20=impute(df$sensor_20,median)

df$sensor_21=impute(df$sensor_21,median)
df$sensor_22=impute(df$sensor_22,median)
df$sensor_23=impute(df$sensor_23,median)
df$sensor_24=impute(df$sensor_24,median)
df$sensor_25=impute(df$sensor_25,median)
df$sensor_26=impute(df$sensor_26,median)
df$sensor_27=impute(df$sensor_27,median)
df$sensor_28=impute(df$sensor_28,median)
df$sensor_29=impute(df$sensor_29,median)
df$sensor_30=impute(df$sensor_30,median)

df$sensor_31=impute(df$sensor_31,median)
df$sensor_32=impute(df$sensor_32,median)
df$sensor_33=impute(df$sensor_33,median)
df$sensor_34=impute(df$sensor_34,median)
df$sensor_35=impute(df$sensor_35,median)
df$sensor_36=impute(df$sensor_36,median)
df$sensor_37=impute(df$sensor_37,median)
df$sensor_38=impute(df$sensor_38,median)
df$sensor_39=impute(df$sensor_39,median)
df$sensor_40=impute(df$sensor_40,median)

df$sensor_41=impute(df$sensor_41,median)
df$sensor_42=impute(df$sensor_42,median)
df$sensor_43=impute(df$sensor_43,median)
df$sensor_44=impute(df$sensor_44,median)
df$sensor_45=impute(df$sensor_45,median)
df$sensor_46=impute(df$sensor_46,median)
df$sensor_47=impute(df$sensor_47,median)
df$sensor_48=impute(df$sensor_48,median)
df$sensor_49=impute(df$sensor_49,median)

file:///Users/junkong/Desktop/1.nb.html 3/11
2022/10/31 10:55 R Notebook
df$sensor_50=impute(df$sensor_50,median)
df$sensor_51=impute(df$sensor_51,median)

df$machine_status[df$machine_status == "NORMAL"] <- 0


df$machine_status[df$machine_status == "BROKEN"] <- 1
df$machine_status[df$machine_status == "RECOVERING"] <- 1

num_train <- nrow(df) * 0.7


inTrain <- sample(nrow(df), size = num_train)

train <- df[inTrain,]


test <- df[-inTrain,]

X= train[,3:54]
X.train = X[,c(1:15,17:50,52)]
X.train = data.matrix(X.train)
y.train<- as.numeric(matrix(c(train[,55]),byrow = T))

X_ = test[,3:54]
X.test =X_[,c(1:15,17:50,52)]
X.test = data.matrix(X.test)
y.test = as.numeric(matrix(c(test[,55]),byrow = T))

Logistic Regression
Hide

# model 1: ridge alpha = 0

cv.fit.1 = cv.glmnet(X.train, y.train, alpha = 0, family = "binomial" )


plot(cv.fit.1)

file:///Users/junkong/Desktop/1.nb.html 4/11
2022/10/31 10:55 R Notebook

Hide

fit1 = glmnet(X.train, y.train, alpha = 0, family = "binomial")


plot(fit1)

Hide

file:///Users/junkong/Desktop/1.nb.html 5/11
2022/10/31 10:55 R Notebook

fit1 = glmnet(X.train, y.train, alpha = 0, lambda = cv.fit.1$lambda.min,


family = "binomial")
y.train.hat1 = predict(fit1, newx = X.train, type = "response")
y.test.hat1 = predict(fit1, newx = X.test, type = "response")
train.err1 = mean((y.train.hat1 - y.train)^2)
test.err1 = mean((y.test.hat1 - y.test)^2)
paste(c("ridge train mse is", "ridge test mse is"), round(c(train.err1, test.err1), 6
))

[1] "ridge train mse is 0.066066" "ridge test mse is 0.064981"

Hide

barplot(as.vector(fit1$beta), main = paste("alpha = ", 0))

Hide

# model 2: elastic-netnet alpha = 0.5


cv.fit.2 = cv.glmnet(X.train, y.train, alpha = 0.5, family = "binomial" )
plot(cv.fit.2)

file:///Users/junkong/Desktop/1.nb.html 6/11
2022/10/31 10:55 R Notebook

Hide

fit2 = glmnet(X.train, y.train, alpha = 0.5, family = "binomial")


plot(fit2)

Hide

file:///Users/junkong/Desktop/1.nb.html 7/11
2022/10/31 10:55 R Notebook

fit2 = glmnet(X.train, y.train, alpha = 0.5, lambda = cv.fit.2$lambda.mi


n, family = "binomial")
y.train.hat2 = predict(fit2, newx = X.train, type = "response")
y.test.hat2 = predict(fit2, newx = X.test, type = "response")
train.err2 = mean((y.train.hat2 - y.train)^2)
test.err2 = mean((y.test.hat2 - y.test)^2)
paste(c("el-net train mse is", "el-net test mse is"), round(c(train.err2, test.err2),
6))

[1] "el-net train mse is 0.931859" "el-net test mse is 0.932825"

Hide

barplot(as.vector(fit2$beta), main = paste("alpha = ", 0.5))

Hide

# model 3: lasso alpha = 1


cv.fit.3 = cv.glmnet(X.train, y.train, alpha = 1, family = "binomial" )
plot(cv.fit.3)

file:///Users/junkong/Desktop/1.nb.html 8/11
2022/10/31 10:55 R Notebook

Hide

fit3 = glmnet(X.train, y.train, alpha = 1, family = "binomial")


plot(fit3)

Hide

file:///Users/junkong/Desktop/1.nb.html 9/11
2022/10/31 10:55 R Notebook

fit3 = glmnet(X.train, y.train, alpha = 1, lambda = cv.fit.3$lambda.min,


family = "binomial")

Warning: from glmnet C++ code (error code -1); Convergence for 1th lambda value not r
eached after maxit=100000 iterations; solutions for larger lambdas returnedWarning: a
n empty model has been returned; probably a convergence issue

Hide

y.train.hat3 = predict(fit3, newx = X.train, type = "response")


y.test.hat3 = predict(fit3, newx = X.test, type = "response")
train.err3 = mean((y.train.hat3 - y.train)^2)
test.err3 = mean((y.test.hat3 - y.test)^2)
paste(c("lasso train mse is", "lasso test mse is"), round(c(train.err3, test.err3), 6
))

[1] "lasso train mse is 0.25" "lasso test mse is 0.25"

Hide

barplot(as.vector(fit3$beta), main = paste("alpha = ", 1))

KNN
Hide

file:///Users/junkong/Desktop/1.nb.html 10/11
2022/10/31 10:55 R Notebook

k.values = seq(2,20, by = 1) # in KNN, these are the values of k


k.length = length(k.values)
test.err = matrix(0, k.length)
train.err = matrix(0, k.length)
i = 1

n.train = dim(train)[1]
n.test = dim(test)[1]
n = n.test + n.train

for (ki in k.values){


y.test.hat = knn(X.train, X.test, y.train, ki)
test.err[i] = sum(y.test.hat != y.test)/n.test
y.train.hat = knn(X.train, X.train, y.train, ki)
train.err[i] = sum(y.train.hat != y.train)/n.train
print(paste("ki=",ki, ", test.err=", test.err[i],", train.err=", train.err[i]))
i=i+1
}

[1] "ki= 2 , test.err= 0.000393367223432583 , train.err= 0.0001491337275651"


[1] "ki= 3 , test.err= 0.000378237714839022 , train.err= 0.000142649652453574"
[1] "ki= 4 , test.err= 0.000378237714839022 , train.err= 0.000201006328457309"

file:///Users/junkong/Desktop/1.nb.html 11/11

You might also like