0% found this document useful (0 votes)
33 views

#Convertng: As - Numeric (Nama Data) As - Character (Nama Data)

This document provides information on data manipulation and analysis in R. It demonstrates how to convert between numeric and character data, select and filter data, perform descriptive statistics including measures of central tendency, dispersion, and visualizations. Regression analysis techniques like simple and multiple linear regression are also summarized, including assumption testing.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
33 views

#Convertng: As - Numeric (Nama Data) As - Character (Nama Data)

This document provides information on data manipulation and analysis in R. It demonstrates how to convert between numeric and character data, select and filter data, perform descriptive statistics including measures of central tendency, dispersion, and visualizations. Regression analysis techniques like simple and multiple linear regression are also summarized, including assumption testing.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

#CONVERTNG

#CONVERT FACTOR TO KARAKTER OR NUMERIC


as.numeric (nama data)
as.character(nama data)

#SELECTING
#MEMBERI NAMA KOLOM SUATU DATA
Colnames(namadata)<-c(“nama1”,”nama2)

#FILTER DATA MENURUT KATEGORI TERTENTU


data[data$sex=="male",]
data[data$sex=="male",3]
data[data$jur=="se",]
data[data$jur=="se"&data$sex=="male",]

#PANGGIL BARIS (CASE) TERTENTU


wp$S.Amer[which.row=c(1:5)]

#HAPUS VARIABEL TERTENTU


dat1$WEIND11<-NULL

#ANALISIS DESKRIPTIF
#UKURAN PEMUSATAN
mean(swiss$Fertility)
mean(InsectSprays$count[InsectSprays$spray=="kategori"]) #rata-rata dengan kategori tertentu
median(swiss$Fertility)
fer<-table(swiss$Fertility)
fer
modusfer<-names(fer)[which.max(fer)]
modusfer
rowSums(state.x77) #jumlah perkolom
colSums(state.x77) #jumlah perbaris
colMeans(state.x77) #mean perkolom

#UKURAN PENYEBARAN
sd(swiss$Fertility)
var(swiss$Fertility)
sd(swiss$Education)
cv_fer<-(sd(swiss$Fertility)/mean(swiss$Fertility))*100 #koefisien variasi
cv_ed<- (sd(swiss$Education)/mean(swiss$Education))*100
print(c(cv_fer, cv_ed))

#QUARTIL
quantile(swiss$Fertility, 0.25) #25% memiliki angka fertilitas
quantile(swiss$Fertility, 0.5)
quantile(swiss$Fertility, 0.75)
quantile (swiss$Fertility, c(0.25,0.5,0.75))

#NILAI MINIMUM DAN MAX


min(swiss$Fertility)
row.names(swiss)[which.min(swiss$Fertility)] #yg mana yang punyak fertilitas paling rendah
row.names(swiss)[which.max(swiss$Fertility)]
rangenya<-max(swiss$Fertility)-min(swiss$Fertility)
rangenya

#VISUALISASI DATA
#SCATTER PLOT
plot(swiss)
#MENAMBAH JUDUL DAN WARNA (WARNA, NAMA X, NAMAY, NAMA GRAFIK)
plot(swiss$Fertility,swiss$Education,col="orange",xlab="fertility",ylab="education", main="Scatter
Plot Between Fertility and Education",)
plot(swiss$Fertility,swiss$Education,col="orange",xlab="fertility",ylab="education", main="Scatter
Plot Between Fertility and Education",pch=5)
#MEMBUAT HISTOGRAM
hist(swiss$Fertility,xlab="fretility", ylab="frekuensi", col="red", main="hitogram")
#sumbu y nya adalah peluang
hist(swiss$Fertility,xlab="fretility", ylab="probability", col="red", main="hitogram",probability = T,
ylim=c(0,0.05))
#membuat garis poligon
lines(density(swiss$Fertility),lwd=5)
#MEMBUAT BOXPLOT
#otomatis ada labelnya tiap boxplot
boxplot(state.x77[1:3], xlab="variabel", ylab="Rate",col="green", main="Boxplot of Fertility")
#hanya satu diwakili xlab
boxplot(state.x77$Murder, xlab="Murder", ylab="Rate",col="green", main="Boxplot of Fertility")
#Jika label terpisah kolom, kolom digabung dulu, otomatis ternamai
gabung<-cbind(Income=state.x77$Income,Illiteracy=state.x77$Illiteracy)
boxplot(gabung, xlab=”Variabel”, ylab="Rate",col="green", main="Boxplot of Fertility")
#Jika suatu variabel dijadikan kategorik, maka otomatis jika diplot dg variabel rasio akan membentuk
boxplot ternamai
state.x77$tingkatIncom<-cut(state.x77$Income, breaks=c(-Inf, 4000, 4800, Inf), labels=c("low",
"medium", "high"))
plot(state.x77$tingkatIncom, state.x77$Illiteracy)
#BUAT VARIABEL KATEGORIK
#fertility (0-36 low, 37-73 medium, 73< high) variabel baru namanya swiss$grfer
swiss$grfer<-cut(swiss$Fertility, breaks=c(0,36,73,Inf), labels=c("low", "medium","high"))
head(swiss)
plot(swiss$grfer,swiss$Fertility,xlab="Fertility Group",ylab="fertility", main="Fertility Boxplot by
Group of Fertility")
#education (0-2 lw, 3-10 medium, 10< high)
summary(swiss$Education)
swiss$gredu<-cut(swiss$Education, breaks=c(0,2,10,Inf), labels=c("low","medium","high"))
plot(swiss$gredu,swiss$Fertility, xlab="gred", ylab="fertility", main="fertility boxplot by group of
education")
#semakin tinggi tingkat pendidikan, semakin rendah fertilitasnya
#STEM N LEAF
stem(swiss$Fertility) #paling dominan di angka 60an dan 70an
#DIAGRAM QQ PLOT
qqnorm(swiss$Fertility)
qqline(swiss$Fertility)

#DIAGRAM BATANG
#jika data rasio langsung saja
barplot(state.x77$Income)
#jika data kategorik dibuat tabel dulu, atau data rasio bisa dibuat kategorik dulu biar rapi
tab<-table(state.x77$tingkatIncom)
barplot(tab, xlab="Income", ylab="frekuensi", main="Tingkat Income")
#tabel bisa dibuat didalam (feed adalah kategorik)
barplot(table(chickwts$feed), xlab="Feed", ylab="Frekuensi", col=c(2:6))
#diberi arsiran pada diagram
barplot(table(chickwts$feed), xlab="Feed", ylab="Frekuensi", density = c(10,20,30,40,50,60),
col=c(1:6))
#barplot menyamping tiap variabel
mat<-cbind(amer=wp$N.Amer,erup=wp$Europe,asia=wp$Asia)
barplot(mat, col=rainbow(7),beside = T, xlab="tahun", ylab="angka")
legend("topright", c("1951","1956","1957","1958","1959","1960","1961"), cex=0.7, pch=15, col =
rainbow(7))
#barplot tambahan
tab1<-table(swiss$Fertility)
tab1
barplot(tab1)
tab1_grfer<-table(swiss$grfer)
tab1_grfer
barplot(tab1_grfer)
barplot(tab2) #numpuk
barplot(tab2,beside=T) #menyamping
barplot(tab2,beside=T, col=c("red","yellow", "green"), xlab="grfer")
legend("topright",c("low","medium","high"),col=c("red","yellow", "green"),pch=1)

#TABEL 2 DAN 3 ARAH


grfer<-swiss$grfer
gredu<-swiss$gredu
tab2<-table(grfer,gredu) #tabel 2 arah
tab2
prop.table(tab2) #menjadi persentase
tab3<-ftable(swiss$grfer,swiss$gredu,swiss$grfer) #tabel 3 arah
tab3
#LINGKARAN
pie(tab1_grfer,clockwise = T,col=c(1,2,3))
legend("topright",c("low","medium","high"),col=c(1,2,3),pch=1)

#GARIS
library(foreign)
dt<-read.spss("E:/Dhofirur/STIS/Tingkat 2/Asli/Komstat/Pertemuan 3/data_asli_1.sav",
to.data.frame = T)
head(dt)
plot(dt$No_Obs,dt$BI_Rate)
plot(dt$No_Obs,dt$BI_Rate, type="l", col="red", xlab="obsevasi", ylab="rate", ylim=c(0,15))
lines(dt$No_Obs,dt$Working_capital_rate,col="blue")
legend("bottomright", c("bi rate","working capital credit"), col=c("red","blue"),pch=5, cex=0.5)

#GRAFIK GARIS PER TAHUN


tahun<-c("2008","2009","2010","2011","2012")
bi_rate<-
c(mean(dt$BI_Rate[1:12]),mean(dt$BI_Rate[13:24]),mean(dt$BI_Rate[25:36]),mean(dt$BI_Rate[37:
48]),mean(dt$BI_Rate[49:60]))
bi_rate
wcp<-
c(mean(dt$Working_capital_rate[1:12]),mean(dt$Working_capital_rate[13:24]),mean(dt$Working_c
apital_rate[25:36]),mean(dt$Working_capital_rate[37:48]),mean(dt$Working_capital_rate[49:60]))
wcp
plot(tahun,bi_rate,type="l",col="blue",xlab="tahun",ylab="rate", ylim=c(0,15))
lines(tahun,wcp, col="red")
legend("topright", c("bi rate", "wcp"), col=c("blue","red"), pch=2)

#PELUANG NORMAL
#DIKETAHUI DARI 400 MAHASISWA STIS BERDISTRIBUDI NORMAL
#rata-rata tinggi badan adalah 165 cm dengan st.dev 5 cm
#berapa peluang seorang mahasiswa punya tinggi badan 164? P(X=164)
dnorm(164,mean=165,sd=5)
#BERAPA PELUANG SEORANG MAHASISWA PUNYA TINGGI BADAN KURANG DARI 164? P(X<164)
pnorm(164,mean=165,sd=5)
#P(X>164)
1-pnorm(164,mean=165,sd=5)
#JIKA DIKETAHUI PELUANGNYA ADALAH 0.05 BERAPA TINGGI BADAN MAKSIMAL MAHASISWA
TERSEBUT?
qnorm(0.05,mean=165,sd=5)
#NORMAL STANDAR
#P(X>1.96)
1-pnorm(1.96)

#MEMANGGIL
#CARA MANGGIL DATA DARI AXCEL
data<-read.csv("D:/data.csv")
data<-read.table("E:/Dhofirur/STIS/Tingkat 2/Asli/Komstat/data.txt,header=TRUE)

#MEMANGGIL DATA DARI SPSS


library(foreign)
data<-read.spss(“E:/Dhofirur/STIS/Tingkat 2/Asli/Komstat/data.sav",to.data.frame=TRUE)

#MEMANGGIL DATA DARI WORKING DIRECTORY


#atur directori
getwd()
setwd("E:/Dhofirur/STIS/Tingkat 2/Asli/Komstat")
data<-read.csv("data.csv")
data<-read.spss("data.sav",to.data.frame=TRUE)

#EKSPLORASI DATA
#PLOT
plot(mtcars) #semuanya
plot(mtcars$hp,mtcars$mpg) #variabel tertentu
#KORELASI
cor(mtcars) #semuanya
library(corrplot) #packages untuk korelasi
corrplot(cor(mtcars),method = "ellipse") #korelasi dengan elips

#REGRESI LINEAR SEDERHANA


#UJI LINEARITAS (terpenuhi jika p-value < alpha)
#resettest
library(lmtest)
resettest(mtcars$mpg~mtcars$wt)
#teras virta
library(tseries)
terasvirta.test(mtcars$mpg,mtcars$wt)

#PERSAMAAN REGRESI LINIER SEDERHANA


rls<-lm(mtcars$mpg~mtcars$wt) #rls adalah linier model dari variabel didalamnya
summary(rls) #biar tau modelnya

#UJI ASUMSI RESIDUAL RLS


#NORMALITAS
#visual normalitas
res<-resid(rls) #residual dari rls
qqnorm(res) #qqplot dari residual
qqline(res)
#statistik normalitas
shapiro.test(res) #p-value>apha berarti normal
#HOMOSKEDASTISITAS= UJI ASUMSI BAHWA VARIANS RESIDUAL KONSTAN
#Visual plot Yhat dengan et^2
res2<-res^2
yhat<-fitted.values(rls)
plot(yhat,res2) #jika acar berarti tepenuhi
#stat uji untuk homoskedastisitas
bptest(rls) #jika p-value > alpha terpenuhi
#NONAUTOKORELASI= UJI ASUMSI BAHWA RESIDUAL ANTAR PENGAMATAN SALING BEBAS ATAU
TDK BERKORELASI
#visual plot et dengan et-1
et<-res
library(Hmisc)
et_1<-Lag(et,shift=1)
head(et)
head(et_1)
et<-et[-1]
et_1<-et_1[-1]
plot(et_1,et)
#uji stat untuk nonautokorelasi
library(car)
library(carData)
durbinWatsonTest(rls) #p-value <apha terpenuhi
cor.test(et_1,et)

#ANALISIS REGRESI LINEAR BERGANDA= VARIABEL INDEPENDENNYA LEBIH DARI 1


#RLB dengan Y=illiteracy
#uji lineritas dulu semua variabel (ambil yang linier sebagai model sementara)
state.x77<-data.frame(state.x77)
#literacy vs populasi (tidak linear)
resettest(state.x77$Illiteracy~state.x77$Population)
#literacy vs income (linear)
resettest(state.x77$Illiteracy~state.x77$Income)
#literacy vs Life Exp (linear)
resettest(state.x77$Illiteracy~state.x77$Life.Exp)
#literacy vs murder (tidak linear)
resettest(state.x77$Illiteracy~state.x77$Murder)
#literacy vs hs grad(linear)
resettest(state.x77$Illiteracy~state.x77$HS.Grad)
#literacy vs frost (tdk linear)
resettest(state.x77$Illiteracy~state.x77$Frost)
#literacy vs area (tidak linear)
resettest(state.x77$Illiteracy~state.x77$Area)

#UJI ASUMSI
#NORMALITAS
#NOAUTOKORELASI
#HOMOSKEDASTISITAS
#NONMULTIKOLINEARITAS= UJI ASUMSI BAHWA VARIABLE INDEPENDENNYA TIDAK BERKORELASI
vif(rlb) #jika vif<5 maka asumsi nonmultikol terpenuhi

#PEMILIHAN MODEL
#BACKWARD= PEMILIHAN MODEL TERBAIK DENGAN MEMASUKAN SEMUA VARIABEL X LALU
DIKELUARKAN 1 PER SATU X
rlb_backward<-step(rlb,direction = "backward")
summary(rlb_backward)
#FORWARD= PEMILIHAN MODEL TERBAIK DENGAN MEMASUKAN X 1 PER 1
rlb_intersep<-lm(state.x77$Illiteracy~1) #sebagai intersep
summary(rlb_intersep)
rlb_forward<-step(rlb_intersep,scope=list(lower=rlb_intersep,upper=rlb),direction="forward")
summary(rlb_forward)
#STEPWISE= KOMBINASI ANTARA BACWARD DAN FORWARD
rlb_stepwise<-step(rlb,direction = "both")
summary(rlb_stepwise)

You might also like