Pruebas de Bondad de Ajuste Con Library Rrisk en R

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 35

Pruebas de bondad de ajuste

Roly

16 de mayo de 2018

rriskdistributions
Utilizando funcion de maxima verosimilitud
Encontramos los valores optimos de los parametros para cierta distribucion continua
utilizando maxima verosimilitud
library(rriskDistributions)
set.seed(1)
x2<- rchisq(500, 4)
hist(x2)

rriskMLEdist(x2, "norm")

## $estimate
## mean sd
## 3.888741 2.776253
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1220.02
##
## $hessian
## mean sd
## mean 64.8712 0.0000
## sd 0.0000 129.7425
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "exp")

## $estimate
## rate
## 0.2571526
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1179.043
##
## $hessian
## rate
## rate 7561.381
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "lnorm")

## $estimate
## meanlog sdlog
## 1.0717509 0.8320904
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1153.438
##
## $hessian
## meanlog sdlog
## meanlog 722.1525 0.000
## sdlog 0.0000 1444.324
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "logis")

## $estimate
## location scale
## 3.590995 1.505062
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1204.184
##
## $hessian
## location scale
## location 73.11348 -12.44612
## scale -12.44612 321.36893
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "gamma")

## $estimate
## shape rate
## 1.8959993 0.4876333
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1128.639
##
## $hessian
## shape rate
## shape 344.9053 -1025.362
## rate -1025.3620 3986.806
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "weibull")

## $estimate
## shape scale
## 1.453619 4.299805
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1130.02
##
## $hessian
## shape scale
## shape 446.84950 -50.89025
## scale -50.89025 57.15013
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "chisq")

## $estimate
## df
## 3.865889
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1128.711
##
## $hessian
## df
## df 84.14863
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "t")

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced

## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,


## 4.10162370021791, : NaNs produced
## Warning in dt(c(1.66173000475602, 7.14151269432391, 6.92634076027453,
## 4.10162370021791, : NaNs produced

## $estimate
## df
## 0.7128329
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1768.134
##
## $hessian
## df
## df 559.9701
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "f")

## $estimate
## df1 df2
## 91618.252389 1.928039
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1336.484
##
## $hessian
## df1 df2
## df1 -1.705303e-07 9.632117e-05
## df2 9.632117e-05 8.731310e+01
##
## $optim.function
## [1] "optim"

rriskMLEdist(x2, "cauchy")

## $estimate
## location scale
## 3.013206 1.505451
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1269.648
##
## $hessian
## location scale
## location 83.41896 -24.22884
## scale -24.22884 137.19702
##
## $optim.function
## [1] "optim"

Utilizando momentos
## Continuous distributions
set.seed(1)
x1 <-rnorm(500, mean = 2, sd = 0.7)
hist(x1)

rriskMMEdist(x1, "norm")

## mean sd
## 2.0158509 0.7076411

rriskMMEdist(x1, "exp")

## rate
## 0.4960684

rriskMMEdist(x1, "gamma")
## shape rate
## 8.115040 4.025615

rriskMMEdist(x1, "logis")

## location scale
## 2.015851 0.390143

rriskMMEdist(x1, "unif")

## min max
## 0.7901805 3.2415212

## produces an error:
# rriskMMEdist(x1, "lnorm")
# rriskMMEdist(x1, "beta")

## Discrete distributions
set.seed(2)
x2 <- rpois(500, lambda = 3)
hist(x2)

rriskMMEdist(x2, "pois")

## lambda
## 3

rriskMMEdist(x2, "nbinom")
## size mu
## 27.77778 3.00000

rriskMMEdist(x2, "geom")

## prob
## 0.25

fitdistrplus
Ajuste de maxima verosimilitud
library(fitdistrplus)

## Loading required package: MASS

## Loading required package: survival

set.seed(1234)
x1 <- rnorm(n=100)
mledist(x1,"norm")

## $estimate
## mean sd
## -0.1567617 0.9993707
##
## $convergence
## [1] 0
##
## $loglik
## [1] -141.8309
##
## $hessian
## mean sd
## mean 100.126 0.0000
## sd 0.000 200.2538
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 43 NA
##
## $optim.message
## NULL

# (2) defining your own distribution functions, here for the Gumbel
distribution
# for other distributions, see the CRAN task view dedicated to probability
distributions

dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))


mledist(x1,"gumbel",start=list(a=10,b=5))

## $estimate
## a b
## -0.6267919 0.8564855
##
## $convergence
## [1] 0
##
## $loglik
## [1] -139.3812
##
## $hessian
## a b
## a 136.31209 -61.53959
## b -61.53959 270.09656
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 91 NA
##
## $optim.message
## NULL

# (3) fit of a discrete distribution (Poisson)


#

set.seed(1234)
x2 <- rpois(n=30,lambda = 2)
mledist(x2,"pois")

## $estimate
## lambda
## 1.7
##
## $convergence
## [1] 0
##
## $loglik
## [1] -46.18434
##
## $hessian
## lambda
## lambda 17.64707
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "BFGS"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 6 1
##
## $optim.message
## NULL

# (4) fit a finite-support distribution (beta)


#
set.seed(1234)
x3 <- rbeta(n=100,shape1=5, shape2=10)
mledist(x3,"beta")

## $estimate
## shape1 shape2
## 4.859798 10.918841
##
## $convergence
## [1] 0
##
## $loglik
## [1] 78.33052
##
## $hessian
## shape1 shape2
## shape1 16.295311 -6.542752
## shape2 -6.542752 3.047900
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 47 NA
##
## $optim.message
## NULL

# (5) fit frequency distributions on USArrests dataset.


#

x4 <- USArrests$Assault
mledist(x4, "pois")

## $estimate
## lambda
## 170.76
##
## $convergence
## [1] 0
##
## $loglik
## [1] -1211.705
##
## $hessian
## lambda
## lambda 0.2928085
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "BFGS"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 2 1
##
## $optim.message
## NULL

mledist(x4, "nbinom")

## $estimate
## size mu
## 3.822579 170.747853
##
## $convergence
## [1] 0
##
## $loglik
## [1] -290.3297
##
## $hessian
## size mu
## size 1.759308e+00 -1.993783e-05
## mu -1.993783e-05 6.413018e-03
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 47 NA
##
## $optim.message
## NULL

# (6) fit a continuous distribution (Gumbel) to censored data.


#

data(fluazinam)
log10EC50 <-log10(fluazinam)
# definition of the Gumbel distribution
dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))
pgumbel <- function(q,a,b) exp(-exp((a-q)/b))
qgumbel <- function(p,a,b) a-b*log(-log(p))

mledist(log10EC50,"gumbel",start=list(a=0,b=2),optim.method="Nelder-Mead")

## $estimate
## a b
## 1.632726 1.144113
##
## $convergence
## [1] 0
##
## $loglik
## [1] -20.27771
##
## $hessian
## a b
## a 10.683955 -4.764691
## b -4.764691 16.810611
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $weights
## NULL
##
## $counts
## function gradient
## 65 NA
##
## $optim.message
## NULL

# (7) scaling problem


# the simulated dataset (below) has particularly small values,
# hence without scaling (10^0),
# the optimization raises an error. The for loop shows how scaling by 10^i
# for i=1,...,6 makes the fitting procedure work correctly.

set.seed(1234)
x2 <- rnorm(100, 1e-4, 2e-4)
for(i in 6:0)
cat(i, try(mledist(x*10^i, "cauchy")$estimate, silent=TRUE), "\n")

## 6 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado


##
## 5 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##
## 4 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##
## 3 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##
## 2 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##
## 1 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##
## 0 Error in mledist(x * 10^i, "cauchy") : objeto 'x' no encontrado
##

# (17) small example for the zero-modified geometric distribution


#
dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) #pdf
x2 <- c(2, 4, 0, 40, 4, 21, 0, 0, 0, 2, 5, 0, 0, 13, 2) #simulated
dataset
initp1 <- function(x) list(p1=mean(x == 0)) #init as MLE
mledist(x2, "zmgeom", fix.arg=initp1, start=list(p2=1/2))

## $estimate
## p2
## 0.096772
##
## $convergence
## [1] 0
##
## $loglik
## [1] -39.6633
##
## $hessian
## p2
## p2 1064.213
##
## $optim.function
## [1] "optim"
##
## $fix.arg
## $fix.arg$p1
## [1] 0.4
##
##
## $optim.method
## [1] "BFGS"
##
## $fix.arg.fun
## function (x)
## list(p1 = mean(x == 0))
## <bytecode: 0x000000000e90a618>
##
## $weights
## NULL
##
## $counts
## function gradient
## 27 9
##
## $optim.message
## NULL

Ajuste mediante momentos


# (1) basic fit of a normal distribution with moment matching estimation
#
library(actuar)#para generar datos pareto
##
## Attaching package: 'actuar'

## The following objects are masked _by_ '.GlobalEnv':


##
## dgumbel, dzmgeom, pgumbel, qgumbel

## The following object is masked from 'package:grDevices':


##
## cm

set.seed(1234)
n <- 100
x1 <- rnorm(n=n)
mmedist(x1, "norm")

## $estimate
## mean sd
## -0.1567617 0.9993707
##
## $convergence
## [1] 0
##
## $value
## NULL
##
## $hessian
## NULL
##
## $optim.function
## NULL
##
## $order
## [1] 1 2
##
## $memp
## NULL
##
## $counts
## NULL
##
## $optim.message
## NULL
##
## $loglik
## [1] -141.8309
##
## $method
## [1] "closed formula"
##
## $weights
## NULL

#weighted
w <- c(rep(1, n/2), rep(10, n/2))
mmedist(x1, "norm", weights=w)$estimate

## Warning in mmedist(x1, "norm", weights = w): weights are not taken into
## account in the default initial values

## mean sd
## 0.08565839 1.02915474

# (2) fit a discrete distribution (Poisson)


#

set.seed(1234)
x2 <- rpois(n=30,lambda = 2)
mmedist(x2, "pois")

## $estimate
## lambda
## 1.7
##
## $convergence
## [1] 0
##
## $value
## NULL
##
## $hessian
## NULL
##
## $optim.function
## NULL
##
## $order
## [1] 1
##
## $memp
## NULL
##
## $counts
## NULL
##
## $optim.message
## NULL
##
## $loglik
## [1] -46.18434
##
## $method
## [1] "closed formula"
##
## $weights
## NULL

# (3) fit a finite-support distribution (beta)


#

set.seed(1234)
x3 <- rbeta(n=100,shape1=5, shape2=10)
mmedist(x3, "beta")

## $estimate
## shape1 shape2
## 4.522734 10.219685
##
## $convergence
## [1] 0
##
## $value
## NULL
##
## $hessian
## NULL
##
## $optim.function
## NULL
##
## $order
## [1] 1 2
##
## $memp
## NULL
##
## $counts
## NULL
##
## $optim.message
## NULL
##
## $loglik
## [1] 78.19503
##
## $method
## [1] "closed formula"
##
## $weights
## NULL
# (4) fit a Pareto distribution
#

## Not run:
require(actuar)
#simulate a sample
x4 <- rpareto(1000, 6, 2)

#empirical raw moment


memp <- function(x, order) mean(x^order)
memp2 <- function(x, order, weights) sum(x^order * weights)/sum(weights)

#fit by MME
mmedist(x4, "pareto", order=c(1, 2), memp=memp,
start=list(shape=10, scale=10), lower=1, upper=Inf)

## $estimate
## shape scale
## 4.560423 1.464764
##
## $convergence
## [1] 0
##
## $value
## [1] 6.740714e-13
##
## $hessian
## NULL
##
## $optim.function
## [1] "constrOptim"
##
## $order
## [1] 1 2
##
## $memp
## function (x, order)
## mean(x^order)
## <bytecode: 0x0000000010dd0588>
##
## $counts
## function gradient
## 404 NA
##
## $optim.message
## NULL
##
## $loglik
## [1] -80.49091
##
## $method
## [1] "default"
##
## $optim.method
## [1] "Nelder-Mead"
##
## $weights
## NULL

#fit by weighted MME


w <- rep(1, length(x4))
w[x4 < 1] <- 2
mmedist(x4, "pareto", order=c(1, 2), memp=memp2, weights=w,
start=list(shape=10, scale=10), lower=1, upper=Inf)

## Warning in mmedist(x4, "pareto", order = c(1, 2), memp = memp2, weights =


## w, : weights are not taken into account in the default initial values

## $estimate
## shape scale
## 5.656694 1.630806
##
## $convergence
## [1] 0
##
## $value
## [1] 7.09812e-14
##
## $hessian
## NULL
##
## $optim.function
## [1] "constrOptim"
##
## $order
## [1] 1 2
##
## $memp
## function (x, order, weights)
## sum(x^order * weights)/sum(weights)
## <bytecode: 0x000000000e438d08>
##
## $counts
## function gradient
## 767 NA
##
## $optim.message
## NULL
##
## $loglik
## [1] 119.7361
##
## $method
## [1] "default"
##
## $optim.method
## [1] "Nelder-Mead"
##
## $weights
## [1] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [35] 2 2 2 2 2 2 1 2 2 1 2 2 2 1 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2
## [69] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [103] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1
## [137] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2
## [171] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [205] 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2
## [239] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1
## [273] 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [307] 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [341] 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2
## [375] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1
## [409] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
## [443] 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2
## [477] 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [511] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
## [545] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2
## [579] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [613] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2
## [647] 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2
## [681] 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
## [715] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2
## [749] 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 1
## [783] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [817] 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [851] 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 1 2 2
## [885] 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [919] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 1 2 2 2 2 2 1 2 2
## [953] 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [987] 2 2 2 2 1 2 2 2 2 2 2 2 2 2

Test de bondad de ajuste


# (1) Fit of a Weibull distribution to serving size data by maximum
# goodness-of-fit estimation using all the distances available
#

data(groundbeef)
serving <- groundbeef$serving
mgedist(serving, "weibull", gof="CvM")
## $estimate
## shape scale
## 2.093204 82.660014
##
## $convergence
## [1] 0
##
## $value
## [1] 0.6556672
##
## $hessian
## shape scale
## shape 4.05295367 0.09244476
## scale 0.09244476 0.02418777
##
## $gof
## [1] "CvM"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1255.623
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 65 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="KS")

## $estimate
## shape scale
## 2.065634 81.450487
##
## $convergence
## [1] 0
##
## $value
## [1] 0.112861
##
## $hessian
## shape scale
## shape 122.668263 6.509057
## scale 6.509057 7.599584
##
## $gof
## [1] "KS"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1255.975
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 127 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="AD")

## $estimate
## shape scale
## 2.125473 82.890260
##
## $convergence
## [1] 0
##
## $value
## [1] 3.501035
##
## $hessian
## shape scale
## shape 29.4165108 0.1823375
## scale 0.1823375 0.1354409
##
## $gof
## [1] "AD"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1255.392
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 63 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="ADR")

## $estimate
## shape scale
## 2.072087 82.761868
##
## $convergence
## [1] 0
##
## $value
## [1] 1.610479
##
## $hessian
## shape scale
## shape 13.5240921 -0.33242262
## scale -0.3324226 0.07977375
##
## $gof
## [1] "ADR"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1255.836
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 65 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="ADL")

## $estimate
## shape scale
## 2.197498 82.016005
##
## $convergence
## [1] 0
##
## $value
## [1] 1.845939
##
## $hessian
## shape scale
## shape 15.3272022 0.54407117
## scale 0.5440712 0.05549883
##
## $gof
## [1] "ADL"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1255.415
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 65 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="AD2R")

## $estimate
## shape scale
## 1.90328 81.33464
##
## $convergence
## [1] 0
##
## $value
## [1] 11.56415
##
## $hessian
## shape scale
## shape 334.61081 -10.4227495
## scale -10.42275 0.5223167
##
## $gof
## [1] "AD2R"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1259.112
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 67 NA
##
## $optim.message
## NULL
mgedist(serving, "weibull", gof="AD2L")

## $estimate
## shape scale
## 2.483836 78.252113
##
## $convergence
## [1] 0
##
## $value
## [1] 9.786977
##
## $hessian
## shape scale
## shape 113.511932 4.1108355
## scale 4.110836 0.2341312
##
## $gof
## [1] "AD2L"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1265.933
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 69 NA
##
## $optim.message
## NULL

mgedist(serving, "weibull", gof="AD2")

## $estimate
## shape scale
## 2.081168 85.281194
##
## $convergence
## [1] 0
##
## $value
## [1] 26.95166
##
## $hessian
## shape scale
## shape 534.9606 -10.5940982
## scale -10.5941 0.7606462
##
## $gof
## [1] "AD2"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -1256.313
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 67 NA
##
## $optim.message
## NULL

# (2) Fit of a uniform distribution using Cramer-von Mises or


# Kolmogorov-Smirnov distance
#

set.seed(1234)
u <- runif(100,min=5,max=10)
mgedist(u,"unif",gof="CvM")

## $estimate
## min max
## 0 1
##
## $convergence
## [1] 0
##
## $value
## [1] 33.33333
##
## $hessian
## min max
## min 0 0
## max 0 0
##
## $gof
## [1] "CvM"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -Inf
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 3 NA
##
## $optim.message
## NULL

mgedist(u,"unif",gof="KS")

## $estimate
## min max
## 0 1
##
## $convergence
## [1] 0
##
## $value
## [1] 1
##
## $hessian
## min max
## min 0 0
## max 0 0
##
## $gof
## [1] "KS"
##
## $optim.function
## [1] "optim"
##
## $loglik
## [1] -Inf
##
## $fix.arg
## NULL
##
## $optim.method
## [1] "Nelder-Mead"
##
## $fix.arg.fun
## NULL
##
## $counts
## function gradient
## 3 NA
##
## $optim.message
## NULL

# (3) Fit of a triangular distribution using Cramer-von Mises or


# Kolmogorov-Smirnov distance
#

Mediante bootstrap
#Usando bootstrap parametrico
# (1) Fit of a gamma distribution to serving size data
# using default method (maximum likelihood estimation)
# followed by parametric bootstrap
#
data(groundbeef)
x1 <- groundbeef$serving
f1 <- fitdist(x1, "gamma")
b1 <- bootdist(f1, niter=51)
print(b1)

## Parameter values obtained with parametric bootstrap


## shape rate
## 1 4.447529 0.05743908
## 2 3.731715 0.05194225
## 3 4.328852 0.05947364
## 4 4.372875 0.05782577
## 5 4.029148 0.05353329
## 6 4.585687 0.06175285

plot(b1)
plot(b1, enhance=TRUE)

summary(b1)
## Parametric bootstrap medians and 95% percentile CI
## Median 2.5% 97.5%
## shape 4.11909335 3.47001598 4.91439278
## rate 0.05552801 0.04696161 0.06963818

quantile(b1)

## (original) estimated quantiles for each specified probability (non-


censored data)
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## estimate 32.16307 42.32373 50.9163 59.15229 67.62884 76.88571 87.68252
## p=0.8 p=0.9
## estimate 101.5288 122.9674
## Median of bootstrap estimates
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## estimate 32.36777 42.44537 50.71752 58.84559 67.23132 76.44518 87.19287
## p=0.8 p=0.9
## estimate 100.7935 121.7046
##
## two-sided 95 % CI of each quantile
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## 2.5 % 28.68123 38.15248 46.47824 54.72404 63.17646 72.01932 82.20427
## 97.5 % 36.94471 47.23128 55.84597 63.64572 71.70947 81.37541 92.23853
## p=0.8 p=0.9
## 2.5 % 94.19356 113.4518
## 97.5 % 105.87249 127.8143

CIcdfplot(b1, CI.output = "quantile")


#Usando bootstrap no parametrico
# (2) non parametric bootstrap on the same fit
#
b1b <- bootdist(f1, bootmethod="nonparam", niter=51)
summary(b1b)

## Nonparametric bootstrap medians and 95% percentile CI


## Median 2.5% 97.5%
## shape 3.98574643 3.44991529 4.78763146
## rate 0.05348564 0.04561434 0.06315915

quantile(b1b)

## (original) estimated quantiles for each specified probability (non-


censored data)
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## estimate 32.16307 42.32373 50.9163 59.15229 67.62884 76.88571 87.68252
## p=0.8 p=0.9
## estimate 101.5288 122.9674
## Median of bootstrap estimates
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## estimate 32.44327 42.73448 51.41546 59.61626 68.28533 77.77375 88.58221
## p=0.8 p=0.9
## estimate 102.7877 124.5528
##
## two-sided 95 % CI of each quantile
## p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7
## 2.5 % 29.51526 39.58272 48.07807 56.33362 64.87729 74.25208 85.04196
## 97.5 % 36.02190 46.30190 55.09512 63.46606 72.13126 81.69365 93.09416
## p=0.8 p=0.9
## 2.5 % 98.34933 118.8859
## 97.5 % 108.82299 133.4873

# (3) Fit of a normal distribution on acute toxicity values of endosulfan in


log10 for
# nonarthropod invertebrates, using maximum likelihood estimation
# to estimate what is called a species sensitivity distribution
# (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile
value of
# the fitted distribution, what is called the 5 percent hazardous
concentration (HC5)
# in ecotoxicology, with its two-sided 95 percent confidence interval
calculated by
# parametric bootstrap
#
data(endosulfan)
ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV
log10ATV <- log10(subset(endosulfan, group == "NonArthroInvert")$ATV)
fln <- fitdist(log10ATV, "norm")
bln <- bootdist(fln, bootmethod = "param", niter=51)
quantile(bln, probs = c(0.05, 0.1, 0.2))

## (original) estimated quantiles for each specified probability (non-


censored data)
## p=0.05 p=0.1 p=0.2
## estimate 1.744227 2.080093 2.4868
## Median of bootstrap estimates
## p=0.05 p=0.1 p=0.2
## estimate 1.714501 2.038195 2.44191
##
## two-sided 95 % CI of each quantile
## p=0.05 p=0.1 p=0.2
## 2.5 % 1.157586 1.506329 1.948332
## 97.5 % 2.328583 2.574807 2.885709

# (4) comparison of sequential and parallel versions of bootstrap


# to be tried with a greater number of iterations (1001 or more)
#
## Not run:
niter <- 1001
data(groundbeef)
x1 <- groundbeef$serving
f1 <- fitdist(x1, "gamma")

# sequential version
ptm <- proc.time()
summary(bootdist(f1, niter = niter))
## Parametric bootstrap medians and 95% percentile CI
## Median 2.5% 97.5%
## shape 4.01117535 3.4508732 4.773186
## rate 0.05463969 0.0458285 0.065843

proc.time() - ptm

## user system elapsed


## 5.67 0.00 5.67

# parallel version using snow


require(parallel)

## Loading required package: parallel

ptm <- proc.time()


summary(bootdist(f1, niter = niter, parallel = "snow", ncpus = 4))

## Parametric bootstrap medians and 95% percentile CI


## Median 2.5% 97.5%
## shape 4.03699568 3.43518046 4.81418350
## rate 0.05478111 0.04589598 0.06571518

proc.time() - ptm

## user system elapsed


## 0.03 0.05 5.10

# parallel version using multicore (not available on Windows)


ptm <- proc.time()
summary(bootdist(f1, niter = niter, parallel = "multicore", ncpus = 4))

## Warning in bootdist(f1, niter = niter, parallel = "multicore", ncpus = 4):


## As the multicore option is not supported on Windows it was replaced by
snow

## Parametric bootstrap medians and 95% percentile CI


## Median 2.5% 97.5%
## shape 4.03207772 3.4302298 4.79049424
## rate 0.05478922 0.0462193 0.06553756

proc.time() - ptm

## user system elapsed


## 0.06 0.03 5.05

You might also like