Data and Examples from Winkelmann and Boes (2009)
WinkelmannBoes2009.RdThis manual page collects a list of examples from the book. Some solutions might not be exact and the list is not complete. If you have suggestions for improvement (preferably in the form of code), please contact the package maintainer.
References
Winkelmann, R., and Boes, S. (2009). Analysis of Microdata, 2nd ed. Berlin and Heidelberg: Springer-Verlag.
Examples
#> Loading required namespace: ROCR
#> Loading required namespace: truncreg
# \donttest{
#########################################
## US General Social Survey 1974--2002 ##
#########################################
## data
data("GSS7402", package = "AER")
## completed fertility subset
gss40 <- subset(GSS7402, age >= 40)
## Chapter 1
## Table 1.1
gss_kids <- table(gss40$kids)
cbind(absolute = gss_kids,
relative = round(prop.table(gss_kids) * 100, digits = 2))
#> absolute relative
#> 0 744 14.45
#> 1 706 13.71
#> 2 1368 26.56
#> 3 1002 19.46
#> 4 593 11.51
#> 5 309 6.00
#> 6 190 3.69
#> 7 89 1.73
#> 8 149 2.89
## Table 1.2
sd1 <- function(x) sd(x) / sqrt(length(x))
with(gss40, round(cbind(
"obs" = tapply(kids, year, length),
"av kids" = tapply(kids, year, mean),
" " = tapply(kids, year, sd1),
"prop childless" = tapply(kids, year, function(x) mean(x <= 0)),
" " = tapply(kids, year, function(x) sd1(x <= 0)),
"av schooling" = tapply(education, year, mean),
" " = tapply(education, year, sd1)
), digits = 2))
#> obs av kids prop childless av schooling
#> 1974 410 3.17 0.10 0.09 0.01 11.07 0.16
#> 1978 445 2.73 0.09 0.14 0.02 11.00 0.15
#> 1982 577 2.96 0.09 0.14 0.01 11.05 0.14
#> 1986 470 2.70 0.09 0.16 0.02 11.34 0.14
#> 1990 431 2.50 0.08 0.15 0.02 12.41 0.15
#> 1994 989 2.40 0.06 0.15 0.01 12.78 0.10
#> 1998 911 2.42 0.06 0.15 0.01 12.94 0.11
#> 2002 917 2.36 0.06 0.16 0.01 13.25 0.10
## Table 1.3
gss40$trend <- gss40$year - 1974
kids_lm1 <- lm(kids ~ factor(year), data = gss40)
kids_lm2 <- lm(kids ~ trend, data = gss40)
kids_lm3 <- lm(kids ~ trend + education, data = gss40)
## Chapter 2
## Table 2.1
kids_tab <- prop.table(xtabs(~ kids + year, data = gss40), 2) * 100
round(kids_tab[,c(4, 8)], digits = 2)
#> year
#> kids 1986 2002
#> 0 16.38 15.59
#> 1 11.28 13.96
#> 2 26.60 28.68
#> 3 15.74 22.14
#> 4 11.49 9.92
#> 5 7.02 3.93
#> 6 6.38 2.62
#> 7 1.70 1.64
#> 8 3.40 1.53
## Figure 2.1
barplot(t(kids_tab[, c(4, 8)]), beside = TRUE, legend = TRUE)
## Chapter 3, Example 3.14
## Table 3.1
gss40$nokids <- factor(gss40$kids <= 0,
levels = c(FALSE, TRUE), labels = c("no", "yes"))
nokids_p1 <- glm(nokids ~ 1, data = gss40, family = binomial(link = "probit"))
nokids_p2 <- glm(nokids ~ trend, data = gss40, family = binomial(link = "probit"))
nokids_p3 <- glm(nokids ~ trend + education + ethnicity + siblings,
data = gss40, family = binomial(link = "probit"))
## p. 87
lrtest(nokids_p1, nokids_p2, nokids_p3)
#> Likelihood ratio test
#>
#> Model 1: nokids ~ 1
#> Model 2: nokids ~ trend
#> Model 3: nokids ~ trend + education + ethnicity + siblings
#> #Df LogLik Df Chisq Pr(>Chisq)
#> 1 1 -2126.9
#> 2 2 -2123.6 1 6.5677 0.01038 *
#> 3 5 -2107.1 3 32.9906 3.235e-07 ***
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Chapter 4, Example 4.1
gss40$nokids01 <- as.numeric(gss40$nokids) - 1
nokids_lm3 <- lm(nokids01 ~ trend + education + ethnicity + siblings, data = gss40)
coeftest(nokids_lm3, vcov = sandwich)
#>
#> t test of coefficients:
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.03972864 0.02898580 1.3706 0.1706
#> trend 0.00060242 0.00053829 1.1191 0.2631
#> education 0.00757341 0.00187637 4.0362 5.511e-05 ***
#> ethnicitycauc 0.01424775 0.01314836 1.0836 0.2786
#> siblings -0.00235388 0.00153770 -1.5308 0.1259
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#>
## Example 4.3
## Table 4.1
nokids_l1 <- glm(nokids ~ 1, data = gss40, family = binomial(link = "logit"))
nokids_l3 <- glm(nokids ~ trend + education + ethnicity + siblings,
data = gss40, family = binomial(link = "logit"))
lrtest(nokids_p3)
#> Error in eval(mf, parent.frame()): object 'gss40' not found
lrtest(nokids_l3)
#> Error in eval(mf, parent.frame()): object 'gss40' not found
## Table 4.2
nokids_xbar <- colMeans(model.matrix(nokids_l3))
sum(coef(nokids_p3) * nokids_xbar)
#> [1] -1.069858
sum(coef(nokids_l3) * nokids_xbar)
#> [1] -1.802965
dnorm(sum(coef(nokids_p3) * nokids_xbar))
#> [1] 0.2250941
dlogis(sum(coef(nokids_l3) * nokids_xbar))
#> [1] 0.1214709
dnorm(sum(coef(nokids_p3) * nokids_xbar)) * coef(nokids_p3)[3]
#> education
#> 0.007072723
dlogis(sum(coef(nokids_l3) * nokids_xbar)) * coef(nokids_l3)[3]
#> education
#> 0.007658209
exp(coef(nokids_l3)[3])
#> education
#> 1.065075
## Figure 4.4
## everything by hand (for ethnicity = "cauc" group)
nokids_xbar <- as.vector(nokids_xbar)
nokids_nd <- data.frame(education = seq(0, 20, by = 0.5), trend = nokids_xbar[2],
ethnicity = "cauc", siblings = nokids_xbar[4])
nokids_p3_fit <- predict(nokids_p3, newdata = nokids_nd,
type = "response", se.fit = TRUE)
plot(nokids_nd$education, nokids_p3_fit$fit, type = "l",
xlab = "education", ylab = "predicted probability", ylim = c(0, 0.3))
polygon(c(nokids_nd$education, rev(nokids_nd$education)),
c(nokids_p3_fit$fit + 1.96 * nokids_p3_fit$se.fit,
rev(nokids_p3_fit$fit - 1.96 * nokids_p3_fit$se.fit)),
col = "lightgray", border = "lightgray")
lines(nokids_nd$education, nokids_p3_fit$fit)
## using "effects" package (for average "ethnicity" variable)
library("effects")
nokids_p3_ef <- effect("education", nokids_p3, xlevels = list(education = 0:20))
plot(nokids_p3_ef, rescale.axis = FALSE, ylim = c(0, 0.3))
## using "effects" plus modification by hand
nokids_p3_ef1 <- as.data.frame(nokids_p3_ef)
plot(pnorm(fit) ~ education, data = nokids_p3_ef1, type = "n", ylim = c(0, 0.3))
polygon(c(0:20, 20:0), pnorm(c(nokids_p3_ef1$upper, rev(nokids_p3_ef1$lower))),
col = "lightgray", border = "lightgray")
lines(pnorm(fit) ~ education, data = nokids_p3_ef1)
## Table 4.6
## McFadden's R^2
1 - as.numeric( logLik(nokids_p3) / logLik(nokids_p1) )
#> [1] 0.009299554
1 - as.numeric( logLik(nokids_l3) / logLik(nokids_l1) )
#> [1] 0.009840523
## McKelvey and Zavoina R^2
r2mz <- function(obj) {
ystar <- predict(obj)
sse <- sum((ystar - mean(ystar))^2)
s2 <- switch(obj$family$link, "probit" = 1, "logit" = pi^2/3, NA)
n <- length(residuals(obj))
sse / (n * s2 + sse)
}
r2mz(nokids_p3)
#> [1] 0.01784602
r2mz(nokids_l3)
#> [1] 0.02076783
## AUC
library("ROCR")
nokids_p3_pred <- prediction(fitted(nokids_p3), gss40$nokids)
nokids_l3_pred <- prediction(fitted(nokids_l3), gss40$nokids)
plot(performance(nokids_p3_pred, "tpr", "fpr"))
abline(0, 1, lty = 2)
performance(nokids_p3_pred, "auc")
#> A performance instance
#> 'Area under the ROC curve'
plot(performance(nokids_l3_pred, "tpr", "fpr"))
abline(0, 1, lty = 2)
performance(nokids_l3_pred, "auc")@y.values
#> [[1]]
#> [1] 0.5828855
#>
## Chapter 7
## Table 7.3
## subset selection
gss02 <- subset(GSS7402, year == 2002 & (age < 40 | !is.na(agefirstbirth)))
#Z# This selection conforms with top of page 229. However, there
#Z# are too many observations: 1374. Furthermore, there are six
#Z# observations with agefirstbirth <= 14 which will cause problems in
#Z# taking logs!
## computing time to first birth
gss02$tfb <- with(gss02, ifelse(is.na(agefirstbirth), age - 14, agefirstbirth - 14))
#Z# currently this is still needed before taking logs
gss02$tfb <- pmax(gss02$tfb, 1)
tfb_tobit <- tobit(log(tfb) ~ education + ethnicity + siblings + city16 + immigrant,
data = gss02, left = -Inf, right = log(gss02$age - 14))
tfb_ols <- lm(log(tfb) ~ education + ethnicity + siblings + city16 + immigrant,
data = gss02, subset = !is.na(agefirstbirth))
## Chapter 8
## Example 8.3
gss2002 <- subset(GSS7402, year == 2002 & (agefirstbirth < 40 | age < 40))
gss2002$afb <- with(gss2002, Surv(ifelse(kids > 0, agefirstbirth, age), kids > 0))
afb_km <- survfit(afb ~ 1, data = gss2002)
afb_skm <- summary(afb_km)
print(afb_skm)
#> Call: survfit(formula = afb ~ 1, data = gss2002)
#>
#> time n.risk n.event survival std.err lower 95% CI upper 95% CI
#> 11 1371 1 0.9993 0.000729 0.9978 1.0000
#> 13 1370 3 0.9971 0.001457 0.9942 0.9999
#> 14 1367 2 0.9956 0.001783 0.9921 0.9991
#> 15 1365 14 0.9854 0.003238 0.9791 0.9918
#> 16 1351 40 0.9562 0.005525 0.9455 0.9671
#> 17 1311 66 0.9081 0.007802 0.8929 0.9235
#> 18 1245 97 0.8373 0.009967 0.8180 0.8571
#> 19 1148 106 0.7600 0.011534 0.7378 0.7830
#> 20 1030 122 0.6700 0.012726 0.6455 0.6954
#> 21 898 123 0.5782 0.013406 0.5525 0.6051
#> 22 767 97 0.5051 0.013612 0.4791 0.5325
#> 23 654 72 0.4495 0.013600 0.4236 0.4770
#> 24 563 61 0.4008 0.013480 0.3752 0.4281
#> 25 484 60 0.3511 0.013248 0.3261 0.3781
#> 26 407 45 0.3123 0.012986 0.2878 0.3388
#> 27 351 45 0.2723 0.012618 0.2486 0.2981
#> 28 294 37 0.2380 0.012223 0.2152 0.2632
#> 29 246 32 0.2070 0.011795 0.1852 0.2315
#> 30 209 38 0.1694 0.011119 0.1489 0.1926
#> 31 163 18 0.1507 0.010730 0.1311 0.1733
#> 32 135 19 0.1295 0.010264 0.1108 0.1512
#> 33 108 17 0.1091 0.009766 0.0915 0.1300
#> 34 83 7 0.0999 0.009542 0.0828 0.1205
#> 35 65 13 0.0799 0.009101 0.0639 0.0999
#> 36 45 7 0.0675 0.008815 0.0522 0.0872
#> 37 29 7 0.0512 0.008572 0.0369 0.0711
#> 38 17 3 0.0422 0.008499 0.0284 0.0626
#> 39 12 2 0.0351 0.008411 0.0220 0.0562
with(afb_skm, plot(n.event/n.risk ~ time, type = "s"))
plot(afb_km, xlim = c(10, 40), conf.int = FALSE)
## Example 8.9
library("survival")
afb_ex <- survreg(
afb ~ education + siblings + ethnicity + immigrant + lowincome16 + city16,
data = gss2002, dist = "exponential")
afb_wb <- survreg(
afb ~ education + siblings + ethnicity + immigrant + lowincome16 + city16,
data = gss2002, dist = "weibull")
afb_ln <- survreg(
afb ~ education + siblings + ethnicity + immigrant + lowincome16 + city16,
data = gss2002, dist = "lognormal")
## Example 8.11
kids_pois <- glm(kids ~ education + trend + ethnicity + immigrant + lowincome16 + city16,
data = gss40, family = poisson)
library("MASS")
kids_nb <- glm.nb(kids ~ education + trend + ethnicity + immigrant + lowincome16 + city16,
data = gss40)
lrtest(kids_pois, kids_nb)
#> Likelihood ratio test
#>
#> Model 1: kids ~ education + trend + ethnicity + immigrant + lowincome16 +
#> city16
#> Model 2: kids ~ education + trend + ethnicity + immigrant + lowincome16 +
#> city16
#> #Df LogLik Df Chisq Pr(>Chisq)
#> 1 7 -10117
#> 2 8 -10014 1 205.17 < 2.2e-16 ***
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
############################################
## German Socio-Economic Panel 1994--2002 ##
############################################
## data
data("GSOEP9402", package = "AER")
## some convenience data transformations
gsoep <- GSOEP9402
gsoep$meducation2 <- cut(gsoep$meducation, breaks = c(6, 10.25, 12.25, 18),
labels = c("7-10", "10.5-12", "12.5-18"))
gsoep$year2 <- factor(gsoep$year)
## Chapter 1
## Table 1.4 plus visualizations
gsoep_tab <- xtabs(~ meducation2 + school, data = gsoep)
round(prop.table(gsoep_tab, 1) * 100, digits = 2)
#> school
#> meducation2 Hauptschule Realschule Gymnasium
#> 7-10 55.12 25.20 19.69
#> 10.5-12 28.09 34.16 37.75
#> 12.5-18 3.88 14.56 81.55
spineplot(gsoep_tab)
plot(school ~ meducation, data = gsoep, breaks = c(7, 10.25, 12.25, 18))
plot(school ~ meducation, data = gsoep, breaks = c(7, 9, 10.5, 11.5, 12.5, 15, 18))
## Chapter 5
## Table 5.1
library("nnet")
gsoep_mnl <- multinom(
school ~ meducation + memployment + log(income) + log(size) + parity + year2,
data = gsoep)
#> # weights: 48 (30 variable)
#> initial value 741.563295
#> iter 10 value 655.748279
#> iter 20 value 624.992858
#> iter 30 value 618.605354
#> final value 618.475696
#> converged
coeftest(gsoep_mnl)[c(1:6, 1:6 + 14),]
#> Estimate Std. Error z value Pr(>|z|)
#> Realschule:(Intercept) -6.3864877 2.36903996 -2.6958126 7.021716e-03
#> Realschule:meducation 0.3004843 0.07910641 3.7984819 1.455851e-04
#> Realschule:memploymentparttime 0.4933680 0.32189721 1.5326879 1.253528e-01
#> Realschule:memploymentnone 0.7526399 0.32884476 2.2887392 2.209451e-02
#> Realschule:log(income) 0.3934871 0.22539836 1.7457408 8.085601e-02
#> Realschule:log(size) -1.1921790 0.44641156 -2.6705827 7.571972e-03
#> Realschule:year22002 0.1922413 0.45158350 0.4257049 6.703229e-01
#> Gymnasium:(Intercept) -23.6975758 3.01022807 -7.8723523 3.480345e-15
#> Gymnasium:meducation 0.6597649 0.08144034 8.1012060 5.441700e-16
#> Gymnasium:memploymentparttime 0.9372429 0.34536421 2.7137813 6.652007e-03
#> Gymnasium:memploymentnone 1.1007579 0.35842760 3.0710746 2.132898e-03
#> Gymnasium:log(income) 1.6676745 0.28408439 5.8703492 4.348783e-09
## alternatively
library("mlogit")
gsoep_mnl2 <- mlogit(school ~ 0 | meducation + memployment + log(income) +
log(size) + parity + year2, data = gsoep, shape = "wide", reflevel = "Hauptschule")
coeftest(gsoep_mnl2)[1:12,]
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept):Gymnasium -23.6982768 3.01026604 -7.872486 1.475202e-14
#> (Intercept):Realschule -6.3865987 2.36904833 -2.695850 7.204061e-03
#> meducation:Gymnasium 0.6597829 0.08144157 8.101304 2.726719e-15
#> meducation:Realschule 0.3004923 0.07910725 3.798543 1.593085e-04
#> memploymentparttime:Gymnasium 0.9372401 0.34536576 2.713761 6.830145e-03
#> memploymentparttime:Realschule 0.4933644 0.32189760 1.532675 1.258463e-01
#> memploymentnone:Gymnasium 1.1007670 0.35842942 3.071084 2.222541e-03
#> memploymentnone:Realschule 0.7526490 0.32884523 2.288764 2.241551e-02
#> log(income):Gymnasium 1.6677258 0.28408738 5.870468 6.954975e-09
#> log(income):Realschule 0.3934899 0.22539876 1.745750 8.133056e-02
#> log(size):Gymnasium -1.5459256 0.48775919 -3.169444 1.599570e-03
#> log(size):Realschule -1.1921835 0.44641174 -2.670592 7.762668e-03
## Table 5.2
library("effects")
gsoep_eff <- effect("meducation", gsoep_mnl,
xlevels = list(meducation = sort(unique(gsoep$meducation))))
gsoep_eff$prob
#> prob.Hauptschule prob.Realschule prob.Gymnasium
#> [1,] 0.686724467 0.24514452 0.06813102
#> [2,] 0.494486442 0.32195219 0.18356137
#> [3,] 0.385007121 0.33853566 0.27645721
#> [4,] 0.331068546 0.33830072 0.33063074
#> [5,] 0.279605513 0.33203209 0.38836239
#> [6,] 0.231922018 0.32005576 0.44802222
#> [7,] 0.189019319 0.30313717 0.50784351
#> [8,] 0.119575666 0.25898492 0.62143941
#> [9,] 0.093066080 0.23424613 0.67268779
#> [10,] 0.071541719 0.20926168 0.71919660
#> [11,] 0.054404764 0.18493393 0.76066131
#> [12,] 0.040990572 0.16192466 0.79708477
#> [13,] 0.022753542 0.12138826 0.85585820
#> [14,] 0.006601958 0.06423885 0.92915919
plot(gsoep_eff, confint = FALSE)
## Table 5.3, odds
exp(coef(gsoep_mnl)[, "meducation"])
#> Realschule Gymnasium
#> 1.350513 1.934338
## all effects
eff_mnl <- allEffects(gsoep_mnl)
plot(eff_mnl, ask = FALSE, confint = FALSE)
plot(eff_mnl, ask = FALSE, style = "stacked", colors = gray.colors(3))
## omit year
gsoep_mnl1 <- multinom(
school ~ meducation + memployment + log(income) + log(size) + parity,
data = gsoep)
#> # weights: 24 (14 variable)
#> initial value 741.563295
#> iter 10 value 658.442291
#> iter 20 value 624.980518
#> final value 624.957624
#> converged
lrtest(gsoep_mnl, gsoep_mnl1)
#> Likelihood ratio test
#>
#> Model 1: school ~ meducation + memployment + log(income) + log(size) +
#> parity + year2
#> Model 2: school ~ meducation + memployment + log(income) + log(size) +
#> parity
#> #Df LogLik Df Chisq Pr(>Chisq)
#> 1 30 -618.48
#> 2 14 -624.96 -16 12.964 0.6754
eff_mnl1 <- allEffects(gsoep_mnl1)
plot(eff_mnl1, ask = FALSE, confint = FALSE)
plot(eff_mnl1, ask = FALSE, style = "stacked", colors = gray.colors(3))
## Chapter 6
## Table 6.1
library("MASS")
gsoep$munemp <- factor(gsoep$memployment != "none",
levels = c(FALSE, TRUE), labels = c("no", "yes"))
gsoep_pop <- polr(school ~ meducation + munemp + log(income) + log(size) + parity + year2,
data = gsoep, method = "probit", Hess = TRUE)
gsoep_pol <- polr(school ~ meducation + munemp + log(income) + log(size) + parity + year2,
data = gsoep, Hess = TRUE)
lrtest(gsoep_pop)
#> Error in eval(expr, p): object 'gsoep' not found
lrtest(gsoep_pol)
#> Error in eval(expr, p): object 'gsoep' not found
## Table 6.2
## todo
eff_pol <- allEffects(gsoep_pol)
plot(eff_pol, ask = FALSE, confint = FALSE)
plot(eff_pol, ask = FALSE, style = "stacked", colors = gray.colors(3))
####################################
## Labor Force Participation Data ##
####################################
## Mroz data
data("PSID1976", package = "AER")
PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000)
## visualizations
plot(hours ~ nwincome, data = PSID1976,
xlab = "Non-wife income (in USD 1000)",
ylab = "Hours of work in 1975")
plot(jitter(hours, 200) ~ jitter(wage, 50), data = PSID1976,
xlab = "Wife's average hourly wage (jittered)",
ylab = "Hours of work in 1975 (jittered)")
## Chapter 1, p. 18
hours_lm <- lm(hours ~ wage + nwincome + youngkids + oldkids, data = PSID1976,
subset = participation == "yes")
## Chapter 7
## Example 7.2, Table 7.1
hours_tobit <- tobit(hours ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, data = PSID1976)
hours_ols1 <- lm(hours ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, data = PSID1976)
hours_ols2 <- lm(hours ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, data = PSID1976, subset = participation == "yes")
## Example 7.10, Table 7.4
wage_ols <- lm(log(wage) ~ education + experience + I(experience^2),
data = PSID1976, subset = participation == "yes")
library("sampleSelection")
wage_ghr <- selection(participation ~ nwincome + age + youngkids + oldkids +
education + experience + I(experience^2),
log(wage) ~ education + experience + I(experience^2), data = PSID1976)
## Exercise 7.13
hours_cragg1 <- glm(participation ~ nwincome + education +
experience + I(experience^2) + age + youngkids + oldkids,
data = PSID1976, family = binomial(link = "probit"))
library("truncreg")
hours_cragg2 <- truncreg(hours ~ nwincome + education +
experience + I(experience^2) + age + youngkids + oldkids,
data = PSID1976, subset = participation == "yes")
## Exercise 7.15
wage_olscoef <- sapply(c(-Inf, 0.5, 1, 1.5, 2), function(censpoint)
coef(lm(log(wage) ~ education + experience + I(experience^2),
data = PSID1976[log(PSID1976$wage) > censpoint,])))
wage_mlcoef <- sapply(c(0.5, 1, 1.5, 2), function(censpoint)
coef(tobit(log(wage) ~ education + experience + I(experience^2),
data = PSID1976, left = censpoint)))
##################################
## Choice of Brand for Crackers ##
##################################
## data
library("mlogit")
data("Cracker", package = "mlogit")
head(Cracker, 3)
#> id disp.sunshine disp.keebler disp.nabisco disp.private feat.sunshine
#> 1 1 0 0 0 0 0
#> 2 1 0 0 0 0 0
#> 3 1 1 0 0 0 0
#> feat.keebler feat.nabisco feat.private price.sunshine price.keebler
#> 1 0 0 0 98 88
#> 2 0 0 0 99 109
#> 3 0 0 0 49 109
#> price.nabisco price.private choice
#> 1 120 71 nabisco
#> 2 99 71 nabisco
#> 3 109 78 sunshine
crack <- mlogit.data(Cracker, varying = 2:13, shape = "wide", choice = "choice")
head(crack, 12)
#> ~~~~~~~
#> first 12 observations out of 13168
#> ~~~~~~~
#> id choice alt disp feat price chid idx
#> 1 1 FALSE keebler 0 0 88 1 1:bler
#> 2 1 TRUE nabisco 0 0 120 1 1:isco
#> 3 1 FALSE private 0 0 71 1 1:vate
#> 4 1 FALSE sunshine 0 0 98 1 1:hine
#> 5 1 FALSE keebler 0 0 109 2 2:bler
#> 6 1 TRUE nabisco 0 0 99 2 2:isco
#> 7 1 FALSE private 0 0 71 2 2:vate
#> 8 1 FALSE sunshine 0 0 99 2 2:hine
#> 9 1 FALSE keebler 0 0 109 3 3:bler
#> 10 1 FALSE nabisco 0 0 109 3 3:isco
#> 11 1 FALSE private 0 0 78 3 3:vate
#> 12 1 TRUE sunshine 1 0 49 3 3:hine
#>
#> ~~~ indexes ~~~~
#> chid alt
#> 1 1 keebler
#> 2 1 nabisco
#> 3 1 private
#> 4 1 sunshine
#> 5 2 keebler
#> 6 2 nabisco
#> 7 2 private
#> 8 2 sunshine
#> 9 3 keebler
#> 10 3 nabisco
#> 11 3 private
#> 12 3 sunshine
#> indexes: 1, 2
## Table 5.6 (model 3 probably not fully converged in W&B)
crack$price <- crack$price/100
crack_mlogit1 <- mlogit(choice ~ price | 0, data = crack, reflevel = "private")
crack_mlogit2 <- mlogit(choice ~ price | 1, data = crack, reflevel = "private")
crack_mlogit3 <- mlogit(choice ~ price + feat + disp | 1, data = crack,
reflevel = "private")
lrtest(crack_mlogit1, crack_mlogit2, crack_mlogit3)
#> Likelihood ratio test
#>
#> Model 1: choice ~ price | 0
#> Model 2: choice ~ price | 1
#> Model 3: choice ~ price + feat + disp | 1
#> #Df LogLik Df Chisq Pr(>Chisq)
#> 1 1 -4498.5
#> 2 4 -3364.9 3 2267.243 < 2.2e-16 ***
#> 3 6 -3347.7 2 34.376 3.43e-08 ***
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## IIA test
crack_mlogit_all <- update(crack_mlogit2, reflevel = "nabisco")
crack_mlogit_res <- update(crack_mlogit_all,
alt.subset = c("keebler", "nabisco", "sunshine"))
hmftest(crack_mlogit_all, crack_mlogit_res)
#>
#> Hausman-McFadden test
#>
#> data: crack
#> chisq = 51.592, df = 3, p-value = 3.659e-11
#> alternative hypothesis: IIA is rejected
#>
# }