Skip to contents

Cross-section data originating from the US National Medical Expenditure Survey (NMES) conducted in 1987 and 1988. The NMES is based upon a representative, national probability sample of the civilian non-institutionalized population and individuals admitted to long-term care facilities during 1987. The data are a subsample of individuals ages 66 and over all of whom are covered by Medicare (a public insurance program providing substantial protection against health-care costs).

Usage

data("NMES1988")

Format

A data frame containing 4,406 observations on 19 variables.

visits

Number of physician office visits.

nvisits

Number of non-physician office visits.

ovisits

Number of physician hospital outpatient visits.

novisits

Number of non-physician hospital outpatient visits.

emergency

Emergency room visits.

hospital

Number of hospital stays.

health

Factor indicating self-perceived health status, levels are "poor", "average" (reference category), "excellent".

chronic

Number of chronic conditions.

adl

Factor indicating whether the individual has a condition that limits activities of daily living ("limited") or not ("normal").

region

Factor indicating region, levels are northeast, midwest, west, other (reference category).

age

Age in years (divided by 10).

afam

Factor. Is the individual African-American?

gender

Factor indicating gender.

married

Factor. is the individual married?

school

Number of years of education.

income

Family income in USD 10,000.

employed

Factor. Is the individual employed?

insurance

Factor. Is the individual covered by private insurance?

medicaid

Factor. Is the individual covered by Medicaid?

Source

Journal of Applied Econometrics Data Archive for Deb and Trivedi (1997).

http://qed.econ.queensu.ca/jae/1997-v12.3/deb-trivedi/

References

Cameron, A.C. and Trivedi, P.K. (1998). Regression Analysis of Count Data. Cambridge: Cambridge University Press.

Deb, P., and Trivedi, P.K. (1997). Demand for Medical Care by the Elderly: A Finite Mixture Approach. Journal of Applied Econometrics, 12, 313–336.

Zeileis, A., Kleiber, C., and Jackman, S. (2008). Regression Models for Count Data in R. Journal of Statistical Software, 27(8). doi:10.18637/jss.v027.i08 .

Examples

## packages
library("MASS")
library("pscl")

## select variables for analysis
data("NMES1988")
nmes <- NMES1988[, c(1, 7:8, 13, 15, 18)]

## dependent variable
hist(nmes$visits, breaks = 0:(max(nmes$visits)+1) - 0.5)

plot(table(nmes$visits))


## convenience transformations for exploratory graphics
clog <- function(x) log(x + 0.5)
cfac <- function(x, breaks = NULL) {
  if(is.null(breaks)) breaks <- unique(quantile(x, 0:10/10))
  x <- cut(x, breaks, include.lowest = TRUE, right = FALSE)
  levels(x) <- paste(breaks[-length(breaks)], ifelse(diff(breaks) > 1,
    c(paste("-", breaks[-c(1, length(breaks))] - 1, sep = ""), "+"), ""), sep = "")
  return(x)
}

## bivariate visualization
par(mfrow = c(3, 2))
plot(clog(visits) ~ health, data = nmes, varwidth = TRUE)
plot(clog(visits) ~ cfac(chronic), data = nmes)
plot(clog(visits) ~ insurance, data = nmes, varwidth = TRUE)
plot(clog(visits) ~ gender, data = nmes, varwidth = TRUE)
plot(cfac(visits, c(0:2, 4, 6, 10, 100)) ~ school, data = nmes, breaks = 9)
par(mfrow = c(1, 1))


## Poisson regression
nmes_pois <- glm(visits ~ ., data = nmes, family = poisson)
summary(nmes_pois)
#> 
#> Call:
#> glm(formula = visits ~ ., family = poisson, data = nmes)
#> 
#> Coefficients:
#>                  Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)      1.034542   0.023857  43.364   <2e-16 ***
#> healthpoor       0.318205   0.017479  18.205   <2e-16 ***
#> healthexcellent -0.379045   0.030291 -12.514   <2e-16 ***
#> chronic          0.168793   0.004471  37.755   <2e-16 ***
#> gendermale      -0.108014   0.012943  -8.346   <2e-16 ***
#> school           0.025754   0.001843  13.972   <2e-16 ***
#> insuranceyes     0.216007   0.016872  12.803   <2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 26943  on 4405  degrees of freedom
#> Residual deviance: 23808  on 4399  degrees of freedom
#> AIC: 36597
#> 
#> Number of Fisher Scoring iterations: 5
#> 

## LM test for overdispersion
dispersiontest(nmes_pois)
#> 
#> 	Overdispersion test
#> 
#> data:  nmes_pois
#> z = 11.557, p-value < 2.2e-16
#> alternative hypothesis: true dispersion is greater than 1
#> sample estimates:
#> dispersion 
#>   6.976619 
#> 
dispersiontest(nmes_pois, trafo = 2)
#> 
#> 	Overdispersion test
#> 
#> data:  nmes_pois
#> z = 11.307, p-value < 2.2e-16
#> alternative hypothesis: true alpha is greater than 0
#> sample estimates:
#>     alpha 
#> 0.9497124 
#> 

## sandwich covariance matrix
coeftest(nmes_pois, vcov = sandwich)
#> 
#> z test of coefficients:
#> 
#>                   Estimate Std. Error z value  Pr(>|z|)    
#> (Intercept)      1.0345418  0.0648436 15.9544 < 2.2e-16 ***
#> healthpoor       0.3182048  0.0555787  5.7253 1.032e-08 ***
#> healthexcellent -0.3790454  0.0777311 -4.8764 1.081e-06 ***
#> chronic          0.1687932  0.0121860 13.8514 < 2.2e-16 ***
#> gendermale      -0.1080145  0.0357357 -3.0226  0.002506 ** 
#> school           0.0257542  0.0051316  5.0187 5.202e-07 ***
#> insuranceyes     0.2160070  0.0430293  5.0200 5.167e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 

## quasipoisson model
nmes_qpois <- glm(visits ~ ., data = nmes, family = quasipoisson)

## NegBin regression
nmes_nb <- glm.nb(visits ~ ., data = nmes)

## hurdle regression
nmes_hurdle <- hurdle(visits ~ . | chronic + insurance + school + gender,
  data = nmes, dist = "negbin")

## zero-inflated regression model
nmes_zinb <- zeroinfl(visits ~ . | chronic + insurance + school + gender,
  data = nmes, dist = "negbin")

## compare estimated coefficients
fm <- list("ML-Pois" = nmes_pois, "Quasi-Pois" = nmes_qpois, "NB" = nmes_nb,
  "Hurdle-NB" = nmes_hurdle, "ZINB" = nmes_zinb)
round(sapply(fm, function(x) coef(x)[1:7]), digits = 3)
#>                 ML-Pois Quasi-Pois     NB Hurdle-NB   ZINB
#> (Intercept)       1.035      1.035  0.940     1.194  1.198
#> healthpoor        0.318      0.318  0.368     0.383  0.349
#> healthexcellent  -0.379     -0.379 -0.374    -0.368 -0.354
#> chronic           0.169      0.169  0.196     0.148  0.150
#> gendermale       -0.108     -0.108 -0.115    -0.056 -0.072
#> school            0.026      0.026  0.027     0.021  0.022
#> insuranceyes      0.216      0.216  0.250     0.130  0.151

## associated standard errors
round(cbind("ML-Pois" = sqrt(diag(vcov(nmes_pois))),
  "Adj-Pois" = sqrt(diag(sandwich(nmes_pois))),
  sapply(fm[-1], function(x) sqrt(diag(vcov(x)))[1:7])),
  digits = 3)
#>                 ML-Pois Adj-Pois Quasi-Pois    NB Hurdle-NB  ZINB
#> (Intercept)       0.024    0.065      0.063 0.055     0.060 0.057
#> healthpoor        0.017    0.056      0.046 0.049     0.049 0.046
#> healthexcellent   0.030    0.078      0.080 0.062     0.067 0.061
#> chronic           0.004    0.012      0.012 0.012     0.013 0.012
#> gendermale        0.013    0.036      0.034 0.032     0.033 0.032
#> school            0.002    0.005      0.005 0.004     0.005 0.004
#> insuranceyes      0.017    0.043      0.045 0.040     0.044 0.042

## log-likelihoods and number of estimated parameters
rbind(logLik = sapply(fm, function(x) round(logLik(x), digits = 0)),
  Df = sapply(fm, function(x) attr(logLik(x), "df")))
#>        ML-Pois Quasi-Pois     NB Hurdle-NB   ZINB
#> logLik  -18291         NA -12227    -12153 -12155
#> Df           7          8      8        13     13

## predicted number of zeros
round(c("Obs" = sum(nmes$visits < 1),
  "ML-Pois" = sum(dpois(0, fitted(nmes_pois))),
  "Adj-Pois" = NA,
  "Quasi-Pois" = NA,
  "NB" = sum(dnbinom(0, mu = fitted(nmes_nb), size = nmes_nb$theta)),
  "NB-Hurdle" = sum(predict(nmes_hurdle, type = "prob")[,1]),
  "ZINB" = sum(predict(nmes_zinb, type = "prob")[,1])))
#>        Obs    ML-Pois   Adj-Pois Quasi-Pois         NB  NB-Hurdle       ZINB 
#>        683         46         NA         NA        618        683        712 

## coefficients of zero-augmentation models
t(sapply(fm[4:5], function(x) round(x$coefficients$zero, digits = 3)))
#>           (Intercept) chronic insuranceyes school gendermale
#> Hurdle-NB       0.054   0.577        0.742  0.056     -0.406
#> ZINB           -0.098  -1.302       -1.193 -0.087      0.583