Data and Examples from Greene (2003)

This manual page collects a list of examples from the book. Some solutions might not be exact and the list is certainly not complete. If you have suggestions for improvement (preferably in the form of code), please contact the package maintainer.
References

Greene, W.H. (2003). Econometric Analysis, 5th edition. Upper Saddle River, NJ: Prentice Hall. URL https://pages.stern.nyu.edu/~wgreene/Text/tables/tablelist5.htm.
Examples

#> Loading required namespace: sampleSelection
# \donttest{
#####################################
## US consumption data (1970-1979) ##
#####################################

## Example 1.1
data("USConsump1979", package = "AER")
plot(expenditure ~ income, data = as.data.frame(USConsump1979), pch = 19)
fm <- lm(expenditure ~ income, data = as.data.frame(USConsump1979))
summary(fm)
#> 
#> Call:
#> lm(formula = expenditure ~ income, data = as.data.frame(USConsump1979))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -11.291  -6.871   1.909   3.418  11.181 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -67.58065   27.91071  -2.421   0.0418 *  
#> income        0.97927    0.03161  30.983 1.28e-09 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 8.193 on 8 degrees of freedom
#> Multiple R-squared:  0.9917,	Adjusted R-squared:  0.9907 
#> F-statistic: 959.9 on 1 and 8 DF,  p-value: 1.28e-09
#> 
abline(fm)



#####################################
## US consumption data (1940-1950) ##
#####################################

## data
data("USConsump1950", package = "AER")
usc <- as.data.frame(USConsump1950)
usc$war <- factor(usc$war, labels = c("no", "yes"))

## Example 2.1
plot(expenditure ~ income, data = usc, type = "n", xlim = c(225, 375), ylim = c(225, 350))
with(usc, text(income, expenditure, time(USConsump1950)))

## single model
fm <- lm(expenditure ~ income, data = usc)
summary(fm)
#> 
#> Call:
#> lm(formula = expenditure ~ income, data = usc)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -35.347 -26.440   9.068  20.000  31.642 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)  51.8951    80.8440   0.642   0.5369  
#> income        0.6848     0.2488   2.753   0.0224 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 27.59 on 9 degrees of freedom
#> Multiple R-squared:  0.4571,	Adjusted R-squared:  0.3968 
#> F-statistic: 7.579 on 1 and 9 DF,  p-value: 0.02237
#> 

## different intercepts for war yes/no
fm2 <- lm(expenditure ~ income + war, data = usc)
summary(fm2)
#> 
#> Call:
#> lm(formula = expenditure ~ income + war, data = usc)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -14.598  -4.418  -2.352   7.242  11.101 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  14.49540   27.29948   0.531     0.61    
#> income        0.85751    0.08534  10.048 8.19e-06 ***
#> waryes      -50.68974    5.93237  -8.545 2.71e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 9.195 on 8 degrees of freedom
#> Multiple R-squared:  0.9464,	Adjusted R-squared:  0.933 
#> F-statistic: 70.61 on 2 and 8 DF,  p-value: 8.26e-06
#> 

## compare
anova(fm, fm2)
#> Analysis of Variance Table
#> 
#> Model 1: expenditure ~ income
#> Model 2: expenditure ~ income + war
#>   Res.Df    RSS Df Sum of Sq     F   Pr(>F)    
#> 1      9 6850.0                                
#> 2      8  676.5  1    6173.5 73.01 2.71e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## visualize
abline(fm, lty = 3)                                   
abline(coef(fm2)[1:2])                                
abline(sum(coef(fm2)[c(1, 3)]), coef(fm2)[2], lty = 2)


## Example 3.2
summary(fm)$r.squared
#> [1] 0.4571345
summary(lm(expenditure ~ income, data = usc, subset = war == "no"))$r.squared
#> [1] 0.9369742
summary(fm2)$r.squared
#> [1] 0.9463904


########################
## US investment data ##
########################

data("USInvest", package = "AER")

## Chapter 3 in Greene (2003)
## transform (and round) data to match Table 3.1
us <- as.data.frame(USInvest)
us$invest <- round(0.1 * us$invest/us$price, digits = 3)
us$gnp <- round(0.1 * us$gnp/us$price, digits = 3)
us$inflation <- c(4.4, round(100 * diff(us$price)/us$price[-15], digits = 2))
us$trend <- 1:15
us <- us[, c(2, 6, 1, 4, 5)]

## p. 22-24
coef(lm(invest ~ trend + gnp, data = us))
#> (Intercept)       trend         gnp 
#> -0.49459760 -0.01700063  0.64781939 
coef(lm(invest ~ gnp, data = us))
#> (Intercept)         gnp 
#> -0.03333061  0.18388271 

## Example 3.1, Table 3.2
cor(us)[1,-1]
#>     trend       gnp  interest inflation 
#> 0.7514213 0.8648613 0.5876756 0.4817416 
pcor <- solve(cor(us))
dcor <- 1/sqrt(diag(pcor))
pcor <- (-pcor * (dcor %o% dcor))[1,-1]

## Table 3.4
fm  <- lm(invest ~ trend + gnp + interest + inflation, data = us)
fm1 <- lm(invest ~ 1, data = us)
anova(fm1, fm)
#> Analysis of Variance Table
#> 
#> Model 1: invest ~ 1
#> Model 2: invest ~ trend + gnp + interest + inflation
#>   Res.Df       RSS Df Sum of Sq      F    Pr(>F)    
#> 1     14 0.0162736                                  
#> 2     10 0.0004394  4  0.015834 90.089 8.417e-08 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Example 4.1
set.seed(123)
w <- rnorm(10000)
x <- rnorm(10000)
eps <- 0.5 * w
y <- 0.5 + 0.5 * x + eps
b <- rep(0, 500)
for(i in 1:500) {
  ix <- sample(1:10000, 100)
  b[i] <- lm.fit(cbind(1, x[ix]), y[ix])$coef[2]
}
hist(b, breaks = 20, col = "lightgray")



###############################
## Longley's regression data ##
###############################

## package and data
data("Longley", package = "AER")
library("dynlm")

## Example 4.6
fm1 <- dynlm(employment ~ time(employment) + price + gnp + armedforces,
  data = Longley)
fm2 <- update(fm1, end = 1961)
cbind(coef(fm2), coef(fm1))
#>                           [,1]          [,2]
#> (Intercept)       1.459415e+06  1.169088e+06
#> time(employment) -7.217561e+02 -5.764643e+02
#> price            -1.811230e+02 -1.976807e+01
#> gnp               9.106778e-02  6.439397e-02
#> armedforces      -7.493705e-02 -1.014525e-02

## Figure 4.3
plot(rstandard(fm2), type = "b", ylim = c(-3, 3))
abline(h = c(-2, 2), lty = 2)



#########################################
## US gasoline market data (1960-1995) ##
#########################################

## data
data("USGasG", package = "AER")

## Greene (2003)
## Example 2.3
fm <- lm(log(gas/population) ~ log(price) + log(income) + log(newcar) + log(usedcar),
  data = as.data.frame(USGasG))
summary(fm)
#> 
#> Call:
#> lm(formula = log(gas/population) ~ log(price) + log(income) + 
#>     log(newcar) + log(usedcar), data = as.data.frame(USGasG))
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.065042 -0.018842  0.001528  0.020786  0.058084 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  -12.34184    0.67489 -18.287   <2e-16 ***
#> log(price)    -0.05910    0.03248  -1.819   0.0786 .  
#> log(income)    1.37340    0.07563  18.160   <2e-16 ***
#> log(newcar)   -0.12680    0.12699  -0.998   0.3258    
#> log(usedcar)  -0.11871    0.08134  -1.459   0.1545    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.03304 on 31 degrees of freedom
#> Multiple R-squared:  0.958,	Adjusted R-squared:  0.9526 
#> F-statistic: 176.7 on 4 and 31 DF,  p-value: < 2.2e-16
#> 

## Example 4.4
## estimates and standard errors (note different offset for intercept)
coef(fm)
#>  (Intercept)   log(price)  log(income)  log(newcar) log(usedcar) 
#> -12.34184054  -0.05909513   1.37339912  -0.12679667  -0.11870847 
sqrt(diag(vcov(fm)))
#>  (Intercept)   log(price)  log(income)  log(newcar) log(usedcar) 
#>   0.67489471   0.03248496   0.07562767   0.12699351   0.08133710 
## confidence interval
confint(fm, parm = "log(income)")
#>                2.5 %   97.5 %
#> log(income) 1.219155 1.527643
## test linear hypothesis
linearHypothesis(fm, "log(income) = 1")
#> 
#> Linear hypothesis test:
#> log(income) = 1
#> 
#> Model 1: restricted model
#> Model 2: log(gas/population) ~ log(price) + log(income) + log(newcar) + 
#>     log(usedcar)
#> 
#>   Res.Df      RSS Df Sum of Sq      F   Pr(>F)    
#> 1     32 0.060445                                 
#> 2     31 0.033837  1  0.026608 24.377 2.57e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Figure 7.5
plot(price ~ gas, data = as.data.frame(USGasG), pch = 19,
  col = (time(USGasG) > 1973) + 1)
legend("topleft", legend = c("after 1973", "up to 1973"), pch = 19, col = 2:1, bty = "n")


## Example 7.6
## re-used in Example 8.3
## linear time trend
ltrend <- 1:nrow(USGasG)
## shock factor
shock <- factor(time(USGasG) > 1973, levels = c(FALSE, TRUE), labels = c("before", "after"))

## 1960-1995
fm1 <- lm(log(gas/population) ~ log(income) + log(price) + log(newcar) + log(usedcar) + ltrend,
  data = as.data.frame(USGasG))
summary(fm1)
#> 
#> Call:
#> lm(formula = log(gas/population) ~ log(income) + log(price) + 
#>     log(newcar) + log(usedcar) + ltrend, data = as.data.frame(USGasG))
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.055238 -0.017715  0.003659  0.016481  0.053522 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  -17.385790   1.679289 -10.353 2.03e-11 ***
#> log(income)    1.954626   0.192854  10.135 3.34e-11 ***
#> log(price)    -0.115530   0.033479  -3.451  0.00168 ** 
#> log(newcar)    0.205282   0.152019   1.350  0.18700    
#> log(usedcar)  -0.129274   0.071412  -1.810  0.08028 .  
#> ltrend        -0.019118   0.005957  -3.210  0.00316 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.02898 on 30 degrees of freedom
#> Multiple R-squared:  0.9687,	Adjusted R-squared:  0.9635 
#> F-statistic: 185.8 on 5 and 30 DF,  p-value: < 2.2e-16
#> 
## pooled
fm2 <- lm(
  log(gas/population) ~ shock + log(income) + log(price) + log(newcar) + log(usedcar) + ltrend,
  data = as.data.frame(USGasG))
summary(fm2)
#> 
#> Call:
#> lm(formula = log(gas/population) ~ shock + log(income) + log(price) + 
#>     log(newcar) + log(usedcar) + ltrend, data = as.data.frame(USGasG))
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.045360 -0.019697  0.003931  0.015112  0.047550 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  -16.374402   1.456263 -11.244 4.33e-12 ***
#> shockafter     0.077311   0.021872   3.535  0.00139 ** 
#> log(income)    1.838167   0.167258  10.990 7.43e-12 ***
#> log(price)    -0.178005   0.033508  -5.312 1.06e-05 ***
#> log(newcar)    0.209842   0.129267   1.623  0.11534    
#> log(usedcar)  -0.128132   0.060721  -2.110  0.04359 *  
#> ltrend        -0.016862   0.005105  -3.303  0.00255 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.02464 on 29 degrees of freedom
#> Multiple R-squared:  0.9781,	Adjusted R-squared:  0.9736 
#> F-statistic: 216.3 on 6 and 29 DF,  p-value: < 2.2e-16
#> 
## segmented
fm3 <- lm(
  log(gas/population) ~ shock/(log(income) + log(price) + log(newcar) + log(usedcar) + ltrend),
  data = as.data.frame(USGasG))
summary(fm3)
#> 
#> Call:
#> lm(formula = log(gas/population) ~ shock/(log(income) + log(price) + 
#>     log(newcar) + log(usedcar) + ltrend), data = as.data.frame(USGasG))
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.027349 -0.006332  0.001295  0.007159  0.022016 
#> 
#> Coefficients:
#>                          Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)              -4.13439    5.03963  -0.820 0.420075    
#> shockafter               -4.74111    5.51576  -0.860 0.398538    
#> shockbefore:log(income)   0.42400    0.57973   0.731 0.471633    
#> shockafter:log(income)    1.01408    0.24904   4.072 0.000439 ***
#> shockbefore:log(price)    0.09455    0.24804   0.381 0.706427    
#> shockafter:log(price)    -0.24237    0.03490  -6.946  3.5e-07 ***
#> shockbefore:log(newcar)   0.58390    0.21670   2.695 0.012665 *  
#> shockafter:log(newcar)    0.33017    0.15789   2.091 0.047277 *  
#> shockbefore:log(usedcar) -0.33462    0.15215  -2.199 0.037738 *  
#> shockafter:log(usedcar)  -0.05537    0.04426  -1.251 0.222972    
#> shockbefore:ltrend        0.02637    0.01762   1.497 0.147533    
#> shockafter:ltrend        -0.01262    0.00329  -3.835 0.000798 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.01488 on 24 degrees of freedom
#> Multiple R-squared:  0.9934,	Adjusted R-squared:  0.9904 
#> F-statistic: 328.5 on 11 and 24 DF,  p-value: < 2.2e-16
#> 

## Chow test
anova(fm3, fm1)
#> Analysis of Variance Table
#> 
#> Model 1: log(gas/population) ~ shock/(log(income) + log(price) + log(newcar) + 
#>     log(usedcar) + ltrend)
#> Model 2: log(gas/population) ~ log(income) + log(price) + log(newcar) + 
#>     log(usedcar) + ltrend
#>   Res.Df       RSS Df Sum of Sq      F    Pr(>F)    
#> 1     24 0.0053144                                  
#> 2     30 0.0251878 -6 -0.019873 14.958 4.595e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library("strucchange")
sctest(log(gas/population) ~ log(income) + log(price) + log(newcar) + log(usedcar) + ltrend,
  data = USGasG, point = c(1973, 1), type = "Chow")
#> 
#> 	Chow test
#> 
#> data:  log(gas/population) ~ log(income) + log(price) + log(newcar) +     log(usedcar) + ltrend
#> F = 14.958, p-value = 4.595e-07
#> 
## Recursive CUSUM test
rcus <- efp(log(gas/population) ~ log(income) + log(price) + log(newcar) + log(usedcar) + ltrend,
   data = USGasG, type = "Rec-CUSUM")
plot(rcus)

sctest(rcus)
#> 
#> 	Recursive CUSUM test
#> 
#> data:  rcus
#> S = 1.4977, p-value = 0.0002437
#> 
## Note: Greene's remark that the break is in 1984 (where the process crosses its boundary)
## is wrong. The break appears to be no later than 1976.

## Example 12.2
library("dynlm")
resplot <- function(obj, bound = TRUE) {
  res <- residuals(obj)
  sigma <- summary(obj)$sigma
  plot(res, ylab = "Residuals", xlab = "Year")
  grid()
  abline(h = 0)
  if(bound) abline(h = c(-2, 2) * sigma, col = "red")  
  lines(res)
}
resplot(dynlm(log(gas/population) ~ log(price), data = USGasG))

resplot(dynlm(log(gas/population) ~ log(price) + log(income), data = USGasG))

resplot(dynlm(log(gas/population) ~ log(price) + log(income) + log(newcar) + log(usedcar) +
  log(transport) + log(nondurable) + log(durable) +log(service) + ltrend, data = USGasG))

## different shock variable than in 7.6
shock <- factor(time(USGasG) > 1974, levels = c(FALSE, TRUE), labels = c("before", "after"))
resplot(dynlm(log(gas/population) ~ shock/(log(price) + log(income) + log(newcar) + log(usedcar) +
  log(transport) + log(nondurable) + log(durable) + log(service) + ltrend), data = USGasG))

## NOTE: something seems to be wrong with the sigma estimates in the `full' models

## Table 12.4, OLS
fm <- dynlm(log(gas/population) ~ log(price) + log(income) + log(newcar) + log(usedcar),
  data = USGasG)
summary(fm)
#> 
#> Time series regression with "ts" data:
#> Start = 1960, End = 1995
#> 
#> Call:
#> dynlm(formula = log(gas/population) ~ log(price) + log(income) + 
#>     log(newcar) + log(usedcar), data = USGasG)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.065042 -0.018842  0.001528  0.020786  0.058084 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  -12.34184    0.67489 -18.287   <2e-16 ***
#> log(price)    -0.05910    0.03248  -1.819   0.0786 .  
#> log(income)    1.37340    0.07563  18.160   <2e-16 ***
#> log(newcar)   -0.12680    0.12699  -0.998   0.3258    
#> log(usedcar)  -0.11871    0.08134  -1.459   0.1545    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.03304 on 31 degrees of freedom
#> Multiple R-squared:  0.958,	Adjusted R-squared:  0.9526 
#> F-statistic: 176.7 on 4 and 31 DF,  p-value: < 2.2e-16
#> 
resplot(fm, bound = FALSE)

dwtest(fm)
#> 
#> 	Durbin-Watson test
#> 
#> data:  fm
#> DW = 0.6047, p-value = 3.387e-09
#> alternative hypothesis: true autocorrelation is greater than 0
#> 

## ML
g <- as.data.frame(USGasG)
y <- log(g$gas/g$population)
X <- as.matrix(cbind(log(g$price), log(g$income), log(g$newcar), log(g$usedcar)))
arima(y, order = c(1, 0, 0), xreg = X)
#> 
#> Call:
#> arima(x = y, order = c(1, 0, 0), xreg = X)
#> 
#> Coefficients:
#>          ar1  intercept       X1      X2      X3       X4
#>       0.9304    -9.7548  -0.2082  1.0818  0.0884  -0.0350
#> s.e.  0.0554     1.1262   0.0337  0.1269  0.1186   0.0612
#> 
#> sigma^2 estimated as 0.0003094:  log likelihood = 93.37,  aic = -172.74


#######################################
## US macroeconomic data (1950-2000) ##
#######################################
## data and trend
data("USMacroG", package = "AER")
ltrend <- 0:(nrow(USMacroG) - 1)

## Example 5.3
## OLS and IV regression
library("dynlm")
fm_ols <- dynlm(consumption ~ gdp, data = USMacroG)
fm_iv <- dynlm(consumption ~ gdp | L(consumption) + L(gdp), data = USMacroG)

## Hausman statistic
library("MASS")
b_diff <- coef(fm_iv) - coef(fm_ols)
v_diff <- summary(fm_iv)$cov.unscaled - summary(fm_ols)$cov.unscaled
(t(b_diff) %*% ginv(v_diff) %*% b_diff) / summary(fm_ols)$sigma^2
#>          [,1]
#> [1,] 9.703933

## Wu statistic
auxreg <- dynlm(gdp ~ L(consumption) + L(gdp), data = USMacroG)
coeftest(dynlm(consumption ~ gdp + fitted(auxreg), data = USMacroG))[3,3] 
#> [1] 4.944502
## agrees with Greene (but not with errata)

## Example 6.1
## Table 6.1
fm6.1 <- dynlm(log(invest) ~ tbill + inflation + log(gdp) + ltrend, data = USMacroG)
fm6.3 <- dynlm(log(invest) ~ I(tbill - inflation) + log(gdp) + ltrend, data = USMacroG)
summary(fm6.1)
#> 
#> Time series regression with "ts" data:
#> Start = 1950(2), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = log(invest) ~ tbill + inflation + log(gdp) + 
#>     ltrend, data = USMacroG)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.22313 -0.05540 -0.00312  0.04246  0.31989 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -9.134091   1.366459  -6.684  2.3e-10 ***
#> tbill       -0.008598   0.003195  -2.691  0.00774 ** 
#> inflation    0.003306   0.002337   1.415  0.15872    
#> log(gdp)     1.930156   0.183272  10.532  < 2e-16 ***
#> ltrend      -0.005659   0.001488  -3.803  0.00019 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.08618 on 198 degrees of freedom
#>   (1 observation deleted due to missingness)
#> Multiple R-squared:  0.9798,	Adjusted R-squared:  0.9793 
#> F-statistic:  2395 on 4 and 198 DF,  p-value: < 2.2e-16
#> 
summary(fm6.3)
#> 
#> Time series regression with "ts" data:
#> Start = 1950(2), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = log(invest) ~ I(tbill - inflation) + log(gdp) + 
#>     ltrend, data = USMacroG)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.227897 -0.054542 -0.002435  0.039993  0.313928 
#> 
#> Coefficients:
#>                       Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)          -7.907158   1.200631  -6.586 3.94e-10 ***
#> I(tbill - inflation) -0.004427   0.002270  -1.950  0.05260 .  
#> log(gdp)              1.764062   0.160561  10.987  < 2e-16 ***
#> ltrend               -0.004403   0.001331  -3.308  0.00111 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.0867 on 199 degrees of freedom
#>   (1 observation deleted due to missingness)
#> Multiple R-squared:  0.9794,	Adjusted R-squared:  0.9791 
#> F-statistic:  3154 on 3 and 199 DF,  p-value: < 2.2e-16
#> 
deviance(fm6.1)
#> [1] 1.470565
deviance(fm6.3)
#> [1] 1.495811
vcov(fm6.1)[2,3] 
#> [1] -3.717454e-06

## F test
linearHypothesis(fm6.1, "tbill + inflation = 0")
#> 
#> Linear hypothesis test:
#> tbill  + inflation = 0
#> 
#> Model 1: restricted model
#> Model 2: log(invest) ~ tbill + inflation + log(gdp) + ltrend
#> 
#>   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
#> 1    199 1.4958                              
#> 2    198 1.4706  1  0.025246 3.3991 0.06673 .
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## alternatively
anova(fm6.1, fm6.3)
#> Analysis of Variance Table
#> 
#> Model 1: log(invest) ~ tbill + inflation + log(gdp) + ltrend
#> Model 2: log(invest) ~ I(tbill - inflation) + log(gdp) + ltrend
#>   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
#> 1    198 1.4706                              
#> 2    199 1.4958 -1 -0.025246 3.3991 0.06673 .
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## t statistic
sqrt(anova(fm6.1, fm6.3)[2,5])
#> [1] 1.843672
 
## Example 6.3
## Distributed lag model:
## log(Ct) = b0 + b1 * log(Yt) + b2 * log(C(t-1)) + u
us <- log(USMacroG[, c(2, 5)])
fm_distlag <- dynlm(log(consumption) ~ log(dpi) + L(log(consumption)),
  data = USMacroG)
summary(fm_distlag)
#> 
#> Time series regression with "ts" data:
#> Start = 1950(2), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = log(consumption) ~ log(dpi) + L(log(consumption)), 
#>     data = USMacroG)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.035243 -0.004606  0.000496  0.005147  0.041754 
#> 
#> Coefficients:
#>                     Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)         0.003142   0.010553   0.298  0.76624    
#> log(dpi)            0.074958   0.028727   2.609  0.00976 ** 
#> L(log(consumption)) 0.924625   0.028594  32.337  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.008742 on 200 degrees of freedom
#> Multiple R-squared:  0.9997,	Adjusted R-squared:  0.9997 
#> F-statistic: 3.476e+05 on 2 and 200 DF,  p-value: < 2.2e-16
#> 

## estimate and test long-run MPC 
coef(fm_distlag)[2]/(1-coef(fm_distlag)[3])
#>  log(dpi) 
#> 0.9944606 
linearHypothesis(fm_distlag, "log(dpi) + L(log(consumption)) = 1")
#> 
#> Linear hypothesis test:
#> log(dpi)  + L(log(consumption)) = 1
#> 
#> Model 1: restricted model
#> Model 2: log(consumption) ~ log(dpi) + L(log(consumption))
#> 
#>   Res.Df      RSS Df  Sum of Sq      F Pr(>F)
#> 1    201 0.015295                            
#> 2    200 0.015286  1 9.1197e-06 0.1193 0.7301
## correct, see errata
 
## Example 6.4
## predict investiment in 2001(1)
predict(fm6.1, interval = "prediction",
  newdata = data.frame(tbill = 4.48, inflation = 5.262, gdp = 9316.8, ltrend = 204))
#>        fit      lwr      upr
#> 1 7.331178 7.158229 7.504126

## Example 7.7
## no GMM available in "strucchange"
## using OLS instead yields
fs <- Fstats(log(m1/cpi) ~ log(gdp) + tbill, data = USMacroG,
  vcov = NeweyWest, from = c(1957, 3), to = c(1991, 3))
plot(fs)

## which looks somewhat similar ...
 
## Example 8.2
## Ct = b0 + b1*Yt + b2*Y(t-1) + v
fm1 <- dynlm(consumption ~ dpi + L(dpi), data = USMacroG)
## Ct = a0 + a1*Yt + a2*C(t-1) + u
fm2 <- dynlm(consumption ~ dpi + L(consumption), data = USMacroG)

## Cox test in both directions:
coxtest(fm1, fm2)
#> Cox test
#> 
#> Model 1: consumption ~ dpi + L(dpi)
#> Model 2: consumption ~ dpi + L(consumption)
#>                 Estimate Std. Error     z value  Pr(>|z|)    
#> fitted(M1) ~ M2 -284.908    0.01862 -15304.2817 < 2.2e-16 ***
#> fitted(M2) ~ M1    1.491    0.42735      3.4894 0.0004842 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## ... and do the same for jtest() and encomptest().
## Notice that in this particular case two of them are coincident.
jtest(fm1, fm2)
#> J test
#> 
#> Model 1: consumption ~ dpi + L(dpi)
#> Model 2: consumption ~ dpi + L(consumption)
#>                 Estimate Std. Error t value  Pr(>|t|)    
#> M1 + fitted(M2)   1.0145    0.01614 62.8605 < 2.2e-16 ***
#> M2 + fitted(M1) -10.6766    1.48542 -7.1876 1.299e-11 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
encomptest(fm1, fm2)
#> Encompassing test
#> 
#> Model 1: consumption ~ dpi + L(dpi)
#> Model 2: consumption ~ dpi + L(consumption)
#> Model E: consumption ~ dpi + L(dpi) + L(consumption)
#>           Res.Df Df        F    Pr(>F)    
#> M1 vs. ME    199 -1 3951.448 < 2.2e-16 ***
#> M2 vs. ME    199 -1   51.661 1.299e-11 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## encomptest could also be performed `by hand' via
fmE <- dynlm(consumption ~ dpi + L(dpi) + L(consumption), data = USMacroG)
waldtest(fm1, fmE, fm2)
#> Wald test
#> 
#> Model 1: consumption ~ dpi + L(dpi)
#> Model 2: consumption ~ dpi + L(dpi) + L(consumption)
#> Model 3: consumption ~ dpi + L(consumption)
#>   Res.Df Df        F    Pr(>F)    
#> 1    200                          
#> 2    199  1 3951.448 < 2.2e-16 ***
#> 3    200 -1   51.661 1.299e-11 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Table 9.1
fm_ols <- lm(consumption ~ dpi, data = as.data.frame(USMacroG))
fm_nls <- nls(consumption ~ alpha + beta * dpi^gamma,
  start = list(alpha = coef(fm_ols)[1], beta = coef(fm_ols)[2], gamma = 1),
  control = nls.control(maxiter = 100), data = as.data.frame(USMacroG))
summary(fm_ols)
#> 
#> Call:
#> lm(formula = consumption ~ dpi, data = as.data.frame(USMacroG))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -191.42  -56.08    1.38   49.53  324.14 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -80.354749  14.305852  -5.617 6.38e-08 ***
#> dpi           0.921686   0.003872 238.054  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 87.21 on 202 degrees of freedom
#> Multiple R-squared:  0.9964,	Adjusted R-squared:  0.9964 
#> F-statistic: 5.667e+04 on 1 and 202 DF,  p-value: < 2.2e-16
#> 
summary(fm_nls)
#> 
#> Formula: consumption ~ alpha + beta * dpi^gamma
#> 
#> Parameters:
#>        Estimate Std. Error t value Pr(>|t|)    
#> alpha 458.79850   22.50142  20.390   <2e-16 ***
#> beta    0.10085    0.01091   9.244   <2e-16 ***
#> gamma   1.24483    0.01205 103.263   <2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 50.09 on 201 degrees of freedom
#> 
#> Number of iterations to convergence: 64 
#> Achieved convergence tolerance: 1.701e-06
#> 
deviance(fm_ols)
#> [1] 1536322
deviance(fm_nls)
#> [1] 504403.2
vcov(fm_nls)
#>             alpha          beta         gamma
#> alpha 506.3137170 -0.2345464898  0.2575688410
#> beta   -0.2345465  0.0001190378 -0.0001314916
#> gamma   0.2575688 -0.0001314916  0.0001453206

## Example 9.7
## F test
fm_nls2 <- nls(consumption ~ alpha + beta * dpi,
  start = list(alpha = coef(fm_ols)[1], beta = coef(fm_ols)[2]),
  control = nls.control(maxiter = 100), data = as.data.frame(USMacroG))
anova(fm_nls, fm_nls2)
#> Analysis of Variance Table
#> 
#> Model 1: consumption ~ alpha + beta * dpi^gamma
#> Model 2: consumption ~ alpha + beta * dpi
#>   Res.Df Res.Sum Sq Df   Sum Sq F value    Pr(>F)    
#> 1    201     504403                                  
#> 2    202    1536322 -1 -1031919  411.21 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Wald test
linearHypothesis(fm_nls, "gamma = 1")
#> 
#> Linear hypothesis test:
#> gamma = 1
#> 
#> Model 1: restricted model
#> Model 2: consumption ~ alpha + beta * dpi^gamma
#> 
#>   Res.Df Df  Chisq Pr(>Chisq)    
#> 1    202                         
#> 2    201  1 412.47  < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Example 9.8, Table 9.2
usm <- USMacroG[, c("m1", "tbill", "gdp")]
fm_lin <- lm(m1 ~ tbill + gdp, data = usm)
fm_log <- lm(m1 ~ tbill + gdp, data = log(usm))
## PE auxiliary regressions
aux_lin <- lm(m1 ~ tbill + gdp + I(fitted(fm_log) - log(fitted(fm_lin))), data = usm)
aux_log <- lm(m1 ~ tbill + gdp + I(fitted(fm_lin) - exp(fitted(fm_log))), data = log(usm))
coeftest(aux_lin)[4,]
#>     Estimate   Std. Error      t value     Pr(>|t|) 
#> 2.093544e+02 2.675803e+01 7.823985e+00 2.900156e-13 
coeftest(aux_log)[4,]
#>      Estimate    Std. Error       t value      Pr(>|t|) 
#> -4.188803e-05  2.613270e-04 -1.602897e-01  8.728146e-01 
## matches results from errata
## With lmtest >= 0.9-24:
## petest(fm_lin, fm_log)

## Example 12.1
fm_m1 <- dynlm(log(m1) ~ log(gdp) + log(cpi), data = USMacroG)
summary(fm_m1)
#> 
#> Time series regression with "ts" data:
#> Start = 1950(1), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = log(m1) ~ log(gdp) + log(cpi), data = USMacroG)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.230620 -0.026026  0.008483  0.036407  0.205929 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -1.63306    0.22857  -7.145 1.62e-11 ***
#> log(gdp)     0.28705    0.04738   6.058 6.68e-09 ***
#> log(cpi)     0.97181    0.03377  28.775  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.08288 on 201 degrees of freedom
#> Multiple R-squared:  0.9895,	Adjusted R-squared:  0.9894 
#> F-statistic:  9489 on 2 and 201 DF,  p-value: < 2.2e-16
#> 

## Figure 12.1
par(las = 1)
plot(0, 0, type = "n", axes = FALSE,
     xlim = c(1950, 2002), ylim = c(-0.3, 0.225),
     xaxs = "i", yaxs = "i",
     xlab = "Quarter", ylab = "", main = "Least Squares Residuals")
box()
axis(1, at = c(1950, 1963, 1976, 1989, 2002))
axis(2, seq(-0.3, 0.225, by = 0.075))
grid(4, 7, col = grey(0.6))
abline(0, 0)
lines(residuals(fm_m1), lwd = 2)


## Example 12.3
fm_pc <- dynlm(d(inflation) ~ unemp, data = USMacroG)
summary(fm_pc)
#> 
#> Time series regression with "ts" data:
#> Start = 1950(3), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = d(inflation) ~ unemp, data = USMacroG)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -11.2791  -1.6635  -0.0117   1.7813   8.5472 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  0.49189    0.74048   0.664    0.507
#> unemp       -0.09013    0.12579  -0.717    0.474
#> 
#> Residual standard error: 2.822 on 200 degrees of freedom
#>   (1 observation deleted due to missingness)
#> Multiple R-squared:  0.002561,	Adjusted R-squared:  -0.002427 
#> F-statistic: 0.5134 on 1 and 200 DF,  p-value: 0.4745
#> 
## Figure 12.3
plot(residuals(fm_pc))

## natural unemployment rate
coef(fm_pc)[1]/coef(fm_pc)[2]
#> (Intercept) 
#>    -5.45749 
## autocorrelation
res <- residuals(fm_pc)
summary(dynlm(res ~ L(res)))
#> 
#> Time series regression with "ts" data:
#> Start = 1950(4), End = 2000(4)
#> 
#> Call:
#> dynlm(formula = res ~ L(res))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -9.8694 -1.4800  0.0718  1.4990  8.3258 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -0.02155    0.17854  -0.121    0.904    
#> L(res)      -0.42630    0.06355  -6.708    2e-10 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 2.531 on 199 degrees of freedom
#> Multiple R-squared:  0.1844,	Adjusted R-squared:  0.1803 
#> F-statistic: 44.99 on 1 and 199 DF,  p-value: 2.002e-10
#> 

## Example 12.4
coeftest(fm_m1)
#> 
#> t test of coefficients:
#> 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> (Intercept) -1.633057   0.228568 -7.1447 1.625e-11 ***
#> log(gdp)     0.287051   0.047384  6.0580 6.683e-09 ***
#> log(cpi)     0.971812   0.033773 28.7749 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
coeftest(fm_m1, vcov = NeweyWest(fm_m1, lag = 5))
#> 
#> t test of coefficients:
#> 
#>             Estimate Std. Error t value Pr(>|t|)  
#> (Intercept) -1.63306    1.07337 -1.5214  0.12972  
#> log(gdp)     0.28705    0.35516  0.8082  0.41992  
#> log(cpi)     0.97181    0.38998  2.4919  0.01351 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
summary(fm_m1)$r.squared
#> [1] 0.9895195
dwtest(fm_m1)
#> 
#> 	Durbin-Watson test
#> 
#> data:  fm_m1
#> DW = 0.024767, p-value < 2.2e-16
#> alternative hypothesis: true autocorrelation is greater than 0
#> 
as.vector(acf(residuals(fm_m1), plot = FALSE)$acf)[2]
#> [1] 0.9832024
## matches Tab. 12.1 errata and Greene 6e, apart from Newey-West SE


#################################################
## Cost function of electricity producers 1870 ##
#################################################

## Example 5.6: a generalized Cobb-Douglas cost function
data("Electricity1970", package = "AER")
fm <- lm(log(cost/fuel) ~ log(output) + I(log(output)^2/2) + 
  log(capital/fuel) + log(labor/fuel), data=Electricity1970[1:123,])


####################################################
## SIC 33: Production for primary metals industry ##
####################################################

## data
data("SIC33", package = "AER")

## Example 6.2
## Translog model
fm_tl <- lm(
  output ~ labor + capital + I(0.5 * labor^2) + I(0.5 * capital^2) + I(labor * capital),
  data = log(SIC33))
## Cobb-Douglas model
fm_cb <- lm(output ~ labor + capital, data = log(SIC33))

## Table 6.2 in Greene (2003)
deviance(fm_tl)
#> [1] 0.6799272
deviance(fm_cb)
#> [1] 0.8516337
summary(fm_tl)
#> 
#> Call:
#> lm(formula = output ~ labor + capital + I(0.5 * labor^2) + I(0.5 * 
#>     capital^2) + I(labor * capital), data = log(SIC33))
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.33990 -0.10106 -0.01238  0.04605  0.39281 
#> 
#> Coefficients:
#>                    Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)         0.94420    2.91075   0.324   0.7489  
#> labor               3.61364    1.54807   2.334   0.0296 *
#> capital            -1.89311    1.01626  -1.863   0.0765 .
#> I(0.5 * labor^2)   -0.96405    0.70738  -1.363   0.1874  
#> I(0.5 * capital^2)  0.08529    0.29261   0.291   0.7735  
#> I(labor * capital)  0.31239    0.43893   0.712   0.4845  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.1799 on 21 degrees of freedom
#> Multiple R-squared:  0.9549,	Adjusted R-squared:  0.9441 
#> F-statistic: 88.85 on 5 and 21 DF,  p-value: 2.121e-13
#> 
summary(fm_cb)
#> 
#> Call:
#> lm(formula = output ~ labor + capital, data = log(SIC33))
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.30385 -0.10119 -0.01819  0.05582  0.50559 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  1.17064    0.32678   3.582  0.00150 ** 
#> labor        0.60300    0.12595   4.787 7.13e-05 ***
#> capital      0.37571    0.08535   4.402  0.00019 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.1884 on 24 degrees of freedom
#> Multiple R-squared:  0.9435,	Adjusted R-squared:  0.9388 
#> F-statistic: 200.2 on 2 and 24 DF,  p-value: 1.067e-15
#> 
vcov(fm_tl)
#>                    (Intercept)       labor     capital I(0.5 * labor^2)
#> (Intercept)         8.47248687 -2.38790338 -0.33129294      -0.08760011
#> labor              -2.38790338  2.39652901 -1.23101576      -0.66580411
#> capital            -0.33129294 -1.23101576  1.03278652       0.52305244
#> I(0.5 * labor^2)   -0.08760011 -0.66580411  0.52305244       0.50039330
#> I(0.5 * capital^2) -0.23317345  0.03476689  0.02636926       0.14674300
#> I(labor * capital)  0.36354446  0.18311307 -0.22554189      -0.28803386
#>                    I(0.5 * capital^2) I(labor * capital)
#> (Intercept)               -0.23317345          0.3635445
#> labor                      0.03476689          0.1831131
#> capital                    0.02636926         -0.2255419
#> I(0.5 * labor^2)           0.14674300         -0.2880339
#> I(0.5 * capital^2)         0.08562001         -0.1160405
#> I(labor * capital)        -0.11604045          0.1926571
vcov(fm_cb)
#>             (Intercept)        labor      capital
#> (Intercept)  0.10678650 -0.019835398  0.001188850
#> labor       -0.01983540  0.015864400 -0.009616201
#> capital      0.00118885 -0.009616201  0.007283931

## Cobb-Douglas vs. Translog model
anova(fm_cb, fm_tl)
#> Analysis of Variance Table
#> 
#> Model 1: output ~ labor + capital
#> Model 2: output ~ labor + capital + I(0.5 * labor^2) + I(0.5 * capital^2) + 
#>     I(labor * capital)
#>   Res.Df     RSS Df Sum of Sq      F Pr(>F)
#> 1     24 0.85163                           
#> 2     21 0.67993  3   0.17171 1.7678 0.1841
## hypothesis of constant returns
linearHypothesis(fm_cb, "labor + capital = 1")
#> 
#> Linear hypothesis test:
#> labor  + capital = 1
#> 
#> Model 1: restricted model
#> Model 2: output ~ labor + capital
#> 
#>   Res.Df     RSS Df Sum of Sq      F Pr(>F)
#> 1     25 0.85574                           
#> 2     24 0.85163  1 0.0041075 0.1158 0.7366


###############################
## Cost data for US airlines ##
###############################

## data
data("USAirlines", package = "AER")

## Example 7.2
fm_full <- lm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + year + firm,
  data = USAirlines)
fm_time <- lm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + year,
  data = USAirlines)
fm_firm <- lm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + firm,
  data = USAirlines)
fm_no <- lm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load, data = USAirlines)

## full fitted model
coef(fm_full)[1:5]
#>      (Intercept)      log(output) I(log(output)^2)       log(price) 
#>      13.56249268       0.88664650       0.01261288       0.12807832 
#>             load 
#>      -0.88548260 
plot(1970:1984, c(coef(fm_full)[6:19], 0), type = "n",
     xlab = "Year", ylab = expression(delta(Year)),
     main = "Estimated Year Specific Effects")
grid()
points(1970:1984, c(coef(fm_full)[6:19], 0), pch = 19)


## Table 7.2
anova(fm_full, fm_time)
#> Analysis of Variance Table
#> 
#> Model 1: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + 
#>     year + firm
#> Model 2: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + 
#>     year
#>   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
#> 1     66 0.17257                                  
#> 2     71 1.03470 -5  -0.86213 65.945 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(fm_full, fm_firm)
#> Analysis of Variance Table
#> 
#> Model 1: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + 
#>     year + firm
#> Model 2: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + 
#>     firm
#>   Res.Df     RSS  Df Sum of Sq      F   Pr(>F)   
#> 1     66 0.17257                                 
#> 2     80 0.26815 -14 -0.095584 2.6112 0.004582 **
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(fm_full, fm_no)
#> Analysis of Variance Table
#> 
#> Model 1: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load + 
#>     year + firm
#> Model 2: log(cost) ~ log(output) + I(log(output)^2) + log(price) + load
#>   Res.Df     RSS  Df Sum of Sq      F    Pr(>F)    
#> 1     66 0.17257                                   
#> 2     85 1.27492 -19   -1.1023 22.189 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## alternatively, use plm()
library("plm")
usair <- pdata.frame(USAirlines, c("firm", "year"))
fm_full2 <- plm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load,
  data = usair, model = "within", effect = "twoways")
fm_time2 <- plm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load,
  data = usair, model = "within", effect = "time")
fm_firm2 <- plm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load,
  data = usair, model = "within", effect = "individual")
fm_no2 <- plm(log(cost) ~ log(output) + I(log(output)^2) + log(price) + load,
  data = usair, model = "pooling")
pFtest(fm_full2, fm_time2)
#> 
#> 	F test for twoways effects
#> 
#> data:  log(cost) ~ log(output) + I(log(output)^2) + log(price) + load
#> F = 65.945, df1 = 5, df2 = 66, p-value < 2.2e-16
#> alternative hypothesis: significant effects
#> 
pFtest(fm_full2, fm_firm2)
#> 
#> 	F test for twoways effects
#> 
#> data:  log(cost) ~ log(output) + I(log(output)^2) + log(price) + load
#> F = 2.6112, df1 = 14, df2 = 66, p-value = 0.004582
#> alternative hypothesis: significant effects
#> 
pFtest(fm_full2, fm_no2)
#> 
#> 	F test for twoways effects
#> 
#> data:  log(cost) ~ log(output) + I(log(output)^2) + log(price) + load
#> F = 22.189, df1 = 19, df2 = 66, p-value < 2.2e-16
#> alternative hypothesis: significant effects
#> 


## Example 13.1, Table 13.1
fm_no <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "pooling")
fm_gm <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "between")
fm_firm <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "within")
fm_time <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "within",
  effect = "time")
fm_ft <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "within",
  effect = "twoways")

summary(fm_no)
#> Pooling Model
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     model = "pooling")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Residuals:
#>       Min.    1st Qu.     Median    3rd Qu.       Max. 
#> -0.2631215 -0.0575660  0.0044011  0.0850025  0.3001454 
#> 
#> Coefficients:
#>              Estimate Std. Error t-value  Pr(>|t|)    
#> (Intercept)  9.516922   0.229245 41.5143 < 2.2e-16 ***
#> log(output)  0.882739   0.013255 66.5991 < 2.2e-16 ***
#> log(price)   0.453977   0.020304 22.3588 < 2.2e-16 ***
#> load        -1.627510   0.345302 -4.7133 9.309e-06 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    114.04
#> Residual Sum of Squares: 1.3354
#> R-Squared:      0.98829
#> Adj. R-Squared: 0.98788
#> F-statistic: 2419.34 on 3 and 86 DF, p-value: < 2.22e-16
summary(fm_gm)
#> Oneway (individual) effect Between Model
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     model = "between")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> Observations used in estimation: 6
#> 
#> Residuals:
#>          1          2          3          4          5          6 
#> -0.0073057 -0.0125824 -0.0492504  0.1507977 -0.0023275 -0.0793316 
#> 
#> Coefficients:
#>             Estimate Std. Error t-value Pr(>|t|)  
#> (Intercept) 85.80867   56.48297  1.5192  0.26806  
#> log(output)  0.78246    0.10877  7.1939  0.01878 *
#> log(price)  -5.52395    4.47880 -1.2334  0.34273  
#> load        -1.75102    2.74319 -0.6383  0.58861  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    4.9787
#> Residual Sum of Squares: 0.031676
#> R-Squared:      0.99364
#> Adj. R-Squared: 0.98409
#> F-statistic: 104.116 on 3 and 2 DF, p-value: 0.0095284
summary(fm_firm)
#> Oneway (individual) effect Within Model
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     model = "within")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Residuals:
#>       Min.    1st Qu.     Median    3rd Qu.       Max. 
#> -0.1560412 -0.0352173 -0.0093022  0.0349257  0.1658661 
#> 
#> Coefficients:
#>              Estimate Std. Error t-value Pr(>|t|)    
#> log(output)  0.919285   0.029890 30.7555  < 2e-16 ***
#> log(price)   0.417492   0.015199 27.4682  < 2e-16 ***
#> load        -1.070396   0.201690 -5.3071  9.5e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    39.361
#> Residual Sum of Squares: 0.29262
#> R-Squared:      0.99257
#> Adj. R-Squared: 0.99183
#> F-statistic: 3604.81 on 3 and 81 DF, p-value: < 2.22e-16
fixef(fm_firm)
#>      1      2      3      4      5      6 
#> 9.7059 9.6647 9.4970 9.8905 9.7300 9.7930 
summary(fm_time)
#> Oneway (time) effect Within Model
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     effect = "time", model = "within")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Residuals:
#>       Min.    1st Qu.     Median    3rd Qu.       Max. 
#> -0.2320007 -0.0587561  0.0046842  0.0487582  0.2871562 
#> 
#> Coefficients:
#>              Estimate Std. Error t-value  Pr(>|t|)    
#> log(output)  0.867727   0.015408 56.3159 < 2.2e-16 ***
#> log(price)  -0.484485   0.364109 -1.3306    0.1875    
#> load        -1.954403   0.442378 -4.4179 3.447e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    76.734
#> Residual Sum of Squares: 1.0882
#> R-Squared:      0.98582
#> Adj. R-Squared: 0.98247
#> F-statistic: 1668.37 on 3 and 72 DF, p-value: < 2.22e-16
fixef(fm_time)
#>   1970   1971   1972   1973   1974   1975   1976   1977   1978   1979   1980 
#> 20.496 20.578 20.656 20.741 21.200 21.412 21.503 21.654 21.830 22.114 22.465 
#>   1981   1982   1983   1984 
#> 22.651 22.617 22.552 22.537 
summary(fm_ft)
#> Twoways effects Within Model
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     effect = "twoways", model = "within")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Residuals:
#>      Min.   1st Qu.    Median   3rd Qu.      Max. 
#> -0.123460 -0.024922  0.001185  0.024570  0.132182 
#> 
#> Coefficients:
#>              Estimate Std. Error t-value  Pr(>|t|)    
#> log(output)  0.817249   0.031851 25.6586 < 2.2e-16 ***
#> log(price)   0.168611   0.163478  1.0314  0.306064    
#> load        -0.882812   0.261737 -3.3729  0.001239 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    2.0542
#> Residual Sum of Squares: 0.17685
#> R-Squared:      0.91391
#> Adj. R-Squared: 0.88564
#> F-statistic: 237.088 on 3 and 67 DF, p-value: < 2.22e-16
fixef(fm_ft, effect = "individual")
#>      1      2      3      4      5      6 
#> 12.421 12.358 12.103 12.427 12.200 12.247 
fixef(fm_ft, effect = "time")
#>   1970   1971   1972   1973   1974   1975   1976   1977   1978   1979   1980 
#> 12.421 12.476 12.519 12.572 12.641 12.687 12.718 12.774 12.842 12.887 13.003 
#>   1981   1982   1983   1984 
#> 13.081 13.097 13.096 13.114 

## Table 13.2
fm_rfirm <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "random")
fm_rft <- plm(log(cost) ~ log(output) + log(price) + load, data = usair, model = "random",
  effect = "twoways")
summary(fm_rfirm)
#> Oneway (individual) effect Random Effect Model 
#>    (Swamy-Arora's transformation)
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     model = "random")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Effects:
#>                    var  std.dev share
#> idiosyncratic 0.003613 0.060105 0.188
#> individual    0.015597 0.124889 0.812
#> theta: 0.8767
#> 
#> Residuals:
#>       Min.    1st Qu.     Median    3rd Qu.       Max. 
#> -0.1385541 -0.0390391 -0.0045664  0.0365641  0.1774947 
#> 
#> Coefficients:
#>              Estimate Std. Error z-value  Pr(>|z|)    
#> (Intercept)  9.627909   0.210164 45.8114 < 2.2e-16 ***
#> log(output)  0.906681   0.025625 35.3827 < 2.2e-16 ***
#> log(price)   0.422778   0.014025 30.1451 < 2.2e-16 ***
#> load        -1.064498   0.200070 -5.3206 1.034e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    40.497
#> Residual Sum of Squares: 0.31159
#> R-Squared:      0.99231
#> Adj. R-Squared: 0.99204
#> Chisq: 11091.4 on 3 DF, p-value: < 2.22e-16
summary(fm_rft)
#> Twoways effects Random Effect Model 
#>    (Swamy-Arora's transformation)
#> 
#> Call:
#> plm(formula = log(cost) ~ log(output) + log(price) + load, data = usair, 
#>     effect = "twoways", model = "random")
#> 
#> Balanced Panel: n = 6, T = 15, N = 90
#> 
#> Effects:
#>                     var   std.dev share
#> idiosyncratic 2.640e-03 5.138e-02 0.144
#> individual    1.566e-02 1.251e-01 0.853
#> time          6.831e-05 8.265e-03 0.004
#> theta: 0.8946 (id) 0.06963 (time) 0.06954 (total)
#> 
#> Residuals:
#>       Min.    1st Qu.     Median    3rd Qu.       Max. 
#> -0.1392549 -0.0379739 -0.0040778  0.0361744  0.1726892 
#> 
#> Coefficients:
#>              Estimate Std. Error z-value  Pr(>|z|)    
#> (Intercept)  9.598602   0.217634 44.1043 < 2.2e-16 ***
#> log(output)  0.902373   0.026252 34.3738 < 2.2e-16 ***
#> log(price)   0.424178   0.014392 29.4723 < 2.2e-16 ***
#> load        -1.053130   0.202555 -5.1992 2.001e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Total Sum of Squares:    35.176
#> Residual Sum of Squares: 0.29403
#> R-Squared:      0.99164
#> Adj. R-Squared: 0.99135
#> Chisq: 10202.5 on 3 DF, p-value: < 2.22e-16


#################################################
## Cost function of electricity producers 1955 ##
#################################################

## Nerlove data
data("Electricity1955", package = "AER")
Electricity <- Electricity1955[1:145,]

## Example 7.3
## Cobb-Douglas cost function
fm_all <- lm(log(cost/fuel) ~ log(output) + log(labor/fuel) + log(capital/fuel),
  data = Electricity)
summary(fm_all)
#> 
#> Call:
#> lm(formula = log(cost/fuel) ~ log(output) + log(labor/fuel) + 
#>     log(capital/fuel), data = Electricity)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -1.01212 -0.21789 -0.00753  0.16046  1.81898 
#> 
#> Coefficients:
#>                    Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)       -4.685776   0.885294  -5.293 4.51e-07 ***
#> log(output)        0.720667   0.017435  41.335  < 2e-16 ***
#> log(labor/fuel)    0.593972   0.204632   2.903   0.0043 ** 
#> log(capital/fuel) -0.008471   0.190842  -0.044   0.9647    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.3917 on 141 degrees of freedom
#> Multiple R-squared:  0.9316,	Adjusted R-squared:  0.9301 
#> F-statistic: 640.1 on 3 and 141 DF,  p-value: < 2.2e-16
#> 

## hypothesis of constant returns to scale
linearHypothesis(fm_all, "log(output) = 1")
#> 
#> Linear hypothesis test:
#> log(output) = 1
#> 
#> Model 1: restricted model
#> Model 2: log(cost/fuel) ~ log(output) + log(labor/fuel) + log(capital/fuel)
#> 
#>   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
#> 1    142 61.027                                  
#> 2    141 21.637  1     39.39 256.69 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Figure 7.4
plot(residuals(fm_all) ~ log(output), data = Electricity)

## scaling seems to be different in Greene (2003) with logQ > 10?

## grouped functions
Electricity$group <- with(Electricity, cut(log(output), quantile(log(output), 0:5/5),
  include.lowest = TRUE, labels = 1:5))
fm_group <- lm(
  log(cost/fuel) ~ group/(log(output) + log(labor/fuel) + log(capital/fuel)) - 1,
  data = Electricity)

## Table 7.3 (close, but not quite)
round(rbind(coef(fm_all)[-1], matrix(coef(fm_group), nrow = 5)[,-1]), digits = 3)
#>      log(output) log(labor/fuel) log(capital/fuel)
#> [1,]       0.721           0.594            -0.008
#> [2,]       0.400           0.615            -0.081
#> [3,]       0.658           0.095             0.377
#> [4,]       0.938           0.402             0.250
#> [5,]       0.912           0.507             0.093
#> [6,]       1.044           0.603            -0.289

## Table 7.4
## log quadratic cost function
fm_all2 <- lm(
  log(cost/fuel) ~ log(output) + I(log(output)^2) + log(labor/fuel) + log(capital/fuel),
  data = Electricity)
summary(fm_all2)
#> 
#> Call:
#> lm(formula = log(cost/fuel) ~ log(output) + I(log(output)^2) + 
#>     log(labor/fuel) + log(capital/fuel), data = Electricity)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -1.3825 -0.1373  0.0080  0.1277  1.1354 
#> 
#> Coefficients:
#>                    Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)       -3.764003   0.702060  -5.361 3.32e-07 ***
#> log(output)        0.152648   0.061862   2.468  0.01481 *  
#> I(log(output)^2)   0.050504   0.005364   9.415  < 2e-16 ***
#> log(labor/fuel)    0.480699   0.161142   2.983  0.00337 ** 
#> log(capital/fuel)  0.073897   0.150119   0.492  0.62331    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.3076 on 140 degrees of freedom
#> Multiple R-squared:  0.9581,	Adjusted R-squared:  0.9569 
#> F-statistic: 800.7 on 4 and 140 DF,  p-value: < 2.2e-16
#> 


##########################
## Technological change ##
##########################

## Exercise 7.1
data("TechChange", package = "AER")
fm1 <- lm(I(output/technology) ~ log(clr), data = TechChange)
fm2 <- lm(I(output/technology) ~ I(1/clr), data = TechChange)
fm3 <- lm(log(output/technology) ~ log(clr), data = TechChange)
fm4 <- lm(log(output/technology) ~ I(1/clr), data = TechChange)

## Exercise 7.2 (a) and (c)
plot(I(output/technology) ~ clr, data = TechChange)

sctest(I(output/technology) ~ log(clr), data = TechChange,
  type = "Chow", point = c(1942, 1))
#> 
#> 	Chow test
#> 
#> data:  I(output/technology) ~ log(clr)
#> F = 1208.3, p-value < 2.2e-16
#> 


##################################
## Expenditure and default data ##
##################################

## full data set (F21.4)
data("CreditCard", package = "AER")

## extract data set F9.1
ccard <- CreditCard[1:100,]
ccard$income <- round(ccard$income, digits = 2)
ccard$expenditure <- round(ccard$expenditure, digits = 2)
ccard$age <- round(ccard$age + .01)
## suspicious:
CreditCard$age[CreditCard$age < 1]
#> [1] 0.5000000 0.1666667 0.5833333 0.7500000 0.5833333 0.5000000 0.7500000
## the first of these is also in TableF9.1 with 36 instead of 0.5:
ccard$age[79] <- 36

## Example 11.1
ccard <- ccard[order(ccard$income),]
ccard0 <- subset(ccard, expenditure > 0)
cc_ols <- lm(expenditure ~ age + owner + income + I(income^2), data = ccard0)

## Figure 11.1
plot(residuals(cc_ols) ~ income, data = ccard0, pch = 19)


## Table 11.1
mean(ccard$age)
#> [1] 32.08
prop.table(table(ccard$owner))
#> 
#>   no  yes 
#> 0.64 0.36 
mean(ccard$income)
#> [1] 3.3692

summary(cc_ols)
#> 
#> Call:
#> lm(formula = expenditure ~ age + owner + income + I(income^2), 
#>     data = ccard0)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -429.03 -130.39  -51.14   53.96 1460.55 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)   
#> (Intercept) -237.127    199.324  -1.190  0.23838   
#> age           -3.086      5.515  -0.560  0.57759   
#> owneryes      27.911     82.920   0.337  0.73747   
#> income       234.416     80.365   2.917  0.00481 **
#> I(income^2)  -15.002      7.469  -2.009  0.04861 * 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 284.7 on 67 degrees of freedom
#> Multiple R-squared:  0.2436,	Adjusted R-squared:  0.1985 
#> F-statistic: 5.396 on 4 and 67 DF,  p-value: 0.0007932
#> 
sqrt(diag(vcovHC(cc_ols, type = "HC0")))
#> (Intercept)         age    owneryes      income I(income^2) 
#>  212.953161    3.300911   92.190987   88.874353    6.945292 
sqrt(diag(vcovHC(cc_ols, type = "HC2"))) 
#> (Intercept)         age    owneryes      income I(income^2) 
#>  221.050557    3.446920   95.675689   92.092471    7.200369 
sqrt(diag(vcovHC(cc_ols, type = "HC1")))
#> (Intercept)         age    owneryes      income I(income^2) 
#>  220.756215    3.421863   95.569059   92.130897    7.199783 

bptest(cc_ols, ~ (age + income + I(income^2) + owner)^2 + I(age^2) + I(income^4),
  data = ccard0)
#> 
#> 	studentized Breusch-Pagan test
#> 
#> data:  cc_ols
#> BP = 14.329, df = 12, p-value = 0.2802
#> 
gqtest(cc_ols)
#> 
#> 	Goldfeld-Quandt test
#> 
#> data:  cc_ols
#> GQ = 15.004, df1 = 31, df2 = 31, p-value = 1.374e-11
#> alternative hypothesis: variance increases from segment 1 to 2
#> 
bptest(cc_ols, ~ income + I(income^2), data = ccard0, studentize = FALSE)
#> 
#> 	Breusch-Pagan test
#> 
#> data:  cc_ols
#> BP = 41.931, df = 2, p-value = 7.85e-10
#> 
bptest(cc_ols, ~ income + I(income^2), data = ccard0)
#> 
#> 	studentized Breusch-Pagan test
#> 
#> data:  cc_ols
#> BP = 6.1884, df = 2, p-value = 0.04531
#> 

## Table 11.2, WLS and FGLS
cc_wls1 <- lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income,
  data = ccard0)
cc_wls2 <- lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income^2,
  data = ccard0)

auxreg1 <- lm(log(residuals(cc_ols)^2) ~ log(income), data = ccard0)
cc_fgls1 <- lm(expenditure ~ age + owner + income + I(income^2),
  weights = 1/exp(fitted(auxreg1)), data = ccard0)

auxreg2 <- lm(log(residuals(cc_ols)^2) ~ income + I(income^2), data = ccard0)
cc_fgls2 <- lm(expenditure ~ age + owner + income + I(income^2),
  weights = 1/exp(fitted(auxreg2)), data = ccard0)

alphai <- coef(lm(log(residuals(cc_ols)^2) ~ log(income), data = ccard0))[2]
alpha <- 0
while(abs((alphai - alpha)/alpha) > 1e-7) {
  alpha <- alphai
  cc_fgls3 <- lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income^alpha,
    data = ccard0)
  alphai <- coef(lm(log(residuals(cc_fgls3)^2) ~ log(income), data = ccard0))[2]
}
alpha ## 1.7623 for Greene
#> log(income) 
#>     1.76133 
cc_fgls3 <- lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income^alpha,
  data = ccard0)

llik <- function(alpha)
  -logLik(lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income^alpha,
    data = ccard0))
plot(0:100/20, -sapply(0:100/20, llik), type = "l", xlab = "alpha", ylab = "logLik")

alpha <- optimize(llik, interval = c(0, 5))$minimum
cc_fgls4 <- lm(expenditure ~ age + owner + income + I(income^2), weights = 1/income^alpha,
  data = ccard0)

## Table 11.2
cc_fit <- list(cc_ols, cc_wls1, cc_wls2, cc_fgls2, cc_fgls1, cc_fgls3, cc_fgls4)
t(sapply(cc_fit, coef))
#>      (Intercept)       age owneryes    income I(income^2)
#> [1,]  -237.12660 -3.086288 27.91120 234.41556  -15.002467
#> [2,]  -181.93573 -2.938475 50.47325 202.26384  -12.122090
#> [3,]  -114.22064 -2.696285 60.43805 158.52460   -7.258642
#> [4,]  -117.99709 -1.229088 50.90827 145.30518   -7.938276
#> [5,]  -193.47191 -2.961744 47.30618 209.02210  -12.782103
#> [6,]  -130.55515 -2.778092 59.10655 169.88021   -8.614093
#> [7,]   -19.30138 -1.705737 58.09942  75.98748    4.392631
t(sapply(cc_fit, function(obj) sqrt(diag(vcov(obj)))))
#>      (Intercept)      age owneryes   income I(income^2)
#> [1,]    199.3244 5.514764 82.91990 80.36466    7.469185
#> [2,]    165.4981 4.603286 69.87638 76.78435    8.273252
#> [3,]    139.6780 3.807233 58.54868 76.39688    9.724617
#> [4,]    101.3706 2.548163 52.81635 46.35491    3.736022
#> [5,]    171.1090 4.764028 72.15562 77.20409    8.082372
#> [6,]    145.0461 3.982370 61.05116 76.18479    9.312051
#> [7,]    117.2006 2.859072 45.10098 84.01104   13.925698

## Table 21.21, Poisson and logit models
cc_pois <- glm(reports ~ age + income + expenditure, data = CreditCard, family = poisson)
summary(cc_pois)
#> 
#> Call:
#> glm(formula = reports ~ age + income + expenditure, family = poisson, 
#>     data = CreditCard)
#> 
#> Coefficients:
#>              Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -0.819682   0.145272  -5.642 1.68e-08 ***
#> age          0.007181   0.003978   1.805  0.07105 .  
#> income       0.077898   0.023940   3.254  0.00114 ** 
#> expenditure -0.004102   0.000374 -10.968  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 2347.4  on 1318  degrees of freedom
#> Residual deviance: 2143.9  on 1315  degrees of freedom
#> AIC: 2801.4
#> 
#> Number of Fisher Scoring iterations: 7
#> 
logLik(cc_pois)
#> 'log Lik.' -1396.719 (df=4)
xhat <- colMeans(CreditCard[, c("age", "income", "expenditure")])
xhat <- as.data.frame(t(xhat))
lambda <- predict(cc_pois, newdata = xhat, type = "response")
ppois(0, lambda) * nrow(CreditCard)
#> [1] 938.597

cc_logit <- glm(factor(reports > 0) ~ age + income + owner,
  data = CreditCard, family = binomial)
summary(cc_logit)
#> 
#> Call:
#> glm(formula = factor(reports > 0) ~ age + income + owner, family = binomial, 
#>     data = CreditCard)
#> 
#> Coefficients:
#>              Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -2.244166   0.251459  -8.925  < 2e-16 ***
#> age          0.022458   0.007313   3.071  0.00213 ** 
#> income       0.069317   0.041983   1.651  0.09872 .  
#> owneryes    -0.376620   0.157799  -2.387  0.01700 *  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 1306.6  on 1318  degrees of freedom
#> Residual deviance: 1291.1  on 1315  degrees of freedom
#> AIC: 1299.1
#> 
#> Number of Fisher Scoring iterations: 4
#> 
logLik(cc_logit)
#> 'log Lik.' -645.5649 (df=4)

## Table 21.21, "split population model"
library("pscl")
cc_zip <- zeroinfl(reports ~ age + income + expenditure | age + income + owner,
  data = CreditCard)
summary(cc_zip)
#> 
#> Call:
#> zeroinfl(formula = reports ~ age + income + expenditure | age + income + 
#>     owner, data = CreditCard)
#> 
#> Pearson residuals:
#>     Min      1Q  Median      3Q     Max 
#> -0.7540 -0.4436 -0.3980 -0.3074 11.2583 
#> 
#> Count model coefficients (poisson with log link):
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)  1.0009818  0.2040577   4.905 9.32e-07 ***
#> age         -0.0050726  0.0056934  -0.891    0.373    
#> income       0.0133224  0.0325209   0.410    0.682    
#> expenditure -0.0023586  0.0003207  -7.354 1.92e-13 ***
#> 
#> Zero-inflation model coefficients (binomial with logit link):
#>              Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)  2.154038   0.304421   7.076 1.49e-12 ***
#> age         -0.024685   0.008888  -2.777  0.00548 ** 
#> income      -0.116745   0.051498  -2.267  0.02339 *  
#> owneryes     0.386539   0.170840   2.263  0.02366 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
#> 
#> Number of iterations in BFGS optimization: 17 
#> Log-likelihood: -1093 on 8 Df
sum(predict(cc_zip, type = "prob")[,1])
#> [1] 1060.261


###################################
## DEM/GBP exchange rate returns ##
###################################

## data as given by Greene (2003)
data("MarkPound")
mp <- round(MarkPound, digits = 6)

## Figure 11.3 in Greene (2003)
plot(mp)


## Example 11.8 in Greene (2003), Table 11.5
library("tseries")
mp_garch <- garch(mp, grad = "numerical")
#> 
#>  ***** ESTIMATION WITH NUMERICAL GRADIENT ***** 
#> 
#> 
#>      I     INITIAL X(I)        D(I)
#> 
#>      1     1.990169e-01     1.000e+00
#>      2     5.000000e-02     1.000e+00
#>      3     5.000000e-02     1.000e+00
#> 
#>     IT   NF      F         RELDF    PRELDF    RELDX   STPPAR   D*STEP   NPRELDF
#>      0    1 -5.449e+02
#>      1    3 -5.845e+02  6.78e-02  1.10e-01  2.5e-01  6.4e+03  1.0e-01  3.55e+02
#>      2    5 -5.913e+02  1.15e-02  3.08e-02  7.3e-02  4.6e+00  3.3e-02  4.87e+02
#>      3    6 -5.997e+02  1.40e-02  1.43e-02  7.8e-02  2.0e+00  3.3e-02  9.80e+01
#>      4    7 -6.126e+02  2.11e-02  2.71e-02  1.4e-01  2.0e+00  6.5e-02  6.24e+01
#>      5    8 -6.301e+02  2.77e-02  5.01e-02  1.8e-01  2.0e+00  1.3e-01  3.43e+01
#>      6    9 -6.537e+02  3.61e-02  4.89e-02  2.2e-01  2.0e+00  1.3e-01  1.19e+01
#>      7   11 -6.755e+02  3.24e-02  2.87e-02  1.6e-01  2.0e+00  1.3e-01  1.37e+01
#>      8   13 -6.878e+02  1.78e-02  1.71e-02  9.1e-02  2.0e+00  9.0e-02  2.84e+01
#>      9   16 -6.879e+02  2.73e-04  5.03e-04  1.4e-03  9.8e+00  1.8e-03  2.22e+01
#>     10   17 -6.881e+02  2.59e-04  2.67e-04  1.3e-03  2.1e+00  1.8e-03  1.82e+01
#>     11   18 -6.885e+02  6.02e-04  6.08e-04  2.9e-03  2.0e+00  3.6e-03  1.81e+01
#>     12   22 -6.963e+02  1.12e-02  1.21e-02  6.4e-02  2.0e+00  7.7e-02  1.73e+01
#>     13   26 -6.964e+02  1.07e-04  1.92e-04  5.9e-04  9.1e+00  8.2e-04  8.37e-01
#>     14   27 -6.965e+02  9.85e-05  1.00e-04  5.8e-04  2.4e+00  8.2e-04  6.52e-01
#>     15   28 -6.966e+02  1.80e-04  1.84e-04  1.1e-03  2.0e+00  1.6e-03  6.54e-01
#>     16   33 -7.031e+02  9.26e-03  1.18e-02  7.5e-02  1.9e+00  1.2e-01  6.40e-01
#>     17   35 -7.035e+02  5.47e-04  3.04e-03  1.3e-02  2.0e+00  1.9e-02  3.58e-01
#>     18   36 -7.039e+02  5.98e-04  4.77e-03  1.2e-02  2.0e+00  1.9e-02  1.55e-01
#>     19   37 -7.049e+02  1.45e-03  2.88e-03  1.2e-02  1.7e+00  1.9e-02  3.79e-03
#>     20   38 -7.054e+02  6.23e-04  2.27e-03  1.1e-02  1.7e+00  1.9e-02  1.82e-02
#>     21   39 -7.058e+02  5.69e-04  1.04e-03  1.1e-02  1.3e+00  1.9e-02  2.36e-03
#>     22   41 -7.064e+02  8.39e-04  9.73e-04  2.4e-02  3.0e-01  4.6e-02  1.01e-03
#>     23   42 -7.064e+02  1.88e-05  1.27e-04  4.7e-03  0.0e+00  8.2e-03  1.27e-04
#>     24   43 -7.064e+02  4.80e-05  4.67e-05  1.2e-03  0.0e+00  2.1e-03  4.67e-05
#>     25   44 -7.064e+02  4.71e-07  9.35e-07  7.7e-04  0.0e+00  1.5e-03  9.35e-07
#>     26   45 -7.064e+02  1.82e-07  2.02e-07  1.6e-04  0.0e+00  2.9e-04  2.02e-07
#>     27   46 -7.064e+02  5.22e-09  6.74e-09  4.1e-05  0.0e+00  9.0e-05  6.74e-09
#>     28   47 -7.064e+02  3.70e-10  3.72e-10  1.3e-05  0.0e+00  2.8e-05  3.72e-10
#>     29   48 -7.064e+02  1.97e-13  2.13e-13  1.6e-07  0.0e+00  3.1e-07  2.13e-13
#> 
#>  ***** RELATIVE FUNCTION CONVERGENCE *****
#> 
#>  FUNCTION    -7.064122e+02   RELDX        1.604e-07
#>  FUNC. EVALS      48         GRAD. EVALS     103
#>  PRELDF       2.127e-13      NPRELDF      2.127e-13
#> 
#>      I      FINAL X(I)        D(I)          G(I)
#> 
#>      1    1.086690e-02     1.000e+00     9.277e-04
#>      2    1.546040e-01     1.000e+00     4.473e-05
#>      3    8.044204e-01     1.000e+00     9.389e-05
#> 
summary(mp_garch)
#> 
#> Call:
#> garch(x = mp, grad = "numerical")
#> 
#> Model:
#> GARCH(1,1)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -6.797391 -0.537032 -0.002637  0.552328  5.248670 
#> 
#> Coefficient(s):
#>     Estimate  Std. Error  t value Pr(>|t|)    
#> a0  0.010867    0.001297    8.376   <2e-16 ***
#> a1  0.154604    0.013882   11.137   <2e-16 ***
#> b1  0.804420    0.016046   50.133   <2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Diagnostic Tests:
#> 	Jarque Bera Test
#> 
#> data:  Residuals
#> X-squared = 1060, df = 2, p-value < 2.2e-16
#> 
#> 
#> 	Box-Ljung test
#> 
#> data:  Squared.Residuals
#> X-squared = 2.4776, df = 1, p-value = 0.1155
#> 
logLik(mp_garch)  
#> 'log Lik.' -1106.654 (df=3)
## Greene (2003) also includes a constant and uses different
## standard errors (presumably computed from Hessian), here
## OPG standard errors are used. garchFit() in "fGarch"
## implements the approach used by Greene (2003).

## compare Errata to Greene (2003)
library("dynlm")
res <- residuals(dynlm(mp ~ 1))^2
mp_ols <- dynlm(res ~ L(res, 1:10))
summary(mp_ols)
#> 
#> Time series regression with "ts" data:
#> Start = 11, End = 1974
#> 
#> Call:
#> dynlm(formula = res ~ L(res, 1:10))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -1.4937 -0.1560 -0.1042 -0.0065  9.7787 
#> 
#> Coefficients:
#>                 Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)     0.095733   0.014931   6.412 1.80e-10 ***
#> L(res, 1:10)1   0.161696   0.022595   7.156 1.17e-12 ***
#> L(res, 1:10)2   0.094938   0.022882   4.149 3.48e-05 ***
#> L(res, 1:10)3   0.051267   0.022973   2.232   0.0258 *  
#> L(res, 1:10)4   0.034278   0.023003   1.490   0.1363    
#> L(res, 1:10)5   0.121759   0.023015   5.290 1.36e-07 ***
#> L(res, 1:10)6  -0.007805   0.023015  -0.339   0.7346    
#> L(res, 1:10)7   0.003673   0.023003   0.160   0.8731    
#> L(res, 1:10)8   0.029509   0.022974   1.284   0.1991    
#> L(res, 1:10)9   0.025063   0.022883   1.095   0.2735    
#> L(res, 1:10)10  0.054212   0.022595   2.399   0.0165 *  
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.5005 on 1953 degrees of freedom
#> Multiple R-squared:  0.09795,	Adjusted R-squared:  0.09333 
#> F-statistic: 21.21 on 10 and 1953 DF,  p-value: < 2.2e-16
#> 
logLik(mp_ols)
#> 'log Lik.' -1421.871 (df=12)
summary(mp_ols)$r.squared * length(residuals(mp_ols))
#> [1] 192.3783


################################
## Grunfeld's investment data ##
################################

## subset of data with mistakes
data("Grunfeld", package = "AER")
ggr <- subset(Grunfeld, firm %in% c("General Motors", "US Steel",
  "General Electric", "Chrysler", "Westinghouse"))
ggr[c(26, 38), 1] <- c(261.6, 645.2)
ggr[32, 3] <- 232.6

## Tab. 13.4
fm_pool <- lm(invest ~ value + capital, data = ggr)
summary(fm_pool)
#> 
#> Call:
#> lm(formula = invest ~ value + capital, data = ggr)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -323.83  -67.52    8.97   41.25  330.66 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -48.02974   21.48017  -2.236   0.0276 *  
#> value         0.10509    0.01138   9.236 5.99e-15 ***
#> capital       0.30537    0.04351   7.019 3.06e-10 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 127.3 on 97 degrees of freedom
#> Multiple R-squared:  0.7789,	Adjusted R-squared:  0.7743 
#> F-statistic: 170.8 on 2 and 97 DF,  p-value: < 2.2e-16
#> 
logLik(fm_pool)
#> 'log Lik.' -624.9928 (df=4)
## White correction
sqrt(diag(vcovHC(fm_pool, type = "HC0")))
#>  (Intercept)        value      capital 
#> 15.016673443  0.009146375  0.059105263 

## heteroskedastic FGLS
auxreg1 <- lm(residuals(fm_pool)^2 ~ firm - 1, data = ggr)
fm_pfgls <- lm(invest ~ value + capital, data = ggr, weights = 1/fitted(auxreg1))
summary(fm_pfgls)
#> 
#> Call:
#> lm(formula = invest ~ value + capital, data = ggr, weights = 1/fitted(auxreg1))
#> 
#> Weighted Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -3.1867 -0.7964  0.0400  0.7323  2.4693 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -36.25370    6.05101  -5.991 3.54e-08 ***
#> value         0.09499    0.00732  12.976  < 2e-16 ***
#> capital       0.33781    0.02986  11.312  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.988 on 97 degrees of freedom
#> Multiple R-squared:  0.9014,	Adjusted R-squared:  0.8993 
#> F-statistic: 443.2 on 2 and 97 DF,  p-value: < 2.2e-16
#> 

## ML, computed as iterated FGLS
sigmasi <- fitted(lm(residuals(fm_pfgls)^2 ~ firm - 1 , data = ggr))
sigmas <- 0
while(any(abs((sigmasi - sigmas)/sigmas) > 1e-7)) {
   sigmas <- sigmasi
   fm_pfgls_i <- lm(invest ~ value + capital, data = ggr, weights = 1/sigmas)
   sigmasi <- fitted(lm(residuals(fm_pfgls_i)^2 ~ firm - 1 , data = ggr))
   }
fm_pmlh <- lm(invest ~ value + capital, data = ggr, weights = 1/sigmas)
summary(fm_pmlh)
#> 
#> Call:
#> lm(formula = invest ~ value + capital, data = ggr, weights = 1/sigmas)
#> 
#> Weighted Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -2.5991 -0.8639 -0.3293  0.5610  2.9899 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -23.25818    4.88907  -4.757 6.84e-06 ***
#> value         0.09435    0.00638  14.789  < 2e-16 ***
#> capital       0.33370    0.02238  14.913  < 2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.015 on 97 degrees of freedom
#> Multiple R-squared:  0.913,	Adjusted R-squared:  0.9112 
#> F-statistic: 508.7 on 2 and 97 DF,  p-value: < 2.2e-16
#> 
logLik(fm_pmlh)
#> 'log Lik.' -564.5355 (df=4)

## Tab. 13.5
auxreg2 <- lm(residuals(fm_pfgls)^2 ~ firm - 1, data = ggr)
auxreg3 <- lm(residuals(fm_pmlh)^2 ~ firm - 1, data = ggr)
rbind(
  "OLS" = coef(auxreg1),
  "Het. FGLS" = coef(auxreg2),
  "Het. ML" = coef(auxreg3))
#>           firmGeneral Motors firmUS Steel firmGeneral Electric firmChrysler
#> OLS                 9410.908     33455.51             34288.49     755.8508
#> Het. FGLS           8612.147     32902.83             36563.24     409.1902
#> Het. ML             8657.885     29824.90             40211.12     175.7844
#>           firmWestinghouse
#> OLS               633.4237
#> Het. FGLS         777.9749
#> Het. ML          1241.0107


## Chapter 14: explicitly treat as panel data
library("plm")
pggr <- pdata.frame(ggr, c("firm", "year"))

## Tab. 14.1
library("systemfit")
fm_sur <- systemfit(invest ~ value + capital, data = pggr, method = "SUR",
  methodResidCov = "noDfCor")
fm_psur <- systemfit(invest ~ value + capital, data = pggr, method = "SUR", pooled = TRUE, 
  methodResidCov = "noDfCor", residCovWeighted = TRUE)

## Tab 14.2
fm_ols <- systemfit(invest ~ value + capital, data = pggr, method = "OLS")
fm_pols <- systemfit(invest ~ value + capital, data = pggr, method = "OLS", pooled = TRUE)
## or "by hand"
fm_gm <- lm(invest ~ value + capital, data = ggr, subset = firm == "General Motors")
mean(residuals(fm_gm)^2)   ## Greene uses MLE
#> [1] 7160.294
## etc.
fm_pool <- lm(invest ~ value + capital, data = ggr)

## Tab. 14.3 (and Tab 13.4, cross-section ML)
## (not run due to long computation time)
if (FALSE) { # \dontrun{
fm_ml <- systemfit(invest ~ value + capital, data = pggr, method = "SUR",
  methodResidCov = "noDfCor", maxiter = 1000, tol = 1e-10)
fm_pml <- systemfit(invest ~ value + capital, data = pggr, method = "SUR", pooled = TRUE, 
  methodResidCov = "noDfCor", residCovWeighted = TRUE, maxiter = 1000, tol = 1e-10)
} # }

## Fig. 14.2
plot(unlist(residuals(fm_sur)[, c(3, 1, 2, 5, 4)]), 
  type = "l", ylab = "SUR residuals", ylim = c(-400, 400), xaxs = "i", yaxs = "i")
abline(v = c(20,40,60,80), h = 0, lty = 2)



###################
## Klein model I ##
###################

## data
data("KleinI", package = "AER")

## Tab. 15.3, OLS
library("dynlm")
fm_cons <- dynlm(consumption ~ cprofits + L(cprofits) + I(pwage + gwage), data = KleinI)
fm_inv <- dynlm(invest ~ cprofits + L(cprofits) + capital, data = KleinI)
fm_pwage <- dynlm(pwage ~ gnp + L(gnp) + I(time(gnp) - 1931), data = KleinI)
summary(fm_cons)
#> 
#> Time series regression with "ts" data:
#> Start = 1921, End = 1941
#> 
#> Call:
#> dynlm(formula = consumption ~ cprofits + L(cprofits) + I(pwage + 
#>     gwage), data = KleinI)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -2.17345 -0.43597 -0.03466  0.78508  1.61650 
#> 
#> Coefficients:
#>                  Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)      16.23660    1.30270  12.464 5.62e-10 ***
#> cprofits          0.19293    0.09121   2.115   0.0495 *  
#> L(cprofits)       0.08988    0.09065   0.992   0.3353    
#> I(pwage + gwage)  0.79622    0.03994  19.933 3.16e-13 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.026 on 17 degrees of freedom
#> Multiple R-squared:  0.981,	Adjusted R-squared:  0.9777 
#> F-statistic: 292.7 on 3 and 17 DF,  p-value: 7.938e-15
#> 
summary(fm_inv)
#> 
#> Time series regression with "ts" data:
#> Start = 1921, End = 1941
#> 
#> Call:
#> dynlm(formula = invest ~ cprofits + L(cprofits) + capital, data = KleinI)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -2.56562 -0.63169  0.03687  0.41542  1.49226 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 10.12579    5.46555   1.853 0.081374 .  
#> cprofits     0.47964    0.09711   4.939 0.000125 ***
#> L(cprofits)  0.33304    0.10086   3.302 0.004212 ** 
#> capital     -0.11179    0.02673  -4.183 0.000624 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.009 on 17 degrees of freedom
#> Multiple R-squared:  0.9313,	Adjusted R-squared:  0.9192 
#> F-statistic: 76.88 on 3 and 17 DF,  p-value: 4.299e-10
#> 
summary(fm_pwage)
#> 
#> Time series regression with "ts" data:
#> Start = 1921, End = 1941
#> 
#> Call:
#> dynlm(formula = pwage ~ gnp + L(gnp) + I(time(gnp) - 1931), data = KleinI)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -1.29418 -0.46875  0.01376  0.45027  1.19569 
#> 
#> Coefficients:
#>                     Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)          1.49704    1.27003   1.179 0.254736    
#> gnp                  0.43948    0.03241  13.561 1.52e-10 ***
#> L(gnp)               0.14609    0.03742   3.904 0.001142 ** 
#> I(time(gnp) - 1931)  0.13025    0.03191   4.082 0.000777 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.7671 on 17 degrees of freedom
#> Multiple R-squared:  0.9874,	Adjusted R-squared:  0.9852 
#> F-statistic: 444.6 on 3 and 17 DF,  p-value: 2.411e-16
#> 
## Notes:
##  - capital refers to previous year's capital stock -> no lag needed!
##  - trend used by Greene (p. 381, "time trend measured as years from 1931")
##    Maddala uses years since 1919

## preparation of data frame for systemfit
KI <- ts.intersect(KleinI, lag(KleinI, k = -1), dframe = TRUE)
names(KI) <- c(colnames(KleinI), paste("L", colnames(KleinI), sep = ""))
KI$trend <- (1921:1941) - 1931

library("systemfit")
system <- list(
  consumption = consumption ~ cprofits + Lcprofits + I(pwage + gwage),
  invest = invest ~ cprofits + Lcprofits + capital,
  pwage = pwage ~ gnp + Lgnp + trend)

## Tab. 15.3 OLS again
fm_ols <- systemfit(system, method = "OLS", data = KI)
summary(fm_ols)
#> 
#> systemfit results 
#> method: OLS 
#> 
#>         N DF     SSR detRCov   OLS-R2 McElroy-R2
#> system 63 51 45.2069 0.37084 0.977268   0.991302
#> 
#>              N DF     SSR      MSE     RMSE       R2   Adj R2
#> consumption 21 17 17.8794 1.051732 1.025540 0.981008 0.977657
#> invest      21 17 17.3227 1.018982 1.009447 0.931348 0.919233
#> pwage       21 17 10.0047 0.588515 0.767147 0.987414 0.985193
#> 
#> The covariance matrix of the residuals
#>             consumption    invest     pwage
#> consumption   1.0517323 0.0611432 -0.470419
#> invest        0.0611432 1.0189825  0.149681
#> pwage        -0.4704191 0.1496807  0.588515
#> 
#> The correlations of the residuals
#>             consumption    invest     pwage
#> consumption   1.0000000 0.0590626 -0.597935
#> invest        0.0590626 1.0000000  0.193288
#> pwage        -0.5979346 0.1932875  1.000000
#> 
#> 
#> OLS estimates for 'consumption' (equation 1)
#> Model Formula: consumption ~ cprofits + Lcprofits + I(pwage + gwage)
#> <environment: 0x5bb5fa905190>
#> 
#>                    Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept)      16.2366003  1.3026983 12.46382 5.6208e-10 ***
#> cprofits          0.1929344  0.0912102  2.11527   0.049474 *  
#> Lcprofits         0.0898849  0.0906479  0.99158   0.335306    
#> I(pwage + gwage)  0.7962187  0.0399439 19.93342 3.1619e-13 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.02554 on 17 degrees of freedom
#> Number of observations: 21 Degrees of Freedom: 17 
#> SSR: 17.879449 MSE: 1.051732 Root MSE: 1.02554 
#> Multiple R-Squared: 0.981008 Adjusted R-Squared: 0.977657 
#> 
#> 
#> OLS estimates for 'invest' (equation 2)
#> Model Formula: invest ~ cprofits + Lcprofits + capital
#> <environment: 0x5bb5fa905190>
#> 
#>               Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 10.1257885  5.4655465  1.85266 0.08137418 .  
#> cprofits     0.4796356  0.0971146  4.93886 0.00012456 ***
#> Lcprofits    0.3330387  0.1008592  3.30202 0.00421173 ** 
#> capital     -0.1117947  0.0267276 -4.18275 0.00062445 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.009447 on 17 degrees of freedom
#> Number of observations: 21 Degrees of Freedom: 17 
#> SSR: 17.322702 MSE: 1.018982 Root MSE: 1.009447 
#> Multiple R-Squared: 0.931348 Adjusted R-Squared: 0.919233 
#> 
#> 
#> OLS estimates for 'pwage' (equation 3)
#> Model Formula: pwage ~ gnp + Lgnp + trend
#> <environment: 0x5bb5fa905190>
#> 
#>              Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 1.4970438  1.2700320  1.17874 0.25473559    
#> gnp         0.4394770  0.0324076 13.56093 1.5169e-10 ***
#> Lgnp        0.1460899  0.0374231  3.90373 0.00114240 ** 
#> trend       0.1302452  0.0319103  4.08160 0.00077703 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.767147 on 17 degrees of freedom
#> Number of observations: 21 Degrees of Freedom: 17 
#> SSR: 10.00475 MSE: 0.588515 Root MSE: 0.767147 
#> Multiple R-Squared: 0.987414 Adjusted R-Squared: 0.985193 
#> 

## Tab. 15.3 2SLS, 3SLS, I3SLS
inst <- ~ Lcprofits + capital + Lgnp + gexpenditure + taxes + trend + gwage
fm_2sls <- systemfit(system, method = "2SLS", inst = inst,
  methodResidCov = "noDfCor", data = KI)

fm_3sls <- systemfit(system, method = "3SLS", inst = inst,
  methodResidCov = "noDfCor", data = KI)

fm_i3sls <- systemfit(system, method = "3SLS", inst = inst,
  methodResidCov = "noDfCor", maxiter = 100, data = KI)


############################################
## Transportation equipment manufacturing ##
############################################

## data
data("Equipment", package = "AER")

## Example 17.5
## Cobb-Douglas
fm_cd <- lm(log(valueadded/firms) ~ log(capital/firms) + log(labor/firms),
  data = Equipment)

## generalized Cobb-Douglas with Zellner-Revankar trafo
GCobbDouglas <- function(theta)
 lm(I(log(valueadded/firms) + theta * valueadded/firms) ~ log(capital/firms) + log(labor/firms),
     data = Equipment)

## yields classical Cobb-Douglas for theta = 0
fm_cd0 <- GCobbDouglas(0)

## ML estimation of generalized model
## choose starting values from classical model
par0 <- as.vector(c(coef(fm_cd0), 0, mean(residuals(fm_cd0)^2)))

## set up likelihood function
nlogL <- function(par) {
  beta <- par[1:3]
  theta <- par[4]
  sigma2 <- par[5]

  Y <- with(Equipment, valueadded/firms)
  K <- with(Equipment, capital/firms)
  L <- with(Equipment, labor/firms)

  rhs <- beta[1] + beta[2] * log(K) + beta[3] * log(L)
  lhs <- log(Y) + theta * Y

  rval <- sum(log(1 + theta * Y) - log(Y) +
    dnorm(lhs, mean = rhs, sd = sqrt(sigma2), log = TRUE))
  return(-rval)
}

## optimization
opt <- optim(par0, nlogL, hessian = TRUE)
#> Warning: NaNs produced
#> Warning: NaNs produced

## Table 17.2
opt$par
#> [1] 2.91468783 0.34997794 1.09232479 0.10666219 0.04274876
sqrt(diag(solve(opt$hessian)))[1:4]
#> [1] 0.36055500 0.09671393 0.14079052 0.05849648
-opt$value
#> [1] -8.939045

## re-fit ML model
fm_ml <- GCobbDouglas(opt$par[4])
deviance(fm_ml)
#> [1] 1.068552
sqrt(diag(vcov(fm_ml)))
#>        (Intercept) log(capital/firms)   log(labor/firms) 
#>         0.12533876         0.09435337         0.11497723 

## fit NLS model
rss <- function(theta) deviance(GCobbDouglas(theta))
optim(0, rss)
#> Warning: one-dimensional optimization by Nelder-Mead is unreliable:
#> use "Brent" or optimize() directly
#> $par
#> [1] -0.03164062
#> 
#> $value
#> [1] 0.765549
#> 
#> $counts
#> function gradient 
#>       26       NA 
#> 
#> $convergence
#> [1] 0
#> 
#> $message
#> NULL
#> 
opt2 <- optimize(rss, c(-1, 1))
fm_nls <- GCobbDouglas(opt2$minimum)
-nlogL(c(coef(fm_nls), opt2$minimum, mean(residuals(fm_nls)^2)))
#> [1] -13.62126


############################
## Municipal expenditures ##
############################

## Table 18.2
data("Municipalities", package = "AER")
summary(Municipalities)
#>   municipality       year      expenditures        revenues       
#>  114    :   9   1979   :265   Min.   :0.01223   Min.   :0.006228  
#>  115    :   9   1980   :265   1st Qu.:0.01622   1st Qu.:0.011339  
#>  120    :   9   1981   :265   Median :0.01796   Median :0.012952  
#>  123    :   9   1982   :265   Mean   :0.01848   Mean   :0.013423  
#>  125    :   9   1983   :265   3rd Qu.:0.02014   3rd Qu.:0.014821  
#>  126    :   9   1984   :265   Max.   :0.03388   Max.   :0.029142  
#>  (Other):2331   (Other):795                                       
#>      grants        
#>  Min.   :0.001571  
#>  1st Qu.:0.004488  
#>  Median :0.004978  
#>  Mean   :0.005236  
#>  3rd Qu.:0.005539  
#>  Max.   :0.012589  
#>                    


###########################
## Program effectiveness ##
###########################

## Table 21.1, col. "Probit"
data("ProgramEffectiveness", package = "AER")
fm_probit <- glm(grade ~ average + testscore + participation,
  data = ProgramEffectiveness, family = binomial(link = "probit"))
summary(fm_probit)
#> 
#> Call:
#> glm(formula = grade ~ average + testscore + participation, family = binomial(link = "probit"), 
#>     data = ProgramEffectiveness)
#> 
#> Coefficients:
#>                  Estimate Std. Error z value Pr(>|z|)   
#> (Intercept)      -7.45231    2.57152  -2.898  0.00376 **
#> average           1.62581    0.68973   2.357  0.01841 * 
#> testscore         0.05173    0.08119   0.637  0.52406   
#> participationyes  1.42633    0.58695   2.430  0.01510 * 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 41.183  on 31  degrees of freedom
#> Residual deviance: 25.638  on 28  degrees of freedom
#> AIC: 33.638
#> 
#> Number of Fisher Scoring iterations: 6
#> 


####################################
## Labor force participation data ##
####################################

## data and transformations
data("PSID1976", package = "AER")
PSID1976$kids <- with(PSID1976, factor((youngkids + oldkids) > 0,
  levels = c(FALSE, TRUE), labels = c("no", "yes")))
PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000)

## Example 4.1, Table 4.2
## (reproduced in Example 7.1, Table 7.1)
gr_lm <- lm(log(hours * wage) ~ age + I(age^2) + education + kids,
  data = PSID1976, subset = participation == "yes")
summary(gr_lm)
#> 
#> Call:
#> lm(formula = log(hours * wage) ~ age + I(age^2) + education + 
#>     kids, data = PSID1976, subset = participation == "yes")
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -4.5305 -0.5266  0.3003  0.8474  1.7568 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  3.2400965  1.7674296   1.833  0.06747 . 
#> age          0.2005573  0.0838603   2.392  0.01721 * 
#> I(age^2)    -0.0023147  0.0009869  -2.345  0.01947 * 
#> education    0.0674727  0.0252486   2.672  0.00782 **
#> kidsyes     -0.3511952  0.1475326  -2.380  0.01773 * 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.19 on 423 degrees of freedom
#> Multiple R-squared:  0.041,	Adjusted R-squared:  0.03193 
#> F-statistic: 4.521 on 4 and 423 DF,  p-value: 0.001382
#> 
vcov(gr_lm)
#>             (Intercept)           age      I(age^2)     education       kidsyes
#> (Intercept)  3.12380756 -1.440901e-01  1.661740e-03 -9.260920e-03  2.674867e-02
#> age         -0.14409007  7.032544e-03 -8.232369e-05  5.085495e-05 -2.641203e-03
#> I(age^2)     0.00166174 -8.232369e-05  9.739279e-07 -4.976114e-07  3.841018e-05
#> education   -0.00926092  5.085495e-05 -4.976114e-07  6.374903e-04 -5.461931e-05
#> kidsyes      0.02674867 -2.641203e-03  3.841018e-05 -5.461931e-05  2.176587e-02

## Example 4.5
summary(gr_lm)
#> 
#> Call:
#> lm(formula = log(hours * wage) ~ age + I(age^2) + education + 
#>     kids, data = PSID1976, subset = participation == "yes")
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -4.5305 -0.5266  0.3003  0.8474  1.7568 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  3.2400965  1.7674296   1.833  0.06747 . 
#> age          0.2005573  0.0838603   2.392  0.01721 * 
#> I(age^2)    -0.0023147  0.0009869  -2.345  0.01947 * 
#> education    0.0674727  0.0252486   2.672  0.00782 **
#> kidsyes     -0.3511952  0.1475326  -2.380  0.01773 * 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 1.19 on 423 degrees of freedom
#> Multiple R-squared:  0.041,	Adjusted R-squared:  0.03193 
#> F-statistic: 4.521 on 4 and 423 DF,  p-value: 0.001382
#> 
## or equivalently
gr_lm1 <- lm(log(hours * wage) ~ 1, data = PSID1976, subset = participation == "yes")
anova(gr_lm1, gr_lm)
#> Analysis of Variance Table
#> 
#> Model 1: log(hours * wage) ~ 1
#> Model 2: log(hours * wage) ~ age + I(age^2) + education + kids
#>   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
#> 1    427 625.08                                
#> 2    423 599.46  4    25.625 4.5206 0.001382 **
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Example 21.4, p. 681, and Tab. 21.3, p. 682
gr_probit1 <- glm(participation ~ age + I(age^2) + I(fincome/10000) + education + kids,
  data = PSID1976, family = binomial(link = "probit") )
gr_probit2 <- glm(participation ~ age + I(age^2) + I(fincome/10000) + education,
  data = PSID1976, family = binomial(link = "probit"))
gr_probit3 <- glm(participation ~ kids/(age + I(age^2) + I(fincome/10000) + education),
  data = PSID1976, family = binomial(link = "probit"))
## LR test of all coefficients
lrtest(gr_probit1)
#> Likelihood ratio test
#> 
#> Model 1: participation ~ age + I(age^2) + I(fincome/10000) + education + 
#>     kids
#> Model 2: participation ~ 1
#>   #Df  LogLik Df  Chisq Pr(>Chisq)    
#> 1   6 -490.85                         
#> 2   1 -514.87 -5 48.051  3.468e-09 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Chow-type test
lrtest(gr_probit2, gr_probit3)
#> Likelihood ratio test
#> 
#> Model 1: participation ~ age + I(age^2) + I(fincome/10000) + education
#> Model 2: participation ~ kids/(age + I(age^2) + I(fincome/10000) + education)
#>   #Df  LogLik Df  Chisq Pr(>Chisq)  
#> 1   5 -496.87                       
#> 2  10 -489.48  5 14.774    0.01137 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## equivalently:
anova(gr_probit2, gr_probit3, test = "Chisq")
#> Analysis of Deviance Table
#> 
#> Model 1: participation ~ age + I(age^2) + I(fincome/10000) + education
#> Model 2: participation ~ kids/(age + I(age^2) + I(fincome/10000) + education)
#>   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
#> 1       748     993.73                       
#> 2       743     978.96  5   14.774  0.01137 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Table 21.3
summary(gr_probit1)
#> 
#> Call:
#> glm(formula = participation ~ age + I(age^2) + I(fincome/10000) + 
#>     education + kids, family = binomial(link = "probit"), data = PSID1976)
#> 
#> Coefficients:
#>                    Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)      -4.1568189  1.4040095  -2.961 0.003070 ** 
#> age               0.1853957  0.0662076   2.800 0.005107 ** 
#> I(age^2)         -0.0024259  0.0007762  -3.125 0.001775 ** 
#> I(fincome/10000)  0.0458029  0.0430557   1.064 0.287417    
#> education         0.0981824  0.0228932   4.289  1.8e-05 ***
#> kidsyes          -0.4489872  0.1300252  -3.453 0.000554 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 1029.7  on 752  degrees of freedom
#> Residual deviance:  981.7  on 747  degrees of freedom
#> AIC: 993.7
#> 
#> Number of Fisher Scoring iterations: 4
#> 

## Example 22.8, Table 22.7, p. 786
library("sampleSelection")
#> Loading required package: maxLik
#> Loading required package: miscTools
#> 
#> Please cite the 'maxLik' package as:
#> Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
#> 
#> If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
#> https://r-forge.r-project.org/projects/maxlik/
gr_2step <- selection(participation ~ age + I(age^2) + fincome + education + kids, 
  wage ~ experience + I(experience^2) + education + city,
  data = PSID1976, method = "2step")
gr_ml <- selection(participation ~ age + I(age^2) + fincome + education + kids, 
  wage ~ experience + I(experience^2) + education + city,
  data = PSID1976, method = "ml")
gr_ols <- lm(wage ~ experience + I(experience^2) + education + city, 
  data = PSID1976, subset = participation == "yes")
## NOTE: ML estimates agree with Greene, 5e errata. 
## Standard errors are based on the Hessian (here), while Greene has BHHH/OPG.



####################
## Ship accidents ##
####################

## subset data
data("ShipAccidents", package = "AER")
sa <- subset(ShipAccidents, service > 0)

## Table 21.20
sa_full <- glm(incidents ~ type + construction + operation, family = poisson,
  data = sa, offset = log(service))
summary(sa_full)
#> 
#> Call:
#> glm(formula = incidents ~ type + construction + operation, family = poisson, 
#>     data = sa, offset = log(service))
#> 
#> Coefficients:
#>                     Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)         -6.40288    0.21752 -29.435  < 2e-16 ***
#> typeB               -0.54471    0.17761  -3.067  0.00216 ** 
#> typeC               -0.68876    0.32903  -2.093  0.03632 *  
#> typeD               -0.07431    0.29056  -0.256  0.79815    
#> typeE                0.32053    0.23575   1.360  0.17396    
#> construction1965-69  0.69585    0.14966   4.650 3.33e-06 ***
#> construction1970-74  0.81746    0.16984   4.813 1.49e-06 ***
#> construction1975-79  0.44497    0.23324   1.908  0.05642 .  
#> operation1975-79     0.38386    0.11826   3.246  0.00117 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 146.328  on 33  degrees of freedom
#> Residual deviance:  38.963  on 25  degrees of freedom
#> AIC: 154.83
#> 
#> Number of Fisher Scoring iterations: 5
#> 

sa_notype <- glm(incidents ~ construction + operation, family = poisson,
  data = sa, offset = log(service))
summary(sa_notype)
#> 
#> Call:
#> glm(formula = incidents ~ construction + operation, family = poisson, 
#>     data = sa, offset = log(service))
#> 
#> Coefficients:
#>                     Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)          -6.9470     0.1269 -54.725  < 2e-16 ***
#> construction1965-69   0.7536     0.1488   5.066 4.07e-07 ***
#> construction1970-74   1.0503     0.1576   6.666 2.63e-11 ***
#> construction1975-79   0.6999     0.2203   3.177  0.00149 ** 
#> operation1975-79      0.3872     0.1181   3.279  0.00104 ** 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 146.328  on 33  degrees of freedom
#> Residual deviance:  62.536  on 29  degrees of freedom
#> AIC: 170.4
#> 
#> Number of Fisher Scoring iterations: 4
#> 

sa_noperiod <- glm(incidents ~ type + operation, family = poisson,
  data = sa, offset = log(service))
summary(sa_noperiod)
#> 
#> Call:
#> glm(formula = incidents ~ type + operation, family = poisson, 
#>     data = sa, offset = log(service))
#> 
#> Coefficients:
#>                  Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)       -5.8000     0.1784 -32.508  < 2e-16 ***
#> typeB             -0.7437     0.1691  -4.397 1.10e-05 ***
#> typeC             -0.7549     0.3276  -2.304   0.0212 *  
#> typeD             -0.1843     0.2876  -0.641   0.5215    
#> typeE              0.3842     0.2348   1.636   0.1018    
#> operation1975-79   0.5001     0.1116   4.483 7.37e-06 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> (Dispersion parameter for poisson family taken to be 1)
#> 
#>     Null deviance: 146.328  on 33  degrees of freedom
#> Residual deviance:  70.364  on 28  degrees of freedom
#> AIC: 180.23
#> 
#> Number of Fisher Scoring iterations: 5
#> 

## model comparison
anova(sa_full, sa_notype, test = "Chisq")
#> Analysis of Deviance Table
#> 
#> Model 1: incidents ~ type + construction + operation
#> Model 2: incidents ~ construction + operation
#>   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
#> 1        25     38.963                          
#> 2        29     62.536 -4  -23.573 9.725e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(sa_full, sa_noperiod, test = "Chisq")
#> Analysis of Deviance Table
#> 
#> Model 1: incidents ~ type + construction + operation
#> Model 2: incidents ~ type + operation
#>   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
#> 1        25     38.963                          
#> 2        28     70.364 -3  -31.401 6.998e-07 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## test for overdispersion
dispersiontest(sa_full)
#> 
#> 	Overdispersion test
#> 
#> data:  sa_full
#> z = 0.93429, p-value = 0.1751
#> alternative hypothesis: true dispersion is greater than 1
#> sample estimates:
#> dispersion 
#>   1.317918 
#> 
dispersiontest(sa_full, trafo = 2)
#> 
#> 	Overdispersion test
#> 
#> data:  sa_full
#> z = -0.6129, p-value = 0.73
#> alternative hypothesis: true alpha is greater than 0
#> sample estimates:
#>      alpha 
#> -0.0111868 
#> 


######################################
## Fair's extramarital affairs data ##
######################################

## data
data("Affairs", package = "AER")

## Tab. 22.3 and 22.4
fm_ols <- lm(affairs ~ age + yearsmarried + religiousness + occupation + rating,
  data = Affairs)
fm_probit <- glm(I(affairs > 0) ~ age + yearsmarried + religiousness + occupation + rating,
  data = Affairs, family = binomial(link = "probit"))

fm_tobit <- tobit(affairs ~ age + yearsmarried + religiousness + occupation + rating,
  data = Affairs)
fm_tobit2 <- tobit(affairs ~ age + yearsmarried + religiousness + occupation + rating,
  right = 4, data = Affairs)

fm_pois <- glm(affairs ~ age + yearsmarried + religiousness + occupation + rating,
  data = Affairs, family = poisson)

library("MASS")
fm_nb <- glm.nb(affairs ~ age + yearsmarried + religiousness + occupation + rating,
  data = Affairs)

## Tab. 22.6
library("pscl")
fm_zip <- zeroinfl(affairs ~ age + yearsmarried + religiousness + occupation + rating | age + 
  yearsmarried + religiousness + occupation + rating, data = Affairs)


######################
## Strike durations ##
######################

## data and package
data("StrikeDuration", package = "AER")
library("MASS")

## Table 22.10
fit_exp <- fitdistr(StrikeDuration$duration, "exponential")
fit_wei <- fitdistr(StrikeDuration$duration, "weibull")
fit_wei$estimate[2]^(-1)
#>      scale 
#> 0.02432637 
fit_lnorm <- fitdistr(StrikeDuration$duration, "lognormal")
1/fit_lnorm$estimate[2]
#>     sdlog 
#> 0.7787838 
exp(-fit_lnorm$estimate[1])
#>   meanlog 
#> 0.0448489 
## Weibull and lognormal distribution have
## different parameterizations, see Greene p. 794

## Example 22.10
library("survival")
fm_wei <- survreg(Surv(duration) ~ uoutput, dist = "weibull", data = StrikeDuration)
summary(fm_wei)
#> 
#> Call:
#> survreg(formula = Surv(duration) ~ uoutput, data = StrikeDuration, 
#>     dist = "weibull")
#>                Value Std. Error     z      p
#> (Intercept)  3.77977    0.13670 27.65 <2e-16
#> uoutput     -9.33220    2.93756 -3.18 0.0015
#> Log(scale)  -0.00783    0.10050 -0.08 0.9379
#> 
#> Scale= 0.992 
#> 
#> Weibull distribution
#> Loglik(model)= -289.8   Loglik(intercept only)= -294.4
#> 	Chisq= 9.28 on 1 degrees of freedom, p= 0.0023 
#> Number of Newton-Raphson Iterations: 6 
#> n= 62 
#> 
# }
Data and Examples from Greene (2003)

References

See also

Examples