Skip to contents

This manual page collects a list of examples from the book. Some solutions might not be exact and the list is certainly not complete. If you have suggestions for improvement (preferably in the form of code), please contact the package maintainer.

References

Baltagi, B.H. (2002). Econometrics, 3rd ed., Berlin: Springer-Verlag.

Examples

#> Loading required namespace: tseries
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo 
#> Loading required namespace: strucchange
#> Loading required namespace: plm
#> Loading required namespace: systemfit
################################
## Cigarette consumption data ##
################################

## data
data("CigarettesB", package = "AER")

## Table 3.3
cig_lm <- lm(packs ~ price, data = CigarettesB)
summary(cig_lm)
#> 
#> Call:
#> lm(formula = packs ~ price, data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.45472 -0.09968  0.00612  0.11553  0.29346 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   5.0941     0.0627  81.247  < 2e-16 ***
#> price        -1.1983     0.2818  -4.253 0.000108 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.163 on 44 degrees of freedom
#> Multiple R-squared:  0.2913,	Adjusted R-squared:  0.2752 
#> F-statistic: 18.08 on 1 and 44 DF,  p-value: 0.0001085
#> 

## Figure 3.9
plot(residuals(cig_lm) ~ price, data = CigarettesB)
abline(h = 0, lty = 2)


## Figure 3.10
cig_pred <- with(CigarettesB,
  data.frame(price = seq(from = min(price), to = max(price), length = 30)))
cig_pred <- cbind(cig_pred, predict(cig_lm, newdata = cig_pred, interval = "confidence"))
plot(packs ~ price, data = CigarettesB)
lines(fit ~ price, data = cig_pred)
lines(lwr ~ price, data = cig_pred, lty = 2)
lines(upr ~ price, data = cig_pred, lty = 2)


## Chapter 5: diagnostic tests (p. 111-115)
cig_lm2 <- lm(packs ~ price + income, data = CigarettesB)
summary(cig_lm2)
#> 
#> Call:
#> lm(formula = packs ~ price + income, data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.41867 -0.10683  0.00757  0.11738  0.32868 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   4.2997     0.9089   4.730 2.43e-05 ***
#> price        -1.3383     0.3246  -4.123 0.000168 ***
#> income        0.1724     0.1968   0.876 0.385818    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.1634 on 43 degrees of freedom
#> Multiple R-squared:  0.3037,	Adjusted R-squared:  0.2713 
#> F-statistic: 9.378 on 2 and 43 DF,  p-value: 0.0004168
#> 
## Glejser tests (p. 112)
ares <- abs(residuals(cig_lm2))
summary(lm(ares ~ income, data = CigarettesB))
#> 
#> Call:
#> lm(formula = ares ~ income, data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.13738 -0.07061 -0.01891  0.07253  0.24508 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)  1.16169    0.46267   2.511   0.0158 *
#> income      -0.21689    0.09684  -2.240   0.0302 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.09242 on 44 degrees of freedom
#> Multiple R-squared:  0.1023,	Adjusted R-squared:  0.08193 
#> F-statistic: 5.016 on 1 and 44 DF,  p-value: 0.03022
#> 
summary(lm(ares ~ I(1/income), data = CigarettesB))
#> 
#> Call:
#> lm(formula = ares ~ I(1/income), data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.14143 -0.07235 -0.01921  0.07227  0.24186 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)  -0.9489     0.4671  -2.032   0.0483 *
#> I(1/income)   5.1287     2.2277   2.302   0.0261 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.09215 on 44 degrees of freedom
#> Multiple R-squared:  0.1075,	Adjusted R-squared:  0.08722 
#> F-statistic:   5.3 on 1 and 44 DF,  p-value: 0.02611
#> 
summary(lm(ares ~ I(1/sqrt(income)), data = CigarettesB))
#> 
#> Call:
#> lm(formula = ares ~ I(1/sqrt(income)), data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.14041 -0.07192 -0.01914  0.07233  0.24267 
#> 
#> Coefficients:
#>                   Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)        -2.0045     0.9317  -2.151   0.0370 *
#> I(1/sqrt(income))   4.6541     2.0352   2.287   0.0271 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.09222 on 44 degrees of freedom
#> Multiple R-squared:  0.1062,	Adjusted R-squared:  0.08591 
#> F-statistic: 5.229 on 1 and 44 DF,  p-value: 0.02708
#> 
summary(lm(ares ~ sqrt(income), data = CigarettesB))
#> 
#> Call:
#> lm(formula = ares ~ sqrt(income), data = CigarettesB)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.13838 -0.07105 -0.01899  0.07247  0.24428 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)    2.2172     0.9273   2.391   0.0211 *
#> sqrt(income)  -0.9571     0.4243  -2.255   0.0291 *
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.09235 on 44 degrees of freedom
#> Multiple R-squared:  0.1036,	Adjusted R-squared:  0.08326 
#> F-statistic: 5.087 on 1 and 44 DF,  p-value: 0.02913
#> 
## Goldfeld-Quandt test (p. 112)
gqtest(cig_lm2, order.by = ~ income, data = CigarettesB, fraction = 12, alternative = "less")
#> 
#> 	Goldfeld-Quandt test
#> 
#> data:  cig_lm2
#> GQ = 0.31846, df1 = 14, df2 = 14, p-value = 0.02017
#> alternative hypothesis: variance decreases from segment 1 to 2
#> 
## NOTE: Baltagi computes the test statistic as mss1/mss2,
## i.e., tries to find decreasing variances. gqtest() always uses
## mss2/mss1 and has an "alternative" argument.

## Spearman rank correlation test (p. 113)
cor.test(~ ares + income, data = CigarettesB, method = "spearman")
#> 
#> 	Spearman's rank correlation rho
#> 
#> data:  ares and income
#> S = 20784, p-value = 0.05813
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#>        rho 
#> -0.2817761 
#> 
## Breusch-Pagan test (p. 113)
bptest(cig_lm2, varformula = ~ income, data = CigarettesB, student = FALSE)
#> 
#> 	Breusch-Pagan test
#> 
#> data:  cig_lm2
#> BP = 5.4852, df = 1, p-value = 0.01918
#> 
## White test (Table 5.1, p. 113)
bptest(cig_lm2, ~ income * price + I(income^2) + I(price^2), data = CigarettesB)
#> 
#> 	studentized Breusch-Pagan test
#> 
#> data:  cig_lm2
#> BP = 15.656, df = 5, p-value = 0.007897
#> 
## White HC standard errors (Table 5.2, p. 114)
coeftest(cig_lm2, vcov = vcovHC(cig_lm2, type = "HC1"))
#> 
#> t test of coefficients:
#> 
#>             Estimate Std. Error t value  Pr(>|t|)    
#> (Intercept)  4.29966    1.09523  3.9258 0.0003076 ***
#> price       -1.33833    0.34337 -3.8977 0.0003352 ***
#> income       0.17239    0.23661  0.7286 0.4702172    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
## Jarque-Bera test (Figure 5.2, p. 115)
hist(residuals(cig_lm2), breaks = 16, ylim = c(0, 10), col = "lightgray")

library("tseries")
jarque.bera.test(residuals(cig_lm2))
#> 
#> 	Jarque Bera Test
#> 
#> data:  residuals(cig_lm2)
#> X-squared = 0.28935, df = 2, p-value = 0.8653
#> 

## Tables 8.1 and 8.2
influence.measures(cig_lm2)
#> Influence measures of
#> 	 lm(formula = packs ~ price + income, data = CigarettesB) :
#> 
#>      dfb.1_  dfb.pric dfb.incm    dffit cov.r   cook.d    hat inf
#> AL  0.14503  0.069010 -0.14188  0.19186 1.070 1.23e-02 0.0480    
#> AZ -0.11311  0.033072  0.10173 -0.25077 0.968 2.05e-02 0.0315    
#> AR  0.56419  0.376064 -0.56381  0.66702 0.847 1.36e-01 0.0847    
#> CA -0.01386 -0.255192  0.02769 -0.31637 1.114 3.34e-02 0.0975    
#> CT  0.15244 -0.022453 -0.14793 -0.20087 1.219 1.37e-02 0.1354   *
#> DE -0.12654 -0.037389  0.12991  0.23129 0.992 1.76e-02 0.0326    
#> DC  0.23239  0.001472 -0.22823 -0.29167 1.149 2.86e-02 0.1104    
#> FL  0.01116  0.050233 -0.01301  0.07389 1.112 1.86e-03 0.0431    
#> GA -0.00269 -0.028971  0.00551  0.04527 1.114 6.99e-04 0.0402    
#> ID -0.10101 -0.013791  0.09591 -0.15005 1.079 7.59e-03 0.0413    
#> IL -0.00101  0.000075  0.00101  0.00178 1.118 1.09e-06 0.0399    
#> IN -0.03076 -0.153574  0.04353  0.19363 1.105 1.26e-02 0.0650    
#> IA  0.00638  0.007696 -0.00639  0.01509 1.107 7.77e-05 0.0310    
#> KS  0.00314 -0.002575 -0.00389 -0.04083 1.092 5.68e-04 0.0223    
#> KY -0.09222 -0.725107  0.14758  0.80979 1.113 2.10e-01 0.1977   *
#> LA  0.31705  0.226157 -0.31744  0.38745 1.022 4.91e-02 0.0761    
#> ME  0.17424  0.309538 -0.18410  0.40000 0.940 5.13e-02 0.0553    
#> MD  0.39398  0.378023 -0.41346 -0.50701 1.073 8.40e-02 0.1216    
#> MA  0.19840  0.073723 -0.20018 -0.23411 1.126 1.84e-02 0.0856    
#> MI -0.00898  0.025355  0.00991  0.12316 1.052 5.10e-03 0.0238    
#> MN  0.01342  0.042769 -0.01537  0.05001 1.172 8.53e-04 0.0864    
#> MS  0.06675  0.002382 -0.06369  0.08277 1.171 2.33e-03 0.0883    
#> MO -0.03986 -0.089643  0.04634  0.10541 1.154 3.78e-03 0.0787    
#> MT -0.04820  0.067706  0.03769 -0.19283 1.021 1.24e-02 0.0312    
#> NE  0.02185  0.027580 -0.02540 -0.09498 1.072 3.05e-03 0.0243    
#> NV  0.05366  0.347879 -0.06990  0.45042 0.937 6.47e-02 0.0646    
#> NH -0.34967 -0.257318  0.36079  0.40764 1.142 5.53e-02 0.1308    
#> NJ  0.12527 -0.004859 -0.12241 -0.15616 1.234 8.29e-03 0.1394   *
#> NM -0.38923 -0.064661  0.37379 -0.49010 0.901 7.56e-02 0.0639    
#> NY  0.01626 -0.028925 -0.01431 -0.05033 1.175 8.64e-04 0.0888    
#> ND -0.15387 -0.005358  0.14232 -0.31360 0.885 3.12e-02 0.0295    
#> OH -0.00856 -0.028773  0.01108  0.04159 1.117 5.90e-04 0.0423    
#> OK -0.12028 -0.047228  0.11708 -0.15599 1.094 8.21e-03 0.0505    
#> PA  0.00741 -0.001370 -0.00765 -0.02452 1.100 2.05e-04 0.0257    
#> RI  0.00218  0.114469 -0.00738  0.16917 1.088 9.64e-03 0.0504    
#> SC  0.04282 -0.092254 -0.03271  0.15382 1.132 8.02e-03 0.0725    
#> SD -0.04178  0.064802  0.03307 -0.14581 1.079 7.17e-03 0.0402    
#> TN  0.01884 -0.062711 -0.01037  0.15431 1.046 7.98e-03 0.0294    
#> TX -0.06472 -0.095510  0.06734 -0.12671 1.113 5.44e-03 0.0546    
#> UT -0.77803 -0.317059  0.76368 -0.88760 0.679 2.24e-01 0.0856   *
#> VT -0.02396 -0.065794  0.03278  0.20305 0.979 1.35e-02 0.0243    
#> VA  0.05235  0.069110 -0.05673 -0.08713 1.156 2.59e-03 0.0773    
#> WA -0.00136 -0.010137  0.00187 -0.01242 1.175 5.27e-05 0.0866    
#> WV -0.11903  0.031391  0.11039 -0.17766 1.122 1.07e-02 0.0709    
#> WI  0.00494  0.006306 -0.00481  0.01736 1.100 1.03e-04 0.0254    
#> WY -0.00156 -0.025435  0.00388  0.03501 1.135 4.18e-04 0.0555    


#####################################
## US consumption data (1950-1993) ##
#####################################

## data
data("USConsump1993", package = "AER")
plot(USConsump1993, plot.type = "single", col = 1:2)


## Chapter 5 (p. 122-125)
fm <- lm(expenditure ~ income, data = USConsump1993)
summary(fm)
#> 
#> Call:
#> lm(formula = expenditure ~ income, data = USConsump1993)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -294.52  -67.02    4.64   90.02  325.84 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -65.795821  90.990824  -0.723    0.474    
#> income        0.915623   0.008648 105.874   <2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 153.6 on 42 degrees of freedom
#> Multiple R-squared:  0.9963,	Adjusted R-squared:  0.9962 
#> F-statistic: 1.121e+04 on 1 and 42 DF,  p-value: < 2.2e-16
#> 
## Durbin-Watson test (p. 122)
dwtest(fm)
#> 
#> 	Durbin-Watson test
#> 
#> data:  fm
#> DW = 0.46078, p-value = 3.274e-11
#> alternative hypothesis: true autocorrelation is greater than 0
#> 
## Breusch-Godfrey test (Table 5.4, p. 124)
bgtest(fm)
#> 
#> 	Breusch-Godfrey test for serial correlation of order up to 1
#> 
#> data:  fm
#> LM test = 24.901, df = 1, p-value = 6.034e-07
#> 
## Newey-West standard errors (Table 5.5, p. 125)
coeftest(fm, vcov = NeweyWest(fm, lag = 3, prewhite = FALSE, adjust = TRUE)) 
#> 
#> t test of coefficients:
#> 
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -65.795821 133.345400 -0.4934   0.6243    
#> income        0.915623   0.015458 59.2319   <2e-16 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 

## Chapter 8
library("strucchange")
## Recursive residuals
rr <- recresid(fm)
rr
#>  [1]   24.900681   30.354827   50.893291   63.260389  -49.805907  -28.404311
#>  [7]  -31.520559   53.194256   67.696114   -2.646556    9.679147   39.658827
#> [13]  -40.126557  -30.260756    2.605633  -78.941467   27.185066   64.363195
#> [19]  -64.906717  -71.641013   70.095867 -113.475323  -85.633171  -29.427630
#> [25]  128.328459  220.693133  126.591749   78.394247  -25.955574 -124.178686
#> [31]  -90.845193  127.830581  -30.794629  159.780872  201.707127  405.310561
#> [37]  390.953841  373.370919  316.431235  188.109683  134.461285  339.300414
## Recursive CUSUM test
rcus <- efp(expenditure ~ income, data = USConsump1993)
plot(rcus)

sctest(rcus)
#> 
#> 	Recursive CUSUM test
#> 
#> data:  rcus
#> S = 1.0267, p-value = 0.02707
#> 
## Harvey-Collier test
harvtest(fm)
#> 
#> 	Harvey-Collier test
#> 
#> data:  fm
#> HC = 3.0802, df = 41, p-value = 0.003685
#> 
## NOTE" Mistake in Baltagi (2002) who computes
## the t-statistic incorrectly as 0.0733 via
mean(rr)/sd(rr)/sqrt(length(rr))
#> [1] 0.07333754
## whereas it should be (as in harvtest)
mean(rr)/sd(rr) * sqrt(length(rr))
#> [1] 3.080177

## Rainbow test
raintest(fm, center = 23)
#> 
#> 	Rainbow test
#> 
#> data:  fm
#> Rain = 4.1448, df1 = 22, df2 = 20, p-value = 0.001116
#> 

## J test for non-nested models
library("dynlm")
fm1 <- dynlm(expenditure ~ income + L(income), data = USConsump1993)
fm2 <- dynlm(expenditure ~ income + L(expenditure), data = USConsump1993)
jtest(fm1, fm2)
#> J test
#> 
#> Model 1: expenditure ~ income + L(income)
#> Model 2: expenditure ~ income + L(expenditure)
#>                 Estimate Std. Error t value  Pr(>|t|)    
#> M1 + fitted(M2)   1.6378    0.20984  7.8051 1.726e-09 ***
#> M2 + fitted(M1)  -2.5419    0.61603 -4.1262 0.0001874 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## Chapter 11
## Table 11.1 Instrumental-variables regression
usc <- as.data.frame(USConsump1993)
usc$investment <- usc$income - usc$expenditure
fm_ols <- lm(expenditure ~ income, data = usc)
fm_iv <- ivreg(expenditure ~ income | investment, data = usc)
## Hausman test
cf_diff <- coef(fm_iv) - coef(fm_ols)
vc_diff <- vcov(fm_iv) - vcov(fm_ols)
x2_diff <- as.vector(t(cf_diff) %*% solve(vc_diff) %*% cf_diff)
pchisq(x2_diff, df = 2, lower.tail = FALSE)
#> [1] 0.0001836953

## Chapter 14
## ACF and PACF for expenditures and first differences
exps <- USConsump1993[, "expenditure"]
(acf(exps))

#> 
#> Autocorrelations of series ‘exps’, by lag
#> 
#>      0      1      2      3      4      5      6      7      8      9     10 
#>  1.000  0.941  0.880  0.820  0.753  0.683  0.614  0.547  0.479  0.412  0.348 
#>     11     12     13     14     15     16 
#>  0.288  0.230  0.171  0.110  0.049 -0.010 
(pacf(exps))

#> 
#> Partial autocorrelations of series ‘exps’, by lag
#> 
#>      1      2      3      4      5      6      7      8      9     10     11 
#>  0.941 -0.045 -0.035 -0.083 -0.064 -0.034 -0.025 -0.049 -0.043 -0.011 -0.022 
#>     12     13     14     15     16 
#> -0.025 -0.061 -0.066 -0.071 -0.030 
(acf(diff(exps)))

#> 
#> Autocorrelations of series ‘diff(exps)’, by lag
#> 
#>      0      1      2      3      4      5      6      7      8      9     10 
#>  1.000  0.344 -0.067 -0.156 -0.105 -0.077 -0.072  0.026 -0.050  0.058  0.073 
#>     11     12     13     14     15     16 
#>  0.078 -0.033 -0.069 -0.158 -0.161  0.034 
(pacf(diff(exps)))

#> 
#> Partial autocorrelations of series ‘diff(exps)’, by lag
#> 
#>      1      2      3      4      5      6      7      8      9     10     11 
#>  0.344 -0.209 -0.066 -0.038 -0.065 -0.060  0.056 -0.133  0.133 -0.014  0.058 
#>     12     13     14     15     16 
#> -0.079  0.005 -0.175 -0.032  0.065 

## dynamic regressions, eq. (14.8)
fm <- dynlm(d(exps) ~ I(time(exps) - 1949) + L(exps))
summary(fm)
#> 
#> Time series regression with "ts" data:
#> Start = 1951, End = 1993
#> 
#> Call:
#> dynlm(formula = d(exps) ~ I(time(exps) - 1949) + L(exps))
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -357.76  -78.18   22.49  108.97  201.06 
#> 
#> Coefficients:
#>                        Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)          1048.96039  353.81291   2.965  0.00509 **
#> I(time(exps) - 1949)   39.90164   14.31344   2.788  0.00808 **
#> L(exps)                -0.19561    0.07398  -2.644  0.01164 * 
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 147.4 on 40 degrees of freedom
#> Multiple R-squared:  0.1784,	Adjusted R-squared:  0.1373 
#> F-statistic: 4.343 on 2 and 40 DF,  p-value: 0.01963
#> 


################################
## Grunfeld's investment data ##
################################

## select the first three companies (as panel data)
data("Grunfeld", package = "AER")
pgr <- subset(Grunfeld, firm %in% levels(Grunfeld$firm)[1:3])
library("plm")
pgr <- pdata.frame(pgr, c("firm", "year"))

## Ex. 10.9
library("systemfit")
#> Loading required package: Matrix
#> 
#> Please cite the 'systemfit' package as:
#> Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
#> 
#> If you have questions, suggestions, or comments regarding the 'systemfit' package, please use a forum or 'tracker' at systemfit's R-Forge site:
#> https://r-forge.r-project.org/projects/systemfit/
gr_ols <- systemfit(invest ~ value + capital, method = "OLS", data = pgr)
gr_sur <- systemfit(invest ~ value + capital, method = "SUR", data = pgr)


#########################################
## Panel study on income dynamics 1982 ##
#########################################

## data
data("PSID1982", package = "AER")

## Table 4.1
earn_lm <- lm(log(wage) ~ . + I(experience^2), data = PSID1982)
summary(earn_lm)
#> 
#> Call:
#> lm(formula = log(wage) ~ . + I(experience^2), data = PSID1982)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -1.0271 -0.2292  0.0155  0.2231  1.1314 
#> 
#> Coefficients:
#>                   Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)      5.5900930  0.1901125  29.404  < 2e-16 ***
#> experience       0.0293801  0.0065241   4.503 8.09e-06 ***
#> weeks            0.0034128  0.0026776   1.275 0.202973    
#> occupationblue  -0.1615216  0.0369073  -4.376 1.43e-05 ***
#> industryyes      0.0846626  0.0291637   2.903 0.003836 ** 
#> southyes        -0.0587635  0.0309069  -1.901 0.057755 .  
#> smsayes          0.1661912  0.0295510   5.624 2.90e-08 ***
#> marriedyes       0.0952370  0.0489277   1.946 0.052077 .  
#> genderfemale    -0.3245574  0.0607294  -5.344 1.30e-07 ***
#> unionyes         0.1062775  0.0316755   3.355 0.000845 ***
#> education        0.0571935  0.0065910   8.678  < 2e-16 ***
#> ethnicityafam   -0.1904220  0.0544118  -3.500 0.000502 ***
#> I(experience^2) -0.0004860  0.0001268  -3.833 0.000141 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.3256 on 582 degrees of freedom
#> Multiple R-squared:  0.4597,	Adjusted R-squared:  0.4485 
#> F-statistic: 41.26 on 12 and 582 DF,  p-value: < 2.2e-16
#> 

## Table 13.1
union_lpm <- lm(I(as.numeric(union) - 1) ~ . - wage, data = PSID1982)
union_probit <- glm(union ~ . - wage, data = PSID1982, family = binomial(link = "probit"))
union_logit <- glm(union ~ . - wage, data = PSID1982, family = binomial)
## probit OK, logit and LPM rather different.