Multi-collinearity diagnostics
mcDiagnose.RdConducts a series of checks for multicollinearity.
Author
Paul E. Johnson pauljohn@ku.edu
Examples
library(rockchalk)
N <- 100
dat <- genCorrelatedData3(y~ 0 + 0.2*x1 + 0.2*x2, N=N, means=c(100,200),
sds=c(20,30), rho=0.4, stde=10)
dat$x3 <- rnorm(100, m=40, s=4)
m1 <- lm(y ~ x1 + x2 + x3, data=dat)
summary(m1)
#>
#> Call:
#> lm(formula = y ~ x1 + x2 + x3, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -23.444 -7.096 -1.084 6.963 18.069
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -2.54831 12.80950 -0.199 0.8427
#> x1 0.14371 0.05520 2.603 0.0107 *
#> x2 0.15881 0.03395 4.678 9.47e-06 ***
#> x3 0.35098 0.25026 1.402 0.1640
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#>
#> Residual standard error: 9.651 on 96 degrees of freedom
#> Multiple R-squared: 0.3033, Adjusted R-squared: 0.2815
#> F-statistic: 13.93 on 3 and 96 DF, p-value: 1.299e-07
#>
m1d <- mcDiagnose(m1)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ x2 + x3
#> x2 ~ x1 + x3
#> x3 ~ x1 + x2
#>
#> R_j Squares of auxiliary models
#> x1 x2 x3
#> 0.110737257 0.110872942 0.006766578
#> The Corresponding VIF, 1/(1-R_j^2)
#> x1 x2 x3
#> 1.124527 1.124699 1.006813
#> Bivariate Pearson Correlations for design matrix
#> x1 x2 x3
#> x1 1.00 0.33 -0.07
#> x2 0.33 1.00 -0.07
#> x3 -0.07 -0.07 1.00
m2 <- lm(y ~ x1 * x2 + x3, data=dat)
summary(m2)
#>
#> Call:
#> lm(formula = y ~ x1 * x2 + x3, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -23.7456 -6.6621 -0.8028 6.8901 17.9305
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 17.0730058 36.6502987 0.466 0.642
#> x1 -0.0485605 0.3408520 -0.142 0.887
#> x2 0.0611795 0.1741484 0.351 0.726
#> x3 0.3350834 0.2526771 1.326 0.188
#> x1:x2 0.0009795 0.0017134 0.572 0.569
#>
#> Residual standard error: 9.685 on 95 degrees of freedom
#> Multiple R-squared: 0.3057, Adjusted R-squared: 0.2764
#> F-statistic: 10.46 on 4 and 95 DF, p-value: 4.624e-07
#>
m2d <- mcDiagnose(m2)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ x2 + x3 + `x1:x2`
#> x2 ~ x1 + x3 + `x1:x2`
#> x3 ~ x1 + x2 + `x1:x2`
#> `x1:x2` ~ x1 + x2 + x3
#>
#> R_j Squares of auxiliary models
#> x1 x2 x3 x1:x2
#> 0.9765114 0.9659757 0.0187993 0.9892624
#> The Corresponding VIF, 1/(1-R_j^2)
#> x1 x2 x3 x1:x2
#> 42.573786 29.390791 1.019159 93.130638
#> Bivariate Pearson Correlations for design matrix
#> x1 x2 x3 x1:x2
#> x1 1.00 0.33 -0.07 0.85
#> x2 0.33 1.00 -0.07 0.77
#> x3 -0.07 -0.07 1.00 -0.07
#> x1:x2 0.85 0.77 -0.07 1.00
m3 <- lm(y ~ log(10+x1) + x3 + poly(x2,2), data=dat)
summary(m3)
#>
#> Call:
#> lm(formula = y ~ log(10 + x1) + x3 + poly(x2, 2), data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -23.3799 -6.9114 -0.7764 6.8222 18.5127
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -28.2437 29.0580 -0.972 0.3335
#> log(10 + x1) 15.3129 5.7329 2.671 0.0089 **
#> x3 0.3434 0.2512 1.367 0.1748
#> poly(x2, 2)1 47.7747 10.2399 4.666 1.01e-05 ***
#> poly(x2, 2)2 4.4219 9.6895 0.456 0.6492
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#>
#> Residual standard error: 9.673 on 95 degrees of freedom
#> Multiple R-squared: 0.3073, Adjusted R-squared: 0.2782
#> F-statistic: 10.54 on 4 and 95 DF, p-value: 4.148e-07
#>
m3d <- mcDiagnose(m3)
#> The following auxiliary models are being estimated and returned in a list:
#> `log(10 + x1)` ~ x3 + `poly(x2, 2)1` + `poly(x2, 2)2`
#> x3 ~ `log(10 + x1)` + `poly(x2, 2)1` + `poly(x2, 2)2`
#> `poly(x2, 2)1` ~ `log(10 + x1)` + x3 + `poly(x2, 2)2`
#> `poly(x2, 2)2` ~ `log(10 + x1)` + x3 + `poly(x2, 2)1`
#>
#> R_j Squares of auxiliary models
#> log(10 + x1) x3 poly(x2, 2)1 poly(x2, 2)2
#> 0.107294339 0.009381609 0.107607175 0.003340768
#> The Corresponding VIF, 1/(1-R_j^2)
#> log(10 + x1) x3 poly(x2, 2)1 poly(x2, 2)2
#> 1.120190 1.009470 1.120583 1.003352
#> Bivariate Pearson Correlations for design matrix
#> log(10 + x1) x3 poly(x2, 2)1 poly(x2, 2)2
#> log(10 + x1) 1.00 -0.06 0.32 -0.02
#> x3 -0.06 1.00 -0.07 0.05
#> poly(x2, 2)1 0.32 -0.07 1.00 0.00
#> poly(x2, 2)2 -0.02 0.05 0.00 1.00
N <- 100
x1 <- 50 + rnorm(N)
x2 <- log(rgamma(N, 2,1))
x3 <- rpois(N, lambda=17)
z1 <- gl(5, N/5)
dummies <- contrasts(z1)[ as.numeric(z1), ]
dimnames(dummies) <- NULL ## Avoids row name conflict in data.frame below
y3 <- x1 -.5 * x2 + 0.1 * x2^2 + dummies %*% c(0.1,-0.1,-0.2,0.2)+ 5 * rnorm(N)
dat <- data.frame(x1=x1, x2=x2, x3=x3, z1=z1, y3 = y3)
m3 <- lm(y3 ~ x1 + poly(x2,2) + log(x1) + z1, dat)
summary(m3)
#>
#> Call:
#> lm(formula = y3 ~ x1 + poly(x2, 2) + log(x1) + z1, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -16.6099 -3.9033 0.2383 3.6656 15.2669
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2022.9799 6820.0308 0.297 0.767
#> x1 13.9882 46.8053 0.299 0.766
#> poly(x2, 2)1 -6.5901 5.9014 -1.117 0.267
#> poly(x2, 2)2 -3.4138 5.8776 -0.581 0.563
#> log(x1) -683.2804 2341.5895 -0.292 0.771
#> z12 2.7889 1.8461 1.511 0.134
#> z13 -1.1047 1.8499 -0.597 0.552
#> z14 -0.7975 1.8306 -0.436 0.664
#> z15 0.7006 1.8683 0.375 0.709
#>
#> Residual standard error: 5.69 on 91 degrees of freedom
#> Multiple R-squared: 0.07841, Adjusted R-squared: -0.002607
#> F-statistic: 0.9678 on 8 and 91 DF, p-value: 0.4662
#>
mcDiagnose(m3)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 + z13 +
#> z14 + z15
#> `poly(x2, 2)1` ~ x1 + `poly(x2, 2)2` + `log(x1)` + z12 + z13 +
#> z14 + z15
#> `poly(x2, 2)2` ~ x1 + `poly(x2, 2)1` + `log(x1)` + z12 + z13 +
#> z14 + z15
#> `log(x1)` ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + z12 + z13 +
#> z14 + z15
#> z12 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z13 +
#> z14 + z15
#> z13 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 +
#> z14 + z15
#> z14 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 +
#> z13 + z15
#> z15 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 +
#> z13 + z14
#>
#> R_j Squares of auxiliary models
#> x1 poly(x2, 2)1 poly(x2, 2)2 log(x1) z12 z13
#> 0.99983338 0.07028572 0.06275649 0.99983328 0.40618551 0.40865296
#> z14 z15
#> 0.39611925 0.42024703
#> The Corresponding VIF, 1/(1-R_j^2)
#> x1 poly(x2, 2)1 poly(x2, 2)2 log(x1) z12 z13
#> 6001.543833 1.075599 1.066959 5998.088389 1.684028 1.691054
#> z14 z15
#> 1.655956 1.724873
#> Bivariate Pearson Correlations for design matrix
#> x1 poly(x2, 2)1 poly(x2, 2)2 log(x1) z12 z13 z14 z15
#> x1 1.00 -0.07 0.10 1.00 0.13 -0.11 -0.05 -0.12
#> poly(x2, 2)1 -0.07 1.00 0.00 -0.07 0.14 -0.17 0.01 0.05
#> poly(x2, 2)2 0.10 0.00 1.00 0.09 -0.06 0.01 -0.01 -0.14
#> log(x1) 1.00 -0.07 0.09 1.00 0.12 -0.12 -0.04 -0.12
#> z12 0.13 0.14 -0.06 0.12 1.00 -0.25 -0.25 -0.25
#> z13 -0.11 -0.17 0.01 -0.12 -0.25 1.00 -0.25 -0.25
#> z14 -0.05 0.01 -0.01 -0.04 -0.25 -0.25 1.00 -0.25
#> z15 -0.12 0.05 -0.14 -0.12 -0.25 -0.25 -0.25 1.00