Skip to contents

Conducts a series of checks for multicollinearity.

Usage

mcDiagnose(model)

Arguments

model

a fitted regression model

Value

a list of the "auxiliary regressions" that were fitted during the analysis

Author

Paul E. Johnson pauljohn@ku.edu

Examples


library(rockchalk)
N <- 100
dat <- genCorrelatedData3(y~ 0 + 0.2*x1 + 0.2*x2, N=N, means=c(100,200),
                          sds=c(20,30), rho=0.4, stde=10)
dat$x3 <- rnorm(100, m=40, s=4)
m1 <- lm(y ~ x1 + x2 + x3, data=dat)
summary(m1)
#> 
#> Call:
#> lm(formula = y ~ x1 + x2 + x3, data = dat)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -23.444  -7.096  -1.084   6.963  18.069 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) -2.54831   12.80950  -0.199   0.8427    
#> x1           0.14371    0.05520   2.603   0.0107 *  
#> x2           0.15881    0.03395   4.678 9.47e-06 ***
#> x3           0.35098    0.25026   1.402   0.1640    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 9.651 on 96 degrees of freedom
#> Multiple R-squared:  0.3033,	Adjusted R-squared:  0.2815 
#> F-statistic: 13.93 on 3 and 96 DF,  p-value: 1.299e-07
#> 
m1d <- mcDiagnose(m1)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ x2 + x3
#> x2 ~ x1 + x3
#> x3 ~ x1 + x2
#> 
#> R_j Squares of auxiliary models
#>          x1          x2          x3 
#> 0.110737257 0.110872942 0.006766578 
#> The Corresponding VIF, 1/(1-R_j^2)
#>       x1       x2       x3 
#> 1.124527 1.124699 1.006813 
#> Bivariate Pearson Correlations for design matrix 
#>       x1    x2    x3
#> x1  1.00  0.33 -0.07
#> x2  0.33  1.00 -0.07
#> x3 -0.07 -0.07  1.00

m2 <- lm(y ~ x1 * x2 + x3, data=dat)
summary(m2)
#> 
#> Call:
#> lm(formula = y ~ x1 * x2 + x3, data = dat)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -23.7456  -6.6621  -0.8028   6.8901  17.9305 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 17.0730058 36.6502987   0.466    0.642
#> x1          -0.0485605  0.3408520  -0.142    0.887
#> x2           0.0611795  0.1741484   0.351    0.726
#> x3           0.3350834  0.2526771   1.326    0.188
#> x1:x2        0.0009795  0.0017134   0.572    0.569
#> 
#> Residual standard error: 9.685 on 95 degrees of freedom
#> Multiple R-squared:  0.3057,	Adjusted R-squared:  0.2764 
#> F-statistic: 10.46 on 4 and 95 DF,  p-value: 4.624e-07
#> 
m2d <- mcDiagnose(m2)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ x2 + x3 + `x1:x2`
#> x2 ~ x1 + x3 + `x1:x2`
#> x3 ~ x1 + x2 + `x1:x2`
#> `x1:x2` ~ x1 + x2 + x3
#> 
#> R_j Squares of auxiliary models
#>        x1        x2        x3     x1:x2 
#> 0.9765114 0.9659757 0.0187993 0.9892624 
#> The Corresponding VIF, 1/(1-R_j^2)
#>        x1        x2        x3     x1:x2 
#> 42.573786 29.390791  1.019159 93.130638 
#> Bivariate Pearson Correlations for design matrix 
#>          x1    x2    x3 x1:x2
#> x1     1.00  0.33 -0.07  0.85
#> x2     0.33  1.00 -0.07  0.77
#> x3    -0.07 -0.07  1.00 -0.07
#> x1:x2  0.85  0.77 -0.07  1.00



m3 <- lm(y ~ log(10+x1) + x3 + poly(x2,2), data=dat)
summary(m3)
#> 
#> Call:
#> lm(formula = y ~ log(10 + x1) + x3 + poly(x2, 2), data = dat)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -23.3799  -6.9114  -0.7764   6.8222  18.5127 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  -28.2437    29.0580  -0.972   0.3335    
#> log(10 + x1)  15.3129     5.7329   2.671   0.0089 ** 
#> x3             0.3434     0.2512   1.367   0.1748    
#> poly(x2, 2)1  47.7747    10.2399   4.666 1.01e-05 ***
#> poly(x2, 2)2   4.4219     9.6895   0.456   0.6492    
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 9.673 on 95 degrees of freedom
#> Multiple R-squared:  0.3073,	Adjusted R-squared:  0.2782 
#> F-statistic: 10.54 on 4 and 95 DF,  p-value: 4.148e-07
#> 
m3d <- mcDiagnose(m3)
#> The following auxiliary models are being estimated and returned in a list:
#> `log(10 + x1)` ~ x3 + `poly(x2, 2)1` + `poly(x2, 2)2`
#> x3 ~ `log(10 + x1)` + `poly(x2, 2)1` + `poly(x2, 2)2`
#> `poly(x2, 2)1` ~ `log(10 + x1)` + x3 + `poly(x2, 2)2`
#> `poly(x2, 2)2` ~ `log(10 + x1)` + x3 + `poly(x2, 2)1`
#> 
#> R_j Squares of auxiliary models
#> log(10 + x1)           x3 poly(x2, 2)1 poly(x2, 2)2 
#>  0.107294339  0.009381609  0.107607175  0.003340768 
#> The Corresponding VIF, 1/(1-R_j^2)
#> log(10 + x1)           x3 poly(x2, 2)1 poly(x2, 2)2 
#>     1.120190     1.009470     1.120583     1.003352 
#> Bivariate Pearson Correlations for design matrix 
#>              log(10 + x1)    x3 poly(x2, 2)1 poly(x2, 2)2
#> log(10 + x1)         1.00 -0.06         0.32        -0.02
#> x3                  -0.06  1.00        -0.07         0.05
#> poly(x2, 2)1         0.32 -0.07         1.00         0.00
#> poly(x2, 2)2        -0.02  0.05         0.00         1.00

N <- 100
x1 <- 50 + rnorm(N)
x2 <- log(rgamma(N, 2,1))
x3 <- rpois(N, lambda=17)
z1 <- gl(5, N/5)
dummies <- contrasts(z1)[ as.numeric(z1), ]
dimnames(dummies) <- NULL ## Avoids row name conflict in data.frame below
y3 <- x1  -.5 * x2 + 0.1 * x2^2 + dummies %*% c(0.1,-0.1,-0.2,0.2)+ 5 * rnorm(N)
dat <- data.frame(x1=x1, x2=x2, x3=x3,  z1=z1, y3 = y3)

m3 <- lm(y3 ~ x1 + poly(x2,2)  + log(x1) + z1, dat)
summary(m3)
#> 
#> Call:
#> lm(formula = y3 ~ x1 + poly(x2, 2) + log(x1) + z1, data = dat)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -16.6099  -3.9033   0.2383   3.6656  15.2669 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  2022.9799  6820.0308   0.297    0.767
#> x1             13.9882    46.8053   0.299    0.766
#> poly(x2, 2)1   -6.5901     5.9014  -1.117    0.267
#> poly(x2, 2)2   -3.4138     5.8776  -0.581    0.563
#> log(x1)      -683.2804  2341.5895  -0.292    0.771
#> z12             2.7889     1.8461   1.511    0.134
#> z13            -1.1047     1.8499  -0.597    0.552
#> z14            -0.7975     1.8306  -0.436    0.664
#> z15             0.7006     1.8683   0.375    0.709
#> 
#> Residual standard error: 5.69 on 91 degrees of freedom
#> Multiple R-squared:  0.07841,	Adjusted R-squared:  -0.002607 
#> F-statistic: 0.9678 on 8 and 91 DF,  p-value: 0.4662
#> 

mcDiagnose(m3)
#> The following auxiliary models are being estimated and returned in a list:
#> x1 ~ `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 + z13 + 
#>     z14 + z15
#> `poly(x2, 2)1` ~ x1 + `poly(x2, 2)2` + `log(x1)` + z12 + z13 + 
#>     z14 + z15
#> `poly(x2, 2)2` ~ x1 + `poly(x2, 2)1` + `log(x1)` + z12 + z13 + 
#>     z14 + z15
#> `log(x1)` ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + z12 + z13 + 
#>     z14 + z15
#> z12 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z13 + 
#>     z14 + z15
#> z13 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 + 
#>     z14 + z15
#> z14 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 + 
#>     z13 + z15
#> z15 ~ x1 + `poly(x2, 2)1` + `poly(x2, 2)2` + `log(x1)` + z12 + 
#>     z13 + z14
#> 
#> R_j Squares of auxiliary models
#>           x1 poly(x2, 2)1 poly(x2, 2)2      log(x1)          z12          z13 
#>   0.99983338   0.07028572   0.06275649   0.99983328   0.40618551   0.40865296 
#>          z14          z15 
#>   0.39611925   0.42024703 
#> The Corresponding VIF, 1/(1-R_j^2)
#>           x1 poly(x2, 2)1 poly(x2, 2)2      log(x1)          z12          z13 
#>  6001.543833     1.075599     1.066959  5998.088389     1.684028     1.691054 
#>          z14          z15 
#>     1.655956     1.724873 
#> Bivariate Pearson Correlations for design matrix 
#>                 x1 poly(x2, 2)1 poly(x2, 2)2 log(x1)   z12   z13   z14   z15
#> x1            1.00        -0.07         0.10    1.00  0.13 -0.11 -0.05 -0.12
#> poly(x2, 2)1 -0.07         1.00         0.00   -0.07  0.14 -0.17  0.01  0.05
#> poly(x2, 2)2  0.10         0.00         1.00    0.09 -0.06  0.01 -0.01 -0.14
#> log(x1)       1.00        -0.07         0.09    1.00  0.12 -0.12 -0.04 -0.12
#> z12           0.13         0.14        -0.06    0.12  1.00 -0.25 -0.25 -0.25
#> z13          -0.11        -0.17         0.01   -0.12 -0.25  1.00 -0.25 -0.25
#> z14          -0.05         0.01        -0.01   -0.04 -0.25 -0.25  1.00 -0.25
#> z15          -0.12         0.05        -0.14   -0.12 -0.25 -0.25 -0.25  1.00