Skip to contents

This is the default method. Works for lm and glm fits.

Usage

# Default S3 method
model.data(model, na.action = na.omit, ...)

Arguments

model

A fitted model

na.action

Defaults to na.omit, so model as it would appear in user workspace is re-created, except that rows with missing values are deleted. Changing this argument to na.pass will provide the data as it was in the workspace.

...

Place holder for other arguments, not used at present

Value

A data frame

Author

Paul E. Johnson pauljohn@ku.edu

Examples

library(rockchalk)






## first, check if model.data works when there is no data argument
## This used to fail, now OK

x1 <- rnorm(100, m = 100, s = 10)
x2 <- rnorm(100, m = 50, s =20)
y <- rnorm(100, m = 40, s = 3)

m0 <- lm(y ~ log(10+x1) + x2)
m0.data <- model.data(m0)
head(m0.data)
#>          y        x1       x2
#> 1 36.07686  98.63171 84.98846
#> 2 40.45374 105.45843 67.13184
#> 3 43.97434  93.28166 43.27243
#> 4 43.47003  99.12302 61.10083
#> 5 37.70206 100.96470 64.74697
#> 6 43.66077  88.96819 24.83402


m1 <- lm(log(43 + y) ~ log(10+x1) + x2)
m1.data <- model.data(m1)
head(m1.data)
#>          y        x1       x2
#> 1 36.07686  98.63171 84.98846
#> 2 40.45374 105.45843 67.13184
#> 3 43.97434  93.28166 43.27243
#> 4 43.47003  99.12302 61.10083
#> 5 37.70206 100.96470 64.74697
#> 6 43.66077  88.96819 24.83402

d <- 3

m2 <- lm(log(d + y) ~ log(10+x1) + x2)
m2.data <- model.data(m2)
head(m2.data)
#>          y        x1       x2
#> 1 36.07686  98.63171 84.98846
#> 2 40.45374 105.45843 67.13184
#> 3 43.97434  93.28166 43.27243
#> 4 43.47003  99.12302 61.10083
#> 5 37.70206 100.96470 64.74697
#> 6 43.66077  88.96819 24.83402

m3 <- lm(log(y + d) ~ log(10+x1) + x2)
m3.data <- model.data(m3)
head(m3.data)
#>          y        x1       x2
#> 1 36.07686  98.63171 84.98846
#> 2 40.45374 105.45843 67.13184
#> 3 43.97434  93.28166 43.27243
#> 4 43.47003  99.12302 61.10083
#> 5 37.70206 100.96470 64.74697
#> 6 43.66077  88.96819 24.83402



## check numeric and categorical predictors

x1 <- rpois(100, l=6)
x2 <- rnorm(100, m=50, s=10)
x3 <- rnorm(100)
xcat1 <- gl(2,50, labels=c("M","F"))
xcat2 <- cut(rnorm(100), breaks=c(-Inf, 0, 0.4, 0.9, 1, Inf),
             labels=c("R", "M", "D", "P", "G"))
dat <- data.frame(x1, x2, x3, xcat1, xcat2)
rm(x1, x2, x3, xcat1, xcat2)
dat$xcat1n <- with(dat, contrasts(xcat1)[xcat1, ,drop=FALSE])
dat$xcat2n <- with(dat, contrasts(xcat2)[xcat2, ])

STDE <- 20
dat$y <- with(dat,
              0.03 + 0.8*x1 + 0.1*x2 + 0.7*x3 +
              xcat1n %*% c(2) + xcat2n %*% c(0.1,-2,0.3, 0.1) +
              STDE*rnorm(100))



m1 <- lm(y ~ poly(x1, 2), data=dat)
m1.data <- model.data(m1)
head(m1.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
attr(m1.data, "varNamesRHS")
#> [1] "x1"

## Check to make sure d is not mistaken for a data column
d <- 2
m2 <- lm(y ~ poly(x1, d), data=dat)
m2.data <- model.data(m2)
head(m2.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
attr(m2.data, "varNamesRHS")
#> [1] "x1"


## Check to see how the 10 in log is handled
m3 <- lm(y ~ log(10 + x1) + poly(x1, d) + sin(x2), data=dat)
m3.data <- model.data(m3)
head(m3.data)
#>            y x1       x2
#> 1  11.690314  5 46.14669
#> 2  20.459827  5 45.87979
#> 3  21.944034  4 44.87008
#> 4 -10.067395  8 52.88255
#> 5   5.382613  2 70.24344
#> 6  -7.506791  7 24.61845
attr(m3.data, "varNamesRHS")
#> [1] "x1" "x2"


m4 <- lm(log(50+y) ~ log(d+10+x1) + poly(x1, 2), data=dat)
m4.data <- model.data(m4)
head(m4.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
attr(m4.data, "varNamesRHS")
#> [1] "x1"


m5 <- lm(y ~ x1*x1, data=dat)
m5.data <- model.data(m5)
head(m5.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
attr(m5.data, "varNamesRHS")
#> [1] "x1"


m6 <- lm(y ~ x1 + I(x1^2), data=dat)
m6.data <- model.data(m6)
head(m6.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
attr(m6.data, "varNamesRHS")
#> [1] "x1"


## Put in some missings.
## poly doesn't work if there are missings, but
## can test with log
dat$x1[sample(100, 5)] <- NA
dat$y[sample(100, 5)] <- NA
dat$x2[sample(100, 5)] <- NA
dat$x3[sample(100,10)] <- NA

m1 <- lm(y ~ log(10 + x1), data=dat)
m1.data <- model.data(m1)
head(m1.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
summarize(m1.data)
#> Numeric variables
#>               y        x1   
#> min        -26.014     2    
#> med         10.647     6    
#> max         64.296    13    
#> mean        10.894     6.110
#> sd          19.736     2.610
#> skewness     0.353     0.189
#> kurtosis    -0.368    -0.631
#> nobs        91        91    
#> nmissing     0         0    
attr(m1.data, "varNamesRHS")
#> [1] "x1"



m2 <- lm(y ~ log(x1 + 10), data=dat)
m2.data <- model.data(m2)
head(m2.data)
#>            y x1
#> 1  11.690314  5
#> 2  20.459827  5
#> 3  21.944034  4
#> 4 -10.067395  8
#> 5   5.382613  2
#> 6  -7.506791  7
summarize(m1.data)
#> Numeric variables
#>               y        x1   
#> min        -26.014     2    
#> med         10.647     6    
#> max         64.296    13    
#> mean        10.894     6.110
#> sd          19.736     2.610
#> skewness     0.353     0.189
#> kurtosis    -0.368    -0.631
#> nobs        91        91    
#> nmissing     0         0    
attr(m1.data, "varNamesRHS")
#> [1] "x1"

d <- 2
m3 <- lm(log(50+y) ~ log(d+10+x1) + x2 + sin(x3), data=dat)
m3.data <- model.data(m3)
head(m3.data)
#>           y x1       x2         x3
#> 1 11.690314  5 46.14669 -0.5202703
#> 2 20.459827  5 45.87979 -0.5543718
#> 3 21.944034  4 44.87008  1.2871108
#> 5  5.382613  2 70.24344  0.5812993
#> 6 -7.506791  7 24.61845 -0.1387219
#> 7 15.471727  8 53.13837 -1.2149978
summarize(m3.data)
#> Numeric variables
#>               y        x1        x2        x3   
#> min        -26.014     2        24.618    -2.419
#> med         11.690     6        49.692    -0.193
#> max         64.296    13        71.677     2.700
#> mean        11.102     6.091    50.500    -0.097
#> sd          20.297     2.711     9.764     1.040
#> skewness     0.343     0.261    -0.048     0.362
#> kurtosis    -0.360    -0.710    -0.243    -0.313
#> nobs        77        77        77        77    
#> nmissing     0         0         0         0    
attr(m3.data, "varNamesRHS")
#> [1] "x1" "x2" "x3"


m4 <- lm(y ~ I(x1) + I(x1^2) + log(x2), data=dat)
m4.data <- model.data(m4)
summarize(m4.data)
#> Numeric variables
#>               y        x1        x2   
#> min        -26.014     2        24.618
#> med         11.331     6        50.782
#> max         64.296    13        71.677
#> mean        11.264     6.093    50.563
#> sd          19.945     2.664     9.685
#> skewness     0.323     0.208    -0.106
#> kurtosis    -0.389    -0.694    -0.268
#> nobs        86        86        86    
#> nmissing     0         0         0    
attr(m4.data, "varNamesRHS")
#> [1] "x1" "x2"


m5 <- lm(y ~ x1 + I(x1^2) + cos(x2), data=dat)
m5.data <- model.data(m5)
head(m5.data)
#>            y x1       x2
#> 1  11.690314  5 46.14669
#> 2  20.459827  5 45.87979
#> 3  21.944034  4 44.87008
#> 4 -10.067395  8 52.88255
#> 5   5.382613  2 70.24344
#> 6  -7.506791  7 24.61845
summarize(m5.data)
#> Numeric variables
#>               y        x1        x2   
#> min        -26.014     2        24.618
#> med         11.331     6        50.782
#> max         64.296    13        71.677
#> mean        11.264     6.093    50.563
#> sd          19.945     2.664     9.685
#> skewness     0.323     0.208    -0.106
#> kurtosis    -0.389    -0.694    -0.268
#> nobs        86        86        86    
#> nmissing     0         0         0    
attr(m5.data, "varNamesRHS")
#> [1] "x1" "x2"



## Now try with some variables in the dataframe, some not

x10 <- rnorm(100)
x11 <- rnorm(100)



m6 <- lm(y ~ x1 + I(x1^2) + cos(x2) + log(10 + x10) + sin(x11) +
         x10*x11, data = dat)
m6.data <- model.data(m6)
head(m6.data)
#>            y x1       x2        x10        x11
#> 1  11.690314  5 46.14669 -0.7653758  1.6574479
#> 2  20.459827  5 45.87979  0.4688345 -0.2611067
#> 3  21.944034  4 44.87008 -0.4908475  0.8192324
#> 4 -10.067395  8 52.88255 -0.3991977 -2.0600598
#> 5   5.382613  2 70.24344 -0.4872019  0.3384131
#> 6  -7.506791  7 24.61845 -1.3272853 -0.8639491
dim(m6.data)
#> [1] 86  5
summarize(m5.data)
#> Numeric variables
#>               y        x1        x2   
#> min        -26.014     2        24.618
#> med         11.331     6        50.782
#> max         64.296    13        71.677
#> mean        11.264     6.093    50.563
#> sd          19.945     2.664     9.685
#> skewness     0.323     0.208    -0.106
#> kurtosis    -0.389    -0.694    -0.268
#> nobs        86        86        86    
#> nmissing     0         0         0    
attr(m6.data, "varNamesRHS")
#> [1] "x1"  "x2"  "x10" "x11"