Skip to contents

The meanCentered regression function requires centered-inputs when calculations are predicted. For comparison with ordinary regression, it is convenient to have both centered and the original data side-by-side. This function handles that. If the input data has columns c("x1","x2","x3"), then the centered result will have columns c("x1","x2","x3","x1c","x2c","x3c"), where "c" indicates "mean-centered". If standardize=TRUE, then the result will have columns c("x1","x2","x3","x1cs","x2cs","x3cs"), where "cs" indicate "centered and scaled".

Usage

centerNumerics(data, center, standardize = FALSE)

Arguments

data

Required. data frame or matrix.

center

Optional. If nc is NOT supplied, then all numeric columns in data will be centered (possiblly scaled). Can be specified in 2 formats. 1) Vector of column names that are to be centered, 2) Vector named elements giving values of means to be used in centering. Values must be named, as in c("x1" = 17, "x2" = 44). (possibly scaled).

standardize

Default FALSE. If TRUE, the variables are first mean-centered, and then divided by their standard deviations (scaled). User can supply a named vector of scale values by which to divide each variable (otherwise sd is used). Vector must have same names and length as center argument. Variables can be entered in any order (will be resorted inside function).

Value

A data frame with 1) All original columns 2) additional columns with centered/scaled data, variables renamed "c" or "cs" to indicate the data is centered or centered and scaled. Attributes "centers" and "scales" are created for "record keeping" on centering and scaling values.

Examples

set.seed(12345)
dat <- data.frame(x1=rnorm(100, m = 50), x2 = rnorm(100, m = 50),
    x3 = rnorm(100, m = 50), y = rnorm(100),
    x4 = gl(2, 50, labels = c("Male","Female")))
datc1 <- centerNumerics(dat)
head(datc1)
#>         x1       x2       x3           y   x4        x1c        x2c        x3c
#> 1 50.58553 50.22393 48.56385  0.52228217 Male  0.3403316  0.1786923 -1.3899341
#> 2 50.70947 48.84378 49.37074  0.00979376 Male  0.4642688 -1.2014564 -0.5830481
#> 3 49.89070 50.42242 50.24352 -0.44052620 Male -0.3545005  0.3771854  0.2897333
#> 4 49.54650 48.67524 51.05836  1.19948953 Male -0.6986944 -1.3699884  1.1045738
#> 5 50.60589 50.14108 50.83135 -0.11746849 Male  0.3606903  0.0958512  0.8775604
#> 6 48.18204 49.46395 50.10521  0.03820979 Male -2.0631532 -0.5812811  0.1514234
#>           yc
#> 1  0.3070063
#> 2 -0.2054821
#> 3 -0.6558021
#> 4  0.9842136
#> 5 -0.3327444
#> 6 -0.1770661
summarize(datc1)
#> Numeric variables
#>              x1        x2        x3         y        x1c       x2c       x3c  
#> min         47.620    47.876    47.710    -2.582    -2.626    -2.169    -2.244
#> med         50.484    50.026    50.013     0.347     0.239    -0.019     0.060
#> max         52.477    52.656    52.747     2.268     2.232     2.611     2.794
#> mean        50.245    50.045    49.954     0.215     0         0         0    
#> sd           1.115     1.011     0.932     0.971     1.115     1.011     0.932
#> skewness    -0.142     0.170    -0.019    -0.270    -0.142     0.170    -0.019
#> kurtosis    -0.610    -0.388     0.271     0.300    -0.610    -0.388     0.271
#> nobs       100       100       100       100       100       100       100    
#> nmissing     0         0         0         0         0         0         0    
#>              yc   
#> min         -2.797
#> med          0.131
#> max          2.053
#> mean         0    
#> sd           0.971
#> skewness    -0.270
#> kurtosis     0.300
#> nobs       100    
#> nmissing     0    
#> 
#> Nonnumeric variables
#>                   x4
#>  Male  : 50         
#>  Female: 50         
#>  nobs         : 100 
#>  nmiss        :   0 
#>  entropy      :   1 
#>  normedEntropy:   1 
datc2 <- centerNumerics(dat, center=c("x1", "x2"))
head(datc2)
#>         x1       x2       x3           y   x4        x1c        x2c
#> 1 50.58553 50.22393 48.56385  0.52228217 Male  0.3403316  0.1786923
#> 2 50.70947 48.84378 49.37074  0.00979376 Male  0.4642688 -1.2014564
#> 3 49.89070 50.42242 50.24352 -0.44052620 Male -0.3545005  0.3771854
#> 4 49.54650 48.67524 51.05836  1.19948953 Male -0.6986944 -1.3699884
#> 5 50.60589 50.14108 50.83135 -0.11746849 Male  0.3606903  0.0958512
#> 6 48.18204 49.46395 50.10521  0.03820979 Male -2.0631532 -0.5812811
summarize(datc2)
#> Numeric variables
#>              x1        x2        x3         y        x1c       x2c  
#> min         47.620    47.876    47.710    -2.582    -2.626    -2.169
#> med         50.484    50.026    50.013     0.347     0.239    -0.019
#> max         52.477    52.656    52.747     2.268     2.232     2.611
#> mean        50.245    50.045    49.954     0.215     0         0    
#> sd           1.115     1.011     0.932     0.971     1.115     1.011
#> skewness    -0.142     0.170    -0.019    -0.270    -0.142     0.170
#> kurtosis    -0.610    -0.388     0.271     0.300    -0.610    -0.388
#> nobs       100       100       100       100       100       100    
#> nmissing     0         0         0         0         0         0    
#> 
#> Nonnumeric variables
#>                   x4
#>  Male  : 50         
#>  Female: 50         
#>  nobs         : 100 
#>  nmiss        :   0 
#>  entropy      :   1 
#>  normedEntropy:   1 
attributes(datc2)
#> $names
#> [1] "x1"  "x2"  "x3"  "y"   "x4"  "x1c" "x2c"
#> 
#> $class
#> [1] "data.frame"
#> 
#> $row.names
#>   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
#>  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
#>  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
#>  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
#>  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
#>  [91]  91  92  93  94  95  96  97  98  99 100
#> 
#> $centers
#>       x1       x2 
#> 50.24520 50.04523 
#> 
datc3 <- centerNumerics(dat, center = c("x1" = 30, "x2" = 40))
head(datc3)
#>         x1       x2       x3           y   x4      x1c       x2c
#> 1 50.58553 50.22393 48.56385  0.52228217 Male 20.58553 10.223925
#> 2 50.70947 48.84378 49.37074  0.00979376 Male 20.70947  8.843777
#> 3 49.89070 50.42242 50.24352 -0.44052620 Male 19.89070 10.422419
#> 4 49.54650 48.67524 51.05836  1.19948953 Male 19.54650  8.675245
#> 5 50.60589 50.14108 50.83135 -0.11746849 Male 20.60589 10.141084
#> 6 48.18204 49.46395 50.10521  0.03820979 Male 18.18204  9.463952
summarize(datc3)
#> Numeric variables
#>              x1        x2        x3         y        x1c       x2c  
#> min         47.620    47.876    47.710    -2.582    17.620     7.876
#> med         50.484    50.026    50.013     0.347    20.484    10.026
#> max         52.477    52.656    52.747     2.268    22.477    12.656
#> mean        50.245    50.045    49.954     0.215    20.245    10.045
#> sd           1.115     1.011     0.932     0.971     1.115     1.011
#> skewness    -0.142     0.170    -0.019    -0.270    -0.142     0.170
#> kurtosis    -0.610    -0.388     0.271     0.300    -0.610    -0.388
#> nobs       100       100       100       100       100       100    
#> nmissing     0         0         0         0         0         0    
#> 
#> Nonnumeric variables
#>                   x4
#>  Male  : 50         
#>  Female: 50         
#>  nobs         : 100 
#>  nmiss        :   0 
#>  entropy      :   1 
#>  normedEntropy:   1 
attributes(datc3)
#> $names
#> [1] "x1"  "x2"  "x3"  "y"   "x4"  "x1c" "x2c"
#> 
#> $class
#> [1] "data.frame"
#> 
#> $row.names
#>   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
#>  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
#>  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
#>  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
#>  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
#>  [91]  91  92  93  94  95  96  97  98  99 100
#> 
#> $centers
#> x1 x2 
#> 30 40 
#> 
datc4 <- centerNumerics(dat, center=c("x1", "x2"), standardize = TRUE)
head(datc3)
#>         x1       x2       x3           y   x4      x1c       x2c
#> 1 50.58553 50.22393 48.56385  0.52228217 Male 20.58553 10.223925
#> 2 50.70947 48.84378 49.37074  0.00979376 Male 20.70947  8.843777
#> 3 49.89070 50.42242 50.24352 -0.44052620 Male 19.89070 10.422419
#> 4 49.54650 48.67524 51.05836  1.19948953 Male 19.54650  8.675245
#> 5 50.60589 50.14108 50.83135 -0.11746849 Male 20.60589 10.141084
#> 6 48.18204 49.46395 50.10521  0.03820979 Male 18.18204  9.463952
summarize(datc4)
#> Numeric variables
#>              x1        x2        x3         y        x1cs      x2cs 
#> min         47.620    47.876    47.710    -2.582    -2.355    -2.145
#> med         50.484    50.026    50.013     0.347     0.214    -0.019
#> max         52.477    52.656    52.747     2.268     2.002     2.582
#> mean        50.245    50.045    49.954     0.215     0         0    
#> sd           1.115     1.011     0.932     0.971     1         1    
#> skewness    -0.142     0.170    -0.019    -0.270    -0.142     0.170
#> kurtosis    -0.610    -0.388     0.271     0.300    -0.610    -0.388
#> nobs       100       100       100       100       100       100    
#> nmissing     0         0         0         0         0         0    
#> 
#> Nonnumeric variables
#>                   x4
#>  Male  : 50         
#>  Female: 50         
#>  nobs         : 100 
#>  nmiss        :   0 
#>  entropy      :   1 
#>  normedEntropy:   1 
attributes(datc4)
#> $names
#> [1] "x1"   "x2"   "x3"   "y"    "x4"   "x1cs" "x2cs"
#> 
#> $class
#> [1] "data.frame"
#> 
#> $row.names
#>   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
#>  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
#>  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
#>  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
#>  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
#>  [91]  91  92  93  94  95  96  97  98  99 100
#> 
#> $centers
#>       x1       x2 
#> 50.24520 50.04523 
#> 
#> $scales
#>       x1       x2 
#> 1.114731 1.011241 
#> 
datc5 <- centerNumerics(dat, center=c("x1"=30, "x2"=40),
standardize = c("x2" = 5, "x1" = 7))
head(datc5)
#>         x1       x2       x3           y   x4     x1cs     x2cs
#> 1 50.58553 50.22393 48.56385  0.52228217 Male 2.940790 2.044785
#> 2 50.70947 48.84378 49.37074  0.00979376 Male 2.958495 1.768755
#> 3 49.89070 50.42242 50.24352 -0.44052620 Male 2.841528 2.084484
#> 4 49.54650 48.67524 51.05836  1.19948953 Male 2.792358 1.735049
#> 5 50.60589 50.14108 50.83135 -0.11746849 Male 2.943698 2.028217
#> 6 48.18204 49.46395 50.10521  0.03820979 Male 2.597435 1.892790
summarize(datc5)
#> Numeric variables
#>              x1        x2        x3         y        x1cs      x2cs 
#> min         47.620    47.876    47.710    -2.582     2.517     1.575
#> med         50.484    50.026    50.013     0.347     2.926     2.005
#> max         52.477    52.656    52.747     2.268     3.211     2.531
#> mean        50.245    50.045    49.954     0.215     2.892     2.009
#> sd           1.115     1.011     0.932     0.971     0.159     0.202
#> skewness    -0.142     0.170    -0.019    -0.270    -0.142     0.170
#> kurtosis    -0.610    -0.388     0.271     0.300    -0.610    -0.388
#> nobs       100       100       100       100       100       100    
#> nmissing     0         0         0         0         0         0    
#> 
#> Nonnumeric variables
#>                   x4
#>  Male  : 50         
#>  Female: 50         
#>  nobs         : 100 
#>  nmiss        :   0 
#>  entropy      :   1 
#>  normedEntropy:   1 
attributes(datc5)
#> $names
#> [1] "x1"   "x2"   "x3"   "y"    "x4"   "x1cs" "x2cs"
#> 
#> $class
#> [1] "data.frame"
#> 
#> $row.names
#>   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
#>  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
#>  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
#>  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
#>  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
#>  [91]  91  92  93  94  95  96  97  98  99 100
#> 
#> $centers
#> x1 x2 
#> 30 40 
#> 
#> $scales
#> x1 x2 
#>  7  5 
#>