centroids.Rdcentroids computes group centroids, the pooled mean
and pooled variance, and optionally the group specific variances.
centroids(x, L, lambda.var, lambda.freqs, var.groups=FALSE,
centered.data=FALSE, verbose=TRUE)A matrix containing the data set. Note that the rows are sample observations and the columns are variables.
A factor with the group labels.
Shrinkage intensity for the variances. If not specified it is
estimated from the data, see details below. lambda.var=0 implies no shrinkage
and lambda.var=1 complete shrinkage.
Shrinkage intensity for the frequencies. If not specified it is
estimated from the data. lambda.freqs=0 implies no shrinkage (i.e. empirical frequencies)
and lambda.freqs=1 complete shrinkage (i.e. uniform frequencies).
Estimate group-specific variances.
Return column-centered data matrix.
Provide some messages while computing.
As estimator of the variance we employ
var.shrink as described in Opgen-Rhein and Strimmer (2007).
For the estimates of frequencies we rely on
freqs.shrink as described in Hausser and Strimmer (2009).
Note that the pooled mean is computed using the estimated frequencies.
centroids returns a list
with the following components:
a vector containing the samples sizes in each group,
a vector containing the estimated frequency in each group,
the group means and the pooled mean,
the group-specific and the pooled variances, and
a matrix containing the centered data.
# load sda library
library("sda")
## prepare data set
data(iris) # good old iris data
X = as.matrix(iris[,1:4])
Y = iris[,5]
## estimate centroids and empirical pooled variances
centroids(X, Y, lambda.var=0)
#> Number of variables: 4
#> Number of observations: 150
#> Number of classes: 3
#>
#> Estimating optimal shrinkage intensity lambda.freq (frequencies): 1
#> Estimating variances (pooled across classes)
#> Specified shrinkage intensity lambda.var (variance vector): 0
#>
#> $samples
#> setosa versicolor virginica
#> 50 50 50
#>
#> $freqs
#> setosa versicolor virginica
#> 0.3333333 0.3333333 0.3333333
#> attr(,"lambda.freqs")
#> [1] 1
#> attr(,"lambda.freqs.estimated")
#> [1] TRUE
#>
#> $means
#> setosa versicolor virginica (pooled)
#> Sepal.Length 5.006 5.936 6.588 5.843333
#> Sepal.Width 3.428 2.770 2.974 3.057333
#> Petal.Length 1.462 4.260 5.552 3.758000
#> Petal.Width 0.246 1.326 2.026 1.199333
#>
#> $variances
#> (pooled)
#> Sepal.Length 0.26500816
#> Sepal.Width 0.11538776
#> Petal.Length 0.18518776
#> Petal.Width 0.04188163
#> attr(,"lambda.var")
#> [1] 0
#> attr(,"lambda.var.estimated")
#> [1] FALSE
#>
#> $centered.data
#> NULL
#>
## also compute group-specific variances
centroids(X, Y, var.groups=TRUE, lambda.var=0)
#> Number of variables: 4
#> Number of observations: 150
#> Number of classes: 3
#>
#> Estimating optimal shrinkage intensity lambda.freq (frequencies): 1
#> Estimating variances (class #1)
#> Specified shrinkage intensity lambda.var (variance vector): 0
#>
#> Estimating variances (class #2)
#> Specified shrinkage intensity lambda.var (variance vector): 0
#>
#> Estimating variances (class #3)
#> Specified shrinkage intensity lambda.var (variance vector): 0
#>
#> Estimating variances (pooled across classes)
#> Specified shrinkage intensity lambda.var (variance vector): 0
#>
#> $samples
#> setosa versicolor virginica
#> 50 50 50
#>
#> $freqs
#> setosa versicolor virginica
#> 0.3333333 0.3333333 0.3333333
#> attr(,"lambda.freqs")
#> [1] 1
#> attr(,"lambda.freqs.estimated")
#> [1] TRUE
#>
#> $means
#> setosa versicolor virginica (pooled)
#> Sepal.Length 5.006 5.936 6.588 5.843333
#> Sepal.Width 3.428 2.770 2.974 3.057333
#> Petal.Length 1.462 4.260 5.552 3.758000
#> Petal.Width 0.246 1.326 2.026 1.199333
#>
#> $variances
#> setosa versicolor virginica (pooled)
#> Sepal.Length 0.12424898 0.26643265 0.40434286 0.26500816
#> Sepal.Width 0.14368980 0.09846939 0.10400408 0.11538776
#> Petal.Length 0.03015918 0.22081633 0.30458776 0.18518776
#> Petal.Width 0.01110612 0.03910612 0.07543265 0.04188163
#> attr(,"lambda.var")
#> [1] 0 0 0 0
#> attr(,"lambda.var.estimated")
#> [1] FALSE
#>
#> $centered.data
#> NULL
#>
## use shrinkage estimator for the variances
centroids(X, Y, var.groups=TRUE)
#> Number of variables: 4
#> Number of observations: 150
#> Number of classes: 3
#>
#> Estimating optimal shrinkage intensity lambda.freq (frequencies): 1
#> Estimating variances (class #1)
#> Estimating optimal shrinkage intensity lambda.var (variance vector): 0.1315
#>
#> Estimating variances (class #2)
#> Estimating optimal shrinkage intensity lambda.var (variance vector): 0.1287
#>
#> Estimating variances (class #3)
#> Estimating optimal shrinkage intensity lambda.var (variance vector): 0.1354
#>
#> Estimating variances (pooled across classes)
#> Estimating optimal shrinkage intensity lambda.var (variance vector): 0.0726
#>
#> $samples
#> setosa versicolor virginica
#> 50 50 50
#>
#> $freqs
#> setosa versicolor virginica
#> 0.3333333 0.3333333 0.3333333
#> attr(,"lambda.freqs")
#> [1] 1
#> attr(,"lambda.freqs.estimated")
#> [1] TRUE
#>
#> $means
#> setosa versicolor virginica (pooled)
#> Sepal.Length 5.006 5.936 6.588 5.843333
#> Sepal.Width 3.428 2.770 2.974 3.057333
#> Petal.Length 1.462 4.260 5.552 3.758000
#> Petal.Width 0.246 1.326 2.026 1.199333
#>
#> $variances
#> setosa versicolor virginica (pooled)
#> Sepal.Length 0.11806141 0.25269054 0.37725192 0.2566824
#> Sepal.Width 0.13494528 0.10634142 0.11758589 0.1179206
#> Petal.Length 0.03634675 0.21294430 0.29100594 0.1826549
#> Petal.Width 0.01979964 0.05461724 0.09288369 0.0497491
#> attr(,"lambda.var")
#> [1] 0.13152480 0.12868373 0.13542292 0.07257402
#> attr(,"lambda.var.estimated")
#> [1] TRUE
#>
#> $centered.data
#> NULL
#>
## return centered data
xc = centroids(X, Y, centered.data=TRUE)$centered.data
#> Number of variables: 4
#> Number of observations: 150
#> Number of classes: 3
#>
#> Estimating optimal shrinkage intensity lambda.freq (frequencies): 1
#> Estimating variances (pooled across classes)
#> Estimating optimal shrinkage intensity lambda.var (variance vector): 0.0726
#>
apply(xc, 2, mean)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> -7.105174e-17 -5.624438e-17 1.909493e-16 4.533618e-17
## useful, e.g., to compute the inverse pooled correlation matrix
powcor.shrink(xc, alpha=-1)
#> Estimating optimal shrinkage intensity lambda (correlation matrix): 0.0335
#>
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> Sepal.Length 2.5698469 -0.8141157 -1.7037483 0.2626377
#> Sepal.Width -0.8141157 1.5677251 0.2842077 -0.5592243
#> Petal.Length -1.7037483 0.2842077 2.4602011 -0.6809681
#> Petal.Width 0.2626377 -0.5592243 -0.6809681 1.4806445
#> attr(,"lambda")
#> [1] 0.03349646
#> attr(,"lambda.estimated")
#> [1] TRUE
#> attr(,"class")
#> [1] "shrinkage"