Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)

Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows). Each subset is averaged independently of the others.

rowAvgsPerColSet(X, W = NULL, rows = NULL, S, FUN = rowMeans, ...,
  na.rm = NA, tFUN = FALSE)

colAvgsPerRowSet(X, W = NULL, cols = NULL, S, FUN = colMeans, ...,
  na.rm = NA, tFUN = FALSE)

Arguments

X: A numeric NxM matrix.
W: An optional numeric NxM matrix of weights.
rows: A vector indicating subset of rows to operate over. If NULL, no subsetting is done.
S: An integer KxJ matrix specifying the J subsets. Each column holds K column (row) indices for the corresponding subset.
FUN: The row-by-row (column-by-column) function used to average over each subset of X. This function must accept a numeric NxK (KxM) matrix and the logical argument na.rm, and return a numeric vector of length N (M).
...: Additional arguments passed to then FUN function.
na.rm: (logical) Argument passed to FUN() as na.rm = na.rm. If NA (default), then na.rm = TRUE is used if X or S holds missing values, otherwise na.rm = FALSE.
tFUN: If TRUE, the NxK (KxM) matrix passed to FUN() is transposed first.
cols: A vector indicating subset of columns to operate over. If NULL, no subsetting is done.

Value

Returns a numeric JxN (MxJ) matrix, where row names equal rownames(X) (colnames(S)) and column names colnames(S) (colnames(X)).

Details

If argument S is a single column vector with indices 1:N, then rowAvgsPerColSet(X, S = S, FUN = rowMeans) gives the same result as rowMeans(X). Analogously, for colAvgsPerRowSet().

Author

Henrik Bengtsson

Examples

X <- matrix(rnorm(20 * 6), nrow = 20, ncol = 6)
rownames(X) <- LETTERS[1:nrow(X)]
colnames(X) <- letters[1:ncol(X)]
print(X)
#>             a           b            c           d           e           f
#> A  0.41794156  0.88110773  2.172611670  0.26709879  1.68217608 -0.32427027
#> B  1.35867955  0.39810588  0.475509529 -0.54252003 -0.63573645  0.06016044
#> C -0.10278773 -0.61202639 -0.709946431  1.20786781 -0.46164473 -0.58889449
#> D  0.38767161  0.34111969  0.610726353  1.16040262  1.43228224  0.53149619
#> E -0.05380504 -1.12936310 -0.934097632  0.70021365 -0.65069635 -1.51839408
#> F -1.37705956  1.43302370 -1.253633400  1.58683345 -0.20738074  0.30655786
#> G -0.41499456  1.98039990  0.291446236  0.55848643 -0.39280793 -1.53644982
#> H -0.39428995 -0.36722148 -0.443291873 -1.27659221 -0.31999287 -0.30097613
#> I -0.05931340 -1.04413463  0.001105352 -0.57326541 -0.27911330 -0.52827990
#> J  1.10002537  0.56971963  0.074341324 -1.22461261  0.49418833 -0.65209478
#> K  0.76317575 -0.13505460 -0.589520946 -0.47340064 -0.17733048 -0.05689678
#> L -0.16452360  2.40161776 -0.568668733 -0.62036668 -0.50595746 -1.91435943
#> M -0.25336168 -0.03924000 -0.135178615  0.04211587  1.34303883  1.17658331
#> N  0.69696338  0.68973936  1.178086997 -0.91092165 -0.21457941 -1.66497244
#> O  0.55666320  0.02800216 -1.523566800  0.15802877 -0.17955653 -0.46353040
#> P -0.68875569 -0.74327321  0.593946188 -0.65458464 -0.10019074 -1.11592011
#> Q -0.70749516  0.18879230  0.332950371  1.76728727  0.71266631 -0.75081900
#> R  0.36458196 -1.80495863  1.063099837  0.71670748 -0.07356440  2.08716655
#> S  0.76853292  1.46555486 -0.304183924  0.91017423 -0.03763417  0.01739562
#> T -0.11234621  0.15325334  0.370018810  0.38418536 -0.68166048 -1.28630053


# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply rowMeans() for 3 sets of 2 columns
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbr_of_sets <- 3
S <- matrix(1:ncol(X), ncol = nbr_of_sets)
colnames(S) <- sprintf("s%d", 1:nbr_of_sets)
print(S)
#>      s1 s2 s3
#> [1,]  1  3  5
#> [2,]  2  4  6

Z <- rowAvgsPerColSet(X, S = S)
print(Z)
#>            s1          s2          s3
#> A  0.64952464  1.21985523  0.67895290
#> B  0.87839272 -0.03350525 -0.28778801
#> C -0.35740706  0.24896069 -0.52526961
#> D  0.36439565  0.88556448  0.98188922
#> E -0.59158407 -0.11694199 -1.08454522
#> F  0.02798207  0.16660003  0.04958856
#> G  0.78270267  0.42496633 -0.96462888
#> H -0.38075572 -0.85994204 -0.31048450
#> I -0.55172401 -0.28608003 -0.40369660
#> J  0.83487250 -0.57513565 -0.07895322
#> K  0.31406057 -0.53146079 -0.11711363
#> L  1.11854708 -0.59451771 -1.21015844
#> M -0.14630084 -0.04653137  1.25981107
#> N  0.69335137  0.13358267 -0.93977592
#> O  0.29233268 -0.68276901 -0.32154347
#> P -0.71601445 -0.03031923 -0.60805542
#> Q -0.25935143  1.05011882 -0.01907635
#> R -0.72018833  0.88990366  1.00680107
#> S  1.11704389  0.30299515 -0.01011928
#> T  0.02045356  0.37710208 -0.98398050

# Validation
Z0 <- cbind(s1 = rowMeans(X[, 1:2]),
            s2 = rowMeans(X[, 3:4]),
            s3 = rowMeans(X[, 5:6]))
stopifnot(identical(drop(Z), Z0))


# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply colMeans() for 5 sets of 4 rows
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbr_of_sets <- 5
S <- matrix(1:nrow(X), ncol = nbr_of_sets)
colnames(S) <- sprintf("s%d", 1:nbr_of_sets)
print(S)
#>      s1 s2 s3 s4 s5
#> [1,]  1  5  9 13 17
#> [2,]  2  6 10 14 18
#> [3,]  3  7 11 15 19
#> [4,]  4  8 12 16 20

Z <- colAvgsPerRowSet(X, S = S)
print(Z)
#>              a             b           c          d           e           f
#> s1  0.51537625  0.2520767262  0.63722528  0.5232123  0.50426928 -0.08037703
#> s2 -0.56003728  0.4792097569 -0.58489417  0.3922353 -0.39271947 -0.76231554
#> s3  0.40984103  0.4480370394 -0.27068575 -0.7229113 -0.11705323 -0.78790772
#> s4  0.07787730 -0.0161929226  0.02832194 -0.3413404  0.21217804 -0.51695991
#> s5  0.07831838  0.0006604676  0.36547127  0.9445886 -0.02004819  0.01686066

# Validation
Z0 <- rbind(s1 = colMeans(X[  1:4, ]),
            s2 = colMeans(X[  5:8, ]),
            s3 = colMeans(X[ 9:12, ]),
            s4 = colMeans(X[13:16, ]),
            s5 = colMeans(X[17:20, ]))
stopifnot(identical(drop(Z), Z0))


# - - - - - - - - - - - - - - - - - - - - - - - - - -
# When there is only one "complete" set
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbr_of_sets <- 1
S <- matrix(1:ncol(X), ncol = nbr_of_sets)
colnames(S) <- sprintf("s%d", 1:nbr_of_sets)
print(S)
#>      s1
#> [1,]  1
#> [2,]  2
#> [3,]  3
#> [4,]  4
#> [5,]  5
#> [6,]  6

Z <- rowAvgsPerColSet(X, S = S, FUN = rowMeans)
print(Z)
#>            s1
#> A  0.84944426
#> B  0.18569982
#> C -0.21123866
#> D  0.74394978
#> E -0.59769043
#> F  0.08139022
#> G  0.08101337
#> H -0.51706075
#> I -0.41383355
#> J  0.06026121
#> K -0.11150462
#> L -0.22870969
#> M  0.35565962
#> N -0.03761396
#> O -0.23732660
#> P -0.45146303
#> Q  0.25723035
#> R  0.39217213
#> S  0.46997326
#> T -0.19547495

Z0 <- rowMeans(X)
stopifnot(identical(drop(Z), Z0))


nbr_of_sets <- 1
S <- matrix(1:nrow(X), ncol = nbr_of_sets)
colnames(S) <- sprintf("s%d", 1:nbr_of_sets)
print(S)
#>       s1
#>  [1,]  1
#>  [2,]  2
#>  [3,]  3
#>  [4,]  4
#>  [5,]  5
#>  [6,]  6
#>  [7,]  7
#>  [8,]  8
#>  [9,]  9
#> [10,] 10
#> [11,] 11
#> [12,] 12
#> [13,] 13
#> [14,] 14
#> [15,] 15
#> [16,] 16
#> [17,] 17
#> [18,] 18
#> [19,] 19
#> [20,] 20

Z <- colAvgsPerRowSet(X, S = S, FUN = colMeans)
print(Z)
#>            a         b          c         d          e          f
#> s1 0.1042751 0.2327582 0.03508772 0.1591569 0.03732529 -0.4261399

Z0 <- colMeans(X)
stopifnot(identical(drop(Z), Z0))