This can be useful if you are imputing specific values, however we would generally recommend to impute using other model based approaches. See the simputation package, for example simputation::impute_lm().

impute_mean(x)

# Default S3 method
impute_mean(x)

# S3 method for class 'factor'
impute_mean(x)

Arguments

x

vector

Value

vector with mean values replaced

Examples


library(dplyr)
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_mean(vec)
#>  [1]  0.12779601  1.34467162 -2.51183820  0.12779601  0.80880184  0.32186331
#>  [7]  0.05810520 -0.04448689  0.91745520  0.12779601

dat <- tibble(
  num = rnorm(10),
  int = as.integer(rpois(10, 5)),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>       num   int fct  
#>     <dbl> <int> <fct>
#>  1 -0.532     5 NA   
#>  2 NA         4 B    
#>  3  1.04      4 C    
#>  4  0.202     5 D    
#>  5 NA        11 E    
#>  6 -0.283     9 NA   
#>  7 -0.201    NA G    
#>  8  1.33      7 H    
#>  9  1.03      6 I    
#> 10  1.17     NA J    

dat %>%
  nabular() %>%
  mutate(
    num = impute_mean(num),
    int = impute_mean(int),
    fct = impute_mean(fct),
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1 -0.532  5    E     !NA    !NA    NA    
#>  2  0.470  4    B     NA     !NA    !NA   
#>  3  1.04   4    C     !NA    !NA    !NA   
#>  4  0.202  5    D     !NA    !NA    !NA   
#>  5  0.470 11    E     NA     !NA    !NA   
#>  6 -0.283  9    E     !NA    !NA    NA    
#>  7 -0.201  6.38 G     !NA    NA     !NA   
#>  8  1.33   7    H     !NA    !NA    !NA   
#>  9  1.03   6    I     !NA    !NA    !NA   
#> 10  1.17   6.38 J     !NA    NA     !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      where(is.numeric),
      impute_mean
    )
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1 -0.532  5    NA    !NA    !NA    NA    
#>  2  0.470  4    B     NA     !NA    !NA   
#>  3  1.04   4    C     !NA    !NA    !NA   
#>  4  0.202  5    D     !NA    !NA    !NA   
#>  5  0.470 11    E     NA     !NA    !NA   
#>  6 -0.283  9    NA    !NA    !NA    NA    
#>  7 -0.201  6.38 G     !NA    NA     !NA   
#>  8  1.33   7    H     !NA    !NA    !NA   
#>  9  1.03   6    I     !NA    !NA    !NA   
#> 10  1.17   6.38 J     !NA    NA     !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      c("num", "int"),
      impute_mean
    )
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1 -0.532  5    NA    !NA    !NA    NA    
#>  2  0.470  4    B     NA     !NA    !NA   
#>  3  1.04   4    C     !NA    !NA    !NA   
#>  4  0.202  5    D     !NA    !NA    !NA   
#>  5  0.470 11    E     NA     !NA    !NA   
#>  6 -0.283  9    NA    !NA    !NA    NA    
#>  7 -0.201  6.38 G     !NA    NA     !NA   
#>  8  1.33   7    H     !NA    !NA    !NA   
#>  9  1.03   6    I     !NA    !NA    !NA   
#> 10  1.17   6.38 J     !NA    NA     !NA