Impute the median value into a vector with missing values

impute_median(x)

# Default S3 method
impute_median(x)

# S3 method for class 'factor'
impute_median(x)

Arguments

x

vector

Value

vector with median values replaced

Examples


vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_median(vec)
#>  [1] -0.15186435 -1.07310029 -0.09364359  0.30637628 -0.91960217  0.84868095
#>  [7] -0.09364359 -0.09364359 -0.09364359  0.40188223

library(dplyr)

dat <- tibble(
  num = rnorm(10),
  int = as.integer(rpois(10, 5)),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>       num   int fct  
#>     <dbl> <int> <fct>
#>  1  0.773    NA A    
#>  2 -0.397     7 B    
#>  3  0.664     6 C    
#>  4  1.48      5 NA   
#>  5 NA         9 E    
#>  6 -0.977     7 NA   
#>  7  0.601     3 G    
#>  8 -1.13      4 H    
#>  9 NA         5 I    
#> 10  0.266    NA J    

dat %>%
  nabular() %>%
  mutate(
    num = impute_median(num),
    int = impute_median(int),
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.773   5.5 A     !NA    NA     !NA   
#>  2 -0.397   7   B     !NA    !NA    !NA   
#>  3  0.664   6   C     !NA    !NA    !NA   
#>  4  1.48    5   NA    !NA    !NA    NA    
#>  5  0.434   9   E     NA     !NA    !NA   
#>  6 -0.977   7   NA    !NA    !NA    NA    
#>  7  0.601   3   G     !NA    !NA    !NA   
#>  8 -1.13    4   H     !NA    !NA    !NA   
#>  9  0.434   5   I     NA     !NA    !NA   
#> 10  0.266   5.5 J     !NA    NA     !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      where(is.numeric),
      impute_median
    )
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.773   5.5 A     !NA    NA     !NA   
#>  2 -0.397   7   B     !NA    !NA    !NA   
#>  3  0.664   6   C     !NA    !NA    !NA   
#>  4  1.48    5   NA    !NA    !NA    NA    
#>  5  0.434   9   E     NA     !NA    !NA   
#>  6 -0.977   7   NA    !NA    !NA    NA    
#>  7  0.601   3   G     !NA    !NA    !NA   
#>  8 -1.13    4   H     !NA    !NA    !NA   
#>  9  0.434   5   I     NA     !NA    !NA   
#> 10  0.266   5.5 J     !NA    NA     !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      c("num", "int"),
      impute_median
    )
 )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.773   5.5 A     !NA    NA     !NA   
#>  2 -0.397   7   B     !NA    !NA    !NA   
#>  3  0.664   6   C     !NA    !NA    !NA   
#>  4  1.48    5   NA    !NA    !NA    NA    
#>  5  0.434   9   E     NA     !NA    !NA   
#>  6 -0.977   7   NA    !NA    !NA    NA    
#>  7  0.601   3   G     !NA    !NA    !NA   
#>  8 -1.13    4   H     !NA    !NA    !NA   
#>  9  0.434   5   I     NA     !NA    !NA   
#> 10  0.266   5.5 J     !NA    NA     !NA