This can be useful if you are imputing specific values, however we would
generally recommend to impute using other model based approaches. See
the simputation package, for example simputation::impute_lm().
impute_zero(x)vector with a fixed values replaced
vec <- rnorm(10)
vec[sample(1:10, 3)] <- NA
vec
#> [1] -1.2973793 2.1936681 NA 1.3602905 -0.7435074 -0.1664855
#> [7] NA NA 2.0497529 1.3781885
impute_zero(vec)
#> [1] -1.2973793 2.1936681 0.0000000 1.3602905 -0.7435074 -0.1664855
#> [7] 0.0000000 0.0000000 2.0497529 1.3781885
library(dplyr)
dat <- tibble(
num = rnorm(10),
int = rpois(10, 5),
fct = factor(LETTERS[1:10])
) %>%
mutate(
across(
everything(),
\(x) set_prop_miss(x, prop = 0.25)
)
)
dat
#> # A tibble: 10 × 3
#> num int fct
#> <dbl> <int> <fct>
#> 1 NA NA A
#> 2 NA 6 B
#> 3 0.747 2 C
#> 4 -1.91 2 D
#> 5 1.13 5 NA
#> 6 -1.54 4 F
#> 7 -0.396 2 G
#> 8 -0.635 3 H
#> 9 -0.0434 5 NA
#> 10 -0.862 NA J
dat %>%
nabular() %>%
mutate(
num = impute_fixed(num, -9999),
int = impute_zero(int),
fct = impute_factor(fct, "out")
)
#> # A tibble: 10 × 6
#> num int fct num_NA int_NA fct_NA
#> <dbl> <dbl> <fct> <fct> <fct> <fct>
#> 1 -9999 0 A NA NA !NA
#> 2 -9999 6 B NA !NA !NA
#> 3 0.747 2 C !NA !NA !NA
#> 4 -1.91 2 D !NA !NA !NA
#> 5 1.13 5 out !NA !NA NA
#> 6 -1.54 4 F !NA !NA !NA
#> 7 -0.396 2 G !NA !NA !NA
#> 8 -0.635 3 H !NA !NA !NA
#> 9 -0.0434 5 out !NA !NA NA
#> 10 -0.862 0 J !NA NA !NA