Measure variable importance. — variable

variable_importance measures importance of variables based on specified methods.

variable_importance(
  sample,
  variables,
  operation = "replicate_correlation",
  ...
)

Arguments

sample	tbl containing sample used to estimate parameters.
variables	character vector specifying observation variables.
operation	optional character string specifying method for computing variable importance. This must be one of the strings `"replicate_correlation"` (default) or `"svd_entropy"`. is implemented.
...	arguments passed to variable importance operation.

Value

data frame containing variable importance measures.

Examples

set.seed(123)
x1 <- rnorm(10)
x2 <- x1 + rnorm(10) / 100
y1 <- rnorm(10)
y2 <- y1 + rnorm(10) / 10
z1 <- rnorm(10)
z2 <- z1 + rnorm(10) / 1

batch <- rep(rep(1:2, each = 5), 2)

treatment <- rep(1:10, 2)

replicate_id <- rep(1:2, each = 10)

sample <-
  tibble::tibble(
    x = c(x1, x2), y = c(y1, y2), z = c(z1, z2),
    Metadata_treatment = treatment,
    Metadata_replicate_id = replicate_id,
    Metadata_batch = batch
  )

head(sample)
#> # A tibble: 6 x 6
#>         x      y      z Metadata_treatment Metadata_replicate_id Metadata_batch
#>     <dbl>  <dbl>  <dbl>              <int>                 <int>          <int>
#> 1 -0.560  -1.07  -0.695                  1                     1              1
#> 2 -0.230  -0.218 -0.208                  2                     1              1
#> 3  1.56   -1.03  -1.27                   3                     1              1
#> 4  0.0705 -0.729  2.17                   4                     1              1
#> 5  0.129  -0.625  1.21                   5                     1              1
#> 6  1.72   -1.69  -1.12                   6                     1              2

# `replicate_correlation`` returns the median, min, and max
# replicate correlation (across batches) per variable
variable_importance(
  sample = sample,
  variables = c("x", "y", "z"),
  operation = "replicate_correlation",
  strata = c("Metadata_treatment"),
  replicates = 2,
  split_by = "Metadata_batch",
  replicate_by = "Metadata_replicate_id",
  cores = 1
)
#> # A tibble: 3 x 4
#>   variable median   min   max
#>   <chr>     <dbl> <dbl> <dbl>
#> 1 x         1.00  1.00  1.00 
#> 2 y         0.996 0.993 0.999
#> 3 z         0.627 0.290 0.964

# `svd_entropy`` measures the contribution of each variable in decreasing
# the data entropy.

variable_importance(
  sample = sample,
  variables = c("x", "y", "z"),
  operation = "svd_entropy",
  cores = 1
)
#> # A tibble: 3 x 2
#>   variable svd_entropy
#>   <chr>          <dbl>
#> 1 x              0.143
#> 2 y              0.157
#> 3 z              0.158