sim_calculate calculates a melted similarity matrix.

sim_calculate(
  population,
  annotation_prefix = "Metadata_",
  strata = NULL,
  method = "pearson",
  lazy = FALSE,
  all_same_cols_rep_or_group = NULL,
  all_same_cols_ref = NULL,
  all_same_cols_rep_ref = NULL,
  reference = NULL,
  ...
)

Arguments

population

data.frame with annotations (a.k.a. metadata) and observation variables.

annotation_prefix

optional character string specifying prefix for annotation columns.

strata

optional character vector specifying stratification columns.

method

optional character string specifying method for to calculate similarity. This must be one of the strings "pearson" (default), "kendall", "spearman", "euclidean", "cosine".

lazy

optional boolean specifying whether to lazily evaluate similarity.

all_same_cols_rep_or_group

optional character vector specifying columns.

all_same_cols_ref

optional character vector specifying columns.

all_same_cols_rep_ref

optional character vector specifying columns.

reference

optional character string specifying reference.

...

arguments passed downstream for parallel processing.

Value

metric_sim object, with similarity matrix and related metadata

Examples

suppressMessages(suppressWarnings(library(magrittr)))
population <- tibble::tribble(
  ~Metadata_group1, ~Metadata_group2, ~x, ~y, ~z,
  1, 1, -1, 5, -5,
  1, 2, -1.2, 5.1, -5.2,
  2, 1, 0, 6, -4,
  2, 2, 0.3, 6.2, -4.4,
  3, 1, 7, -4, 3,
  3, 2, 7.2, -4.1, 3.7
)
sim_pearson <- matric::sim_calculate(population, method = "pearson")
sim_cosine <- matric::sim_calculate(population, method = "cosine")
sim_euclidean <- matric::sim_calculate(population, method = "euclidean")

sim_pearson %>%
  dplyr::inner_join(sim_cosine,
    by = c("id1", "id2"),
    suffix = c("_pearson", "_cosine")
  ) %>%
  dplyr::inner_join(sim_euclidean %>% dplyr::rename(sim_euclidean = sim),
    by = c("id1", "id2")
  )
#> # A tibble: 30 × 5
#>      id1   id2 sim_pearson sim_cosine sim_euclidean
#>    <int> <int>       <dbl>      <dbl>         <dbl>
#>  1     2     1       1.00       1.00           0.3 
#>  2     3     1       1          0.971          1.73
#>  3     4     1       0.999      0.970          1.87
#>  4     5     1      -0.714     -0.684         14.5 
#>  5     6     1      -0.754     -0.713         15.0 
#>  6     1     2       1.00       1.00           0.3 
#>  7     3     2       1.00       0.966          1.92
#>  8     4     2       0.998      0.964          2.02
#>  9     5     2      -0.723     -0.699         14.7 
#> 10     6     2      -0.763     -0.728         15.3 
#> # ℹ 20 more rows

sim_cosine <-
  matric::sim_calculate(population,
    strata = "Metadata_group1",
    method = "cosine",
    lazy = TRUE
  )

matric::sim_calculate(population,
  method = "cosine",
  lazy = TRUE,
  all_same_cols_rep_or_group = c("Metadata_group2")
)
#> # A tibble: 18 × 2
#>      id1   id2
#>    <int> <int>
#>  1     1     1
#>  2     3     1
#>  3     5     1
#>  4     1     3
#>  5     3     3
#>  6     5     3
#>  7     1     5
#>  8     3     5
#>  9     5     5
#> 10     2     2
#> 11     4     2
#> 12     6     2
#> 13     2     4
#> 14     4     4
#> 15     6     4
#> 16     2     6
#> 17     4     6
#> 18     6     6

matric::sim_calculate(population,
  method = "cosine",
  lazy = TRUE,
  all_same_cols_rep_or_group = c("Metadata_group2"),
  all_same_cols_ref = c("Metadata_group1"),
  reference = data.frame(Metadata_group2 = 2)
)
#> # A tibble: 15 × 2
#>      id1   id2
#>    <int> <int>
#>  1     1     1
#>  2     3     1
#>  3     5     1
#>  4     1     3
#>  5     3     3
#>  6     5     3
#>  7     1     5
#>  8     3     5
#>  9     5     5
#> 10     1     2
#> 11     2     2
#> 12     3     4
#> 13     4     4
#> 14     5     6
#> 15     6     6

matric::sim_calculate(population,
  method = "cosine",
  lazy = TRUE,
  all_same_cols_rep_or_group = c("Metadata_group2"),
  all_same_cols_ref = c("Metadata_group1"),
  all_same_cols_rep_ref = c("Metadata_group2"),
  reference = data.frame(Metadata_group2 = 2)
)
#> # A tibble: 21 × 2
#>      id1   id2
#>    <int> <int>
#>  1     1     1
#>  2     3     1
#>  3     5     1
#>  4     1     3
#>  5     3     3
#>  6     5     3
#>  7     1     5
#>  8     3     5
#>  9     5     5
#> 10     1     2
#> # ℹ 11 more rows