sim_calculate
calculates a melted similarity matrix.
sim_calculate(
population,
annotation_prefix = "Metadata_",
strata = NULL,
method = "pearson",
lazy = FALSE,
all_same_cols_rep_or_group = NULL,
all_same_cols_ref = NULL,
all_same_cols_rep_ref = NULL,
reference = NULL,
...
)
data.frame with annotations (a.k.a. metadata) and observation variables.
optional character string specifying prefix for annotation columns.
optional character vector specifying stratification columns.
optional character string specifying method for
to calculate similarity. This must be one of the
strings "pearson"
(default), "kendall"
, "spearman"
,
"euclidean"
, "cosine"
.
optional boolean specifying whether to lazily evaluate similarity.
optional character vector specifying columns.
optional character vector specifying columns.
optional character vector specifying columns.
optional character string specifying reference.
arguments passed downstream for parallel processing.
metric_sim
object, with similarity matrix and related metadata
suppressMessages(suppressWarnings(library(magrittr)))
population <- tibble::tribble(
~Metadata_group1, ~Metadata_group2, ~x, ~y, ~z,
1, 1, -1, 5, -5,
1, 2, -1.2, 5.1, -5.2,
2, 1, 0, 6, -4,
2, 2, 0.3, 6.2, -4.4,
3, 1, 7, -4, 3,
3, 2, 7.2, -4.1, 3.7
)
sim_pearson <- matric::sim_calculate(population, method = "pearson")
sim_cosine <- matric::sim_calculate(population, method = "cosine")
sim_euclidean <- matric::sim_calculate(population, method = "euclidean")
sim_pearson %>%
dplyr::inner_join(sim_cosine,
by = c("id1", "id2"),
suffix = c("_pearson", "_cosine")
) %>%
dplyr::inner_join(sim_euclidean %>% dplyr::rename(sim_euclidean = sim),
by = c("id1", "id2")
)
#> # A tibble: 30 × 5
#> id1 id2 sim_pearson sim_cosine sim_euclidean
#> <int> <int> <dbl> <dbl> <dbl>
#> 1 2 1 1.00 1.00 0.3
#> 2 3 1 1 0.971 1.73
#> 3 4 1 0.999 0.970 1.87
#> 4 5 1 -0.714 -0.684 14.5
#> 5 6 1 -0.754 -0.713 15.0
#> 6 1 2 1.00 1.00 0.3
#> 7 3 2 1.00 0.966 1.92
#> 8 4 2 0.998 0.964 2.02
#> 9 5 2 -0.723 -0.699 14.7
#> 10 6 2 -0.763 -0.728 15.3
#> # ℹ 20 more rows
sim_cosine <-
matric::sim_calculate(population,
strata = "Metadata_group1",
method = "cosine",
lazy = TRUE
)
matric::sim_calculate(population,
method = "cosine",
lazy = TRUE,
all_same_cols_rep_or_group = c("Metadata_group2")
)
#> # A tibble: 18 × 2
#> id1 id2
#> <int> <int>
#> 1 1 1
#> 2 3 1
#> 3 5 1
#> 4 1 3
#> 5 3 3
#> 6 5 3
#> 7 1 5
#> 8 3 5
#> 9 5 5
#> 10 2 2
#> 11 4 2
#> 12 6 2
#> 13 2 4
#> 14 4 4
#> 15 6 4
#> 16 2 6
#> 17 4 6
#> 18 6 6
matric::sim_calculate(population,
method = "cosine",
lazy = TRUE,
all_same_cols_rep_or_group = c("Metadata_group2"),
all_same_cols_ref = c("Metadata_group1"),
reference = data.frame(Metadata_group2 = 2)
)
#> # A tibble: 15 × 2
#> id1 id2
#> <int> <int>
#> 1 1 1
#> 2 3 1
#> 3 5 1
#> 4 1 3
#> 5 3 3
#> 6 5 3
#> 7 1 5
#> 8 3 5
#> 9 5 5
#> 10 1 2
#> 11 2 2
#> 12 3 4
#> 13 4 4
#> 14 5 6
#> 15 6 6
matric::sim_calculate(population,
method = "cosine",
lazy = TRUE,
all_same_cols_rep_or_group = c("Metadata_group2"),
all_same_cols_ref = c("Metadata_group1"),
all_same_cols_rep_ref = c("Metadata_group2"),
reference = data.frame(Metadata_group2 = 2)
)
#> # A tibble: 21 × 2
#> id1 id2
#> <int> <int>
#> 1 1 1
#> 2 3 1
#> 3 5 1
#> 4 1 3
#> 5 3 3
#> 6 5 3
#> 7 1 5
#> 8 3 5
#> 9 5 5
#> 10 1 2
#> # ℹ 11 more rows