sim_collate collates several subsets of a melted similarity matrix, required for computing metrics.

sim_collate(
  sim_df,
  all_same_cols_rep,
  annotation_cols,
  any_different_cols_rep = NULL,
  all_different_cols_rep = NULL,
  all_same_cols_ref = NULL,
  all_same_cols_rep_ref = NULL,
  all_same_cols_non_rep = NULL,
  any_different_cols_non_rep = NULL,
  all_different_cols_non_rep = NULL,
  any_different_cols_group = NULL,
  all_same_cols_group = NULL,
  reference = NULL,
  drop_reference = FALSE,
  drop_group = NULL
)

Arguments

sim_df

metric_sim object.

all_same_cols_rep

optional character vector specifying columns.

annotation_cols

character vector specifying which columns from metadata to annotate the left index of the filtered sim_df with.

any_different_cols_rep

optional character vector specifying columns.

all_different_cols_rep

optional character vector specifying columns.

all_same_cols_ref

optional character vector specifying columns.

all_same_cols_rep_ref

optional character vector specifying columns.

all_same_cols_non_rep

optional character vector specifying columns.

any_different_cols_non_rep

optional character vector specifying columns.

all_different_cols_non_rep

optional character vector specifying columns.

any_different_cols_group

optional character vector specifying columns.

all_same_cols_group

optional character vector specifying columns.

reference

optional character string specifying reference.

drop_reference

optional boolean specifying whether to filter (drop) pairs using reference on the left index.

drop_group

optional tbl; rows that match on drop_group on the left or right index are dropped.

Value

metric_sim object comprising a filtered sim_df with sets of pairs, preserving the same metric_sim attributes as sim_df.

Details

0. Filter out some rows

Filter out pairs that match drop_group in either right or left indices

1. Similarity to reference

Fetch similarities between

  • (a) all rows (except, optionally those containing reference), and

  • (b) all rows containing reference

Do so only for those (a, b) pairs that

  • have same values in all columns of all_same_cols_ref

2. Similarity to replicates (no references)

Fetch similarities between

  • (a) all rows except reference rows, and

  • (b) all rows except reference rows (i.e. to each other)

Do so for only those (a, b) pairs that

  • have same values in all columns of all_same_cols_rep

  • have different values in all columns of all_different_cols_rep (if specified)

  • have different values in at least one column of any_different_cols_rep (if specified)

Keep, both, (a, b) and (b, a)

3. Similarity to replicates (only references)

Fetch similarities between

  • (a) all rows containing reference, and

  • (b) all rows containing reference (i.e. to each other)

Do so for only those (a, b) pairs that

  • have same values in all columns of all_same_cols_rep_ref.

Keep, both, (a, b) and (b, a)

4. Similarity to non-replicates

Fetch similarities between

  • (a) all rows (except, optionally, reference rows), and

  • (b) all rows except reference rows

Do so for only those (a, b) pairs that

  • have same values in all columns of all_same_cols_non_rep

  • have different values in all columns all_different_cols_non_rep

  • have different values in at least one column of any_different_cols_non_rep

Keep, both, (a, b) and (b, a)

5. Similarity to group

Fetch similarities between

  • (a) all rows (except, optionally, reference rows), and

  • (b) all rows (except, optionally, reference rows)

Do so for only those (a, b) pairs that

  • have same values in all columns of all_same_cols_group

  • have different values in at least one column of any_different_cols_group

Keep, both, (a, b) and (b, a)

Examples


sim_df <- matric::sim_calculate(matric::cellhealth)

drop_group <-
  data.frame(Metadata_gene_name = "EMPTY")

reference <-
  data.frame(Metadata_gene_name = c("Chr2"))

all_same_cols_ref <-
  c(
    "Metadata_cell_line",
    "Metadata_Plate"
  )

all_same_cols_rep <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name",
    "Metadata_pert_name"
  )

all_same_cols_rep_ref <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name",
    "Metadata_pert_name",
    "Metadata_Plate"
  )

any_different_cols_non_rep <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name",
    "Metadata_pert_name"
  )

all_same_cols_non_rep <-
  c(
    "Metadata_cell_line",
    "Metadata_Plate"
  )

all_different_cols_non_rep <-
  c("Metadata_gene_name")

all_same_cols_group <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name"
  )

any_different_cols_group <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name",
    "Metadata_pert_name"
  )

annotation_cols <-
  c(
    "Metadata_cell_line",
    "Metadata_gene_name",
    "Metadata_pert_name"
  )

collated_sim <-
  matric::sim_collate(
    sim_df,
    reference = reference,
    all_same_cols_rep = all_same_cols_rep,
    all_same_cols_rep_ref = all_same_cols_rep_ref,
    all_same_cols_ref = all_same_cols_ref,
    any_different_cols_non_rep = any_different_cols_non_rep,
    all_same_cols_non_rep = all_same_cols_non_rep,
    all_different_cols_non_rep = all_different_cols_non_rep,
    any_different_cols_group = any_different_cols_group,
    all_same_cols_group = all_same_cols_group,
    annotation_cols = annotation_cols,
    drop_group = drop_group
  )

head(collated_sim)
#> # A tibble: 6 × 7
#>     id1   id2    sim Metadata_cell_line Metadata_gene_name Metadata_pert_name
#>   <int> <int>  <dbl> <chr>              <chr>              <chr>             
#> 1     2     1 -0.959 A549               AKT1               AKT1-1            
#> 2    23     1 -0.983 A549               AKT1               AKT1-1            
#> 3    24     1 -0.990 A549               AKT1               AKT1-1            
#> 4    45     1 -0.932 A549               AKT1               AKT1-1            
#> 5    46     1  0.982 A549               AKT1               AKT1-1            
#> 6     1     2 -0.959 A549               AKT1               AKT1-1            
#> # ℹ 1 more variable: type <chr>

collated_sim %>%
  dplyr::group_by(type) %>%
  dplyr::tally()
#> # A tibble: 4 × 2
#>   type          n
#>   <chr>     <int>
#> 1 non_rep    1152
#> 2 ref        1944
#> 3 rep         468
#> 4 rep_group  3672