cosine_sparse
computes cosine similarity between pairs of rows of a
matrix.
pearson_sparse
computes pearson similarity between pairs of rows of a
matrix.
cosine_sparse(X, id1, id2, ...)
pearson_sparse(X, id1, id2, ...)
matrix
vector of integers specifying the list of rows of X
(first set)
vector of integers specifying the list of rows of X
,
(second set), same length as id1
.
arguments passed downstream for parallel processing.
data.frame with the same number of rows as the length of id1
(and id2
) containing the similarity between the pairs of rows
of X
. sim[i] == similarity(X[id1[i], ], X[id2[i], ])
.
set.seed(42)
X <- matrix(rnorm(5 * 3), 5, 3)
id1 <- c(1, 3)
id2 <- c(5, 4)
s1 <- matric::cosine_sparse(X, id1, id2) %>% dplyr::arrange(id1, id2)
Xn <- X / sqrt(rowSums(X * X))
n_rows <- nrow(Xn)
s2 <-
expand.grid(
id1 = seq(n_rows),
id2 = seq(n_rows),
KEEP.OUT.ATTRS = FALSE
) %>%
dplyr::mutate(sim = as.vector(tcrossprod(Xn))) %>%
dplyr::inner_join(s1 %>% dplyr::select(id1, id2)) %>%
dplyr::arrange(id1, id2)
#> Joining with `by = join_by(id1, id2)`
s1
#> id1 id2 sim
#> 1 1 5 0.4743700
#> 2 3 4 0.1387656
all.equal(s1, s2)
#> [1] TRUE
Xm <- X - rowMeans(X)
s3 <- matric::cosine_sparse(Xm, id1, id2) %>% dplyr::arrange(id1, id2)
s4 <- matric::pearson_sparse(X, id1, id2) %>% dplyr::arrange(id1, id2)
all.equal(s3, s4)
#> [1] TRUE