sim_write
writes similarity matrix.
sim_write(sim_df, output, file_format = "parquet")
metric_sim
object.
character string specifying the output directory or filename.
character string specify file format. This must be one of csv
or parquet
(default).
No return value, called for side effects
The output format can be either CSV or Parquet.
With the CSV format, the row_metadata
and metric_metadata
attributes are saved as separate files.
This is not required for Parquet because it saves the attributes as well.
suppressMessages(suppressWarnings(library(magrittr)))
population <- tibble::tibble(
Metadata_group = sample(c("a", "b"), 4, replace = TRUE),
x = rnorm(4),
y = x + rnorm(4) / 100,
z = y + rnorm(4) / 1000
)
tmpdir <- tempdir()
tmpfile_prefix <- file.path(tmpdir, "test")
sim_df <- matric::sim_calculate(population, method = "pearson")
sim_df %>% matric::sim_write(tmpfile_prefix, file_format = "csv")
#> NULL
readr::read_csv(file.path(tmpfile_prefix, "test.csv"))
#> Rows: 12 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (3): id1, id2, sim
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 12 × 3
#> id1 id2 sim
#> <dbl> <dbl> <dbl>
#> 1 2 1 -0.999
#> 2 3 1 -1.00
#> 3 4 1 -0.683
#> 4 1 2 -0.999
#> 5 3 2 0.998
#> 6 4 2 0.648
#> 7 1 3 -1.00
#> 8 2 3 0.998
#> 9 4 3 0.695
#> 10 1 4 -0.683
#> 11 2 4 0.648
#> 12 3 4 0.695
readr::read_csv(file.path(tmpfile_prefix, "test_metadata.csv"))
#> Rows: 4 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (1): Metadata_group
#> dbl (1): id
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 4 × 2
#> id Metadata_group
#> <dbl> <chr>
#> 1 1 b
#> 2 2 a
#> 3 3 b
#> 4 4 b
jsonlite::read_json(file.path(tmpfile_prefix, "test_metadata.json"))
#> $method
#> $method[[1]]
#> [1] "pearson"
#>
#>
sim_df %>% matric::sim_write(paste0(tmpfile_prefix, ".parquet"))
#> NULL
sim_df_in <- arrow::read_parquet(paste0(tmpfile_prefix, ".parquet"))
attr(sim_df_in, "row_metadata")
#> # A tibble: 4 × 2
#> id Metadata_group
#> <int> <chr>
#> 1 1 b
#> 2 2 a
#> 3 3 b
#> 4 4 b
attr(sim_df_in, "metric_metadata")
#> $method
#> [1] "pearson"
#>