sim_write writes similarity matrix.

sim_write(sim_df, output, file_format = "parquet")

Arguments

sim_df

metric_sim object.

output

character string specifying the output directory or filename.

file_format

character string specify file format. This must be one of csv or parquet(default).

Value

No return value, called for side effects

Details

The output format can be either CSV or Parquet.

With the CSV format, the row_metadata and metric_metadata attributes are saved as separate files.

This is not required for Parquet because it saves the attributes as well.

Examples

suppressMessages(suppressWarnings(library(magrittr)))
population <- tibble::tibble(
  Metadata_group = sample(c("a", "b"), 4, replace = TRUE),
  x = rnorm(4),
  y = x + rnorm(4) / 100,
  z = y + rnorm(4) / 1000
)
tmpdir <- tempdir()
tmpfile_prefix <- file.path(tmpdir, "test")
sim_df <- matric::sim_calculate(population, method = "pearson")
sim_df %>% matric::sim_write(tmpfile_prefix, file_format = "csv")
#> NULL
readr::read_csv(file.path(tmpfile_prefix, "test.csv"))
#> Rows: 12 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (3): id1, id2, sim
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 12 × 3
#>      id1   id2    sim
#>    <dbl> <dbl>  <dbl>
#>  1     2     1 -0.999
#>  2     3     1 -1.00 
#>  3     4     1 -0.683
#>  4     1     2 -0.999
#>  5     3     2  0.998
#>  6     4     2  0.648
#>  7     1     3 -1.00 
#>  8     2     3  0.998
#>  9     4     3  0.695
#> 10     1     4 -0.683
#> 11     2     4  0.648
#> 12     3     4  0.695
readr::read_csv(file.path(tmpfile_prefix, "test_metadata.csv"))
#> Rows: 4 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (1): Metadata_group
#> dbl (1): id
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 4 × 2
#>      id Metadata_group
#>   <dbl> <chr>         
#> 1     1 b             
#> 2     2 a             
#> 3     3 b             
#> 4     4 b             
jsonlite::read_json(file.path(tmpfile_prefix, "test_metadata.json"))
#> $method
#> $method[[1]]
#> [1] "pearson"
#> 
#> 
sim_df %>% matric::sim_write(paste0(tmpfile_prefix, ".parquet"))
#> NULL
sim_df_in <- arrow::read_parquet(paste0(tmpfile_prefix, ".parquet"))
attr(sim_df_in, "row_metadata")
#> # A tibble: 4 × 2
#>      id Metadata_group
#>   <int> <chr>         
#> 1     1 b             
#> 2     2 a             
#> 3     3 b             
#> 4     4 b             
attr(sim_df_in, "metric_metadata")
#> $method
#> [1] "pearson"
#>