CytoDataFrame at a Glance#
This notebook demonstrates various capabilities of CytoDataFrame using examples.
CytoDataFrame is intended to provide you a Pandas-like DataFrame experience which is enhanced with single-cell visual information which can be viewed directly in a Jupyter notebook.
import pathlib
import pandas as pd
from cytodataframe.frame import CytoDataFrame
# create paths for use with CytoDataFrames below
jump_data_path = "../../../tests/data/cytotable/JUMP_plate_BR00117006"
nf1_cellpainting_path = "../../../tests/data/cytotable/NF1_cellpainting_data_shrunken/"
nuclear_speckles_path = "../../../tests/data/cytotable/nuclear_speckles"
pediatric_cancer_atlas_path = (
"../../../tests/data/cytotable/pediatric_cancer_atlas_profiling"
)
%%time
# view JUMP plate BR00117006 with images
frame = CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Nuclei_Texture_Variance_RNA_5_03_256",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
frame
CPU times: user 919 ms, sys: 636 ms, total: 1.56 s
Wall time: 554 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Nuclei_Texture_Variance_RNA_5_03_256 | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 106.035972 | |||
| 1 | 1 | 2 | 33.590487 | |||
| 2 | 1 | 3 | 53.527363 |
%%time
# view JUMP plate BR00117006 with images and overlaid outlines for segmentation
frame = CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
frame
CPU times: user 891 ms, sys: 588 ms, total: 1.48 s
Wall time: 516 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images and overlaid outlines for segmentation
# and changing the color to something besides the default (default is green).
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={"outline_color": (200, 100, 255)},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
CPU times: user 868 ms, sys: 633 ms, total: 1.5 s
Wall time: 466 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images and overlaid outlines for segmentation
# and adding scale bars which show how micrometers scale to the pixels displayed.
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={
"um_per_pixel": 0.1550,
"scale_bar": {
"length_um": 5,
"location": "lower right",
"color": (255, 255, 255),
"thickness_px": 2,
"margin_px": 5,
},
},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
CPU times: user 930 ms, sys: 646 ms, total: 1.58 s
Wall time: 536 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images and adjust the brightness
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
display_options={"brightness": 10},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
CPU times: user 913 ms, sys: 661 ms, total: 1.57 s
Wall time: 502 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images and overlaid outlines for segmentation
# and removing the optional red center dot.
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={"center_dot": False},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
CPU times: user 769 ms, sys: 481 ms, total: 1.25 s
Wall time: 468 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images and change the display width
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={"width": "100"},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:3]
CPU times: user 951 ms, sys: 652 ms, total: 1.6 s
Wall time: 543 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 |
%%time
# view JUMP plate BR00117006 with images, change the display height and width
# and also transpose for a different view of things.
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={"width": "200px", "height": "auto"},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:5].T
CPU times: user 935 ms, sys: 661 ms, total: 1.6 s
Wall time: 531 ms
Static snapshot (for non-interactive view)
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| Metadata_ImageNumber | 1 | 1 | 1 | 1 | 1 |
| Cells_Number_Object_Number | 1 | 2 | 3 | 4 | 5 |
| Image_FileName_OrigAGP | |||||
| Image_FileName_OrigDNA | |||||
| Image_FileName_OrigRNA |
%%time
# export to OME Parquet, a format which uses OME Arrow
# to store OME-spec images as values within the table.
frame.to_ome_parquet(file_path="example.ome.parquet")
# read OME Parquet file into the CytoDataFrame
CytoDataFrame(data="example.ome.parquet")
CPU times: user 508 ms, sys: 94.3 ms, total: 603 ms
Wall time: 629 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | Image_FileName_OrigAGP_OMEArrow_ORIG | Image_FileName_OrigAGP_OMEArrow_LABL | Image_FileName_OrigAGP_OMEArrow_COMP | Image_FileName_OrigDNA_OMEArrow_ORIG | Image_FileName_OrigDNA_OMEArrow_LABL | Image_FileName_OrigDNA_OMEArrow_COMP | Image_FileName_OrigRNA_OMEArrow_ORIG | Image_FileName_OrigRNA_OMEArrow_LABL | Image_FileName_OrigRNA_OMEArrow_COMP | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | r01c01f01p01-ch2sk1fk1fl1.tiff | r01c01f01p01-ch5sk1fk1fl1.tiff | r01c01f01p01-ch3sk1fk1fl1.tiff | None | ||||||||
| 1 | 1 | 2 | r01c01f01p01-ch2sk1fk1fl1.tiff | r01c01f01p01-ch5sk1fk1fl1.tiff | r01c01f01p01-ch3sk1fk1fl1.tiff | None | ||||||||
| 2 | 1 | 3 | r01c01f01p01-ch2sk1fk1fl1.tiff | r01c01f01p01-ch5sk1fk1fl1.tiff | r01c01f01p01-ch3sk1fk1fl1.tiff | None |
%%time
# view JUMP plate BR00117006 with images, changing the bounding box
# using offsets so each image has roughly the same size.
CytoDataFrame(
data=f"{jump_data_path}/BR00117006_shrunken.parquet",
data_context_dir=f"{jump_data_path}/images/orig",
data_outline_context_dir=f"{jump_data_path}/images/outlines",
display_options={
"offset_bounding_box": {
"x_min": -20,
"y_min": -20,
"x_max": 20,
"y_max": 20,
},
},
)[
[
"Metadata_ImageNumber",
"Cells_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
"Image_FileName_OrigRNA",
]
][:5]
CPU times: user 754 ms, sys: 305 ms, total: 1.06 s
Wall time: 529 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Cells_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigRNA | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | |||
| 1 | 1 | 2 | |||
| 2 | 1 | 3 | |||
| 3 | 1 | 4 | |||
| 4 | 1 | 5 |
%%time
# view NF1 Cell Painting data with images
CytoDataFrame(
data=f"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet",
data_context_dir=f"{nf1_cellpainting_path}/Plate_2_images",
)[
[
"Metadata_ImageNumber",
"Metadata_Cells_Number_Object_Number",
"Image_FileName_GFP",
"Image_FileName_RFP",
"Image_FileName_DAPI",
]
][:3]
CPU times: user 296 ms, sys: 130 ms, total: 425 ms
Wall time: 214 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Cells_Number_Object_Number | Image_FileName_GFP | Image_FileName_RFP | Image_FileName_DAPI | |
|---|---|---|---|---|---|
| 353 | 31 | 4 | |||
| 1564 | 113 | 17 | |||
| 1275 | 94 | 5 |
%%time
# view NF1 Cell Painting data with images and overlaid outlines from masks
frame = CytoDataFrame(
data=f"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet",
data_context_dir=f"{nf1_cellpainting_path}/Plate_2_images",
data_mask_context_dir=f"{nf1_cellpainting_path}/Plate_2_masks",
)[
[
"Metadata_ImageNumber",
"Metadata_Cells_Number_Object_Number",
"Image_FileName_GFP",
"Image_FileName_RFP",
"Image_FileName_DAPI",
]
][:3]
frame
CPU times: user 265 ms, sys: 180 ms, total: 445 ms
Wall time: 161 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Cells_Number_Object_Number | Image_FileName_GFP | Image_FileName_RFP | Image_FileName_DAPI | |
|---|---|---|---|---|---|
| 353 | 31 | 4 | |||
| 1564 | 113 | 17 | |||
| 1275 | 94 | 5 |
%%time
# add active paths on the local system to show how CytoDataFrame
# may be used without specifying a context directory for images.
# Note: normally these paths are local to the system where the
# profile data was generated, which often is not the same as the
# system which will be used to analyze the data.
parquet_path = f"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet"
nf1_dataset_with_modified_image_paths = pd.read_parquet(path=parquet_path)
nf1_dataset_with_modified_image_paths.loc[
:, ["Image_PathName_DAPI", "Image_PathName_GFP", "Image_PathName_RFP"]
] = f"{pathlib.Path(parquet_path).parent}/Plate_2_images"
# view NF1 Cell Painting data with images and overlaid outlines from masks
CytoDataFrame(
# note: we can read directly from an existing Pandas DataFrame
data=nf1_dataset_with_modified_image_paths,
data_mask_context_dir=f"{nf1_cellpainting_path}/Plate_2_masks",
)[
[
"Metadata_ImageNumber",
"Metadata_Cells_Number_Object_Number",
"Image_FileName_GFP",
"Image_FileName_RFP",
"Image_FileName_DAPI",
]
][:3]
CPU times: user 245 ms, sys: 152 ms, total: 396 ms
Wall time: 149 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Cells_Number_Object_Number | Image_FileName_GFP | Image_FileName_RFP | Image_FileName_DAPI | |
|---|---|---|---|---|---|
| 353 | 31 | 4 | |||
| 1564 | 113 | 17 | |||
| 1275 | 94 | 5 |
%%time
# export to OME Parquet, a format which uses OME Arrow
# to store OME-spec images as values within the table.
frame.to_ome_parquet(file_path="example.ome.parquet")
# read OME Parquet file into the CytoDataFrame
CytoDataFrame(data="example.ome.parquet")
CPU times: user 848 ms, sys: 119 ms, total: 967 ms
Wall time: 933 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Cells_Number_Object_Number | Image_FileName_GFP | Image_FileName_RFP | Image_FileName_DAPI | Image_FileName_GFP_OMEArrow_ORIG | Image_FileName_GFP_OMEArrow_LABL | Image_FileName_GFP_OMEArrow_COMP | Image_FileName_RFP_OMEArrow_ORIG | Image_FileName_RFP_OMEArrow_LABL | Image_FileName_RFP_OMEArrow_COMP | Image_FileName_DAPI_OMEArrow_ORIG | Image_FileName_DAPI_OMEArrow_LABL | Image_FileName_DAPI_OMEArrow_COMP | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 353 | 31 | 4 | B7_01_2_3_GFP_001.tif | B7_01_3_3_RFP_001.tif | B7_01_1_3_DAPI_001.tif | None | ||||||||
| 1564 | 113 | 17 | H12_01_2_1_GFP_001.tif | H12_01_3_1_RFP_001.tif | H12_01_1_1_DAPI_001.tif | None | ||||||||
| 1275 | 94 | 5 | F7_01_2_2_GFP_001.tif | F7_01_3_2_RFP_001.tif | F7_01_1_2_DAPI_001.tif | None |
%%time
# view nuclear speckles data with images and overlaid outlines from masks
CytoDataFrame(
data=f"{nuclear_speckles_path}/test_slide1_converted.parquet",
data_context_dir=f"{nuclear_speckles_path}/images/plate1",
data_mask_context_dir=f"{nuclear_speckles_path}/masks/plate1",
)[
[
"Metadata_ImageNumber",
"Nuclei_Number_Object_Number",
"Image_FileName_A647",
"Image_FileName_DAPI",
"Image_FileName_GOLD",
]
][:3]
CPU times: user 86.6 ms, sys: 21.9 ms, total: 108 ms
Wall time: 90 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Nuclei_Number_Object_Number | Image_FileName_A647 | Image_FileName_DAPI | Image_FileName_GOLD | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 1 | 1 | 2 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 2 | 1 | 3 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff |
%%time
# view nuclear speckles data with images and overlaid outlines from masks
# and also apply a filter to only show rows where the value for
# "Nuclei_Texture_Variance_DAPI_3_03_256".
CytoDataFrame(
data=f"{nuclear_speckles_path}/test_slide1_converted.parquet",
data_context_dir=f"{nuclear_speckles_path}/images/plate1",
data_mask_context_dir=f"{nuclear_speckles_path}/masks/plate1",
display_options={
"filter_columns": ["Nuclei_Texture_Variance_DAPI_3_03_256"],
},
)[
[
"Metadata_ImageNumber",
"Nuclei_Number_Object_Number",
"Nuclei_Texture_Variance_DAPI_3_03_256",
"Image_FileName_A647",
"Image_FileName_DAPI",
"Image_FileName_GOLD",
]
]
CPU times: user 116 ms, sys: 61.3 ms, total: 177 ms
Wall time: 74.9 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Nuclei_Number_Object_Number | Nuclei_Texture_Variance_DAPI_3_03_256 | Image_FileName_A647 | Image_FileName_DAPI | Image_FileName_GOLD | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2.484139 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 1 | 1 | 2 | 12.026326 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 2 | 1 | 3 | 51.418746 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 3 | 1 | 4 | 47.049561 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 4 | 1 | 5 | 117.135912 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 5 | 1 | 6 | 25.371580 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 6 | 1 | 7 | 23.930735 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 7 | 1 | 8 | 2.973642 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 8 | 1 | 9 | 8.355843 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 9 | 1 | 10 | 150.652194 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 10 | 1 | 11 | 7.919292 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 11 | 1 | 12 | 0.432249 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 12 | 1 | 13 | 18.161879 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 13 | 1 | 14 | 32.575908 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 14 | 1 | 15 | 29.200237 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 15 | 1 | 16 | 9.793458 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 16 | 1 | 17 | 8.513971 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 17 | 1 | 18 | 31.487882 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 18 | 1 | 19 | 4.329104 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 19 | 1 | 20 | 32.853237 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 20 | 1 | 21 | 7.200573 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 21 | 1 | 22 | 3.978256 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 22 | 1 | 23 | 32.280016 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 23 | 1 | 24 | 26.525734 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff | |
| 24 | 1 | 25 | 51.948095 | slide1_A1_M10_CH1_Z09_illumcorrect.tiff | slide1_A1_M10_CH2_Z09_illumcorrect.tiff |
%%time
# view ALSF pediatric cancer atlas plate BR00143976 with images
cdf = CytoDataFrame(
data=f"{pediatric_cancer_atlas_path}/BR00143976_shrunken.parquet",
data_context_dir=f"{pediatric_cancer_atlas_path}/images/orig",
data_outline_context_dir=f"{pediatric_cancer_atlas_path}/images/outlines",
segmentation_file_regex={
r"CellsOutlines_BR(\d+)_C(\d{2})_\d+\.tiff": r".*ch3.*\.tiff",
r"NucleiOutlines_BR(\d+)_C(\d{2})_\d+\.tiff": r".*ch5.*\.tiff",
},
)[
[
"Metadata_ImageNumber",
"Metadata_Nuclei_Number_Object_Number",
"Image_FileName_OrigAGP",
"Image_FileName_OrigDNA",
]
]
cdf
CPU times: user 395 ms, sys: 224 ms, total: 619 ms
Wall time: 249 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Nuclei_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | |
|---|---|---|---|---|
| 0 | 3 | 3 | ||
| 1 | 3 | 4 | ||
| 2 | 3 | 6 | ||
| 3 | 3 | 7 | ||
| 4 | 3 | 8 |
%%time
# show that we can use the cytodataframe again
# by quick variable reference.
cdf
CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 3.1 μs
%%time
# export to OME Parquet, a format which uses OME Arrow
# to store OME-spec images as values within the table.
cdf.to_ome_parquet(file_path="example.ome.parquet")
# read OME Parquet file into the CytoDataFrame
CytoDataFrame(data="example.ome.parquet")
CPU times: user 820 ms, sys: 192 ms, total: 1.01 s
Wall time: 890 ms
Static snapshot (for non-interactive view)
| Metadata_ImageNumber | Metadata_Nuclei_Number_Object_Number | Image_FileName_OrigAGP | Image_FileName_OrigDNA | Image_FileName_OrigAGP_OMEArrow_ORIG | Image_FileName_OrigAGP_OMEArrow_LABL | Image_FileName_OrigAGP_OMEArrow_COMP | Image_FileName_OrigDNA_OMEArrow_ORIG | Image_FileName_OrigDNA_OMEArrow_LABL | Image_FileName_OrigDNA_OMEArrow_COMP | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3 | 3 | r03c03f03p01-ch3sk1fk1fl1.tiff | r03c03f03p01-ch5sk1fk1fl1.tiff | ||||||
| 1 | 3 | 4 | r03c03f03p01-ch3sk1fk1fl1.tiff | r03c03f03p01-ch5sk1fk1fl1.tiff | ||||||
| 2 | 3 | 6 | r03c03f03p01-ch3sk1fk1fl1.tiff | r03c03f03p01-ch5sk1fk1fl1.tiff | ||||||
| 3 | 3 | 7 | r03c03f03p01-ch3sk1fk1fl1.tiff | r03c03f03p01-ch5sk1fk1fl1.tiff | ||||||
| 4 | 3 | 8 | r03c03f03p01-ch3sk1fk1fl1.tiff | r03c03f03p01-ch5sk1fk1fl1.tiff |
%%time
# 3D example dataset, showing how
# CytoDataFrame can be used with 3D data for visualization.
cp_3d_path = "../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation"
# send the data to CytoDataFrame
# note: because we have 3d input images, CytoDataFrame will automatically process
# using the 3D display options for interactive visualization.
cdf = CytoDataFrame(
data=pathlib.Path(cp_3d_path) / "output/MyExpt_RealsizeNuclei.csv",
data_context_dir=str(pathlib.Path(cp_3d_path) / "input"),
)
cdf[["ImageNumber", "ObjectNumber", "FileName_Nuclei"]][:3]
CPU times: user 5.68 ms, sys: 1.28 ms, total: 6.96 ms
Wall time: 8.21 ms
Static snapshot (for non-interactive view)
| ImageNumber | ObjectNumber | FileName_Nuclei | |
|---|---|---|---|
| 0 | 1 | 1 | |
| 1 | 1 | 2 | |
| 2 | 1 | 3 |
%%time
# read 3d images with segmentation masks and show the
# segmentation masks are also 3D.
cdf = CytoDataFrame(
data=pathlib.Path(cp_3d_path) / "output/MyExpt_RealsizeNuclei.csv",
data_context_dir=str(pathlib.Path(cp_3d_path) / "input"),
data_mask_context_dir=str(pathlib.Path(cp_3d_path) / "output/masks"),
)
cdf[["ImageNumber", "ObjectNumber", "FileName_Nuclei"]][:3]
CPU times: user 5.63 ms, sys: 1.85 ms, total: 7.48 ms
Wall time: 6.12 ms
Static snapshot (for non-interactive view)
| ImageNumber | ObjectNumber | FileName_Nuclei | |
|---|---|---|---|
| 0 | 1 | 1 | |
| 1 | 1 | 2 | |
| 2 | 1 | 3 |