{ "cells": [ { "cell_type": "markdown", "id": "ce6594c1-702f-4724-b190-5370e7396d3e", "metadata": {}, "source": [ "# CytoTable mise en place\n", "\n", "This notebook includes a quick demonstration of CytoTable to help you understand the basics of using this project.\n", "\n", "The name of the notebook comes from the french _mise en place_:\n", "> \"Mise en place (French pronunciation: [mi zɑ̃ ˈplas]) is a French culinary phrase which means \"putting in place\"\n", "> or \"gather\". It refers to the setup required before cooking, and is often used in professional kitchens to\n", "> refer to organizing and arranging the ingredients ...\"\n", "> - [Wikipedia](https://en.wikipedia.org/wiki/Mise_en_place)" ] }, { "cell_type": "code", "execution_count": 1, "id": "c529e38f-8784-4a17-955b-06ea0b2375ce", "metadata": {}, "outputs": [], "source": [ "import pathlib\n", "from collections import Counter\n", "\n", "import pyarrow.parquet as pq\n", "\n", "import cytotable\n", "\n", "# setup variables for use throughout the notebook\n", "source_path = \"../../../tests/data/cellprofiler/examplehuman\"\n", "dest_path = \"./example.parquet\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "c6e14f33-1c7a-437f-9a0f-9cda7e1620b2", "metadata": {}, "outputs": [], "source": [ "# remove the dest_path if it's present\n", "if pathlib.Path(dest_path).is_file():\n", " pathlib.Path(dest_path).unlink()" ] }, { "cell_type": "code", "execution_count": 3, "id": "d45b25f0-c115-4f95-8521-addc45bb8a90", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[PosixPath('../../../tests/data/cellprofiler/examplehuman/Experiment.csv'),\n", " PosixPath('../../../tests/data/cellprofiler/examplehuman/PH3.csv'),\n", " PosixPath('../../../tests/data/cellprofiler/examplehuman/Cytoplasm.csv'),\n", " PosixPath('../../../tests/data/cellprofiler/examplehuman/Image.csv'),\n", " PosixPath('../../../tests/data/cellprofiler/examplehuman/Nuclei.csv'),\n", " PosixPath('../../../tests/data/cellprofiler/examplehuman/Cells.csv')]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# show the files we will use as source data with CytoTable\n", "list(pathlib.Path(source_path).glob(\"*.csv\"))" ] }, { "cell_type": "code", "execution_count": 4, "id": "78f1f734-16f5-4957-9aeb-ad421d3d1bae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 327 ms, sys: 201 ms, total: 528 ms\n", "Wall time: 22.4 s\n" ] }, { "data": { "text/plain": [ "'example.parquet'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "# run cytotable convert\n", "result = cytotable.convert(\n", " source_path=source_path,\n", " dest_path=dest_path,\n", " # specify a destination data format type\n", " dest_datatype=\"parquet\",\n", " # specify a preset which enables quick use of common input file formats\n", " preset=\"cellprofiler_csv\",\n", ")\n", "result.name" ] }, { "cell_type": "code", "execution_count": 5, "id": "1601b045-2631-46d7-a001-39ae6cfb27fb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Metadata_ImageNumber | \n", "Metadata_Cells_Parent_Nuclei | \n", "Metadata_Cytoplasm_Parent_Cells | \n", "Metadata_Cytoplasm_Parent_Nuclei | \n", "Metadata_ObjectNumber | \n", "Image_FileName_DNA | \n", "Image_FileName_OrigOverlay | \n", "Image_FileName_PH3 | \n", "Image_FileName_cellbody | \n", "Cytoplasm_AreaShape_Area | \n", "... | \n", "Nuclei_Location_Center_X | \n", "Nuclei_Location_Center_Y | \n", "Nuclei_Location_Center_Z | \n", "Nuclei_Location_MaxIntensity_X_DNA | \n", "Nuclei_Location_MaxIntensity_X_PH3 | \n", "Nuclei_Location_MaxIntensity_Y_DNA | \n", "Nuclei_Location_MaxIntensity_Y_PH3 | \n", "Nuclei_Location_MaxIntensity_Z_DNA | \n", "Nuclei_Location_MaxIntensity_Z_PH3 | \n", "Nuclei_Number_Object_Number | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "AS_09125_050116030001_D03f00d0.tif | \n", "AS_09125_050116030001_D03f00d0_Overlay.png | \n", "AS_09125_050116030001_D03f00d1.tif | \n", "AS_09125_050116030001_D03f00d2.tif | \n", "288 | \n", "... | \n", "477.099237 | \n", "7.580153 | \n", "0 | \n", "477.0 | \n", "478.0 | \n", "8.0 | \n", "13.0 | \n", "0.0 | \n", "0.0 | \n", "1 | \n", "
1 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "AS_09125_050116030001_D03f00d0.tif | \n", "AS_09125_050116030001_D03f00d0_Overlay.png | \n", "AS_09125_050116030001_D03f00d1.tif | \n", "AS_09125_050116030001_D03f00d2.tif | \n", "256 | \n", "... | \n", "495.750000 | \n", "11.098684 | \n", "0 | \n", "495.0 | \n", "502.0 | \n", "9.0 | \n", "14.0 | \n", "0.0 | \n", "0.0 | \n", "2 | \n", "
2 | \n", "1 | \n", "3 | \n", "3 | \n", "3 | \n", "3 | \n", "AS_09125_050116030001_D03f00d0.tif | \n", "AS_09125_050116030001_D03f00d0_Overlay.png | \n", "AS_09125_050116030001_D03f00d1.tif | \n", "AS_09125_050116030001_D03f00d2.tif | \n", "52 | \n", "... | \n", "438.959184 | \n", "11.374150 | \n", "0 | \n", "440.0 | \n", "439.0 | \n", "11.0 | \n", "16.0 | \n", "0.0 | \n", "0.0 | \n", "3 | \n", "
3 | \n", "1 | \n", "4 | \n", "4 | \n", "4 | \n", "4 | \n", "AS_09125_050116030001_D03f00d0.tif | \n", "AS_09125_050116030001_D03f00d0_Overlay.png | \n", "AS_09125_050116030001_D03f00d1.tif | \n", "AS_09125_050116030001_D03f00d2.tif | \n", "466 | \n", "... | \n", "80.459184 | \n", "11.163265 | \n", "0 | \n", "80.0 | \n", "81.0 | \n", "13.0 | \n", "10.0 | \n", "0.0 | \n", "0.0 | \n", "4 | \n", "
4 | \n", "1 | \n", "5 | \n", "5 | \n", "5 | \n", "5 | \n", "AS_09125_050116030001_D03f00d0.tif | \n", "AS_09125_050116030001_D03f00d0_Overlay.png | \n", "AS_09125_050116030001_D03f00d1.tif | \n", "AS_09125_050116030001_D03f00d2.tif | \n", "296 | \n", "... | \n", "58.423077 | \n", "15.509615 | \n", "0 | \n", "62.0 | \n", "52.0 | \n", "14.0 | \n", "15.0 | \n", "0.0 | \n", "0.0 | \n", "5 | \n", "
5 rows × 312 columns
\n", "