mAP for phenotypic activity assesement¶
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copairs import map
from copairs.matching import assign_reference_index
from copairs.map.average_precision import p_values
Introduction¶
This example demostrates how to use copairs to assess phenotypic activity of perturbations in a profiling dataset.
Phenotypic activity is assessed by calculating mean average precision (mAP) for the retrieval of replicates of a perturbation against replicates of negative controls.
It aims to answer the question: āHow distinguishable is this perturbation from negative controls?ā
The resulting perturbation mAP score reflects the average extent to which its replicate profiles are more similar to each other compared to control profiles (Figure 1E).
Citation:
Kalinin, A.A., Arevalo, J., Serrano, E., Vulliard, L., Tsang, H., Bornholdt, M., MuƱoz, A.F., Sivagurunathan, S., Rajwa, B., Carpenter, A.E., Way, G.P. and Singh, S., 2025. A versatile information retrieval framework for evaluating profile strength and similarity. Nature Communications 16, 5181. doi:10.1038/s41467-025-60306-2
# these imports are only needed for showing Figure 1 from the paper
from io import BytesIO
from pathlib import Path
import requests
from PIL import Image
from IPython.display import display
fig1_path = "data/Figure_1.jpg"
fig1_url = "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/b9a3/12137819/d02214b315d1/41467_2025_60306_Fig1_HTML.jpg"
if not Path(fig1_path).is_file():
image = Image.open(BytesIO(requests.get(fig1_url).content))
image.save(fig1_path)
image = Image.open(fig1_path).resize((514, 640))
display(image)
Download data¶
Download a single plate of profiles from the dataset "cpg0004" (aka LINCS), which contains Cell Painting images of 1,327 small-molecule perturbations of A549 human cells. The wells on each plate were perturbed with 56 different compounds in six different doses.
Way, G. P. et al. Morphology and gene expression profiling provide complementary information for mapping cell state. Cell Syst 13, 911ā923.e9 (2022).
local_path = "data/2016_04_01_a549_48hr_batch1_plateSQ00014812.csv"
commit = "da8ae6a3bc103346095d61b4ee02f08fc85a5d98"
plate = "SQ00014812"
url = f"https://media.githubusercontent.com/media/broadinstitute/lincs-cell-painting/{commit}/profiles/2016_04_01_a549_48hr_batch1/{plate}/{plate}_normalized_feature_select.csv.gz"
if not Path(local_path).is_file():
df = pd.read_csv(url)
df.to_csv(local_path, index=False)
else:
df = pd.read_csv(local_path)
df = df.loc[:, df.nunique() > 1] # remove constant columns
df
| Metadata_broad_sample | Metadata_mg_per_ml | Metadata_mmoles_per_liter | Metadata_pert_id | Metadata_pert_mfc_id | Metadata_pert_well | Metadata_broad_sample_type | Metadata_pert_type | Metadata_broad_id | Metadata_InChIKey14 | ... | Nuclei_Texture_InverseDifferenceMoment_AGP_5_0 | Nuclei_Texture_InverseDifferenceMoment_DNA_20_0 | Nuclei_Texture_InverseDifferenceMoment_ER_5_0 | Nuclei_Texture_InverseDifferenceMoment_Mito_10_0 | Nuclei_Texture_InverseDifferenceMoment_Mito_5_0 | Nuclei_Texture_SumAverage_RNA_5_0 | Nuclei_Texture_SumEntropy_DNA_10_0 | Nuclei_Texture_SumEntropy_DNA_20_0 | Nuclei_Texture_SumEntropy_DNA_5_0 | Nuclei_Texture_Variance_RNA_10_0 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | DMSO | 0.000000 | 0.000000 | NaN | NaN | A01 | control | control | NaN | NaN | ... | -1.3544 | -1.07770 | 2.26020 | -0.377010 | -0.065840 | 2.12360 | 2.8740 | 2.87500 | 2.3047 | -0.92358 |
| 1 | DMSO | 0.000000 | 0.000000 | NaN | NaN | A02 | control | control | NaN | NaN | ... | -2.3840 | -0.73440 | 1.12090 | -0.182500 | -0.061450 | 0.66985 | 2.3919 | 2.35230 | 1.8672 | -0.11820 |
| 2 | DMSO | 0.000000 | 0.000000 | NaN | NaN | A03 | control | control | NaN | NaN | ... | -1.9493 | -0.36148 | 0.44050 | 0.326660 | 0.547200 | 0.25015 | 1.2271 | 0.77847 | 1.0651 | -0.44810 |
| 3 | DMSO | 0.000000 | 0.000000 | NaN | NaN | A04 | control | control | NaN | NaN | ... | -2.2909 | -0.46380 | 0.96434 | 1.132200 | 0.753500 | 0.31403 | 1.4384 | 1.48110 | 1.2943 | -0.83810 |
| 4 | DMSO | 0.000000 | 0.000000 | NaN | NaN | A05 | control | control | NaN | NaN | ... | -1.8955 | -1.05350 | 1.64840 | 0.057781 | 0.070229 | 1.60990 | 1.1296 | 0.90213 | 1.1016 | 0.53225 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 379 | BRD-K82746043-001-15-1 | 3.248700 | 3.333300 | BRD-K82746043 | BRD-K82746043-001-15-1 | P20 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | ... | -6.1522 | 1.81410 | 1.54220 | -1.874700 | -1.133900 | 1.57540 | -3.0962 | -3.25160 | -2.7683 | 1.40170 |
| 380 | BRD-K82746043-001-15-1 | 1.082900 | 1.111100 | BRD-K82746043 | BRD-K82746043-001-15-1 | P21 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | ... | -5.1586 | 1.50580 | 1.68420 | -1.126400 | -1.066600 | 1.24740 | -1.5305 | -1.79020 | -1.2474 | 1.17600 |
| 381 | BRD-K82746043-001-15-1 | 0.360970 | 0.370370 | BRD-K82746043 | BRD-K82746043-001-15-1 | P22 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | ... | -5.9475 | 1.42100 | 1.51020 | -1.103600 | -1.666500 | 1.19840 | -2.6086 | -2.97620 | -2.0026 | 0.91557 |
| 382 | BRD-K82746043-001-15-1 | 0.120320 | 0.123460 | BRD-K82746043 | BRD-K82746043-001-15-1 | P23 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | ... | -8.4408 | 2.99620 | 2.55230 | -2.275200 | -1.783500 | 2.49200 | -4.3964 | -4.19030 | -3.8360 | 1.02240 |
| 383 | BRD-K82746043-001-15-1 | 0.040108 | 0.041152 | BRD-K82746043 | BRD-K82746043-001-15-1 | P24 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | ... | -7.9510 | 2.55730 | 3.05790 | -1.466300 | -1.673800 | 1.99540 | -4.2176 | -4.49940 | -3.4922 | 1.01170 |
384 rows Ć 507 columns
Note that in this dataset, pertubations can target multiple genes. We can list these targets from the Metadata_target column.
df["Metadata_target"].unique()
array([nan, 'CHRM1|CHRM2|CHRM3|CHRM4|CHRM5', 'HMGCR',
'HDAC1|HDAC2|HDAC3|HDAC9', 'ERBB2', 'DNMT1|DNMT3A',
'GABRA1|GABRA2|GABRA3|GABRA4|GABRA5|GABRA6', 'TUBB', 'KIF11',
'PSMA1|PSMA2|PSMA3|PSMA4|PSMA5|PSMA6|PSMA7|PSMA8|PSMB1|PSMB10|PSMB11|PSMB2|PSMB3|PSMB4|PSMB5|PSMB6|PSMB7|PSMB8|PSMB9|PSMD1|PSMD2|RELA',
'SQLE', 'GABRA1', 'KCNT2|TRPV4', 'AURKA|AURKB',
'DRD2|GRIN2A|GRIN2B|GRIN2C|GRIN2D|GRIN3A', 'CFTR',
'CACNA1C|CACNA1S|CACNA2D1|CACNG1|HTR3A|KCNA5',
'ADRA1A|ADRA1B|ADRA2A|ADRA2B|ADRA2C|CHRM1|CHRM2|CHRM3|CHRM4|CHRM5|DRD1|DRD2|DRD3|DRD4|DRD5|HRH1|HTR1A|HTR1B|HTR1D|HTR1E|HTR2A|HTR2C|HTR3A|HTR6|HTR7',
'EGFR|NR1I2', 'ADRA1A|ADRA2A|HRH1|HTR1A|HTR2A|HTR2B|HTR2C|SLC6A4',
'EGFR|ERBB2', 'HIF1A', 'ESR1|ESR2|MAP1A|MAP2', 'SCN4A|SCN9A',
'BIRC2|XIAP', 'AKT1|AKT2|AKT3|PRKG1', 'ACE',
'HTR1A|HTR1B|HTR1D|HTR1E|HTR1F|HTR2A|HTR2B|HTR2C|HTR5A|HTR6|HTR7',
'CYSLTR1|CYSLTR2', 'GAST', 'HTR1A', 'PSMB1', 'MET', 'NAE1|UBA3',
'VDR', 'HRH1', 'HTR1A|HTR2A', 'AURKA|FLT3|KDR|PDGFRA|PTK2|SRC',
'BIRC2|BIRC3|BIRC7|XIAP', 'ABCB1|ABCB4', 'KCNH2',
'ABCB11|CAMLG|FPR1|PPIA|PPIF|PPP3CA|PPP3R2|SLC10A1|SLCO1B1|SLCO1B3',
'FGFR3|KIT|PDGFRA|PDGFRB', 'FLT3|PIM1|PIM2|PIM3', 'PSEN1',
'HSPA1A', 'ATP1A1', 'RELA', 'AVPR1A|AVPR2', 'DPP4',
'BCL2|BCL2L1|BCL2L2'], dtype=object)
Assessing phenotypic activity of compounds with mAP¶
Here, we treat different doses of each compound as replicates and assess how well we can retrieve them by similarity against the group of negative controls (DMSO).
For phenotypic activity, it's helpful to add an extra column that is equal to row index for all DMSO replicates and to -1 for all compound replicates using assign_reference_index function. This helps to not count groups of negative controls as query groups and not consider other perturbations as a reference.
reference_col = "Metadata_reference_index"
df_activity = assign_reference_index(
df,
"Metadata_broad_sample == 'DMSO'", # condition to get reference profiles (neg controls)
reference_col=reference_col,
default_value=-1,
)
Next, we define the rules by which profiles are grouped based on metadata:
Two profiles are a positive pair if they belong to the same group that is not a control group. In this case, any two replicate profiles of the same compound are a positive pair. To define that using metadata columns, positive pairs should share the same value in the metadata column that identifies compounds (
Metadata_broad_sample). We add this column to a list namespos_sameby.In this case, profiles that form a positive pair do not need to be different in any of the metatada columns, so we keep
pos_diffbyempty. Although one could define them as being from different batches, for instance, to account for batch effects.Two profiles are a negative pair when one of them belongs to a group of compound replicates and another to a group of DMSO controls. That means they should be different both in the metadata column that identifies the specific compound and the reference index columns that we created. The latter is needed to ensure that replicates of compounds are retrieved against only DMSO controls at this stage (and not against replicates of other compounds). We list these columns in
neg_diffby.Profiles that form a negative pair do not need to be same in any of the metatada columns, so we keep
neg_samebyempty.
Finally, we include Metadata_reference_index column to:
pos_samebyāthis ensures positive pairs connect profiles that share the same value in this column, i.e. a positive pair cannot be formed between any two negative controls (control profiles contain index values).neg_diffbyāthis ensures negative pairs connect profiles that differ in this columns, i.e. a negative pair cannot be formed between profiles of two different perturbations (all perturbation profiles contain -1).
# positive pairs are replicates of the same treatment
pos_sameby = ["Metadata_broad_sample", reference_col]
pos_diffby = []
neg_sameby = []
# negative pairs are replicates of different treatments
neg_diffby = ["Metadata_broad_sample", reference_col]
Now we can use average_precision function to calculate the average precision score for each replicate of each compound.
It returns metadata with 4 new columns: number of positive and negative pairs for each replicate profile, the average precision score, and the normalized average precision score.
metadata = df_activity.filter(regex="^Metadata")
profiles = df_activity.filter(regex="^(?!Metadata)").values
activity_ap = map.average_precision(
metadata, profiles, pos_sameby, pos_diffby, neg_sameby, neg_diffby
)
activity_ap = activity_ap.query("Metadata_broad_sample != 'DMSO'") # remove DMSO
activity_ap.to_csv("data/activity_ap.csv", index=False)
activity_ap
/Users/alex.kalinin/Projects/copairs/src/copairs/compute.py:50: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console) from tqdm.autonotebook import tqdm
0%| | 0/1 [00:00<?, ?it/s]
0%| | 0/1 [00:00<?, ?it/s]
| Metadata_broad_sample | Metadata_mg_per_ml | Metadata_mmoles_per_liter | Metadata_pert_id | Metadata_pert_mfc_id | Metadata_pert_well | Metadata_broad_sample_type | Metadata_pert_type | Metadata_broad_id | Metadata_InChIKey14 | Metadata_moa | Metadata_target | Metadata_broad_date | Metadata_Well | Metadata_reference_index | n_pos_pairs | n_total_pairs | average_precision | normalized_average_precision | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | BRD-K74363950-004-01-0 | 5.655600 | 10.000000 | BRD-K74363950 | BRD-K74363950-004-01-0 | A07 | trt | trt | BRD-K74363950 | ASMXXROZKSBQIH | acetylcholine receptor antagonist | CHRM1|CHRM2|CHRM3|CHRM4|CHRM5 | broad_id_20170327 | A07 | -1 | 5 | 29 | 0.325013 | 0.087916 |
| 7 | BRD-K74363950-004-01-0 | 1.885200 | 3.333300 | BRD-K74363950 | BRD-K74363950-004-01-0 | A08 | trt | trt | BRD-K74363950 | ASMXXROZKSBQIH | acetylcholine receptor antagonist | CHRM1|CHRM2|CHRM3|CHRM4|CHRM5 | broad_id_20170327 | A08 | -1 | 5 | 29 | 0.513889 | 0.343137 |
| 8 | BRD-K74363950-004-01-0 | 0.628400 | 1.111100 | BRD-K74363950 | BRD-K74363950-004-01-0 | A09 | trt | trt | BRD-K74363950 | ASMXXROZKSBQIH | acetylcholine receptor antagonist | CHRM1|CHRM2|CHRM3|CHRM4|CHRM5 | broad_id_20170327 | A09 | -1 | 5 | 29 | 0.727778 | 0.632157 |
| 9 | BRD-K74363950-004-01-0 | 0.209470 | 0.370370 | BRD-K74363950 | BRD-K74363950-004-01-0 | A10 | trt | trt | BRD-K74363950 | ASMXXROZKSBQIH | acetylcholine receptor antagonist | CHRM1|CHRM2|CHRM3|CHRM4|CHRM5 | broad_id_20170327 | A10 | -1 | 5 | 29 | 0.783333 | 0.707227 |
| 10 | BRD-K74363950-004-01-0 | 0.069823 | 0.123460 | BRD-K74363950 | BRD-K74363950-004-01-0 | A11 | trt | trt | BRD-K74363950 | ASMXXROZKSBQIH | acetylcholine receptor antagonist | CHRM1|CHRM2|CHRM3|CHRM4|CHRM5 | broad_id_20170327 | A11 | -1 | 5 | 29 | 0.900000 | 0.864874 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 379 | BRD-K82746043-001-15-1 | 3.248700 | 3.333300 | BRD-K82746043 | BRD-K82746043-001-15-1 | P20 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | BCL inhibitor | BCL2|BCL2L1|BCL2L2 | broad_id_20170327 | P20 | -1 | 5 | 29 | 1.000000 | 1.000000 |
| 380 | BRD-K82746043-001-15-1 | 1.082900 | 1.111100 | BRD-K82746043 | BRD-K82746043-001-15-1 | P21 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | BCL inhibitor | BCL2|BCL2L1|BCL2L2 | broad_id_20170327 | P21 | -1 | 5 | 29 | 0.966667 | 0.954958 |
| 381 | BRD-K82746043-001-15-1 | 0.360970 | 0.370370 | BRD-K82746043 | BRD-K82746043-001-15-1 | P22 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | BCL inhibitor | BCL2|BCL2L1|BCL2L2 | broad_id_20170327 | P22 | -1 | 5 | 29 | 0.942857 | 0.922785 |
| 382 | BRD-K82746043-001-15-1 | 0.120320 | 0.123460 | BRD-K82746043 | BRD-K82746043-001-15-1 | P23 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | BCL inhibitor | BCL2|BCL2L1|BCL2L2 | broad_id_20170327 | P23 | -1 | 5 | 29 | 1.000000 | 1.000000 |
| 383 | BRD-K82746043-001-15-1 | 0.040108 | 0.041152 | BRD-K82746043 | BRD-K82746043-001-15-1 | P24 | trt | trt | BRD-K82746043 | JLYAXFNOILIKPP | BCL inhibitor | BCL2|BCL2L1|BCL2L2 | broad_id_20170327 | P24 | -1 | 5 | 29 | 1.000000 | 1.000000 |
360 rows Ć 19 columns
AP scores for retrieval of individual replicates¶
Let's calculate p-values for each individual replicate AP score and plot scores and p-values together.
Normalized AP scores are rescaled using expected value of the random AP distribution:
AP_normalized = (AP - μā) / (1 - μā).
Normalization does not affect the ranking or the significance of the scores, but improves dynamic range.
# Calculate p-values using the same null_size and seed as mean_average_precision
activity_ap["p_value"] = p_values(activity_ap, null_size=1000000, seed=0)
activity_ap["-log10(p-value)"] = -activity_ap["p_value"].apply(np.log10)
activity_ap["below_p"] = activity_ap["p_value"] < 0.05
active_ratio_ap = activity_ap["below_p"].mean()
# Plot raw and normalized AP scores vs -log10 p-values
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Plot 1: Raw AP vs -log10(p-value)
axes[0].scatter(
data=activity_ap,
x="average_precision",
y="-log10(p-value)",
c="below_p",
cmap="tab10",
s=10,
)
axes[0].axhline(-np.log10(0.05), color="black", linestyle="--")
axes[0].set_xlabel("AP")
axes[0].set_ylabel("-log10(p-value)")
axes[0].set_title("Replicate retrieval")
axes[0].text(
0.65,
1.5,
f"Retrieved = {100 * active_ratio_ap:.2f}%",
va="center",
ha="left",
)
# Plot 2: Normalized AP vs -log10(p-value)
axes[1].scatter(
data=activity_ap,
x="normalized_average_precision",
y="-log10(p-value)",
c="below_p",
cmap="tab10",
s=10,
)
axes[1].axhline(-np.log10(0.05), color="black", linestyle="--")
axes[1].axvline(0, color="black", linestyle="--", linewidth=0.5)
axes[1].set_xlabel("normalized AP")
axes[1].set_ylabel("-log10(p-value)")
axes[1].set_title("Replicate retrieval (normalized AP)")
axes[1].text(
0.65,
1.5,
f"Retrieved = {100 * active_ratio_ap:.2f}%",
va="center",
ha="left",
)
plt.tight_layout()
plt.show()
0%| | 0/2 [00:00<?, ?it/s]
mAP scores for compounds¶
At the next step, we average replicate AP scores at the per-compound level to obtain mAP values using mean_average_precision.
It also calculates p-values using permutation testing, and performs FDR correction to compare across compounds.
For more information on choosing null size parameter see the Null size example.
activity_map = map.mean_average_precision(
activity_ap, pos_sameby, null_size=1000000, threshold=0.05, seed=0
)
activity_map["-log10(p-value)"] = -activity_map["corrected_p_value"].apply(np.log10)
activity_map.head(10)
0%| | 0/2 [00:00<?, ?it/s]
0%| | 0/58 [00:00<?, ?it/s]
| Metadata_broad_sample | Metadata_reference_index | mean_average_precision | indices | mean_normalized_average_precision | p_value | corrected_p_value | below_p | below_corrected_p | -log10(p-value) | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | BRD-A69275535-001-01-5 | -1 | 0.575629 | [96, 97, 98, 99, 100, 101] | 0.426564 | 1.725598e-02 | 0.023276 | True | True | 1.633101 |
| 1 | BRD-A69636825-003-04-7 | -1 | 0.693806 | [114, 115, 116, 117, 118, 119] | 0.586252 | 3.477997e-03 | 0.006507 | True | True | 2.186605 |
| 2 | BRD-A69815203-001-07-6 | -1 | 1.000000 | [288, 289, 290, 291, 292, 293] | 1.000000 | 9.999990e-07 | 0.000008 | True | True | 5.081670 |
| 3 | BRD-A70858459-001-01-7 | -1 | 0.777173 | [156, 157, 158, 159, 160, 161] | 0.698903 | 8.279992e-04 | 0.001921 | True | True | 2.716482 |
| 4 | BRD-A72309220-001-04-1 | -1 | 0.716927 | [192, 193, 194, 195, 196, 197] | 0.617494 | 2.323998e-03 | 0.004493 | True | True | 2.347458 |
| 5 | BRD-A72390365-001-15-2 | -1 | 0.934444 | [210, 211, 212, 213, 214, 215] | 0.911417 | 2.799997e-05 | 0.000108 | True | True | 3.965506 |
| 6 | BRD-A73368467-003-17-6 | -1 | 0.926032 | [246, 247, 248, 249, 250, 251] | 0.900050 | 3.699996e-05 | 0.000134 | True | True | 3.872491 |
| 7 | BRD-A74980173-001-11-9 | -1 | 0.765931 | [18, 19, 20, 21, 22, 23] | 0.683712 | 1.017999e-03 | 0.002187 | True | True | 2.660188 |
| 8 | BRD-A81233518-004-16-1 | -1 | 0.621183 | [306, 307, 308, 309, 310, 311] | 0.488119 | 9.594990e-03 | 0.014269 | True | True | 1.845592 |
| 9 | BRD-A82035391-001-02-7 | -1 | 0.318066 | [342, 343, 344, 345, 346, 347] | 0.078529 | 2.536767e-01 | 0.258127 | False | False | 0.588166 |
Finally, we can plot the results and filter out phenotypicall inactive compounds with corrected p-value >0.05.
active_ratio = activity_map.below_corrected_p.mean()
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Plot 1: Raw mAP
axes[0].scatter(
data=activity_map,
x="mean_average_precision",
y="-log10(p-value)",
c="below_corrected_p",
cmap="tab10",
s=10,
)
axes[0].set_title("Phenotypic activity assessment\n(mAP)")
axes[0].set_xlabel("mAP")
axes[0].set_ylabel("-log10(p-value)")
axes[0].axhline(-np.log10(0.05), color="black", linestyle="--")
axes[0].text(
0.65,
1.5,
f"Phenotypically active = {100 * active_ratio:.2f}%",
va="center",
ha="left",
)
# Plot 2: Normalized mAP
axes[1].scatter(
data=activity_map,
x="mean_normalized_average_precision",
y="-log10(p-value)",
c="below_corrected_p",
cmap="tab10",
s=10,
)
axes[1].set_title("Phenotypic activity assessment\n(Normalized mAP)")
axes[1].set_xlabel("mAP (normalized)")
axes[1].set_ylabel("-log10(p-value)")
axes[1].axhline(-np.log10(0.05), color="black", linestyle="--")
axes[1].axvline(0, color="black", linestyle="--", linewidth=0.5)
axes[1].text(
0.65,
1.5,
f"Phenotypically active = {100 * active_ratio:.2f}%",
va="center",
ha="left",
)
plt.tight_layout()
plt.show()
activity_map.to_csv("data/activity_map.csv", index=False)