00 - Setup#

import os
import stampede as st

slides = {
    1: {
        "exprmat": "exprMat_file.csv.gz",
        "metadata": "metadata_file.csv.gz",
        "fov_positions": "fov_positions_file.csv.gz",
    }
}
# optional filepath prefix
data_dir = "data"

os.makedirs(st.config["adata_dir"], exist_ok=True)

01 - Read#

import pandas as pd
import scanpy as sc

samples_df = pd.read_table("data/sample2fov.csv", sep=",")
samples_df.sort_values(["slide", "fov_start"], inplace=True)

st.validate_input(slides, samples_df, data_dir)

samples_df

	Donor	Treatment	Timepoint	sample	slide	fov_start	fov_end
0	6	Medium	1h	Medium_1h_don6	1	1	9
1	5	Medium	1h	Medium_1h_don5	1	10	18
2	4	Medium	1h	Medium_1h_don4	1	19	27
3	3	Medium	1h	Medium_1h_don3	1	28	36
4	2	Medium	1h	Medium_1h_don2	1	37	45
...	...	...	...	...	...	...	...
91	5	NaCl+Taurine	24h	NaCl+Taurine_24h_don5	2	388	396
92	4	NaCl+Taurine	24h	NaCl+Taurine_24h_don4	2	397	405
93	3	NaCl+Taurine	24h	NaCl+Taurine_24h_don3	2	406	414
94	2	NaCl+Taurine	24h	NaCl+Taurine_24h_don2	2	415	423
95	1	NaCl+Taurine	24h	NaCl+Taurine_24h_don1	2	424	432

96 rows × 7 columns

adata_file = os.path.join(st.config["adata_dir"], "raw_data.h5ad")
st.read_cosmx(
    slides,
    samples_df,
    adata_file,
    # samples_df_columns=["Donor", "Treatment", "Timepoint"],
    # metadata_df_columns=[
    #     'nCount_RNA', 'nFeature_RNA', "nCount_negprobes", "nCount_falsecode", "Area.um2", "qcFlagsFOV"
    # ],
    # nrows=100,
    data_dir=data_dir,
    overwrite=False,
    verbose=True,
)

adata_file already exists and overwrite=False

'adatas/raw_data.h5ad'

adata = sc.read_h5ad(adata_file)
adata.obs

	fov	cell_ID	slide	slide-fov	Area	AspectRatio	CenterX_local_px	CenterY_local_px	Width	Height	...	errorCtPerCellEstimate	percOfDataFromErrorPerCell	qcFlagsFOV	cell	sample	Donor	Treatment	Timepoint	fov_start	fov_end
slide-fov-cell_ID
1-1-17	1	17	1	1-1	3240	0.89	3792	840	65	73	...	97.281767	0.142455	Pass	c_2_1_17	Medium_1h_don6	6	Medium	1h	1	9
1-1-50	1	50	1	1-1	5656	0.85	3348	1026	105	89	...	97.281767	0.142455	Pass	c_2_1_50	Medium_1h_don6	6	Medium	1h	1	9
1-1-546	1	546	1	1-1	5500	0.98	3715	3090	87	85	...	97.281767	0.142455	Pass	c_2_1_546	Medium_1h_don6	6	Medium	1h	1	9
1-1-698	1	698	1	1-1	3604	0.87	3932	3467	77	67	...	97.281767	0.142455	Pass	c_2_1_698	Medium_1h_don6	6	Medium	1h	1	9
1-1-774	1	774	1	1-1	2604	0.82	893	3653	55	67	...	97.281767	0.142455	Pass	c_2_1_774	Medium_1h_don6	6	Medium	1h	1	9
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1-428-735	428	735	1	1-428	4188	0.92	2419	3332	79	73	...	61.534091	0.119327	Pass	c_2_428_735	Medium_4h_don3	3	Medium	4h	424	432
1-428-833	428	833	1	1-428	2932	0.94	2522	3986	65	61	...	61.534091	0.119327	Pass	c_2_428_833	Medium_4h_don3	3	Medium	4h	424	432
1-429-8	429	8	1	1-429	2936	0.85	869	90	67	57	...	94.791667	0.178882	Pass	c_2_429_8	Medium_4h_don3	3	Medium	4h	424	432
1-431-156	431	156	1	1-431	2756	0.94	1656	2478	61	65	...	53.867021	0.183343	Pass	c_2_431_156	Medium_4h_don3	3	Medium	4h	424	432
1-431-158	431	158	1	1-431	3308	0.94	714	2508	65	69	...	53.867021	0.183343	Pass	c_2_431_158	Medium_4h_don3	3	Medium	4h	424	432

2000 rows × 92 columns

02 - QC#

import matplotlib.pyplot as plt

st.pp.gene_qc(adata)
adata.var

	is_negctrl	is_sysctrl	nCell	pctCell	nTranscript	meanTranscript	signal2noise
ABCB7	False	False	45	2.25	49	0.0245	NaN
ABCB8	False	False	110	5.50	118	0.0590	NaN
ABCC1	False	False	67	3.35	72	0.0360	NaN
ABCC4	False	False	50	2.50	54	0.0270	NaN
ABCE1	False	False	148	7.40	161	0.0805	NaN
...	...	...	...	...	...	...	...
ZDHHC21	False	False	67	3.35	75	0.0375	NaN
ZFP36	False	False	572	28.60	800	0.4000	NaN
ZNF706	False	False	198	9.90	232	0.1160	NaN
ZNRF1	False	False	71	3.55	76	0.0380	NaN
ZWINT	False	False	66	3.30	71	0.0355	NaN

1000 rows × 7 columns

st.pp.slide_qc(adata, slides, data_dir)
adata.uns["fov_metadata"]

	slide-fov	slide	fov	x	y	nCounts	nCell	meanCountsPerCell	nCount_negprobes	mean_NegProbe-CountsPerCell	nCount_falsecode	mean_FalseCode-CountsPerCell	meanCellSize	Failed_AtoMX_QC
0	1-1	1	1	256	145130	5556	7	793.714286	1	0.142857	32	4.571429	58.018832	0
1	1-2	1	2	4512	145130	6140	11	558.181818	2	0.181818	33	3.000000	45.164922	0
2	1-3	1	3	8768	145130	5061	4	1265.250000	3	0.750000	23	5.750000	91.449104	0
3	1-4	1	4	256	140874	8240	14	588.571429	2	0.142857	37	2.642857	45.700784	0
4	1-5	1	5	4512	140874	12586	19	662.421053	7	0.368421	74	3.894737	49.871777	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
346	1-425	1	425	63155	68921	666	2	333.000000	0	0.000000	1	0.500000	30.844722	0
347	1-427	1	427	58899	64665	956	3	318.666667	1	0.333333	3	1.000000	39.081555	0
348	1-428	1	428	63155	64665	2348	5	469.600000	1	0.200000	11	2.200000	48.113137	0
349	1-429	1	429	67411	64665	101	1	101.000000	0	0.000000	0	0.000000	42.476597	0
350	1-431	1	431	63155	60409	563	2	281.500000	0	0.000000	3	1.500000	43.865477	0

351 rows × 14 columns

fig, axs = st.pl.slide_qc(adata, columns=["nCounts", "nCell"])
plt.show()

../_images/8218c7d80007762666ec1c8ab92abdbe64f014a82f29aa9ff49170cc15abe07d.png

# plot nCount_RNA for a single FOV
idx = adata.obs[(adata.obs["slide-fov"] == "1-15")].index
fig, axs = st.pl.avg_per_pixel(adata[idx, :], column='nCount_RNA', log1p=False, fill_cell_area=True)
plt.show()

../_images/8da5992b57b30179a7679bd98a76d09b4f467b2cfa90a376d72a5b73aa768253.png

fig, axs = st.pl.correlations(adata, xcolumn="nCount_RNA", ycolumn="Area")
plt.show()

../_images/8946009bd64e63f95d96cf9b14036c48d3b992b62fee55343f199e83e57305f0.png

# plot nCount_RNA for the whole slide
fig, axs = st.pl.avg_per_pixel(adata, column='nCount_RNA', log1p=False, fill_cell_area=True)
plt.show()

../_images/550cb9c1bb614267c3dbe09f48a644f5ac14af3a30b40c93192bbb2d415e8614.png

fig, axs = st.pl.violin(adata, ["nCount_RNA", "nFeature_RNA", "Area.um2"])
plt.show()
fig, axs = st.pl.violin(adata, ["nCount_negprobes", "nCount_falsecode"], log_scale=(False, False))
plt.show()

../_images/d58303ad14e41da002382b89b6734ae9698e9f9d650f9d9abb5406ae3f080365.png

../_images/52be7d1821805646fcba17c93d757ed603ae2b830da3bebe385230d9a9405442.png

fig, ax = st.pl.ncell_per_condition(adata, "Donor")
plt.show()

../_images/72606df07ad630a533a7459c06902f91b4dae53c4fa6207a36444061cca221ee.png

fig, ax = st.pl.value_distribution(adata)
plt.show()

../_images/b5f9d3bef90fca9d4a82902b73283fc507d93c75f249463fc175db7a692a6e36.png

fig, ax = st.pl.column_distribution(adata, "nCell", axis=1)
plt.show()

../_images/2319e3915189dfeb56d9825764b65a5ec0b8a56b7c0be4c412b6cba2b37b890d.png

03 - Filtering#

# TODO: better tutorial dataset
adata.var["signal2noise"] = 2

fig, ax = st.pl.column_distribution(adata, "dist2edge_px", plot_kwargs={"bins": 50})

# set a threshold & plot
dist2edge_threshold = adata.obs["dist2edge_px"].quantile(0.05)
print(f"{dist2edge_threshold=}")
ax.axvline(dist2edge_threshold, color="red")

plt.show()

dist2edge_threshold=np.float64(50.0)

../_images/0837a84a4b7a00f6340d48d93fc8aef3e6bf574aa33cc651261899cffc9675c6.png

adata = st.pp.filter_genes(adata, ncell_min=10)
adata = st.pp.filter_cells(adata, dist2edge_px_min=dist2edge_threshold)

55 genes filtered out, 945 genes remaining.
919 cells filtered out, 1_081 cells remaining.

st.pp.gene_qc_postfilter(adata)
adata.var[["nCell", "nCell_postfilter", "pctCell", "pctCell_postfilter"]]

	nCell	nCell_postfilter	pctCell	pctCell_postfilter
ABCB7	45	27	2.25	2.50
ABCB8	110	67	5.50	6.20
ABCC1	67	40	3.35	3.70
ABCC4	50	28	2.50	2.59
ABCE1	148	91	7.40	8.42
...	...	...	...	...
ZDHHC21	67	40	3.35	3.70
ZFP36	572	357	28.60	33.02
ZNF706	198	118	9.90	10.92
ZNRF1	71	38	3.55	3.52
ZWINT	66	34	3.30	3.15

945 rows × 4 columns

st.pp.cell_qc_postfilter(adata)
adata.obs[["nFeature_RNA", "nFeature_RNA_postfilter", "nCount_RNA", "nCount_RNA_postfilter"]]

	nFeature_RNA	nFeature_RNA_postfilter	nCount_RNA	nCount_RNA_postfilter
slide-fov-cell_ID
1-1-546	1001	142	1373	183
1-1-774	231	40	267	42
1-1-930	740	112	1029	147
1-2-228	419	68	503	74
1-2-352	574	89	780	117
...	...	...	...	...
1-428-227	252	39	284	41
1-428-703	333	60	395	65
1-428-735	348	50	419	55
1-428-833	381	61	438	72
1-431-156	312	51	367	58

1081 rows × 4 columns

fig, ax = st.pl.ncell_per_condition(
    adata,
    columns = ["Treatment", "Timepoint"],
    offset_between_conditions = 1,
)
plt.show()

fig, ax = st.pl.ncell_per_condition(
    adata,
    columns = ["Treatment", "Timepoint", "Donor"],
    offset_between_conditions = [5, 1],
    text_kwargs={"size": 6},
)
plt.show()

../_images/0c2e1de5b1d8487737abdc2f3fd02d2f339e57f008d9628ca7189e3b63148269.png

../_images/a044e4571fc6b2dd8ee1e21cc6f8960004d23d06976ab4f0dc92e6c6caf80aba.png

fig, ax = st.pl.value_distribution(adata)
plt.show()

../_images/af9901ea8d3fcd71cf18cc20ee0aa438830ca25c2eb0452be64694a897558d9e.png

fig, ax = st.pl.column_distribution(adata, "nCell_postfilter", axis=1)
plt.show()

../_images/18783205023d80221391780f56dfdbd83a884fc516f9e291030e6407bf70f0df.png

04 - Binarize#

st.pp.binarize(adata)
adata_file = os.path.join(st.config["adata_dir"], "filtered.h5ad")
adata.write_h5ad(adata_file)

binary layer set as adata.X

adata.layers

Layers with keys: counts, binary

fig, ax = st.pl.value_distribution(adata, min_quantile=0.00, max_quantile=1.00)
plt.show()

../_images/92b37c46334191c252f1d661eeb60dcc3bee6e36023f1e9a0d82ecba67277211.png

05 - Dimensionality reduction#

st.pp.dim_red(adata)
fig, ax = st.pl.scree(adata)
plt.show()

../_images/ee4c40c5f5b87123a6131f336ff05e6f344785bcee6a0057e4571018f8447109.png

# visualize the effect of subsetting the data
st.tl.sketch(adata, frac=0.05, return_subset=False)
fig, ax = st.pl.sketch(adata, plot_kwargs={"alpha":0.4})
obs_column="subset"
ax.set_title(f"{obs_column} (n={sum(adata.obs[obs_column]):_})")
plt.show()

../_images/d8f329eee2dc805e0e2827b866600752861e6218634735599cdd7b41bcb33e82.png

# optional: subset the data if so desired
# adata = st.tl.sketch(adata, frac=0.05, return_subset=True)

fig_ax_list = st.pl.dim_red(
    adata, 
    columns=["Treatment", "Timepoint"],
)
plt.show()

../_images/5cda41f7e4f547de5a7ab9c59679148d8d636cbd6b5100554bd10acb235299cc.png

../_images/84592ade4c6bb4cb848c859202b1a85441c648d2a766643a0d94f6660d129b83.png

sc.pp.neighbors(
    adata,
    use_rep="X_svd",
    key_added="neighbors_svd",
)

sc.tl.umap(
    adata,
    neighbors_key="neighbors_svd",
    key_added="umap_svd",
)

sc.pl.embedding(
    adata,
    basis="umap_svd",
    alpha=0.5,
)

../_images/100c91cc14b17c0779b13fcc67d0572be24e327c7fd6455fefa98dfa22c809af.png

06 - Clustering#

st.pp.knn_count_smoothing(adata)

KNN_binary_mean layer set as adata.X

fig, ax = st.pl.value_distribution(adata, layer = "KNN_binary_mean")
plt.show()

../_images/0f715121908047abdb3b948e0455ac061f6824229f23f779c5c84a06d395873a.png

markerdict = {
    "T_cells": [
        "CD3D", "CD3E", "TRBC1",
        "CD4", "IL7R", "CD8A", "CD8B",
    ],
    "B_cells": [
        "MS4A1", "CD79A", "CD79B", "CD37"
    ],
    "NK_cells": [
        "NKG7", "GNLY", "KLRD1", "FCGR3A"
    ],
    "Monocytes_Macrophages": [
        "LYZ", "LST1", "S100A8", "S100A9",
        "CTSD", "CST3", "LGALS3",
        "CD14", "CD68"
    ],
    "Dendritic_cells": [
        "FCER1A", "CST3", "CLEC10A", "CD1C"
    ]
}

for ctype in sorted(markerdict):
    for marker in set(markerdict[ctype]):
        if marker not in adata.var_names:
            print(f"{ctype} marker gene '{marker}' is not present in adata.var_names (removing)")
            markerdict[ctype].remove(marker)
    if len(markerdict[ctype]) == 0:
        del markerdict[ctype]

B_cells marker gene 'CD79B' is not present in adata.var_names (removing)
B_cells marker gene 'MS4A1' is not present in adata.var_names (removing)
B_cells marker gene 'CD37' is not present in adata.var_names (removing)
B_cells marker gene 'CD79A' is not present in adata.var_names (removing)
Dendritic_cells marker gene 'CST3' is not present in adata.var_names (removing)
Dendritic_cells marker gene 'FCER1A' is not present in adata.var_names (removing)
Dendritic_cells marker gene 'CLEC10A' is not present in adata.var_names (removing)
Dendritic_cells marker gene 'CD1C' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'CD14' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'CTSD' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'LST1' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'S100A9' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'LGALS3' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'CST3' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'S100A8' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'CD68' is not present in adata.var_names (removing)
Monocytes_Macrophages marker gene 'LYZ' is not present in adata.var_names (removing)
NK_cells marker gene 'GNLY' is not present in adata.var_names (removing)
NK_cells marker gene 'KLRD1' is not present in adata.var_names (removing)
NK_cells marker gene 'FCGR3A' is not present in adata.var_names (removing)
NK_cells marker gene 'NKG7' is not present in adata.var_names (removing)
T_cells marker gene 'CD4' is not present in adata.var_names (removing)
T_cells marker gene 'IL7R' is not present in adata.var_names (removing)
T_cells marker gene 'CD3D' is not present in adata.var_names (removing)
T_cells marker gene 'CD8B' is not present in adata.var_names (removing)
T_cells marker gene 'CD3E' is not present in adata.var_names (removing)
T_cells marker gene 'CD8A' is not present in adata.var_names (removing)

for ctype, markers in markerdict.items():
    if len(markers) == 0:
        continue
        
    print(ctype)
    sc.pl.embedding(
        adata,
        basis="umap_svd",
        color=markers,
        layer="KNN_binary_mean",
        ncols=3,
        alpha=0.5,
    )

T_cells

../_images/bc38d17972bff9680e001e260911bcb8d45efe3e014634989cf076a7230c6824.png

res=0.7
key = f"leiden_res_{res:.2f}"
sc.tl.leiden(
    adata,
    resolution=res,
    neighbors_key="neighbors_svd",
    key_added=key,
    flavor="igraph",
    n_iterations=2,
    directed=False,
    random_state=42,
)

sc.pl.embedding(
    adata,
    basis="umap_svd",
    color=key,
    alpha=0.5,
)

../_images/cd61a4a55b37b21c256ab5cba79bce9c60abf088701cea5ac8503605fe65acf6.png

mp = sc.pl.MatrixPlot(
    adata,
    markerdict,
    groupby=key,
    layer="KNN_binary_mean",
    var_group_rotation=0,
)
ax = mp.get_axes()["mainplot_ax"]
_ = plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor")

../_images/c455a422ac34787ea40b1cadf1fbb98cbab9f865956b6778116dca4c3315ae4d.png

sc.pl.heatmap(
    adata, 
    markerdict,
    groupby=key,
    layer="KNN_binary_mean",
    var_group_rotation=0,
)

../_images/2e2fc3846c6e57632443b9fe78379e8f45ff46c367cc4346c8fba720470bc932.png

# TODO: redo
ctype_key = "celltype_lvl1"
cluster2ctype = {
    "0":"T_cells",
    "1":"Myeloid_cells",
    "2":"NK_cells",
    "3":"B_cells",
    "4":"Unknown",
}
adata.obs[ctype_key] = adata.obs[key].astype(str).replace(cluster2ctype).astype("category")

sc.pl.embedding(
    adata,
    basis="umap_svd",
    color=ctype_key,
    alpha=0.5,
)

../_images/f50110cd38bab944edca9653f80f86a39b59e0e0e3307ae9dc0f29f6748f86cf.png

mp = sc.pl.MatrixPlot(
    adata,
    markerdict,
    groupby=ctype_key,
    layer="KNN_binary_mean",
    var_group_rotation=0,
)
ax = mp.get_axes()["mainplot_ax"]
_ = plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor")

../_images/23c28c662d54796f7e31a1786b2458d954e670098bca7ba8ed4ebf4347a47ecf.png

df = st.pp.pseudobulk(
    adata,
    samples_column="sample",
    cluster_column=ctype_key,
    cluster="T_cells",
)
df

	Medium_1h_don1	Medium_1h_don2	Medium_1h_don3	Medium_1h_don4	Medium_1h_don5	Medium_1h_don6	Medium_4h_don1	Medium_4h_don2	Medium_4h_don3	Medium_4h_don4	...	NaCl_4h_don5	NaCl_4h_don6	Taurine_1h_don1	Taurine_1h_don2	Taurine_1h_don4	Taurine_4h_don1	Taurine_4h_don2	Taurine_4h_don3	Taurine_4h_don4	Taurine_4h_don6
ABCB7	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
ABCB8	0	0	0	0	0	0	0	0	1	0	...	0	0	0	0	0	0	0	0	0	0
ABCC1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
ABCC4	0	0	2	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	2	0	0
ABCE1	0	0	1	1	1	5	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
ZDHHC21	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
ZFP36	6	2	5	8	2	5	0	0	0	0	...	1	0	3	3	1	0	0	0	1	0
ZNF706	0	0	3	0	3	7	0	0	1	1	...	0	0	0	2	1	0	0	0	0	2
ZNRF1	0	0	0	0	0	1	0	0	0	0	...	0	0	0	0	0	0	1	0	0	0
ZWINT	0	0	0	0	1	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0

945 rows × 38 columns

07 - Differential expression analysis#

# select a celltype to analyse
cdata = adata[adata.obs["celltype_lvl1"] == "T_cells", :]

# results_df = st.tl.pydeseq2(
#     cdata, 
#     design="~ 'Treatment' + 'Timepoint'", 
#     contrast=["Treatment", "NaCl", "Medium"],
#     return_objects=False,
# )
# results_df

# fig, axs = st.pl.pydeseq2_volcano(results_df)
# plt.show()

det_rate_df = st.pp.detection_rates(cdata, samples_column="sample")
det_rate_df

	Medium_1h_don5	Medium_1h_don4	Medium_1h_don6	Medium_1h_don2	Medium_1h_don3	Medium_1h_don1	Medium_4h_don4	NaCl_1h_don2	NaCl_4h_don2	Taurine_4h_don2	...	Medium_4h_don2	Medium_4h_don5	NaCl+Taurine_4h_don4	NaCl_4h_don5	NaCl_4h_don6	NaCl_1h_don6	NaCl_1h_don3	NaCl_1h_don4	NaCl_4h_don4	Taurine_4h_don4
ABCB7	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.083333	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
ABCB8	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
ABCC1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.083333	0.0	0.000000	0.000000
ABCC4	0.083333	0.000000	0.000000	0.000000	0.175000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.083333	0.0	0.000000	0.000000
ABCE1	0.083333	0.083333	0.431035	0.000000	0.083333	0.000000	0.000000	0.083333	0.000000	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
ZDHHC21	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.0	0.083333	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
ZFP36	0.166667	0.583333	0.431035	0.172043	0.388889	0.421053	0.000000	0.083333	0.352941	0.000000	...	0.0	0.083333	0.083333	0.083333	0.000000	0.0	0.000000	0.0	0.083333	0.083333
ZNF706	0.166667	0.000000	0.254237	0.000000	0.276316	0.000000	0.083333	0.000000	0.000000	0.000000	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
ZNRF1	0.000000	0.000000	0.083333	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.083333	...	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.0	0.000000	0.000000
ZWINT	0.083333	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.083333	0.000000	...	0.0	0.000000	0.000000	0.000000	0.083333	0.0	0.000000	0.0	0.000000	0.000000

945 rows × 38 columns

results_df = st.tl.paired_binomial_glm(
    det_rate_df, 
    cdata, 
    samples_column="sample", 
    test_condition="NaCl", 
    reference_condition="Medium", 
    condition_column="Treatment",
    covariate_columns=["Timepoint"],
)
results_df

/home/siebrenf/git/stampede/stampede/_tools/statsmodels.py:177: RuntimeWarning: Perfect separation detected in 41 genes. Parameter estimates may be unstable for these genes. Check the 'perfect_separation' column in the results.
  warnings.warn(

	gene	beta	se	odds_ratio	pval	perfect_separation	error	padj	-log10(padj)	log2(odds_ratio)
289	ERP44	-22.079146	26030.162254	2.577203e-10	0.999323	False	None	1.0	-0.0	-31.853475
132	CAND1	-22.036895	26042.032823	2.688425e-10	0.999325	False	None	1.0	-0.0	-31.792520
493	LY6D	-21.891387	26082.472539	3.109505e-10	0.999330	False	None	1.0	-0.0	-31.582596
474	LIF	-21.772220	26109.118763	3.503038e-10	0.999335	False	None	1.0	-0.0	-31.410674
484	LPL	-21.738470	26116.511965	3.623285e-10	0.999336	False	None	1.0	-0.0	-31.361983
...	...	...	...	...	...	...	...	...	...	...
736	RNF121	21.095117	15090.570398	1.450417e+09	0.998885	False	None	1.0	-0.0	30.433821
889	TRIM28	21.126632	21604.504143	1.496855e+09	0.999220	False	None	1.0	-0.0	30.479287
882	TOMM70	21.360697	15206.595748	1.891615e+09	0.998879	False	None	1.0	-0.0	30.816971
320	FSTL1	21.360697	15206.595748	1.891615e+09	0.998879	False	None	1.0	-0.0	30.816971
543	MOSPD2	21.691319	21561.787022	2.632814e+09	0.999197	False	None	1.0	-0.0	31.293958

945 rows × 10 columns

fig, ax = st.pl.paired_binomial_glm_volcano(results_df)
plt.show()

../_images/f34602a6188953a67a55663825785bdebd3fe12781e1276b60882b76a44abee3.png