Compare cell entry effects for cells expressing alpha2,3 or alpha2,6 sialic acids¶
In [1]:
# tagged as papermill params
SA23_csv = None
SA26_csv = None
c293T_csv = None
In [2]:
# Parameters
SA23_csv = "results/func_effects/averages/293_SA23_entry_func_effects.csv"
SA26_csv = "results/func_effects/averages/293_SA26_entry_func_effects.csv"
c293T_csv = "results/func_effects/averages/293T_entry_func_effects.csv"
In [3]:
import os
import pickle
import itertools
import warnings
import altair as alt
import functools as ft
import pandas as pd
import scipy
import yaml
_ = alt.data_transformers.disable_max_rows()
warnings.filterwarnings("ignore", category=Warning) # altair deprecation warning
In [4]:
# Import datasets
# for interactive
# SA23_csv="results/func_effects/averages/293_SA23_entry_func_effects.csv"
# SA26_csv="results/func_effects/averages/293_SA26_entry_func_effects.csv"
# c293T_csv="results/func_effects/averages/293T_entry_func_effects.csv"
SA23 = pd.read_csv(SA23_csv)
SA26 = pd.read_csv(SA26_csv)
c293T = pd.read_csv(c293T_csv)
In [5]:
#drop poor measurements
SA23= SA23.loc[SA23['n_selections'] >1]
SA23= SA23.loc[SA23['times_seen'] >=2]
SA26= SA26.loc[SA26['n_selections'] >1]
SA26= SA26.loc[SA26['times_seen'] >=2]
c293T= c293T.loc[c293T['n_selections'] >1]
c293T= c293T.loc[c293T['times_seen'] >=2]
In [6]:
#create mutation column
SA23['mutation']=SA23['wildtype']+SA23['site']+SA23['mutant']
SA26['mutation']=SA26['wildtype']+SA26['site']+SA26['mutant']
c293T['mutation']=c293T['wildtype']+c293T['site']+c293T['mutant']
In [7]:
#drop columns
SA23=SA23[['mutation', 'effect','effect_std']]
SA26=SA26[['mutation', 'effect','effect_std']]
c293T=c293T[['mutation', 'effect','effect_std']]
In [8]:
#rename columns
SA23= SA23.rename(
columns={"effect": "2,3-linked entry"})
SA26= SA26.rename(
columns={"effect": "2,6-linked entry"})
c293T= c293T.rename(
columns={"effect": "293T entry"})
In [9]:
# data frames and columns with variables to correlate
dfs_to_correlate = [
(SA23, ["2,3-linked entry"]),
(SA26, ["2,6-linked entry"]),
(c293T, ["293T entry"])
]
corr_charts = {}
In [10]:
selection_mutation = alt.selection_single(
on="mouseover", fields=["mutation"], empty="none",
)
for (df1, cols1), (df2, cols2) in itertools.combinations(dfs_to_correlate, 2):
merged_df = df1.merge(df2, on="mutation", validate="one_to_one")
for col1, col2 in itertools.product(cols1, cols2):
n = len(merged_df[merged_df[col1].notnull() & merged_df[col2].notnull()])
if not n:
continue
r, p = scipy.stats.pearsonr(merged_df[col1], merged_df[col2])
chart = (
alt.Chart(merged_df)
.encode(
x=alt.X(col2, axis=alt.Axis(grid=False)),
y=alt.Y(col1, axis=alt.Axis(grid=False)),
tooltip=[
alt.Tooltip(c, format=".3g", title=c.replace("natural sequence ", ""))
if merged_df[c].dtype == float
else alt.Tooltip(c, title=c.replace("natural sequence ", ""))
for c in merged_df.columns
],
opacity=alt.condition(selection_mutation, alt.value(1), alt.value(0.15)),
color=alt.condition(selection_mutation, alt.value("orange"), alt.value("black")),
size=alt.condition(selection_mutation, alt.value(55), alt.value(35)),
strokeWidth=alt.condition(selection_mutation, alt.value(1.5), alt.value(0)),
)
.mark_circle(stroke="black")
.properties(
title=alt.TitleParams(
f"R={r:.2g}, N={n}", fontWeight="normal", fontSize=11, offset=-1,
),
width=200,
height=200,
)
.add_selection(selection_mutation)
)
corr_charts[(col1, col2)] = chart
In [11]:
charts_per_row = 4
chart_rows = []
for i in range(0, len(corr_charts), charts_per_row):
chart_rows.append(alt.hconcat(*list(corr_charts.values())[i: i + charts_per_row]))
all_charts = alt.vconcat(*chart_rows)
all_charts
Out[11]:
In [ ]: