# tagged as papermill params
SA23_csv = None
SA26_csv = None
c293T_csv = None

# Parameters
SA23_csv = "results/func_effects/averages/293_SA23_entry_func_effects.csv"
SA26_csv = "results/func_effects/averages/293_SA26_entry_func_effects.csv"
c293T_csv = "results/func_effects/averages/293T_entry_func_effects.csv"

import os
import pickle
import itertools
import warnings

import altair as alt

import functools as ft

import pandas as pd

import scipy

import yaml


_ = alt.data_transformers.disable_max_rows()
warnings.filterwarnings("ignore", category=Warning)  # altair deprecation warning

# Import datasets
# for interactive
# SA23_csv="results/func_effects/averages/293_SA23_entry_func_effects.csv"
# SA26_csv="results/func_effects/averages/293_SA26_entry_func_effects.csv"  
# c293T_csv="results/func_effects/averages/293T_entry_func_effects.csv"

SA23 = pd.read_csv(SA23_csv)
SA26 = pd.read_csv(SA26_csv)
c293T = pd.read_csv(c293T_csv)

#drop poor measurements
SA23= SA23.loc[SA23['n_selections'] >1]
SA23= SA23.loc[SA23['times_seen'] >=2]

SA26= SA26.loc[SA26['n_selections'] >1]
SA26= SA26.loc[SA26['times_seen'] >=2]

c293T= c293T.loc[c293T['n_selections'] >1]
c293T= c293T.loc[c293T['times_seen'] >=2]

#create mutation column
SA23['mutation']=SA23['wildtype']+SA23['site']+SA23['mutant']
SA26['mutation']=SA26['wildtype']+SA26['site']+SA26['mutant']
c293T['mutation']=c293T['wildtype']+c293T['site']+c293T['mutant']

#drop columns
SA23=SA23[['mutation', 'effect','effect_std']]
SA26=SA26[['mutation', 'effect','effect_std']]
c293T=c293T[['mutation', 'effect','effect_std']]

#rename columns
SA23= SA23.rename(
    columns={"effect": "2,3-linked entry"})
SA26= SA26.rename(
    columns={"effect": "2,6-linked entry"})
c293T= c293T.rename(
    columns={"effect": "293T entry"})

# data frames and columns with variables to correlate
dfs_to_correlate = [
    (SA23, ["2,3-linked entry"]),
    (SA26, ["2,6-linked entry"]),
    (c293T, ["293T entry"])
]

corr_charts = {}

selection_mutation = alt.selection_single(
    on="mouseover", fields=["mutation"], empty="none",
)

for (df1, cols1), (df2, cols2) in itertools.combinations(dfs_to_correlate, 2):

    merged_df = df1.merge(df2, on="mutation", validate="one_to_one")

    for col1, col2 in itertools.product(cols1, cols2):

        n = len(merged_df[merged_df[col1].notnull() & merged_df[col2].notnull()])

        if not n:
            continue

        r, p = scipy.stats.pearsonr(merged_df[col1], merged_df[col2])

        chart = (
            alt.Chart(merged_df)
            .encode(
                x=alt.X(col2, axis=alt.Axis(grid=False)),
                y=alt.Y(col1, axis=alt.Axis(grid=False)),
                tooltip=[
                    alt.Tooltip(c, format=".3g", title=c.replace("natural sequence ", ""))
                    if merged_df[c].dtype == float
                    else alt.Tooltip(c, title=c.replace("natural sequence ", ""))
                    for c in merged_df.columns
                ],
                opacity=alt.condition(selection_mutation, alt.value(1), alt.value(0.15)),
                color=alt.condition(selection_mutation, alt.value("orange"), alt.value("black")),
                size=alt.condition(selection_mutation, alt.value(55), alt.value(35)),
                strokeWidth=alt.condition(selection_mutation, alt.value(1.5), alt.value(0)),
            )
            .mark_circle(stroke="black")
            .properties(
                title=alt.TitleParams(
                    f"R={r:.2g}, N={n}", fontWeight="normal", fontSize=11, offset=-1,
                ),
                width=200,
                height=200,
            )
            .add_selection(selection_mutation)
        )

        corr_charts[(col1, col2)] = chart

charts_per_row = 4
chart_rows = []
for i in range(0, len(corr_charts), charts_per_row):
    chart_rows.append(alt.hconcat(*list(corr_charts.values())[i: i + charts_per_row]))

all_charts = alt.vconcat(*chart_rows)

all_charts

Compare cell entry effects for cells expressing alpha2,3 or alpha2,6 sialic acids¶