Compare binding to human versus mouse Mxra8¶

In [1]:
import itertools

import altair as alt

import pandas as pd

_ = alt.data_transformers.disable_max_rows()
In [2]:
# this cell is tagged parameters for `papermill` parameterization

entry_293T_human_Mxra8 = None
binding_human_Mxra8 = None
binding_mouse_Mxra8 = None
addtl_site_annotations = None
mxra8_dists_csv = None
site_numbering_map = None

site_csv = None
mut_corr_chart_html = None
site_corr_chart_html = None
site_chart_html = None
dist_corr_chart_html = None
In [3]:
# Parameters
entry_csv = "results/func_effects/averages/293T-Mxra8_entry_func_effects.csv"
binding_human_Mxra8 = "results/receptor_affinity/averages/human_Mxra8_mut_effect.csv"
binding_mouse_Mxra8 = "results/receptor_affinity/averages/mouse_Mxra8_mut_effect.csv"
addtl_site_annotations = "data/addtl_site_annotations.csv"
mxra8_dists_csv = "results/mxra8_distances/mxra8_dists.csv"
site_numbering_map = "data/site_numbering_map.csv"
site_csv = "results/compare_human_mouse_mxra8/site_binding.csv"
mut_corr_chart_html = "results/compare_human_mouse_mxra8/mxra8_mut_binding_corr.html"
site_corr_chart_html = "results/compare_human_mouse_mxra8/mxra8_site_binding_corr.html"
dist_corr_chart_html = (
    "results/compare_human_mouse_mxra8/mxra8_site_binding_dist_corr.html"
)
site_chart_html = "results/compare_human_mouse_mxra8/mxra8_site_chart.html"
In [4]:
# Additional hardcoded parameters

min_entry = -4
min_entry_std = 2.25
entry_name = "entry in 293T-Mxra8 cells"
min_times_seen = 2

ligands = {"mouse_Mxra8": "mouse Mxra8", "human_Mxra8": "human Mxra8"}
binding_csvs = {
    "human_Mxra8": binding_human_Mxra8,
    "mouse_Mxra8": binding_mouse_Mxra8,
}
binding_csv_col_names = {"human_Mxra8": "Mxra8", "mouse_Mxra8": "Mxra8"}
max_binding_stds = {"human_Mxra8": 2.5, "mouse_Mxra8": 2.25}

addtl_site_annotations_cols = {
    "domain": "domain",
    "contacts": "Mxra8 contact",
}

assert len(ligands) == 2, "saving for corr charts only works for 2 ligands currently"

Read the data¶

In [5]:
# read the data

print(f"Reading cell entry from {entry_csv=}")
data_df = (
    pd.read_csv(entry_csv)
    .query("times_seen >= @min_times_seen")
    .query("effect_std <= @min_entry_std")
    .assign(mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"])
    [["site", "wildtype", "mutant", "effect"]]
    .rename(columns={"effect": "entry"})
)

for ligand in ligands:
    print(f"Reading binding to {ligand=} from {binding_csvs[ligand]=}")
    max_std = max_binding_stds[ligand]
    col_name = binding_csv_col_names[ligand]
    bind_df = (
        pd.read_csv(binding_csvs[ligand])
        .query("times_seen >= @min_times_seen")
        .query("frac_models == 1")
        .query(f"`{col_name} binding_std` <= @max_std")
        .rename(columns={f"{col_name} binding_median": ligand})
    )
    bind_rep_cols = bind_df.columns[11: ].tolist()
    bind_df = (
        bind_df
        .assign(
            label=lambda x: x.apply(
                lambda r: f"{r[ligand]:.2f} ({', '.join(str(round(r[c], 2)) for c in bind_rep_cols)})",
                axis=1,
            )
        )
        .rename(columns={"label": f"{ligand}_label"})
        [["site", "wildtype", "mutant", ligand, f"{ligand}_label"]]
    )
    data_df = data_df.merge(
        bind_df, how="left", on=["site", "mutant", "wildtype"], validate="1:1"
    )

print(f"Adding sequential site from {site_numbering_map=}")
data_df = data_df.merge(
    pd.read_csv(site_numbering_map).rename(columns={"reference_site": "site"})[
        ["site", "sequential_site", "region"]
    ],
    on="site",
    validate="many_to_one",
)

print(f"Adding site annotations from {addtl_site_annotations=}")
data_df = data_df.merge(
    (
        pd.read_csv(addtl_site_annotations)
        [["sequential_site"] + list(addtl_site_annotations_cols)]
        .rename(columns=addtl_site_annotations_cols)
    ),
    on="sequential_site",
    validate="many_to_one",
    how="left",
)

data_df = (
    data_df
    .query("wildtype != mutant")
    .assign(
        mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"],
        **{"Mxra8 contact": lambda x: x["Mxra8 contact"].fillna("no")},
    )
    .sort_values(["sequential_site", "mutant"])
    .reset_index(drop=True)
)
Reading cell entry from entry_csv='results/func_effects/averages/293T-Mxra8_entry_func_effects.csv'
Reading binding to ligand='mouse_Mxra8' from binding_csvs[ligand]='results/receptor_affinity/averages/mouse_Mxra8_mut_effect.csv'
Reading binding to ligand='human_Mxra8' from binding_csvs[ligand]='results/receptor_affinity/averages/human_Mxra8_mut_effect.csv'
Adding sequential site from site_numbering_map='data/site_numbering_map.csv'
Adding site annotations from addtl_site_annotations='data/addtl_site_annotations.csv'

Simple correlation of binding to different ligands across all mutations¶

In [6]:
# plot the data

site_selection = alt.selection_point(on="mouseover", empty=False, fields=["site"])

mut_selection = alt.selection_point(on="mouseover", empty=False, fields=["mutation"])

min_entry_slider = alt.param(
    name="min_entry_slider",
    bind=alt.binding_range(
        min=data_df["entry"].min(),
        max=0,
        name=f"minimum {entry_name}",
    ),
    value=min_entry,
)

mut_corr_base = alt.Chart(
    data_df[
        ["mutation", "entry", "site"]
        + list(ligands)
        + [f"{lig}_label" for lig in ligands]
    ]
)

for ligand1, ligand2 in itertools.combinations(ligands, 2):
    
    mut_corr_chart = (
        mut_corr_base
        .add_params(site_selection, mut_selection, min_entry_slider)
        .transform_filter(alt.datum["entry"] >= min_entry_slider)
        .encode(
            alt.X(
                ligand1,
                title=f"binding to {ligands[ligand1]}",
                scale=alt.Scale(nice=False, padding=5),
            ),
            alt.Y(
                ligand2,
                title=f"binding to {ligands[ligand2]}",
                scale=alt.Scale(nice=False, padding=5),
            ),
            color=alt.condition(site_selection, alt.value("red"), alt.value("gray")),
            opacity=alt.condition(site_selection, alt.value(0.9), alt.value(0.15)),
            size=alt.condition(site_selection, alt.value(55), alt.value(40)),
            strokeWidth=alt.condition(mut_selection, alt.value(3), alt.value(0.6)),
            tooltip=[
                "mutation",
                alt.Tooltip("entry", format=".2f", title=entry_name),
                alt.Tooltip(f"{ligand1}_label", title=ligands[ligand1]),
                alt.Tooltip(f"{ligand2}_label", title=ligands[ligand2]),
            ],
        )
        .mark_circle(stroke="black")
        .properties(
            width=175,
            height=175,
        )
        .configure_axis(grid=False)
    )

    display(mut_corr_chart)

    print(f"Saving to {mut_corr_chart_html}")
    mut_corr_chart.save(mut_corr_chart_html)
Saving to results/compare_human_mouse_mxra8/mxra8_mut_binding_corr.html

Plot site effects on binding¶

We pre-filter on the entry cutoff, and then get the summed positive and negative effects at each site for that ligand:

In [7]:
data_filtered_df = data_df.query("entry >= @min_entry")

site_df = (
    data_filtered_df
    .melt(
        id_vars=["site", "sequential_site", "wildtype", "region", "Mxra8 contact"],
        value_vars=ligands,
        var_name="ligand",
        value_name="effect",
    )
    .groupby(
        ["ligand", "site", "sequential_site", "wildtype", "region", "Mxra8 contact"],
        as_index=False,
        dropna=False,
    )
    .aggregate(
        positive_effect=pd.NamedAgg("effect", lambda s: s.clip(lower=0).sum()),
        negative_effect=pd.NamedAgg("effect", lambda s: s.clip(upper=0).sum()),
        absolute_effect=pd.NamedAgg("effect", lambda s: s.abs().sum()),
        summed_effect=pd.NamedAgg("effect", "sum"),
        n_aas_w_measurement=pd.NamedAgg("effect", "count"),
    )
    .sort_values(["ligand", "sequential_site"])
)

print(f"Writing site values to {site_csv=}")
(
    site_df
    .merge(
        pd.read_csv(addtl_site_annotations)[["sequential_site", "protein_site", "6nk6_chain", "6nk7_chain"]],
        on="sequential_site",
        how="left",
        validate="many_to_one",
    )
    .to_csv(site_csv, index=False, float_format="%.3f")
)
Writing site values to site_csv='results/compare_human_mouse_mxra8/site_binding.csv'
In [8]:
chart_width = 950

site_binding_chart = (
    alt.Chart(
        site_df.assign(ligand_name=lambda x: "binding to " + x["ligand"].map(ligands))
    )
    .encode(
        alt.X(
            "site",
            sort=alt.SortField("sequential_site"),
            axis=alt.Axis(
                values=site_df[["sequential_site", "site"]].sort_values("sequential_site")["site"].iloc[50::130],
                labelAngle=0,
            ),
        ),
        alt.Y("positive_effect", title=None, scale=alt.Scale(nice=False, padding=4)),
        alt.Y2("negative_effect", title=None),
        alt.Color(
            "Mxra8 contact",
            scale=alt.Scale(
                domain=["no", "wrapped", "intraspike", "interspike"],
                range=["gray", "red", "purple", "orange"],
            ),
        ),
        alt.Row(
            "ligand_name",
            title=None,
            header=alt.Header(labelFontStyle="bold", labelPadding=2),
            spacing=5,
        ),
        tooltip=[
            "site",
            "wildtype",
            alt.Tooltip("positive_effect", format=".2f"),
            alt.Tooltip("negative_effect", format=".2f"),
            "Mxra8 contact",
        ],
    )
    .mark_bar(opacity=1, width=2)
    .properties(width=chart_width, height=0.23 * chart_width)
    .resolve_scale(y="independent")
)

Make overlay bar with regions:

In [9]:
region_chart = (
    alt.Chart(site_df[["sequential_site", "region"]].drop_duplicates())
    .encode(
        alt.X("sequential_site:O", axis=None),
        alt.Color(
            "region",
            legend=None,
            scale=alt.Scale(range=["AliceBlue", "CadetBlue", "CadetBlue", "AliceBlue"])
        ),
    )
    .mark_rect(opacity=0.75, strokeWidth=0)
    .properties(width=chart_width)
)

text_df = site_df.groupby("region", as_index=False).aggregate(x=pd.NamedAgg("sequential_site", "mean"))

text_chart = (
    alt.Chart(text_df)
    .encode(
        alt.X(
            "x:Q",
            title=None,
            scale=alt.Scale(domain=(site_df["sequential_site"].min(), site_df["sequential_site"].max())),
            axis=None,
        ),
        alt.Text("region"),
    )
    .mark_text(fontWeight="bold", fontSize=18)
    .properties(width=chart_width, height=21)
)

overlay_chart = region_chart + text_chart

Combine overlay and site chart:

In [10]:
site_chart = (
    alt.vconcat(overlay_chart, site_binding_chart, spacing=1)
    .resolve_scale(color="independent")
    .configure_axis(grid=False, titleFontSize=18, labelFontSize=14)
    .configure_header(labelFontSize=18)
    .configure_view(stroke="black", strokeOpacity=1, strokeWidth=1)
    .configure_legend(labelFontSize=18, titleFontSize=18)
    .interactive(bind_x=True, bind_y=False)
)

print(f"Saving to {site_chart_html}")
site_chart.save(site_chart_html)

site_chart
Saving to results/compare_human_mouse_mxra8/mxra8_site_chart.html
Out[10]:

Correlations of site effects w distance from Mxra8 for mouse Mxra8¶

In [11]:
site_dist_df = (
    site_df.merge(
        pd.read_csv(mxra8_dists_csv).assign(
            site=lambda x: x["site"].astype(str) + "(" + x["region"] + ")"
        ),
        on=["region", "site"],
    )
    .query("ligand == 'mouse_Mxra8'")
    [["ligand", "site", "region", "absolute_effect", "PDB", "distance_to_Mxra8"]]
    .assign(PDB=lambda x: "PDB " + x["PDB"])
)

site_dist_chart = (
    alt.Chart(site_dist_df)
    .add_params(site_selection)
    .encode(
        alt.X("absolute_effect", title="absolute effect of mutations at site"),
        alt.Y("distance_to_Mxra8", title="distance to Mxra8 in structure"),
        alt.Column("PDB", title=None),
        size=alt.condition(site_selection, alt.value(120), alt.value(60)),
        strokeWidth=alt.condition(site_selection, alt.value(4), alt.value(1)),
        stroke=alt.condition(site_selection, alt.value("red"), alt.value("black")),
        tooltip=[
            "site",
            alt.Tooltip("absolute_effect", title="effect", format=".1f"),
            alt.Tooltip("distance_to_Mxra8", title="distance", format=".1f"),
        ],
    )
    .mark_circle(fill="gray", fillOpacity=0.4)
    .configure_axis(grid=False, titleFontSize=16, labelFontSize=12)
    .configure_header(labelFontSize=18, labelPadding=1)
    .properties(
        width=260,
        height=260,
        title=alt.TitleParams(
            "Effects on mouse Mxra8 binding vs distance to Mxra8 in structure",
            anchor="middle",
            fontSize=18,
        )
    )
)

print(f"Saving to {dist_corr_chart_html=}")
site_dist_chart.save(dist_corr_chart_html)

site_dist_chart
Saving to dist_corr_chart_html='results/compare_human_mouse_mxra8/mxra8_site_binding_dist_corr.html'
Out[11]:

Plot correlations in site effects¶

In [12]:
site_corr_df = (
    site_df
    .melt(
        id_vars=["ligand", "site", "wildtype", "region", "Mxra8 contact"],
        value_vars=["positive_effect", "negative_effect", "absolute_effect"],
        var_name="metric",
        value_name="effect",
    )
    .pivot_table(
        index=["site", "wildtype", "region", "Mxra8 contact", "metric"],
        values="effect",
        columns="ligand",
    )
    .reset_index()
)
In [13]:
tooltip_cols = ["site", "wildtype", "region", "Mxra8 contact"]

for ligand1, ligand2 in itertools.combinations(ligands, 2):

    corrs = (
        site_corr_df
        .groupby("metric")
        [[ligand1, ligand2]]
        .corr()
        .reset_index(level=1)
        .query("ligand == @ligand1")
        [ligand2]
        .to_dict()
    )

    site_corr_chart = (
        alt.Chart(
            site_corr_df[tooltip_cols + [ligand1, ligand2, "metric"]]
            .assign(
                metric=lambda x: x["metric"].map(
                    {
                        metric: 
                            f"{metric.replace('_', ' ')} at site (r = {corrs[metric]:.2f})"
                        for metric in site_corr_df["metric"].unique()
                    }
                )
            )
        )
        .add_params(site_selection)
        .encode(
            alt.X(ligand1, title=ligands[ligand1], scale=alt.Scale(nice=False, padding=6)),
            alt.Y(ligand2, title=ligands[ligand2], scale=alt.Scale(nice=False, padding=6)),
            alt.Column(
                "metric",
                title=None,
                header=alt.Header(labelFontStyle="bold", labelFontSize=11, labelPadding=2),
            ),
            color=alt.condition(site_selection, alt.value("red"), alt.value("gray")),
            strokeWidth=alt.condition(site_selection, alt.value(3), alt.value(1)),
            size=alt.condition(site_selection, alt.value(60), alt.value(35)),
            opacity=alt.condition(site_selection, alt.value(1), alt.value(0.25)),
            tooltip=[
                *tooltip_cols,
                alt.Tooltip(ligand1, title=ligands[ligand1], format=".2f"),
                alt.Tooltip(ligand2, title=ligands[ligand2], format=".2f"),
            ],
        )
        .mark_circle(stroke="black")
        .resolve_scale(x="independent", y="independent")
        .configure_axis(grid=False)
        .properties(width=140, height=140)
    )

    display(site_corr_chart)

    print(f"Saving to {site_corr_chart_html}")
    site_corr_chart.save(site_corr_chart_html)
Saving to results/compare_human_mouse_mxra8/mxra8_site_binding_corr.html
In [ ]: