ephrin_neut_curves.ipynb¶

This notebook analyzes ephrin neutralization of Nipah pseudovirus

  • Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None

ephrin_binding_neuts_file = None
ephrin_validation_curves = None

validation_ic50s_file = None

e2_monomeric_binding_file = None
e3_dimeric_binding_file = None

ephrin_curve_plot = None
e2_curve_plot = None
e3_curve_plot = None
e2_corr_plot = None
e3_corr_plot = None
EFNB2_neut_corr = None
EFNB3_neut_corr = None
all_ephrin_neut_plots = None
In [2]:
# Parameters
ephrin_binding_neuts_file = (
    "data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
)
ephrin_validation_curves = (
    "data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
)
validation_ic50s_file = (
    "data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
)
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
e2_monomeric_binding_file = (
    "results/receptor_affinity/averages/bEFNB2_monomeric_mut_effect.csv"
)
e3_dimeric_binding_file = (
    "results/receptor_affinity/averages/bEFNB3_dimeric_mut_effect.csv"
)
ephrin_curve_plot = "results/images/ephrin_neut_curve.html"
e2_curve_plot = "results/images/ephrinB2_neut_curve.html"
e3_curve_plot = "results/images/ephrinB3_neut_curve.html"
e2_corr_plot = "results/images/e2_corr_plot.html"
e3_corr_plot = "results/images/e3_corr_plot.html"
EFNB2_neut_corr = "results/images/EFNB2_neut_corr.html"
EFNB3_neut_corr = "results/images/EFNB3_neut_corr.html"
all_ephrin_neut_plots = "results/images/all_ephrin_neut_plots.html"

Import packages¶

In [3]:
import warnings
import math
import os
from IPython.display import display, HTML, SVG
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import neutcurve
from neutcurve.colorschemes import CBPALETTE
from neutcurve.colorschemes import CBMARKERS
import scipy.stats
import yaml
import altair as alt
import re
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
Using `neutcurve` version 0.5.7
Setup in correct directory
Out[3]:
ThemeRegistry.enable('main_theme')

Setup path¶

For running notebook interactively¶

In [4]:
if EFNB2_neut_corr is None:
    #altair_config = "data/custom_analyses_data/theme.py"
    nipah_config = "nipah_config.yaml"
    ephrin_binding_neuts_file = (
        "data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
    )
    ephrin_validation_curves = "data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
    validation_ic50s_file = (
        "data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
    )
    e2_monomeric_binding_file = (
        "results/receptor_affinity/averages/bEFNB2_monomeric_mut_effect.csv"
    )
    e3_dimeric_binding_file = (
        "results/receptor_affinity/averages/bEFNB3_dimeric_mut_effect.csv"
    )

Read in config file¶

In [5]:
with open(nipah_config) as f:
    config = yaml.safe_load(f)

First plot neuts with WT virus¶

In [6]:
# First, load in the neut data
df = pd.read_csv(ephrin_binding_neuts_file)
display(df.head(5))

# In this particular case I want to fix the names
df["virus"] = df["virus"].replace(
    {
        "E2-dimeric": "dimeric-bEFNB2",
        "E2-monomeric": "monomeric-bEFNB2",
        "E3-dimeric": "dimeric-bEFNB3",
        "E3-monomeric": "monomeric-bEFNB3",
    }
)

display(df.head(4))
serum virus replicate concentration fraction infectivity
0 CHO-EFNB3 E2-dimeric 1 0.098039 0.00001
1 CHO-EFNB3 E2-dimeric 1 0.032680 0.00001
2 CHO-EFNB3 E2-dimeric 1 0.010893 0.00000
3 CHO-EFNB3 E2-dimeric 1 0.003631 0.00001
4 CHO-EFNB3 E2-dimeric 1 0.001210 0.00001
serum virus replicate concentration fraction infectivity
0 CHO-EFNB3 dimeric-bEFNB2 1 0.098039 0.00001
1 CHO-EFNB3 dimeric-bEFNB2 1 0.032680 0.00001
2 CHO-EFNB3 dimeric-bEFNB2 1 0.010893 0.00000
3 CHO-EFNB3 dimeric-bEFNB2 1 0.003631 0.00001

Get neut curves and plot¶

In [7]:
fits = neutcurve.curvefits.CurveFits(
    data=df,
    fixbottom=0,
)

fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])


def extract_dataframe_from_neutcurve(serum, viruses, replicate="average"):
    curves = []
    # Loop over each virus type and retrieve the curve
    for virus in viruses:
        curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
        df = curve.dataframe()
        df["virus"] = virus
        curves.append(df)

    # Concatenate all the dataframes into one
    combined_curve = pd.concat(curves, axis=0)
    return combined_curve


serum = "CHO-EFNB3"  # pull out the neuts that were done on CHO-EFNB3 cells, not E2
viruses = ["dimeric-bEFNB2", "monomeric-bEFNB2", "dimeric-bEFNB3", "monomeric-bEFNB3"]
curve = extract_dataframe_from_neutcurve(serum, viruses)
curve["upper"] = curve["measurement"] + curve["stderr"]
curve["lower"] = curve["measurement"] - curve["stderr"]


def plot_neut_curve(df):
    chart = (
        alt.Chart(df)
        .mark_line(size=1.5)
        .encode(
            x=alt.X(
                "concentration:Q",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e"),
                title="Concentration (μM)",
            ),
            y=alt.Y(
                "fit:Q",
                title="Fraction Infectivity",
            ),
            color=alt.Color("virus", title="Receptor"),
        )
        .properties(
            width=300, height=200
        )
    )
    circle = (
        alt.Chart(df)
        .mark_circle(size=40,opacity=1)
        .encode(
            x=alt.X(
                "concentration",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e",tickCount=3),
                title="Concentration (μM)",
            ),
            y=alt.Y("measurement:Q", title="Fraction Infectivity"),
            color=alt.Color("virus", title="Receptor"),
        )
        .properties(
            width=300, height=200
        )
    )
    error = (
        alt.Chart(df)
        .mark_errorbar(opacity=1)
        .encode(
            x="concentration",
            y=alt.Y("lower", title="Fraction Infectivity"),
            y2="upper",
            color="virus",
        )
        .properties(
            width=300, height=200
        )
    )
    plot = chart + circle + error
    return plot


ephrin_curve = plot_neut_curve(curve)
ephrin_curve.display()
if EFNB2_neut_corr is not None:
    ephrin_curve.save(ephrin_curve_plot)

fitParams = fitParams.rename(
    columns={"serum": "target_cells", "virus": "soluble_receptor"}
)
fitParams["ic50_nM"] = fitParams["ic50"] * 1000
subset = fitParams[fitParams["target_cells"] == "CHO-EFNB3"]
display(subset[["soluble_receptor", "ic50_nM"]].round(3))
soluble_receptor ic50_nM
0 dimeric-bEFNB2 0.045
1 monomeric-bEFNB2 0.892
2 dimeric-bEFNB3 0.610
3 monomeric-bEFNB3 434.780

Pull in binding validation neut data¶

In [8]:
validation_curves = pd.read_csv(ephrin_validation_curves)
validation_curves['virus'] = validation_curves['virus'].str.replace('WT','Unmutated')
In [9]:
fits = neutcurve.curvefits.CurveFits(
    data=validation_curves,
    fixbottom=0,
)

fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])
fitParams = fitParams.rename(columns={"serum": "target_cells", "virus": "mutant"})


def extract_dataframe_from_neutcurve(serum, viruses, replicate="average"):
    curves = []
    # Loop over each virus type and retrieve the curve
    for virus in viruses:
        curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
        df = curve.dataframe()
        df["virus"] = virus
        curves.append(df)

    # Concatenate all the dataframes into one
    combined_curve = pd.concat(curves, axis=0)
    return combined_curve


def get_curves(df, serum):
    viruses = ["H333Q", "Q492R", "V507I", "Q530F", "S553W", "D555K", "Unmutated"]
    curve = extract_dataframe_from_neutcurve(serum, viruses)
    curve["upper"] = curve["measurement"] + curve["stderr"]
    curve["lower"] = curve["measurement"] - curve["stderr"]
    return curve


e2_validation_curves = get_curves(validation_curves, "EFNB2-monomeric")
e3_validation_curves = get_curves(validation_curves, "EFNB3-dimeric")


# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
    def extract_number(virus):
        num = re.search(r"\d+", virus)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if "WT" in array:
        array.remove("Unmutated")
        array.insert(0, "Unmutated")
    return array


def plot_validation_curves(df, name):
    # Define the category10 colors manually
    category10_colors = [
        "#4E79A5",
        "#F18F3B",
        "#E0585B",
        "#77B7B2",
        "#5AA155",
        "#EDC958",
        "#AF7AA0",
        "#FE9EA8",
        "#9C7561",
        "#BAB0AC",
    ]

    # Adjust colors based on the unique mutations
    colors = ["black"] + category10_colors[: len(df["virus"].unique()) - 1]

    chart = (
        alt.Chart(df)
        .mark_line(size=1.5)
        .encode(
            x=alt.X(
                "concentration:Q",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e", tickCount=3),
                title=f"{name} conc. (μg/mL)",
            ),
            y=alt.Y("fit:Q", title="Fraction Infectivity", axis=alt.Axis(tickCount=3)),
            color=alt.Color(
                "virus",
                title="Virus",
                scale=alt.Scale(
                    domain=custom_sort_order(df["virus"].unique()), range=colors
                ),
            ),
        )
        .properties(
            width=300, height=200
        )
    )
    circle = (
        alt.Chart(df)
        .mark_circle(opacity=1,size=40)
        .encode(
            x=alt.X(
                "concentration",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e", tickCount=3),
                title=f"{name} conc. (μg/mL)",
            ),
            y=alt.Y(
                "measurement:Q",
                title="Fraction Infectivity",
                axis=alt.Axis(tickCount=3),
            ),
            color=alt.Color(
                "virus",
                title="Virus",
                scale=alt.Scale(
                    domain=custom_sort_order(df["virus"].unique()), range=colors
                ),
            ),
        )
        .properties(
            width=300, height=200
        )
    )
    error = (
        alt.Chart(df)
        .mark_errorbar(opacity=1)
        .encode(
            x="concentration",
            y=alt.Y("lower", title="Fraction Infectivity"),
            y2="upper",
            color="virus",
        )
        .properties(
            width=300, height=200
        )
    )
    plot = chart + circle + error
    plot
    return plot


ephrin_b2_neut_curve = plot_validation_curves(e2_validation_curves, "monomeric-bEFNB2")
ephrin_b2_neut_curve.display()
if EFNB2_neut_corr is not None:
    ephrin_b2_neut_curve.save(e2_curve_plot)

ephrin_b3_neut_curve = plot_validation_curves(e3_validation_curves, "dimeric-bEFNB3")
ephrin_b3_neut_curve.display()
if EFNB2_neut_corr is not None:
    ephrin_b3_neut_curve.save(e3_curve_plot)

# Find the IC50 values in ng/mL and relative IC50 to WT
fitParams["ic50_ng"] = (fitParams["ic50"] * 1000).round(1)
efnb2 = fitParams[fitParams["target_cells"] == "EFNB2-monomeric"]
WT_IC50 = efnb2[efnb2["mutant"] == "Unmutated"]
wt50 = WT_IC50["ic50_ng"].unique()

fitParams["relative_ic50"] = (fitParams["ic50_ng"] / wt50).round(1)
display(fitParams[["target_cells", "mutant", "ic50_ng", "relative_ic50"]])
target_cells mutant ic50_ng relative_ic50
0 EFNB2-monomeric H333Q 92.4 1.6
1 EFNB2-monomeric Q492R 527.8 9.2
2 EFNB2-monomeric V507I 36.9 0.6
3 EFNB2-monomeric Q530F 1077.4 18.8
4 EFNB2-monomeric S553W 20.6 0.4
5 EFNB2-monomeric D555K 1799.8 31.4
6 EFNB2-monomeric Unmutated 57.4 1.0
7 EFNB3-dimeric H333Q 88.5 1.5
8 EFNB3-dimeric Q492R 39.0 0.7
9 EFNB3-dimeric V507I 78.6 1.4
10 EFNB3-dimeric Q530F 29.8 0.5
11 EFNB3-dimeric S553W 32.8 0.6
12 EFNB3-dimeric D555K 178.3 3.1
13 EFNB3-dimeric Unmutated 57.5 1.0

Now plot correlations between IC50 and DMS binding score¶

Pull in data and prepare for plotting

In [10]:
validation_ic50s = pd.read_csv(validation_ic50s_file, na_filter=None)
validation_ic50s['mutation'] = validation_ic50s['mutation'].str.replace('WT','Unmutated')
In [11]:
# Read in filtered DMS binding data
e2_monomeric_binding = pd.read_csv(e2_monomeric_binding_file)
e3_dimeric_binding = pd.read_csv(e3_dimeric_binding_file)


def make_df(df, name):
    merged = validation_ic50s.merge(df, on=["mutation"])
    wt_rows = validation_ic50s[validation_ic50s["mutation"] == "Unmutated"].copy()
    wt_rows["Ephrin binding_mean"] = 0.00000
    merged = pd.concat([merged, wt_rows], ignore_index=True)
    df_tmp = merged[merged["antibody"] == name]
    return df_tmp


e2_df_out = make_df(e2_monomeric_binding, "EFNB2-monomeric")
e3_df_out = make_df(e3_dimeric_binding, "EFNB3-dimeric")

Plot correlation data:

In [12]:
# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
    def extract_number(mutation):
        num = re.search(r"\d+", mutation)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if "WT" in array:
        array.remove("Unmutated")
        array.insert(0, "Unmutated")
    return array


def make_corr_chart(df, name):
    # calculate NT50
    df["NT50"] = 1 / df["measured IC50"]

    # calculate R value:
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        df["Ephrin binding_mean"], df["NT50"]
    )
    print(f"The r-value is: {r_value:.2f}")

    # Define the category10 colors manually
    category10_colors = [
        "#4E79A5",
        "#F18F3B",
        "#E0585B",
        "#77B7B2",
        "#5AA155",
        "#EDC958",
        "#AF7AA0",
        "#FE9EA8",
        "#9C7561",
        "#BAB0AC",
    ]
    if name == 'EFNB3-dimeric':
        scale_y = alt.Scale(type="log", domain=[5, 100])
        axis_y = alt.Axis(values=[1,10,100])
    else:
        scale_y = alt.Scale(type="log")
        axis_y = alt.Axis(values=[0.1,1,10,100])


    # Adjust colors based on the unique mutations
    colors = ["black"] + category10_colors[: len(df["mutation"].unique()) - 1]
    corr_chart = (
        alt.Chart(df)
        .encode(
            x=alt.X(
                "Ephrin binding_mean",
                title="Receptor binding in DMS",
                axis=alt.Axis(tickCount=4),
            ),
            y=alt.Y(
                "NT50",
                title=f"{name} NT50 (μg/ml-1)",
                scale=scale_y,
                axis=axis_y,
            ),
            color=alt.Color(
                "mutation",
                title="Virus",
                scale=alt.Scale(
                    domain=custom_sort_order(df["mutation"].unique()), range=colors
                ),
            ),
        )
        .mark_point(size=120,filled=True,opacity=1)
    )
    text = (
        alt.Chart(
            {
                "values": [
                    {
                        "x": df["Ephrin binding_mean"].min(),
                        "y": df["NT50"].max(),
                        "text": f"r = {r_value:.2f}",
                    }
                ]
            }
        )
        .mark_text(align="left", baseline="top", dx=-8,dy=-20)
        .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
    )
    chart = corr_chart + text
    return chart


e2_corr = make_corr_chart(e2_df_out, "monomeric-bEFNB2")
e2_corr.display()
if EFNB2_neut_corr is not None:
    e2_corr.save(e2_corr_plot)

e3_corr = make_corr_chart(e3_df_out, "dimeric-bEFNB3")
e3_corr.display()
if EFNB2_neut_corr is not None:
    e3_corr.save(e3_corr_plot)

if EFNB2_neut_corr is not None:
    (ephrin_b2_neut_curve | e2_corr).save(EFNB2_neut_corr)
    (ephrin_b3_neut_curve | e3_corr).save(EFNB3_neut_corr)

all_ephrin_neuts = (ephrin_b2_neut_curve | e2_corr) & (ephrin_b3_neut_curve | e3_corr)
all_ephrin_neuts.display()
if EFNB2_neut_corr is not None:
    all_ephrin_neuts.save(all_ephrin_neut_plots)
The r-value is: 0.90
The r-value is: 0.70
In [ ]: