IC50_validations¶

This notebook will read in experimentally determined fraction infectivity curves, plot, and then make correlations with DMS data

  • Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None

neut = None
escape_file = None

nah1_validation_neut_curves = None
IC50_validation_plot = None
combined_ic50_neut_curve_plot = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
neut = "data/custom_analyses_data/experimental_data/nAH1_3_mab_validation_neuts.csv"
escape_file = "results/antibody_escape/averages/nAH1.3_mut_effect.csv"
nah1_validation_neut_curves = "results/images/nah1_validation_neut_curves.html"
IC50_validation_plot = "results/images/IC50_validation_plot.html"
combined_ic50_neut_curve_plot = "results/images/combined_ic50_neut_curve_plot.html"
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import neutcurve
import scipy.stats
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
Using `neutcurve` version 0.5.7
Setup in correct directory
Out[3]:
ThemeRegistry.enable('main_theme')

For running interactively¶

In [4]:
if nah1_validation_neut_curves is None:
    #altair_config = "data/custom_analyses_data/theme.py"
    nipah_config = "nipah_config.yaml"
    escape_file = "results/antibody_escape/averages/nAH1.3_mut_effect.csv"
    neut = "data/custom_analyses_data/experimental_data/nAH1_3_mab_validation_neuts.csv"

    ephrin_binding_neuts_file = (
        "data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
    )
    ephrin_validation_curves = "data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
    validation_ic50s_file = (
        "data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
    )
    e2_monomeric_binding_file = (
        "results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
    )
    e3_dimeric_binding_file = (
        "results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
    )

Read in config files¶

In [5]:
with open(nipah_config) as f:
    config = yaml.safe_load(f)

Function for getting fitting neut curve from experimental data¶

In [6]:
# Estimate neutralization curves using the `curvefits` module from `neutcurve` package.
def get_neutcurve(df, replicate="average"):
    # estimate neut curves
    fits = neutcurve.curvefits.CurveFits(
        data=df,  # Input DataFrame containing the experimental data.
        serum_col="antibody",
        virus_col="virus",
        replicate_col="replicate",
        conc_col="concentration",
        fracinf_col="fraction infectivity",
        fixbottom=0,
    )
    # get neutcurve parameters for different IC values
    fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])

    # Prepare lists of unique antibodies and viruses for looping.
    antibody_list = list(df["antibody"].unique())
    virus_list = list(df["virus"].unique())

    curve_list = (
        []
    )  # Initialize an empty list to store neutralization curve data frames.
    # Loop through each combination of antibody and virus to get their neutralization curve.
    for antibody in antibody_list:
        for virus in virus_list:
            # get curves and combine
            curve = fits.getCurve(serum=antibody, virus=virus, replicate=replicate)
            neut_df = curve.dataframe()
            neut_df["antibody"] = antibody
            neut_df["virus"] = virus
            curve_list.append(neut_df)

    neut_curve_df = pd.concat(curve_list)
    # Calculate upper and lower bounds for the measurements using standard error (stderr).
    neut_curve_df["upper"] = neut_curve_df["measurement"] + neut_curve_df["stderr"]
    neut_curve_df["lower"] = neut_curve_df["measurement"] - neut_curve_df["stderr"]

    return (
        fitParams,
        neut_curve_df,
    )  # Return the fit parameters and the neut fitted neut curves

Make neut curve plot¶

In [7]:
# Sorting function to organize mutation strings with 'WT' at the start and the rest numerically.
def custom_sort_order(array):
    # Helper function to extract numerical part from mutation strings.
    def extract_number(virus):
        num = re.search(
            r"\d+", virus
        )  # Find the first sequence of digits in the string.
        return (
            int(num.group()) if num else 0
        )  # Convert digits to integer, or 0 if none found.

    array = sorted(
        array, key=extract_number
    )  # Sort array by the numerical value extracted.

    # Ensure 'WT' (wild type) is the first element in the list if it exists.
    if "WT" in array:
        array.remove("Unmutated")  # Remove 'WT' from its current position.
        array.insert(0, "Unmutated")  # Insert 'WT' at the beginning of the list.
    return array


# Function to plot neutralization curves with customized color coding for each virus mutation.
def plot_neut_curves(df, neut_curve_name, color_name):
    # Manually define colors corresponding to the category10 color scheme plus black.
    category10_colors = [
        "#4E79A5",
        "#F18F3B",
        "#E0585B",
        "#77B7B2",
        "#5AA155",
        "#EDC958",
        "#AF7AA0",
        "#FE9EA8",
        "#9C7561",
        "#BAB0AC",
    ]

    # Assign colors to mutations, starting with black for the first mutation.
    colors = ["black"] + category10_colors[: len(df["virus"].unique()) - 1]
    # Construct a line chart for the neutralization curves.
    chart = (
        alt.Chart(df)
        .mark_line(size=1.5, opacity=1)
        .encode(
            x=alt.X(
                "concentration:Q",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e", tickCount=3),
                title=neut_curve_name,
            ),
            y=alt.Y("fit:Q", title="Fraction Infectivity", axis=alt.Axis(tickCount=3)),
            color=alt.Color(
                "virus",
                title=color_name,
                scale=alt.Scale(
                    domain=custom_sort_order(df["virus"].unique()), range=colors
                ),
            ),
        )
        .properties(
            height=config["neut_curve_height"],
            width=config["neut_curve_width"],
        )
    )
    # Add data points as circles on the plot for actual measurements.
    circle = (
        alt.Chart(df)
        .mark_circle(size=40, opacity=1)
        .encode(
            x=alt.X(
                "concentration",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=".0e", tickCount=3),
                title=neut_curve_name,
            ),
            y=alt.Y(
                "measurement:Q",
                title="Fraction Infectivity",
                axis=alt.Axis(tickCount=3),
            ),
            color=alt.Color(
                "virus",
                title=color_name,
                scale=alt.Scale(
                    domain=custom_sort_order(df["virus"].unique()), range=colors
                ),
            ),
        )
        .properties(
            height=config["neut_curve_height"],
            width=config["neut_curve_width"],
        )
    )
    # Add error bars
    error = (
        alt.Chart(df)
        .mark_errorbar(opacity=1)
        .encode(
            x="concentration",
            y=alt.Y("lower", title="Fraction Infectivity"),
            y2="upper",
            color="virus",
        )
    )
    plot = (
        chart + circle + error
    )  # Combine line chart, circles, and error bars into one plot.
    return plot

Now calculate correlations with DMS data and plot¶

In [8]:
def plot_ic50_correlations(fitparams, name, escape):
    # Merge IC50 and DMS escape dataframes and append WT so it has escape score of 0
    fitparams["lower_bound"] = fitparams["ic50_bound"].apply(lambda x: x == "lower")
    fitparams["mutation"] = fitparams["virus"]
    # Merge with DMS escape data
    merged = fitparams.merge(escape, on=["mutation"])
    wt_rows = fitparams[fitparams["mutation"] == "Unmutated"].copy()
    wt_rows["escape_mean"] = 0
    merged = pd.concat([merged, wt_rows], ignore_index=True)
    display(merged)
    # calculate R value:
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        merged["escape_mean"], merged["ic50"]
    )

    # Define the category10 colors manually
    category10_colors = [
        "#4E79A5",
        "#F18F3B",
        "#E0585B",
        "#77B7B2",
        "#5AA155",
        "#EDC958",
        "#AF7AA0",
        "#FE9EA8",
        "#9C7561",
        "#BAB0AC",
    ]

    # Adjust colors based on the unique mutations
    colors = ["black"] + category10_colors[: len(merged["mutation"].unique()) - 1]

    corr_chart = (
        alt.Chart(merged)
        .mark_point(size=150,filled=True,opacity=1)
        .encode(
            x=alt.X(
                "escape_mean", title="Antibody escape from DMS", axis=alt.Axis(grid=False,tickCount=3)
            ),
            y=alt.Y(
                "ic50",
                title=f"{name}",
                scale=alt.Scale(type="log"),
                axis=alt.Axis(grid=False,tickCount=4),
            ),
            color=alt.Color(
                "mutation",
                title="Virus",
                scale=alt.Scale(
                    domain=custom_sort_order(merged["mutation"].unique()), range=colors
                ),
            ),
            #shape=alt.Shape("lower_bound", title="Lower Bound"),
        )
    )
    text = (
        alt.Chart(
            {
                "values": [
                    {
                        "x": merged["escape_median"].min(),
                        "y": merged["ic50"].max(),
                        "text": f"r = {r_value:.2f}",
                    }
                ]
            }
        )
        .mark_text(align="left", baseline="top", dx=-10)  # Adjust this for position
        .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
    )
    final_chart = corr_chart + text
    return final_chart

Combine all the data from above in single function¶

In [9]:
# Function to calculate IC50 correlations and generate plots for neutralization curves and IC50 correlations.
def get_neut_curve_ic50_correlations(
    raw_data, escape, neut_curve_name, name, color_name
):
    # Process raw data to separate fitted curve data and neutralization curve data.
    fit_df, neut_df = get_neutcurve(raw_data)

    # Convert IC50 values from µg to ng/mL for better readability.
    fit_df["ic50_ng"] = fit_df["ic50"] * 1000
    # Display IC50 values for each serum-virus combination, rounded for clarity.
    print("Here are the ic50 values:")
    display(fit_df[["serum", "virus", "ic50_ng"]].round(2))

    # Generate a plot of the neutralization curves using the provided data and color scheme.
    neut_curves_plot = plot_neut_curves(neut_df, neut_curve_name, color_name)

    # Create a plot showing correlations between IC50 values and a specified variable (e.g., escape).
    ic50_correlations = plot_ic50_correlations(fit_df, name, escape)

    # Combine the neutralization curve plot and the IC50 correlation plot side by side.
    combined_neut_curve = neut_curves_plot | ic50_correlations

    # Return the individual plots as well as the combined plot for further use or display.
    return neut_curves_plot, ic50_correlations, combined_neut_curve

Do for antibody validations¶

In [10]:
# Load escape mutation data from a CSV file
escape = pd.read_csv(escape_file)
# Load neutralization data from another CSV file
neuts = pd.read_csv(neut)

# Get rid of Y455M because its not present in escape data
neuts = neuts[neuts["virus"] != "Y455M"]


def fix_neut_df(df):
    df = df[["serum", "virus", "replicate", "concentration", "fraction infectivity"]]
    df = df.rename(columns={"serum": "antibody"})
    return df


# Fix naming in dataframe
nAH1_validation_neuts = fix_neut_df(neuts)

# Calculate neut curve, ic50 correlations with DMS data, and combine figures
nAH1_curve_plot, nAH1_ic50_correlations, nAH1_combined = (
    get_neut_curve_ic50_correlations(
        nAH1_validation_neuts,  # Single mutant validations
        escape,  # Filtered DMS escape file
        "nAH1.3 Concentration (μg/ml)",  # Title for the neutralization curve
        "Validation IC₅₀ (μg/ml)",  # Title for the IC50 correlation plot
        "Virus",  # Color legend title
    )
)

nAH1_curve_plot.display()
if combined_ic50_neut_curve_plot is not None:
    nAH1_curve_plot.save(nah1_validation_neut_curves)

nAH1_ic50_correlations.display()
if combined_ic50_neut_curve_plot is not None:
    nAH1_ic50_correlations.save(IC50_validation_plot)

nAH1_combined.display()
if combined_ic50_neut_curve_plot is not None:
    nAH1_combined.save(combined_ic50_neut_curve_plot)
Here are the ic50 values:
serum virus ic50_ng
0 nAH1.3 L184H 10.80
1 nAH1.3 Q450S 6.54
2 nAH1.3 W519T 7.92
3 nAH1.3 I520T 8725.69
4 nAH1.3 D468V 487.70
5 nAH1.3 I517K 10000.00
6 nAH1.3 P185D 10000.00
7 nAH1.3 Unmutated 6.83
serum virus replicate nreplicates ic50 ic50_bound ic50_str ic90 ic90_bound ic90_str ... mutant escape_mean escape_median escape_std n_models times_seen frac_models LibA-230725-nAH1.3 LibB-230630-nAH1.3 LibB-230720-nAH1.3
0 nAH1.3 L184H average 2 0.010802 interpolated 0.0108 0.043326 interpolated 0.0433 ... H 0.3322 0.3080 0.24800 3.0 4.667 1.0 0.3080 0.09721 0.5915
1 nAH1.3 Q450S average 2 0.006540 interpolated 0.00654 0.032273 interpolated 0.0323 ... S 0.3267 0.3306 0.04523 3.0 3.333 1.0 0.2797 0.33060 0.3699
2 nAH1.3 W519T average 2 0.007923 interpolated 0.00792 0.026042 interpolated 0.026 ... T 0.2210 0.2147 0.05903 3.0 10.000 1.0 0.2147 0.28290 0.1654
3 nAH1.3 I520T average 2 8.725689 interpolated 8.73 10.000000 lower >10 ... T 1.2640 1.2170 0.60140 3.0 8.000 1.0 0.6875 1.21700 1.8880
4 nAH1.3 D468V average 2 0.487700 interpolated 0.488 10.000000 lower >10 ... V 1.8670 1.5540 1.13600 3.0 6.667 1.0 0.9211 1.55400 3.1270
5 nAH1.3 I517K average 2 10.000000 lower >10 10.000000 lower >10 ... K 2.1900 2.5660 1.12800 3.0 7.667 1.0 0.9221 2.56600 3.0810
6 nAH1.3 P185D average 2 10.000000 lower >10 10.000000 lower >10 ... D 1.9620 1.8280 1.23900 3.0 7.333 1.0 0.7950 1.82800 3.2620
7 nAH1.3 Unmutated average 2 0.006834 interpolated 0.00683 0.019276 interpolated 0.0193 ... NaN 0.0000 NaN NaN NaN NaN NaN NaN NaN NaN

8 rows × 42 columns

In [ ]:
 
In [ ]: