IC50_validations¶
This notebook will read in experimentally determined fraction infectivity curves, plot, and then make correlations with DMS data
- Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None
neut = None
escape_file = None
nah1_validation_neut_curves = None
IC50_validation_plot = None
combined_ic50_neut_curve_plot = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
neut = "data/custom_analyses_data/experimental_data/nAH1_3_mab_validation_neuts.csv"
escape_file = "results/antibody_escape/averages/nAH1.3_mut_effect.csv"
nah1_validation_neut_curves = "results/images/nah1_validation_neut_curves.html"
IC50_validation_plot = "results/images/IC50_validation_plot.html"
combined_ic50_neut_curve_plot = "results/images/combined_ic50_neut_curve_plot.html"
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import neutcurve
import scipy.stats
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
Using `neutcurve` version 0.5.7 Setup in correct directory
Out[3]:
ThemeRegistry.enable('main_theme')
For running interactively¶
In [4]:
if nah1_validation_neut_curves is None:
#altair_config = "data/custom_analyses_data/theme.py"
nipah_config = "nipah_config.yaml"
escape_file = "results/antibody_escape/averages/nAH1.3_mut_effect.csv"
neut = "data/custom_analyses_data/experimental_data/nAH1_3_mab_validation_neuts.csv"
ephrin_binding_neuts_file = (
"data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
)
ephrin_validation_curves = "data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
validation_ic50s_file = (
"data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
)
e2_monomeric_binding_file = (
"results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
)
e3_dimeric_binding_file = (
"results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
)
Read in config files¶
In [5]:
with open(nipah_config) as f:
config = yaml.safe_load(f)
Function for getting fitting neut curve from experimental data¶
In [6]:
# Estimate neutralization curves using the `curvefits` module from `neutcurve` package.
def get_neutcurve(df, replicate="average"):
# estimate neut curves
fits = neutcurve.curvefits.CurveFits(
data=df, # Input DataFrame containing the experimental data.
serum_col="antibody",
virus_col="virus",
replicate_col="replicate",
conc_col="concentration",
fracinf_col="fraction infectivity",
fixbottom=0,
)
# get neutcurve parameters for different IC values
fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])
# Prepare lists of unique antibodies and viruses for looping.
antibody_list = list(df["antibody"].unique())
virus_list = list(df["virus"].unique())
curve_list = (
[]
) # Initialize an empty list to store neutralization curve data frames.
# Loop through each combination of antibody and virus to get their neutralization curve.
for antibody in antibody_list:
for virus in virus_list:
# get curves and combine
curve = fits.getCurve(serum=antibody, virus=virus, replicate=replicate)
neut_df = curve.dataframe()
neut_df["antibody"] = antibody
neut_df["virus"] = virus
curve_list.append(neut_df)
neut_curve_df = pd.concat(curve_list)
# Calculate upper and lower bounds for the measurements using standard error (stderr).
neut_curve_df["upper"] = neut_curve_df["measurement"] + neut_curve_df["stderr"]
neut_curve_df["lower"] = neut_curve_df["measurement"] - neut_curve_df["stderr"]
return (
fitParams,
neut_curve_df,
) # Return the fit parameters and the neut fitted neut curves
Make neut curve plot¶
In [7]:
# Sorting function to organize mutation strings with 'WT' at the start and the rest numerically.
def custom_sort_order(array):
# Helper function to extract numerical part from mutation strings.
def extract_number(virus):
num = re.search(
r"\d+", virus
) # Find the first sequence of digits in the string.
return (
int(num.group()) if num else 0
) # Convert digits to integer, or 0 if none found.
array = sorted(
array, key=extract_number
) # Sort array by the numerical value extracted.
# Ensure 'WT' (wild type) is the first element in the list if it exists.
if "WT" in array:
array.remove("Unmutated") # Remove 'WT' from its current position.
array.insert(0, "Unmutated") # Insert 'WT' at the beginning of the list.
return array
# Function to plot neutralization curves with customized color coding for each virus mutation.
def plot_neut_curves(df, neut_curve_name, color_name):
# Manually define colors corresponding to the category10 color scheme plus black.
category10_colors = [
"#4E79A5",
"#F18F3B",
"#E0585B",
"#77B7B2",
"#5AA155",
"#EDC958",
"#AF7AA0",
"#FE9EA8",
"#9C7561",
"#BAB0AC",
]
# Assign colors to mutations, starting with black for the first mutation.
colors = ["black"] + category10_colors[: len(df["virus"].unique()) - 1]
# Construct a line chart for the neutralization curves.
chart = (
alt.Chart(df)
.mark_line(size=1.5, opacity=1)
.encode(
x=alt.X(
"concentration:Q",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e", tickCount=3),
title=neut_curve_name,
),
y=alt.Y("fit:Q", title="Fraction Infectivity", axis=alt.Axis(tickCount=3)),
color=alt.Color(
"virus",
title=color_name,
scale=alt.Scale(
domain=custom_sort_order(df["virus"].unique()), range=colors
),
),
)
.properties(
height=config["neut_curve_height"],
width=config["neut_curve_width"],
)
)
# Add data points as circles on the plot for actual measurements.
circle = (
alt.Chart(df)
.mark_circle(size=40, opacity=1)
.encode(
x=alt.X(
"concentration",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e", tickCount=3),
title=neut_curve_name,
),
y=alt.Y(
"measurement:Q",
title="Fraction Infectivity",
axis=alt.Axis(tickCount=3),
),
color=alt.Color(
"virus",
title=color_name,
scale=alt.Scale(
domain=custom_sort_order(df["virus"].unique()), range=colors
),
),
)
.properties(
height=config["neut_curve_height"],
width=config["neut_curve_width"],
)
)
# Add error bars
error = (
alt.Chart(df)
.mark_errorbar(opacity=1)
.encode(
x="concentration",
y=alt.Y("lower", title="Fraction Infectivity"),
y2="upper",
color="virus",
)
)
plot = (
chart + circle + error
) # Combine line chart, circles, and error bars into one plot.
return plot
Now calculate correlations with DMS data and plot¶
In [8]:
def plot_ic50_correlations(fitparams, name, escape):
# Merge IC50 and DMS escape dataframes and append WT so it has escape score of 0
fitparams["lower_bound"] = fitparams["ic50_bound"].apply(lambda x: x == "lower")
fitparams["mutation"] = fitparams["virus"]
# Merge with DMS escape data
merged = fitparams.merge(escape, on=["mutation"])
wt_rows = fitparams[fitparams["mutation"] == "Unmutated"].copy()
wt_rows["escape_mean"] = 0
merged = pd.concat([merged, wt_rows], ignore_index=True)
display(merged)
# calculate R value:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
merged["escape_mean"], merged["ic50"]
)
# Define the category10 colors manually
category10_colors = [
"#4E79A5",
"#F18F3B",
"#E0585B",
"#77B7B2",
"#5AA155",
"#EDC958",
"#AF7AA0",
"#FE9EA8",
"#9C7561",
"#BAB0AC",
]
# Adjust colors based on the unique mutations
colors = ["black"] + category10_colors[: len(merged["mutation"].unique()) - 1]
corr_chart = (
alt.Chart(merged)
.mark_point(size=150,filled=True,opacity=1)
.encode(
x=alt.X(
"escape_mean", title="Antibody escape from DMS", axis=alt.Axis(grid=False,tickCount=3)
),
y=alt.Y(
"ic50",
title=f"{name}",
scale=alt.Scale(type="log"),
axis=alt.Axis(grid=False,tickCount=4),
),
color=alt.Color(
"mutation",
title="Virus",
scale=alt.Scale(
domain=custom_sort_order(merged["mutation"].unique()), range=colors
),
),
#shape=alt.Shape("lower_bound", title="Lower Bound"),
)
)
text = (
alt.Chart(
{
"values": [
{
"x": merged["escape_median"].min(),
"y": merged["ic50"].max(),
"text": f"r = {r_value:.2f}",
}
]
}
)
.mark_text(align="left", baseline="top", dx=-10) # Adjust this for position
.encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
)
final_chart = corr_chart + text
return final_chart
Combine all the data from above in single function¶
In [9]:
# Function to calculate IC50 correlations and generate plots for neutralization curves and IC50 correlations.
def get_neut_curve_ic50_correlations(
raw_data, escape, neut_curve_name, name, color_name
):
# Process raw data to separate fitted curve data and neutralization curve data.
fit_df, neut_df = get_neutcurve(raw_data)
# Convert IC50 values from µg to ng/mL for better readability.
fit_df["ic50_ng"] = fit_df["ic50"] * 1000
# Display IC50 values for each serum-virus combination, rounded for clarity.
print("Here are the ic50 values:")
display(fit_df[["serum", "virus", "ic50_ng"]].round(2))
# Generate a plot of the neutralization curves using the provided data and color scheme.
neut_curves_plot = plot_neut_curves(neut_df, neut_curve_name, color_name)
# Create a plot showing correlations between IC50 values and a specified variable (e.g., escape).
ic50_correlations = plot_ic50_correlations(fit_df, name, escape)
# Combine the neutralization curve plot and the IC50 correlation plot side by side.
combined_neut_curve = neut_curves_plot | ic50_correlations
# Return the individual plots as well as the combined plot for further use or display.
return neut_curves_plot, ic50_correlations, combined_neut_curve
Do for antibody validations¶
In [10]:
# Load escape mutation data from a CSV file
escape = pd.read_csv(escape_file)
# Load neutralization data from another CSV file
neuts = pd.read_csv(neut)
# Get rid of Y455M because its not present in escape data
neuts = neuts[neuts["virus"] != "Y455M"]
def fix_neut_df(df):
df = df[["serum", "virus", "replicate", "concentration", "fraction infectivity"]]
df = df.rename(columns={"serum": "antibody"})
return df
# Fix naming in dataframe
nAH1_validation_neuts = fix_neut_df(neuts)
# Calculate neut curve, ic50 correlations with DMS data, and combine figures
nAH1_curve_plot, nAH1_ic50_correlations, nAH1_combined = (
get_neut_curve_ic50_correlations(
nAH1_validation_neuts, # Single mutant validations
escape, # Filtered DMS escape file
"nAH1.3 Concentration (μg/ml)", # Title for the neutralization curve
"Validation IC₅₀ (μg/ml)", # Title for the IC50 correlation plot
"Virus", # Color legend title
)
)
nAH1_curve_plot.display()
if combined_ic50_neut_curve_plot is not None:
nAH1_curve_plot.save(nah1_validation_neut_curves)
nAH1_ic50_correlations.display()
if combined_ic50_neut_curve_plot is not None:
nAH1_ic50_correlations.save(IC50_validation_plot)
nAH1_combined.display()
if combined_ic50_neut_curve_plot is not None:
nAH1_combined.save(combined_ic50_neut_curve_plot)
Here are the ic50 values:
serum | virus | ic50_ng | |
---|---|---|---|
0 | nAH1.3 | L184H | 10.80 |
1 | nAH1.3 | Q450S | 6.54 |
2 | nAH1.3 | W519T | 7.92 |
3 | nAH1.3 | I520T | 8725.69 |
4 | nAH1.3 | D468V | 487.70 |
5 | nAH1.3 | I517K | 10000.00 |
6 | nAH1.3 | P185D | 10000.00 |
7 | nAH1.3 | Unmutated | 6.83 |
serum | virus | replicate | nreplicates | ic50 | ic50_bound | ic50_str | ic90 | ic90_bound | ic90_str | ... | mutant | escape_mean | escape_median | escape_std | n_models | times_seen | frac_models | LibA-230725-nAH1.3 | LibB-230630-nAH1.3 | LibB-230720-nAH1.3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | nAH1.3 | L184H | average | 2 | 0.010802 | interpolated | 0.0108 | 0.043326 | interpolated | 0.0433 | ... | H | 0.3322 | 0.3080 | 0.24800 | 3.0 | 4.667 | 1.0 | 0.3080 | 0.09721 | 0.5915 |
1 | nAH1.3 | Q450S | average | 2 | 0.006540 | interpolated | 0.00654 | 0.032273 | interpolated | 0.0323 | ... | S | 0.3267 | 0.3306 | 0.04523 | 3.0 | 3.333 | 1.0 | 0.2797 | 0.33060 | 0.3699 |
2 | nAH1.3 | W519T | average | 2 | 0.007923 | interpolated | 0.00792 | 0.026042 | interpolated | 0.026 | ... | T | 0.2210 | 0.2147 | 0.05903 | 3.0 | 10.000 | 1.0 | 0.2147 | 0.28290 | 0.1654 |
3 | nAH1.3 | I520T | average | 2 | 8.725689 | interpolated | 8.73 | 10.000000 | lower | >10 | ... | T | 1.2640 | 1.2170 | 0.60140 | 3.0 | 8.000 | 1.0 | 0.6875 | 1.21700 | 1.8880 |
4 | nAH1.3 | D468V | average | 2 | 0.487700 | interpolated | 0.488 | 10.000000 | lower | >10 | ... | V | 1.8670 | 1.5540 | 1.13600 | 3.0 | 6.667 | 1.0 | 0.9211 | 1.55400 | 3.1270 |
5 | nAH1.3 | I517K | average | 2 | 10.000000 | lower | >10 | 10.000000 | lower | >10 | ... | K | 2.1900 | 2.5660 | 1.12800 | 3.0 | 7.667 | 1.0 | 0.9221 | 2.56600 | 3.0810 |
6 | nAH1.3 | P185D | average | 2 | 10.000000 | lower | >10 | 10.000000 | lower | >10 | ... | D | 1.9620 | 1.8280 | 1.23900 | 3.0 | 7.333 | 1.0 | 0.7950 | 1.82800 | 3.2620 |
7 | nAH1.3 | Unmutated | average | 2 | 0.006834 | interpolated | 0.00683 | 0.019276 | interpolated | 0.0193 | ... | NaN | 0.0000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
8 rows × 42 columns
In [ ]:
In [ ]: