ephrin_neut_curves.ipynb¶
This notebook analyzes ephrin neutralization of Nipah pseudovirus
- Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None
ephrin_binding_neuts_file = None
ephrin_validation_curves = None
validation_ic50s_file = None
e2_monomeric_binding_file = None
e3_dimeric_binding_file = None
ephrin_curve_plot = None
e2_curve_plot = None
e3_curve_plot = None
e2_corr_plot = None
e3_corr_plot = None
EFNB2_neut_corr = None
EFNB3_neut_corr = None
all_ephrin_neut_plots = None
In [2]:
# Parameters
ephrin_binding_neuts_file = (
"data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
)
ephrin_validation_curves = (
"data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
)
validation_ic50s_file = (
"data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
)
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
e2_monomeric_binding_file = (
"results/receptor_affinity/averages/bEFNB2_monomeric_mut_effect.csv"
)
e3_dimeric_binding_file = (
"results/receptor_affinity/averages/bEFNB3_dimeric_mut_effect.csv"
)
ephrin_curve_plot = "results/images/ephrin_neut_curve.html"
e2_curve_plot = "results/images/ephrinB2_neut_curve.html"
e3_curve_plot = "results/images/ephrinB3_neut_curve.html"
e2_corr_plot = "results/images/e2_corr_plot.html"
e3_corr_plot = "results/images/e3_corr_plot.html"
EFNB2_neut_corr = "results/images/EFNB2_neut_corr.html"
EFNB3_neut_corr = "results/images/EFNB3_neut_corr.html"
all_ephrin_neut_plots = "results/images/all_ephrin_neut_plots.html"
Import packages¶
In [3]:
import warnings
import math
import os
from IPython.display import display, HTML, SVG
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import neutcurve
from neutcurve.colorschemes import CBPALETTE
from neutcurve.colorschemes import CBMARKERS
import scipy.stats
import yaml
import altair as alt
import re
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
Using `neutcurve` version 0.5.7 Setup in correct directory
Out[3]:
ThemeRegistry.enable('main_theme')
Setup path¶
For running notebook interactively¶
In [4]:
if EFNB2_neut_corr is None:
#altair_config = "data/custom_analyses_data/theme.py"
nipah_config = "nipah_config.yaml"
ephrin_binding_neuts_file = (
"data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv"
)
ephrin_validation_curves = "data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv"
validation_ic50s_file = (
"data/custom_analyses_data/experimental_data/receptor_IC_validations.csv"
)
e2_monomeric_binding_file = (
"results/receptor_affinity/averages/bEFNB2_monomeric_mut_effect.csv"
)
e3_dimeric_binding_file = (
"results/receptor_affinity/averages/bEFNB3_dimeric_mut_effect.csv"
)
Read in config file¶
In [5]:
with open(nipah_config) as f:
config = yaml.safe_load(f)
First plot neuts with WT virus¶
In [6]:
# First, load in the neut data
df = pd.read_csv(ephrin_binding_neuts_file)
display(df.head(5))
# In this particular case I want to fix the names
df["virus"] = df["virus"].replace(
{
"E2-dimeric": "dimeric-bEFNB2",
"E2-monomeric": "monomeric-bEFNB2",
"E3-dimeric": "dimeric-bEFNB3",
"E3-monomeric": "monomeric-bEFNB3",
}
)
display(df.head(4))
serum | virus | replicate | concentration | fraction infectivity | |
---|---|---|---|---|---|
0 | CHO-EFNB3 | E2-dimeric | 1 | 0.098039 | 0.00001 |
1 | CHO-EFNB3 | E2-dimeric | 1 | 0.032680 | 0.00001 |
2 | CHO-EFNB3 | E2-dimeric | 1 | 0.010893 | 0.00000 |
3 | CHO-EFNB3 | E2-dimeric | 1 | 0.003631 | 0.00001 |
4 | CHO-EFNB3 | E2-dimeric | 1 | 0.001210 | 0.00001 |
serum | virus | replicate | concentration | fraction infectivity | |
---|---|---|---|---|---|
0 | CHO-EFNB3 | dimeric-bEFNB2 | 1 | 0.098039 | 0.00001 |
1 | CHO-EFNB3 | dimeric-bEFNB2 | 1 | 0.032680 | 0.00001 |
2 | CHO-EFNB3 | dimeric-bEFNB2 | 1 | 0.010893 | 0.00000 |
3 | CHO-EFNB3 | dimeric-bEFNB2 | 1 | 0.003631 | 0.00001 |
Get neut curves and plot¶
In [7]:
fits = neutcurve.curvefits.CurveFits(
data=df,
fixbottom=0,
)
fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])
def extract_dataframe_from_neutcurve(serum, viruses, replicate="average"):
curves = []
# Loop over each virus type and retrieve the curve
for virus in viruses:
curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
df = curve.dataframe()
df["virus"] = virus
curves.append(df)
# Concatenate all the dataframes into one
combined_curve = pd.concat(curves, axis=0)
return combined_curve
serum = "CHO-EFNB3" # pull out the neuts that were done on CHO-EFNB3 cells, not E2
viruses = ["dimeric-bEFNB2", "monomeric-bEFNB2", "dimeric-bEFNB3", "monomeric-bEFNB3"]
curve = extract_dataframe_from_neutcurve(serum, viruses)
curve["upper"] = curve["measurement"] + curve["stderr"]
curve["lower"] = curve["measurement"] - curve["stderr"]
def plot_neut_curve(df):
chart = (
alt.Chart(df)
.mark_line(size=1.5)
.encode(
x=alt.X(
"concentration:Q",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e"),
title="Concentration (μM)",
),
y=alt.Y(
"fit:Q",
title="Fraction Infectivity",
),
color=alt.Color("virus", title="Receptor"),
)
.properties(
width=300, height=200
)
)
circle = (
alt.Chart(df)
.mark_circle(size=40,opacity=1)
.encode(
x=alt.X(
"concentration",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e",tickCount=3),
title="Concentration (μM)",
),
y=alt.Y("measurement:Q", title="Fraction Infectivity"),
color=alt.Color("virus", title="Receptor"),
)
.properties(
width=300, height=200
)
)
error = (
alt.Chart(df)
.mark_errorbar(opacity=1)
.encode(
x="concentration",
y=alt.Y("lower", title="Fraction Infectivity"),
y2="upper",
color="virus",
)
.properties(
width=300, height=200
)
)
plot = chart + circle + error
return plot
ephrin_curve = plot_neut_curve(curve)
ephrin_curve.display()
if EFNB2_neut_corr is not None:
ephrin_curve.save(ephrin_curve_plot)
fitParams = fitParams.rename(
columns={"serum": "target_cells", "virus": "soluble_receptor"}
)
fitParams["ic50_nM"] = fitParams["ic50"] * 1000
subset = fitParams[fitParams["target_cells"] == "CHO-EFNB3"]
display(subset[["soluble_receptor", "ic50_nM"]].round(3))
soluble_receptor | ic50_nM | |
---|---|---|
0 | dimeric-bEFNB2 | 0.045 |
1 | monomeric-bEFNB2 | 0.892 |
2 | dimeric-bEFNB3 | 0.610 |
3 | monomeric-bEFNB3 | 434.780 |
Pull in binding validation neut data¶
In [8]:
validation_curves = pd.read_csv(ephrin_validation_curves)
validation_curves['virus'] = validation_curves['virus'].str.replace('WT','Unmutated')
In [9]:
fits = neutcurve.curvefits.CurveFits(
data=validation_curves,
fixbottom=0,
)
fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])
fitParams = fitParams.rename(columns={"serum": "target_cells", "virus": "mutant"})
def extract_dataframe_from_neutcurve(serum, viruses, replicate="average"):
curves = []
# Loop over each virus type and retrieve the curve
for virus in viruses:
curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
df = curve.dataframe()
df["virus"] = virus
curves.append(df)
# Concatenate all the dataframes into one
combined_curve = pd.concat(curves, axis=0)
return combined_curve
def get_curves(df, serum):
viruses = ["H333Q", "Q492R", "V507I", "Q530F", "S553W", "D555K", "Unmutated"]
curve = extract_dataframe_from_neutcurve(serum, viruses)
curve["upper"] = curve["measurement"] + curve["stderr"]
curve["lower"] = curve["measurement"] - curve["stderr"]
return curve
e2_validation_curves = get_curves(validation_curves, "EFNB2-monomeric")
e3_validation_curves = get_curves(validation_curves, "EFNB3-dimeric")
# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
# Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
def extract_number(virus):
num = re.search(r"\d+", virus)
return int(num.group()) if num else 0
array = sorted(array, key=extract_number)
# Move 'WT' to the beginning of the list
if "WT" in array:
array.remove("Unmutated")
array.insert(0, "Unmutated")
return array
def plot_validation_curves(df, name):
# Define the category10 colors manually
category10_colors = [
"#4E79A5",
"#F18F3B",
"#E0585B",
"#77B7B2",
"#5AA155",
"#EDC958",
"#AF7AA0",
"#FE9EA8",
"#9C7561",
"#BAB0AC",
]
# Adjust colors based on the unique mutations
colors = ["black"] + category10_colors[: len(df["virus"].unique()) - 1]
chart = (
alt.Chart(df)
.mark_line(size=1.5)
.encode(
x=alt.X(
"concentration:Q",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e", tickCount=3),
title=f"{name} conc. (μg/mL)",
),
y=alt.Y("fit:Q", title="Fraction Infectivity", axis=alt.Axis(tickCount=3)),
color=alt.Color(
"virus",
title="Virus",
scale=alt.Scale(
domain=custom_sort_order(df["virus"].unique()), range=colors
),
),
)
.properties(
width=300, height=200
)
)
circle = (
alt.Chart(df)
.mark_circle(opacity=1,size=40)
.encode(
x=alt.X(
"concentration",
scale=alt.Scale(type="log"),
axis=alt.Axis(format=".0e", tickCount=3),
title=f"{name} conc. (μg/mL)",
),
y=alt.Y(
"measurement:Q",
title="Fraction Infectivity",
axis=alt.Axis(tickCount=3),
),
color=alt.Color(
"virus",
title="Virus",
scale=alt.Scale(
domain=custom_sort_order(df["virus"].unique()), range=colors
),
),
)
.properties(
width=300, height=200
)
)
error = (
alt.Chart(df)
.mark_errorbar(opacity=1)
.encode(
x="concentration",
y=alt.Y("lower", title="Fraction Infectivity"),
y2="upper",
color="virus",
)
.properties(
width=300, height=200
)
)
plot = chart + circle + error
plot
return plot
ephrin_b2_neut_curve = plot_validation_curves(e2_validation_curves, "monomeric-bEFNB2")
ephrin_b2_neut_curve.display()
if EFNB2_neut_corr is not None:
ephrin_b2_neut_curve.save(e2_curve_plot)
ephrin_b3_neut_curve = plot_validation_curves(e3_validation_curves, "dimeric-bEFNB3")
ephrin_b3_neut_curve.display()
if EFNB2_neut_corr is not None:
ephrin_b3_neut_curve.save(e3_curve_plot)
# Find the IC50 values in ng/mL and relative IC50 to WT
fitParams["ic50_ng"] = (fitParams["ic50"] * 1000).round(1)
efnb2 = fitParams[fitParams["target_cells"] == "EFNB2-monomeric"]
WT_IC50 = efnb2[efnb2["mutant"] == "Unmutated"]
wt50 = WT_IC50["ic50_ng"].unique()
fitParams["relative_ic50"] = (fitParams["ic50_ng"] / wt50).round(1)
display(fitParams[["target_cells", "mutant", "ic50_ng", "relative_ic50"]])
target_cells | mutant | ic50_ng | relative_ic50 | |
---|---|---|---|---|
0 | EFNB2-monomeric | H333Q | 92.4 | 1.6 |
1 | EFNB2-monomeric | Q492R | 527.8 | 9.2 |
2 | EFNB2-monomeric | V507I | 36.9 | 0.6 |
3 | EFNB2-monomeric | Q530F | 1077.4 | 18.8 |
4 | EFNB2-monomeric | S553W | 20.6 | 0.4 |
5 | EFNB2-monomeric | D555K | 1799.8 | 31.4 |
6 | EFNB2-monomeric | Unmutated | 57.4 | 1.0 |
7 | EFNB3-dimeric | H333Q | 88.5 | 1.5 |
8 | EFNB3-dimeric | Q492R | 39.0 | 0.7 |
9 | EFNB3-dimeric | V507I | 78.6 | 1.4 |
10 | EFNB3-dimeric | Q530F | 29.8 | 0.5 |
11 | EFNB3-dimeric | S553W | 32.8 | 0.6 |
12 | EFNB3-dimeric | D555K | 178.3 | 3.1 |
13 | EFNB3-dimeric | Unmutated | 57.5 | 1.0 |
Now plot correlations between IC50 and DMS binding score¶
Pull in data and prepare for plotting
In [10]:
validation_ic50s = pd.read_csv(validation_ic50s_file, na_filter=None)
validation_ic50s['mutation'] = validation_ic50s['mutation'].str.replace('WT','Unmutated')
In [11]:
# Read in filtered DMS binding data
e2_monomeric_binding = pd.read_csv(e2_monomeric_binding_file)
e3_dimeric_binding = pd.read_csv(e3_dimeric_binding_file)
def make_df(df, name):
merged = validation_ic50s.merge(df, on=["mutation"])
wt_rows = validation_ic50s[validation_ic50s["mutation"] == "Unmutated"].copy()
wt_rows["Ephrin binding_mean"] = 0.00000
merged = pd.concat([merged, wt_rows], ignore_index=True)
df_tmp = merged[merged["antibody"] == name]
return df_tmp
e2_df_out = make_df(e2_monomeric_binding, "EFNB2-monomeric")
e3_df_out = make_df(e3_dimeric_binding, "EFNB3-dimeric")
Plot correlation data:
In [12]:
# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
# Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
def extract_number(mutation):
num = re.search(r"\d+", mutation)
return int(num.group()) if num else 0
array = sorted(array, key=extract_number)
# Move 'WT' to the beginning of the list
if "WT" in array:
array.remove("Unmutated")
array.insert(0, "Unmutated")
return array
def make_corr_chart(df, name):
# calculate NT50
df["NT50"] = 1 / df["measured IC50"]
# calculate R value:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
df["Ephrin binding_mean"], df["NT50"]
)
print(f"The r-value is: {r_value:.2f}")
# Define the category10 colors manually
category10_colors = [
"#4E79A5",
"#F18F3B",
"#E0585B",
"#77B7B2",
"#5AA155",
"#EDC958",
"#AF7AA0",
"#FE9EA8",
"#9C7561",
"#BAB0AC",
]
if name == 'EFNB3-dimeric':
scale_y = alt.Scale(type="log", domain=[5, 100])
axis_y = alt.Axis(values=[1,10,100])
else:
scale_y = alt.Scale(type="log")
axis_y = alt.Axis(values=[0.1,1,10,100])
# Adjust colors based on the unique mutations
colors = ["black"] + category10_colors[: len(df["mutation"].unique()) - 1]
corr_chart = (
alt.Chart(df)
.encode(
x=alt.X(
"Ephrin binding_mean",
title="Receptor binding in DMS",
axis=alt.Axis(tickCount=4),
),
y=alt.Y(
"NT50",
title=f"{name} NT50 (μg/ml-1)",
scale=scale_y,
axis=axis_y,
),
color=alt.Color(
"mutation",
title="Virus",
scale=alt.Scale(
domain=custom_sort_order(df["mutation"].unique()), range=colors
),
),
)
.mark_point(size=120,filled=True,opacity=1)
)
text = (
alt.Chart(
{
"values": [
{
"x": df["Ephrin binding_mean"].min(),
"y": df["NT50"].max(),
"text": f"r = {r_value:.2f}",
}
]
}
)
.mark_text(align="left", baseline="top", dx=-8,dy=-20)
.encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
)
chart = corr_chart + text
return chart
e2_corr = make_corr_chart(e2_df_out, "monomeric-bEFNB2")
e2_corr.display()
if EFNB2_neut_corr is not None:
e2_corr.save(e2_corr_plot)
e3_corr = make_corr_chart(e3_df_out, "dimeric-bEFNB3")
e3_corr.display()
if EFNB2_neut_corr is not None:
e3_corr.save(e3_corr_plot)
if EFNB2_neut_corr is not None:
(ephrin_b2_neut_curve | e2_corr).save(EFNB2_neut_corr)
(ephrin_b3_neut_curve | e3_corr).save(EFNB3_neut_corr)
all_ephrin_neuts = (ephrin_b2_neut_curve | e2_corr) & (ephrin_b3_neut_curve | e3_corr)
all_ephrin_neuts.display()
if EFNB2_neut_corr is not None:
all_ephrin_neuts.save(all_ephrin_neut_plots)
The r-value is: 0.90
The r-value is: 0.70
In [ ]: