Escape sites related to antibody contact distance¶
This notebook plots the summed escape at each site stratified by distance to antibody residues for each antibody.
# Imports
import os
import warnings
import seaborn as sns
import pandas as pd
import numpy as np
import altair as alt
import matplotlib.pyplot as plt
# Plotting colors
tol_muted_adjusted = [
"#000000",
"#CC6677",
"#1f78b4",
"#DDCC77",
"#117733",
"#882255",
"#88CCEE",
"#44AA99",
"#999933",
"#AA4499",
"#EE7733",
"#CC3311",
"#DDDDDD",
]
# Seaborn style settings
sns.set(rc={
"figure.dpi":300,
"savefig.dpi":300,
"svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)
# Suppress warnings
warnings.simplefilter("ignore")
# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
# this cell is tagged as `parameters` for papermill parameterization
contacts_89F = None
contacts_377H = None
contacts_256A = None
contacts_2510C = None
contacts_121F = None
contacts_372D = None
filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None
func_scores = None
min_times_seen = None
n_selections = None
out_dir = None
func_vs_escape = None
func_vs_escape_svg = None
saved_image_path = None
func_distance_image_path = None
# Parameters
contacts_89F = "data/antibody_contacts/antibody_contacts_89F.csv"
contacts_377H = "data/antibody_contacts/antibody_contacts_377H.csv"
contacts_256A = "data/antibody_contacts/antibody_contacts_256A.csv"
contacts_2510C = "data/antibody_contacts/antibody_contacts_2510C.csv"
contacts_121F = "data/antibody_contacts/antibody_contacts_121F.csv"
contacts_372D = "data/antibody_contacts/antibody_contacts_372D.csv"
filtered_escape_377H = (
"results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
"results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
"results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
"results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
"results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
"results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
func_scores = "results/func_effects/averages/293T_entry_func_effects.csv"
out_dir = "results/antibody_escape_profiles/"
min_times_seen = 2
n_selections = 8
func_vs_escape = "results/antibody_escape_profiles/antibody_escape_vs_func_effect.html"
func_vs_escape_svg = (
"results/antibody_escape_profiles/antibody_escape_vs_func_effect.svg"
)
saved_image_path = "results/antibody_escape_profiles/antibody_escape_by_distance.svg"
func_distance_image_path = (
"results/antibody_escape_profiles/func_effect_by_distance.svg"
)
# # Uncomment for running interactive
# contacts_89F = "../data/antibody_contacts/antibody_contacts_89F.csv"
# contacts_377H = "../data/antibody_contacts/antibody_contacts_377H.csv"
# contacts_256A = "../data/antibody_contacts/antibody_contacts_256A.csv"
# contacts_2510C = "../data/antibody_contacts/antibody_contacts_2510C.csv"
# contacts_121F = "../data/antibody_contacts/antibody_contacts_121F.csv"
# contacts_372D = "../data/antibody_contacts/antibody_contacts_372D.csv"
# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"
# min_times_seen = 2
# n_selections = 8
# out_dir = "../results/antibody_escape_profiles/"
# func_vs_escape = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect.html"
# func_vs_escape_svg = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect.svg"
# saved_image_path = "../results/antibody_escape_profiles/antibody_escape_by_distance.svg"
# func_distance_image_path = "../results/antibody_escape_profiles/func_effect_by_distance.svg"
contacts = [
contacts_2510C,
contacts_121F,
contacts_377H,
contacts_256A,
contacts_372D,
contacts_89F,
]
escape = [
filtered_escape_2510C,
filtered_escape_121F,
filtered_escape_377H,
filtered_escape_256A,
filtered_escape_372D,
filtered_escape_89F,
]
# Load data as dataframe
contacts_df = pd.read_csv(contacts[0])
functional_scores = pd.read_csv(func_scores)
# Calculate site averages for functional scores but filter
# for stop codons, min times seen, and min selections
merged_df = (
functional_scores.loc[
(functional_scores["mutant"] != "*")
&
(functional_scores["times_seen"] >= min_times_seen)
&
(functional_scores["n_selections"] >= n_selections)
]
)
merged_df = (
merged_df
.groupby(["site", "wildtype"])
.aggregate({"effect" : "mean"})
.reset_index()
)
# Add escape to dataframe for each antibody
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Load data as dataframe
escape_df = pd.read_csv(antibody_file)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Clip lower scores to 0
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Calculate site summed escape
escape_df = (
escape_df
.groupby(["site", "wildtype"])
.aggregate({"escape_median" : "sum"})
.reset_index()
)
# Rename escape column to include antibody name
escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})
# Merge dataframes
merged_df = (
merged_df.merge(
escape_df[["site", "wildtype", "escape_" + antibody_name]],
how="left",
on=["site", "wildtype"],
validate="one_to_one",
)
)
# Add contacts to dataframe for each antibody
for contacts_file in contacts:
antibody_name = contacts_file.split("/")[-1].split("_")[-1][:-4]
# Load data as dataframe
contacts_df = pd.read_csv(contacts_file)
# Filter contacts df
contacts_df = (
contacts_df
.groupby(["position"])
.aggregate({"distance" : "first"})
.reset_index()
.sort_values(by=["distance", "position"])
.rename(columns={
"position" : "site",
"distance" : antibody_name + "_distance",
})
.reset_index(drop=True)
)
# Merge escape and contacts dataframes
merged_df = (
merged_df.merge(
contacts_df,
how="left",
on="site",
validate="one_to_one",
)
)
First look at antibody escape vs functional effects for all antibody contact sites.
subplots = []
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Set lim for each antibody
fixed_min = None
fixed_max = None
if antibody_name == "2510C":
fixed_min = -7.5
fixed_max = 67.5
elif antibody_name == "121F":
fixed_min = -3.75
fixed_max = 33.75
elif antibody_name == "377H":
fixed_min = -8.75
fixed_max = 78.75
elif antibody_name == "256A":
fixed_min = -5
fixed_max = 45
elif antibody_name == "372D":
fixed_min = -1.875
fixed_max = 16.875
elif antibody_name == "89F":
fixed_min = -5
fixed_max = 45
else:
print("Error! No ylims set!")
curr_subplot = alt.Chart(merged_df.loc[merged_df[antibody_name + "_distance"] == 4], title=antibody_name).mark_point(
filled=True,
color="black",
size=75,
opacity=0.15,
).encode(
alt.X(
"escape_"+antibody_name,
axis=alt.Axis(
title="site escape",
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[fixed_min, fixed_max])
),
alt.Y(
"effect",
axis=alt.Axis(
title=["site mean","effect on cell entry"],
values=[-4,-3,-2,-1,0,1],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-4.1,1.1])
),
tooltip=[
"site",
"wildtype",
"effect",
"escape_" + antibody_name,
antibody_name + "_distance",
],
).properties(
width=150,
height=150,
)
subplots.append(curr_subplot)
escape_vs_effect = alt.hconcat(
subplots[0],
subplots[1],
subplots[2],
subplots[3],
subplots[4],
subplots[5],
spacing=5,
title=["Functional effect vs antibody escape", "for antibody contact sites"],
).configure_axis(
grid=False,
labelFontSize=16,
titleFontSize=16,
labelFontWeight="normal",
titleFontWeight="normal",
).configure_title(
fontSize=24,
)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
escape_vs_effect.save(func_vs_escape)
escape_vs_effect
Recreate same plot as above but formatted for paper.
# Functions
def plot_func_scores_vs_escape(merged_df, ax, i, antibody_name):
"""
This function creates a plot of
functional scores vs antibody escape
for all antibody contact sites (within 4 A).
"""
# Set lim for each antibody
fixed_min = None
fixed_max = None
if antibody_name == "2510C":
fixed_min = -7.5
fixed_max = 67.5
elif antibody_name == "121F":
fixed_min = -3.75
fixed_max = 33.75
elif antibody_name == "377H":
fixed_min = -9.375
fixed_max = 84.375
elif antibody_name == "256A":
fixed_min = -5
fixed_max = 45
elif antibody_name == "372D":
fixed_min = -1.875
fixed_max = 16.875
elif antibody_name == "89F":
fixed_min = -5
fixed_max = 45
else:
print("Error! No ylims set!")
# Plot escape vs functional score
chart = sns.scatterplot(
data=merged_df.loc[merged_df[antibody_name + "_distance"] == 4],
y="effect",
x="escape_"+antibody_name,
edgecolor=None,
linewidth=0.5,
# palette={False : "#00000026", True : "#EE7733CC"},
s=20,
ax=ax,
alpha=0.15,
color="#000000",
)
if antibody_name == "2510C":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
xticks = [0, 30, 60]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "121F":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
xticks = [0, 15, 30]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
xticks = None
if antibody_name == "377H":
xticks = [0, 35, 70]
elif antibody_name == "256A":
xticks = [0, 20, 40]
elif antibody_name == "372D":
xticks = [0, 7.5, 15]
else:
print("ERROR")
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "89F":
chart.set_title(
antibody_name[:1] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
xticks = [0, 20, 40]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
chart.set_ylabel("site mean\neffect on cell entry", fontsize=8)
chart.set_xlim(fixed_min, fixed_max)
chart.set_ylim(-4.1,1.1)
yticks = [-4, -3, -2, -1, 0, 1]
chart.set_yticks(yticks)
chart.set_yticklabels(labels=["-4", "-3", "-2", "-1", "0", "1"], fontsize=8)
chart.set(xlabel=None)
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
chart.tick_params(axis="both", length=3, width=1)
chart.grid(False)
sns.despine()
# Label points on each scatter plot
for i in range(0, merged_df.shape[0]):
x_pos = merged_df.at[i, "escape_"+antibody_name]
y_pos = merged_df.at[i, "effect"]
name = merged_df.at[i, "site"]
wt = merged_df.at[i, "wildtype"]
if antibody_name == "89F":
if name == 125:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 258:
chart.text(
x_pos+4,
y_pos-0.1,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if antibody_name == "377H":
if name == 399:
chart.text(
x_pos+5,
y_pos-0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 401:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if name == 404:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if antibody_name == "256A":
if name == 399:
chart.text(
x_pos+3,
y_pos-0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 401:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name} ",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if name == 404:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if antibody_name == "2510C":
if name == 100:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 228:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if antibody_name == "121F":
if name == 161:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 107:
chart.text(
x_pos+3,
y_pos-0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 92:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if antibody_name == "372D":
if name == 264:
chart.text(
x_pos+1.5,
y_pos-0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="left",
color="#000000",
)
if name == 395:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
if name == 397:
chart.text(
x_pos,
y_pos+0.2,
f"{wt}{name}",
fontsize=6,
horizontalalignment="center",
color="#000000",
)
fig, axes = plt.subplots(
1,
6,
figsize=(6.4,2),
sharey=True,
)
for i,antibody_file in enumerate(escape):
antibody_name = antibody_file.split("/")[-1].split("_")[0]
plot_func_scores_vs_escape(merged_df, axes[i], i, antibody_name)
# Common X and Y axis labels
fig.text(0.5, 0, "site escape", ha="center", rotation="horizontal", fontsize=8)
fig.tight_layout(w_pad=0.5)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
# Save fig
plt.savefig(func_vs_escape_svg)
Look at escape scores across glycosylation motifs (N - X(X!=P) - S/T) and RBD binding regions for DG and LAMP1, specifically for 12.1F and 8.9F
# Load data as dataframe
contacts_df = pd.read_csv(contacts[0])
functional_scores = pd.read_csv(func_scores)
# Calculate site averages for functional scores but filter
# for stop codons, min times seen, and min selections
merged_df = (
functional_scores.loc[
(functional_scores["mutant"] != "*")
&
(functional_scores["times_seen"] >= min_times_seen)
&
(functional_scores["n_selections"] >= n_selections)
]
)
# Add escape to dataframe for each antibody
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Load data as dataframe
escape_df = pd.read_csv(antibody_file)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Clip lower scores to 0
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Rename escape column to include antibody name
escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})
# Merge dataframes
merged_df = (
merged_df.merge(
escape_df[["site", "wildtype", "mutant", "escape_" + antibody_name]],
how="left",
on=["site", "wildtype", "mutant"],
validate="one_to_one",
)
)
# Add contacts to dataframe for each antibody
for contacts_file in contacts:
antibody_name = contacts_file.split("/")[-1].split("_")[-1][:-4]
# Load data as dataframe
contacts_df = pd.read_csv(contacts_file)
# Filter contacts df
contacts_df = (
contacts_df
.groupby(["position"])
.aggregate({"distance" : "first"})
.reset_index()
.sort_values(by=["distance", "position"])
.rename(columns={
"position" : "site",
"distance" : antibody_name + "_distance",
})
.reset_index(drop=True)
)
# Merge escape and contacts dataframes
merged_df = (
merged_df.merge(
contacts_df,
how="left",
on="site",
validate="many_to_one",
)
)
glycan_sites = [
79,80,81,
89,90,91,
99,100,101,
109,110,111,
119,120,121,
167,168,169,
224,225,226,
365,366,367,
373,374,375,
390,391,392,
395,396,397,
]
# Mark each site if its a glycan site
merged_df["glycan_site"] = (
merged_df["site"].apply(lambda x: True if x in glycan_sites else False)
)
glycan_numbers = {
tuple([79,80,81,]) : "N79",
tuple([89,90,91,]) : "N89",
tuple([99,100,101,]) : "N99",
tuple([109,110,111,]) : "N109",
tuple([119,120,121,]) : "N119",
tuple([167,168,169,]) : "N167",
tuple([224,225,226,]) : "N224",
tuple([365,366,367,]) : "N365",
tuple([373,374,375,]) : "N373",
tuple([390,391,392,]) : "N390",
tuple([395,396,397,]) : "N395",
tuple([
120,
121,
125,
150, # supported by 151 and 125 interactions
151,
256,
257,
258,]) : "\u03B1-DG binding sites",
tuple([
92, # histidine triad
93, # histidine triad
172,
173,
188,
192,
195,
197,
198,
200,
201,
202,
204,
206,
207,
211,
216,
230, # histidine triad
]) : "LAMP1 binding sites",
}
def site_to_glycan_num(site):
"""Convert site to glycan number"""
for key,val in glycan_numbers.items():
if site in key:
return val
return float("NaN")
# Label each site if its glycan or RBD binding site
merged_df["glycan_number"] = (
merged_df["site"].apply(lambda x: site_to_glycan_num(x))
)
# Plot score distrbutions for each region
plot_121F = alt.Chart(
merged_df.query("glycan_number == glycan_number"),
title = alt.TitleParams(
text="antibody 12.1F",
color="#999933",
)
).mark_circle(opacity=0.15, size=75, color="#000000").encode(
y=alt.Y(
"glycan_number:N",
title="GPC site",
sort=[
"N79",
"N89",
"N99",
"N109",
"N119",
"N167",
"N224",
"N365",
"N373",
"N390",
"N395",
"\u03B1-DG binding sites",
"LAMP1 binding sites",
],
axis=alt.Axis(
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
x=alt.X(
"escape_121F:Q",
title=None,
axis=alt.Axis(
values=[0, 1, 2, 3, 4, 5],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-0.1,5.1])
),
yOffset="jitter:Q",
tooltip=[
"site",
"wildtype",
"mutant",
alt.Tooltip(
"escape_121F", format=".2f", title="12.1F escape"
),
alt.Tooltip(
"effect", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
width=400,
height=600,
)
# Plot score distrbutions for each region
plot_89F = alt.Chart(
merged_df.query("glycan_number == glycan_number"),
title = alt.TitleParams(
text="antibody 8.9F",
color="#117733",
)
).mark_circle(opacity=0.15, size=75, color="#000000").encode(
y=alt.Y(
"glycan_number:N",
title="GPC site",
sort=[
"N79",
"N89",
"N99",
"N109",
"N119",
"N167",
"N224",
"N365",
"N373",
"N390",
"N395",
"\u03B1-DG binding sites",
"LAMP1 binding sites",
],
axis=alt.Axis(
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
x=alt.X(
"escape_89F:Q",
title="escape",
axis=alt.Axis(
values=[0, 1, 2, 3, 4, 5],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-0.1,5.1])
),
yOffset="jitter:Q",
tooltip=[
"site",
"wildtype",
"mutant",
alt.Tooltip(
"escape_89F", format=".2f", title="8.9F escape"
),
alt.Tooltip(
"effect", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
width=400,
height=600,
)
combined_plot = alt.vconcat(
plot_121F,
plot_89F,
).configure_axis(
grid=False,
labelFontSize=16,
titleFontSize=16,
labelFontWeight="normal",
titleFontWeight="normal",
).configure_title(
fontSize=24,
).configure_view(
stroke=None
)
combined_plot
Recreate same plot as above but formatted for paper
# Plot score distrbutions for each region
plot_121F = alt.Chart(
merged_df.query("glycan_number == glycan_number"),
title = alt.TitleParams(
text="antibody 12.1F",
color="#999933",
)
).mark_circle(opacity=0.15, size=5, color="#000000").encode(
y=alt.Y(
"glycan_number:N",
title="GPC site",
sort=[
"N79",
"N89",
"N99",
"N109",
"N119",
"N167",
"N224",
"N365",
"N373",
"N390",
"N395",
"\u03B1-DG binding sites",
"LAMP1 binding sites",
],
axis=alt.Axis(
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
x=alt.X(
"escape_121F:Q",
title=None,
axis=alt.Axis(
values=[0, 1, 2, 3, 4, 5],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-0.1,5.1])
),
yOffset="jitter:Q",
tooltip=[
"site",
"wildtype",
"mutant",
alt.Tooltip(
"escape_121F", format=".2f", title="12.1F escape"
),
alt.Tooltip(
"effect", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
width=75,
height=115,
)
# Plot score distrbutions for each region
plot_89F = alt.Chart(
merged_df.query("glycan_number == glycan_number"),
title = alt.TitleParams(
text="antibody 8.9F",
color="#117733",
)
).mark_circle(opacity=0.15, size=5, color="#000000").encode(
y=alt.Y(
"glycan_number:N",
title="GPC site",
sort=[
"N79",
"N89",
"N99",
"N109",
"N119",
"N167",
"N224",
"N365",
"N373",
"N390",
"N395",
"\u03B1-DG binding sites",
"LAMP1 binding sites",
],
axis=alt.Axis(
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
x=alt.X(
"escape_89F:Q",
title="escape",
axis=alt.Axis(
values=[0, 1, 2, 3, 4, 5],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-0.1,5.1])
),
yOffset="jitter:Q",
tooltip=[
"site",
"wildtype",
"mutant",
alt.Tooltip(
"escape_89F", format=".2f", title="8.9F escape"
),
alt.Tooltip(
"effect", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
width=75,
height=115,
)
combined_plot = alt.vconcat(
plot_121F,
plot_89F,
spacing=5,
).configure_axis(
grid=False,
labelFontSize=8,
titleFontSize=8,
labelFontWeight="normal",
titleFontWeight="normal",
).configure_title(
fontSize=8,
fontWeight="bold",
).configure_view(
stroke=None
)
combined_plot
# Functions
def plot_func_scores_vs_distance(contacts_file, escape_file, ax, i, func_scores_file):
"""
This function creates a plot of
functional scores stratified by antibody
distance.
"""
antibody_name = contacts_file.split("/")[-1].split("_")[2][:-4]
# Load data as dataframe
contacts_df = pd.read_csv(contacts_file)
escape_df = pd.read_csv(escape_file)
func_scores = pd.read_csv(func_scores_file)
# Filter contacts df
contacts_df = (
contacts_df
.groupby(["position"])
.aggregate({"distance" : "first"})
.reset_index()
.sort_values(by=["distance", "position"])
.rename(columns={"position" : "site"})
.reset_index(drop=True)
)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Floor escape scores at 0
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Calculate site sums
escape_df = (
escape_df
.groupby(["site"])
.aggregate({"escape_median" : "sum"})
.reset_index()
)
# Calculate site averages for functional scores but filter
# for stop codons, min times seen, and min selections
func_scores = func_scores.loc[
(func_scores["mutant"] != "*")
&
(func_scores["times_seen"] >= min_times_seen)
&
(func_scores["n_selections"] >= n_selections)
]
func_scores = (
func_scores
.groupby(["site"])
.aggregate({"effect" : "mean"})
.reset_index()
)
# Merge functional and escape dfs
escape_df = (
escape_df.merge(
func_scores,
how="left",
on=["site"],
validate="one_to_one",
)
)
# Merge escape and contacts dataframes
merged_df = (
escape_df.merge(
contacts_df,
how="left",
on="site",
validate="one_to_one",
)
.fillna(100)
)
# Mark sites with strong escape
cutoff = escape_df["escape_median"].median() * 10
merged_df["strong escape"] = (
merged_df
.apply(
lambda x: True if x["escape_median"] > cutoff else False,
axis=1
)
)
# Re-map distance for plotting and sort
merged_df["distance"] = merged_df["distance"].map({
100 : 3,
4 : 0,
8 : 1.5,
})
merged_df = merged_df.sort_values(by="distance")
# Add jitter to x values
merged_df["jittered_x"] = merged_df["distance"] + np.random.normal(0,0.1,merged_df["distance"].shape)
merged_df = merged_df.loc[
(merged_df["distance"] == 0)
|
(merged_df["strong escape"] == True)
]
# Plot escape vs functional score
chart = sns.scatterplot(
data=merged_df,
x="jittered_x",
y="effect",
hue="strong escape",
edgecolor=None,
linewidth=0.5,
palette={False : "#00000026", True : "#EE7733CC"},
s=20,
ax=ax,
)
if antibody_name == "2510C":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
if antibody_name == "121F":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
if antibody_name == "89F":
chart.set_title(
antibody_name[:1] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
xticks = [0, 1.5, 3]
chart.set_xticks(xticks)
x_labels = [
"contact",
"proximal",
"distal",
]
chart.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=8)
chart.set_ylabel("site mean\neffect on cell entry", fontsize=8)
chart.set_ylim(-5, 1)
chart.set_xlim(-0.75, 3.75)
yticks = [-4, -2, 0]
chart.set_yticks(yticks)
chart.set_yticklabels(labels=["-4", "-2", "0"], fontsize=8)
chart.set(xlabel=None)
# Make only one legend
if i == 5:
sns.move_legend(
chart,
"upper left",
bbox_to_anchor=(1, 1),
fontsize=8,
markerscale=1,
handletextpad=0.1,
title="site of\nstrong\nescape",
title_fontproperties = {
"size" : 8,
# "weight" : "bold",
},
frameon=False,
borderaxespad=0.1,
reverse=True,
)
# Add edges to legend markers to match scatter plot
for ha in chart.legend_.legendHandles:
ha.set_edgecolor(None)
ha.set_linewidths(0.5)
else:
ax.get_legend().remove()
# Only keep the first y-axis
if i == 0:
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
chart.tick_params(axis="both", length=4, width=1)
else:
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
# Remove y-axis
chart.spines["left"].set_linewidth(0)
chart.set_yticks([])
chart.set_yticklabels([])
chart.tick_params(axis="both", length=4, width=1)
chart.set_ylabel("")
chart.grid(False)
sns.despine()
Plot functional scores stratified by distance to antibody while highlighting strong escape sites (i.e., 10 fold greater than median of all summed escape sites) for all antibodies. Sites that are not strong escape are only showed for contact sites because proximal and distal sites have too many not strong escape sites.
fig, axes = plt.subplots(1, 6, figsize=(6,2))
for i in range(len(contacts)):
plot_func_scores_vs_distance(contacts[i], escape[i], axes[i], i, func_scores)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
# Save fig
plt.savefig(func_distance_image_path)
# Functions
def plot_escape_vs_contact_distance(contacts_file, escape_file, ax, i):
"""
This function creates a plot of
site escape stratified by contact.
"""
antibody_name = contacts_file.split("/")[-1].split("_")[2][:-4]
# Load data as dataframe
contacts_df = pd.read_csv(contacts_file)
escape_df = pd.read_csv(escape_file)
# Filter contacts df
contacts_df = (
contacts_df
.groupby(["position"])
.aggregate({"distance" : "first"})
.reset_index()
.sort_values(by=["distance", "position"])
.rename(columns={"position" : "site"})
.reset_index(drop=True)
)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Floor escape scores at 0
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Calculate site sums
escape_df = (
escape_df
.groupby(["site"])
.aggregate({"escape_median" : "sum"})
.reset_index()
)
# Merge escape and contacts dataframes
merged_df = (
escape_df.merge(
contacts_df,
how="left",
on="site",
validate="one_to_one",
)
.fillna(100)
)
# Mark sites with strong escape
cutoff = escape_df["escape_median"].median() * 10
merged_df["strong escape"] = (
merged_df
.apply(
lambda x: True if x["escape_median"] > cutoff else False,
axis=1
)
)
# Re-map distance for plotting and sort
merged_df["distance"] = merged_df["distance"].map({
100 : 3,
4 : 0,
8 : 1.5,
})
merged_df = merged_df.sort_values(by="distance")
# Add jitter to x values
np.random.seed(0)
merged_df["jittered_x"] = merged_df["distance"] + np.random.normal(0,0.1,merged_df["distance"].shape)
# plt.figure(figsize=(1,2))
chart = sns.scatterplot(
data=merged_df,
x="jittered_x",
y="escape_median",
hue="strong escape",
edgecolor=None,
linewidth=0.5,
palette={False : "#00000026", True : "#EE7733CC"},
s=20,
ax=ax,
)
if antibody_name == "2510C":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
if antibody_name == "121F":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
if antibody_name == "89F":
chart.set_title(
antibody_name[:1] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
xticks = [0, 1.5, 3]
chart.set_xticks(xticks)
x_labels = [
"contact",
"proximal",
"distal",
]
chart.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=8)
chart.set_ylabel("site escape", fontsize=8)
chart.set_ylim(-2.5, 95)
chart.set_xlim(-0.75, 3.75)
yticks = [0, 20, 40, 60, 80]
chart.set_yticks(yticks)
chart.set_yticklabels(labels=["0", "20", "40", "60", "80"], fontsize=8)
chart.set(xlabel=None)
# Make only one legend
if i == 5:
sns.move_legend(
chart,
"upper left",
bbox_to_anchor=(1, 1),
fontsize=8,
markerscale=1,
handletextpad=0.1,
title="site of\nstrong\nescape",
title_fontproperties = {
"size" : 8,
# "weight" : "bold",
},
frameon=False,
borderaxespad=0.1,
reverse=True,
)
# Add edges to legend markers to match scatter plot
for ha in chart.legend_.legendHandles:
ha.set_edgecolor(None)
ha.set_linewidths(0.5)
else:
ax.get_legend().remove()
# Only keep the first y-axis
if i == 0:
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
chart.tick_params(axis="both", length=4, width=1)
else:
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
# Remove y-axis
chart.spines["left"].set_linewidth(0)
chart.set_yticks([])
chart.set_yticklabels([])
chart.tick_params(axis="both", length=4, width=1)
chart.set_ylabel("")
chart.grid(False)
sns.despine()
# Plot cutoff line
chart.axhline(
y = cutoff,
color = "#000000",
linestyle = "--",
alpha=0.5,
linewidth=1,
)
# Calculate counts for each distance and add to top of plot
print(antibody_name)
strong_escape = merged_df.loc[(merged_df["distance"] == 0) & (merged_df["strong escape"] == True)].shape[0]
total = merged_df.loc[(merged_df["distance"] == 0)].shape[0]
print(f"Contacts with strong escape: {(strong_escape/total)*100:.1f}%")
chart.text(
0,
91,
f"{strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#EE7733",
)
chart.text(
0,
85,
f"{total-strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#000000"
)
strong_escape = merged_df.loc[(merged_df["distance"] == 1.5) & (merged_df["strong escape"] == True)].shape[0]
total = merged_df.loc[(merged_df["distance"] == 1.5)].shape[0]
print(f"Proximal with strong escape: {(strong_escape/total)*100:.1f}%")
chart.text(
1.5,
91,
f"{strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#EE7733",
)
chart.text(
1.5,
85,
f"{total-strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#000000"
)
strong_escape = merged_df.loc[(merged_df["distance"] == 3) & (merged_df["strong escape"] == True)].shape[0]
total = (
merged_df.loc[(merged_df["distance"] == 3)].shape[0]
+
(
491 # size of LASV protein
-
merged_df.loc[(merged_df["distance"] == 1.5)].shape[0]
-
merged_df.loc[(merged_df["distance"] == 0)].shape[0]
-
merged_df.loc[(merged_df["distance"] == 3)].shape[0]
)
)
print(f"Distal with strong escape: {(strong_escape/total)*100:.1f}%")
# Calculate stats for summed escape across all sites
print(f"Cumulative escape across all sites: {escape_df['escape_median'].sum()}")
print(f"Median escape across all sites: {escape_df['escape_median'].median()}")
print(f"Mean escape across all sites: {escape_df['escape_median'].mean()}")
print()
chart.text(
3,
91,
f"{strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#EE7733",
)
chart.text(
3,
85,
f"{total-strong_escape}",
fontsize=7,
horizontalalignment="center",
color="#000000"
)
# Label points on each scatter plot
for i in range(0, merged_df.shape[0]):
x_pos = merged_df.at[i, "jittered_x"]
y_pos = merged_df.at[i, "escape_median"]
name = merged_df.at[i, "site"]
if antibody_name == "89F":
if name == 119:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 125:
chart.text(
x_pos+0.2,
y_pos+1,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 129:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 138:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 150:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if antibody_name == "377H":
if name == 398:
chart.text(
x_pos+0.25,
y_pos,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 401:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 402:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 404:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if antibody_name == "256A":
if name == 401:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 404:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if antibody_name == "2510C":
if name == 76:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 99:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 101:
chart.text(
x_pos-0.75,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 228:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if antibody_name == "121F":
if name == 89:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 92:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 111:
chart.text(
x_pos-1.25,
y_pos+1,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 127:
chart.text(
x_pos+0.1,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 135:
chart.text(
x_pos,
y_pos+3,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 160:
chart.text(
x_pos-1.25,
y_pos+1,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if antibody_name == "372D":
if name == 149:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 395:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 397:
chart.text(
x_pos+0.05,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
if name == 398:
chart.text(
x_pos-1.25,
y_pos+2,
f"{name}",
fontsize=6,
horizontalalignment="left",
color="#EE7733",
)
Plot summed site escape scores stratified by distance to antibody while highlighting strong escape sites (i.e., site score 10 fold greater than median of all sites) for all antibodies.
fig, axes = plt.subplots(1, 6, figsize=(6,2))
for i in range(len(contacts)):
plot_escape_vs_contact_distance(contacts[i], escape[i], axes[i], i)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
# Save fig
plt.savefig(saved_image_path)
2510C Contacts with strong escape: 23.1% Proximal with strong escape: 14.3% Distal with strong escape: 0.2% Cumulative escape across all sites: 555.38503226 Median escape across all sites: 0.7075149999999999 Mean escape across all sites: 1.303720733004695 121F Contacts with strong escape: 33.3% Proximal with strong escape: 10.0% Distal with strong escape: 6.1% Cumulative escape across all sites: 683.2671576500001 Median escape across all sites: 0.560305 Mean escape across all sites: 1.6039135156103288 377H Contacts with strong escape: 14.3% Proximal with strong escape: 0.0% Distal with strong escape: 0.7% Cumulative escape across all sites: 791.4568553070001 Median escape across all sites: 1.0570673 Mean escape across all sites: 1.7946867467278913 256A Contacts with strong escape: 9.1% Proximal with strong escape: 0.0% Distal with strong escape: 0.9% Cumulative escape across all sites: 318.95216001250003 Median escape across all sites: 0.3497 Mean escape across all sites: 0.7504756706176471 372D Contacts with strong escape: 18.2% Proximal with strong escape: 0.0% Distal with strong escape: 4.3% Cumulative escape across all sites: 171.768081009 Median escape across all sites: 0.13061999999999999 Mean escape across all sites: 0.3859956876606742
89F Contacts with strong escape: 77.8% Proximal with strong escape: 66.7% Distal with strong escape: 1.5% Cumulative escape across all sites: 792.37621389 Median escape across all sites: 0.646014 Mean escape across all sites: 1.800855031568182