Analyze mutational accessibility for GPC to escape antibodies¶
In [1]:
# Imports
import os
import warnings
import pandas as pd
import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
from Bio import SeqIO, AlignIO
# Plotting colors
# re-arranged for plot
tol_muted_adjusted = [
"#AA4499",
"#88CCEE",
"#EE7733",
"#44AA99",
"#1f78b4",
"#CC6677",
"#117733",
"#999933",
"#DDCC77",
"#CC3311",
"#882255",
"#000000",
"#DDDDDD",
]
# Seaborn style settings
sns.set(rc={
"figure.dpi":300,
"savefig.dpi":300,
"svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)
# Suppress warnings
warnings.simplefilter("ignore")
# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
In [2]:
# this cell is tagged as `parameters` for papermill parameterization
filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None
filtered_func_293T = None
Josiah_sequence = None
out_dir = None
func_vs_escape = None
func_vs_escape_svg = None
In [3]:
# Parameters
filtered_escape_377H = (
"results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
"results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
"results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
"results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
"results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
"results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
filtered_func_293T = "results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
Josiah_sequence = "data/Josiah_nucleotide_reference_NC_004296.fasta"
out_dir = "results/antibody_escape_profiles/"
func_vs_escape = (
"results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.html"
)
func_vs_escape_svg = (
"results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.svg"
)
In [4]:
# # Uncomment for running interactive
# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
# filtered_func_293T = "../results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
# Josiah_sequence = "../data/Josiah_nucleotide_reference_NC_004296.fasta"
# out_dir = "../results/antibody_escape_profiles/"
# func_vs_escape = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.html"
# func_vs_escape_svg = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.svg"
In [5]:
# Dictionary of codon values and corresponding amino acids
codon_chart = {
'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
}
def check_if_AA_one_mutation_away(wt_codon, mutant_AA):
"""
This function check if codon is within
one mutation away from a mutant amino acid
"""
# Check if current codon is mutant AA
if codon_chart[wt_codon] == mutant_AA:
return True
else:
# Iterate through each position and check if each mutation
for i in range(3):
for new_base in ["A", "T", "G", "C"]:
mutated_codon = wt_codon[:i] + new_base + wt_codon[i + 1:]
if codon_chart[mutated_codon] == mutant_AA:
return True
# Return false if not mutant was found
return False
escape = [
filtered_escape_2510C,
filtered_escape_121F,
filtered_escape_377H,
filtered_escape_256A,
filtered_escape_372D,
filtered_escape_89F,
]
merged_df = pd.read_csv(filtered_func_293T)
# Add escape to dataframe for each antibody
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Load data as dataframe
escape_df = pd.read_csv(antibody_file)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Clip lower scores to 0
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Rename escape column to include antibody name
escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})
# Merge dataframes
merged_df = (
merged_df.merge(
escape_df[["site", "wildtype", "mutant", "escape_" + antibody_name]],
how="left",
on=["site", "wildtype", "mutant"],
validate="one_to_one",
)
)
# Clip upper scores to 0
merged_df["effect"] = merged_df["effect"].clip(upper=0)
# Read josiah sequence
josiah_seq = SeqIO.read(Josiah_sequence, "fasta").seq
# Mark each mutation as being one nucleotide accesible
# from the josiah wt codon
merged_df["single_nucleotide_accessible"] = (
merged_df.apply(lambda x: check_if_AA_one_mutation_away(josiah_seq[((x["site"]-1)*3):(((x["site"]-1)*3)+3)], x["mutant"]), axis=1)
)
Compare antibody escape vs functional effects
In [6]:
subplots = []
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Set lim for each antibody
fixed_min = None
fixed_max = None
if antibody_name == "2510C":
fixed_min = -0.4
fixed_max = 7
elif antibody_name == "121F":
fixed_min = -0.34
fixed_max = 6
elif antibody_name == "377H":
fixed_min = -0.43
fixed_max = 7.5
elif antibody_name == "256A":
fixed_min = -0.4
fixed_max = 7
elif antibody_name == "372D":
fixed_min = -0.2
fixed_max = 3.5
elif antibody_name == "89F":
fixed_min = -0.4
fixed_max = 7
else:
print("Error! No ylims set!")
curr_subplot = alt.Chart(merged_df.query("single_nucleotide_accessible == True"), title=antibody_name).mark_point(
filled=True,
size=75,
opacity=0.25,
).encode(
alt.X(
"escape_"+antibody_name,
axis=alt.Axis(
title="escape",
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[fixed_min, fixed_max])
),
alt.Y(
"effect",
axis=alt.Axis(
title="effect on cell entry",
labelExpr=(
"datum.label == 0 ? '≥0' : datum.label"
),
values=[-1.5,-1,-0.5,0],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-1.6,0.1])
),
tooltip=[
"site",
"wildtype",
"mutant",
"effect",
"escape_" + antibody_name,
"single_nucleotide_accessible"
],
color=alt.Color(
"single_nucleotide_accessible:N",
scale=alt.Scale(
domain=[True, False],
range=["#EE7733", "#000000"]
),
legend=alt.Legend(
title=["single nucleotide", "accessible mutation"],
),
),
).properties(
width=150,
height=150,
)
subplots.append(curr_subplot)
# Create row of plots
row_1 = alt.hconcat(
subplots[0],
subplots[1],
subplots[2],
subplots[3],
subplots[4],
subplots[5],
spacing=5,
title="single nucleotide accessible mutations",
)
subplots = []
for antibody_file in escape:
antibody_name = antibody_file.split("/")[-1].split("_")[0]
# Set lim for each antibody
fixed_min = None
fixed_max = None
if antibody_name == "2510C":
fixed_min = -0.4
fixed_max = 7
elif antibody_name == "121F":
fixed_min = -0.34
fixed_max = 6
elif antibody_name == "377H":
fixed_min = -0.43
fixed_max = 7.5
elif antibody_name == "256A":
fixed_min = -0.4
fixed_max = 7
elif antibody_name == "372D":
fixed_min = -0.2
fixed_max = 3.5
elif antibody_name == "89F":
fixed_min = -0.4
fixed_max = 7
else:
print("Error! No ylims set!")
curr_subplot = alt.Chart(merged_df.query("single_nucleotide_accessible == False"), title=antibody_name).mark_point(
filled=True,
size=75,
opacity=0.15,
).encode(
alt.X(
"escape_"+antibody_name,
axis=alt.Axis(
title="escape",
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[fixed_min, fixed_max])
),
alt.Y(
"effect",
axis=alt.Axis(
title="effect on cell entry",
labelExpr=(
"datum.label == 0 ? '≥0' : datum.label"
),
values=[-1.5,-1,-0.5,0],
domainWidth=1,
domainColor="black",
tickColor="black",
),
scale=alt.Scale(domain=[-1.6,0.1])
),
tooltip=[
"site",
"wildtype",
"mutant",
"effect",
"escape_" + antibody_name,
"single_nucleotide_accessible"
],
color=alt.Color(
"single_nucleotide_accessible:N",
scale=alt.Scale(
domain=[True, False],
range=["#EE7733", "#000000"]
),
legend=alt.Legend(
title=["single nucleotide", "accessible mutation"],
),
),
).properties(
width=150,
height=150,
)
subplots.append(curr_subplot)
# Create row of plots
row_2 = alt.hconcat(
subplots[0],
subplots[1],
subplots[2],
subplots[3],
subplots[4],
subplots[5],
spacing=5,
title="multi-nucleotide accessible mutations",
)
escape_vs_effect = alt.vconcat(
row_1,
row_2,
spacing=10,
title="Functional effect vs antibody escape",
).configure_axis(
grid=False,
labelFontSize=16,
titleFontSize=16,
labelFontWeight="normal",
titleFontWeight="normal",
).configure_title(
fontSize=24,
).configure_view(
stroke=None
)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
escape_vs_effect.save(func_vs_escape)
escape_vs_effect
Out[6]:
Recreate same plot formatted for paper
In [7]:
# Functions
def plot_func_scores_vs_escape(merged_df, ax, i, antibody_name):
"""
This function creates a plot of
functional scores vs antibody escape
"""
# Set lim for each antibody
fixed_min = None
fixed_max = None
if antibody_name == "2510C":
fixed_min = -0.6
fixed_max = 7
elif antibody_name == "121F":
fixed_min = -0.6
fixed_max = 7
elif antibody_name == "377H":
fixed_min = -0.64
fixed_max = 7.5
elif antibody_name == "256A":
fixed_min = -0.6
fixed_max = 7
elif antibody_name == "372D":
fixed_min = -0.3
fixed_max = 3.5
elif antibody_name == "89F":
fixed_min = -0.6
fixed_max = 7
else:
print("Error! No ylims set!")
# Plot escape vs functional score
chart = sns.scatterplot(
data=merged_df,
y="effect",
x="escape_"+antibody_name,
hue="single_nucleotide_accessible",
edgecolor=None,
linewidth=0,
palette={True : "#EE773340", False : "#00000026"},
s=20,
ax=ax,
)
if antibody_name == "2510C":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
xticks = [0, 2, 4, 6]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "121F":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
xticks = [0, 2, 4, 6]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
chart.set_title(
antibody_name[:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
xticks = None
if antibody_name == "377H":
xticks = [0, 2, 4, 6]
elif antibody_name == "256A":
xticks = [0, 2, 4, 6]
elif antibody_name == "372D":
xticks = [0, 1, 2, 3]
else:
print("ERROR")
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
if antibody_name == "89F":
chart.set_title(
antibody_name[:1] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
xticks = [0, 2, 4, 6]
chart.set_xticks(xticks)
chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
chart.set_ylabel("effect on cell entry", fontsize=8)
chart.set_xlim(fixed_min, fixed_max)
chart.set_ylim(-1.6,0.1)
yticks = [-1.5, -1, -0.5, 0]
chart.set_yticks(yticks)
chart.set_yticklabels(labels=["-1.5", "-1.0", "-0.5", "≥0"], fontsize=8)
chart.set(xlabel=None)
# Make only one legend
if i == 5:
sns.move_legend(
chart,
"upper left",
bbox_to_anchor=(1, 1),
fontsize=8,
markerscale=1,
handletextpad=0.1,
title="single\nnucleotide\naccessible\nmutation",
title_fontproperties = {
"size" : 8,
# "weight" : "bold",
},
frameon=False,
borderaxespad=0.1,
reverse=True,
)
# Add edges to legend markers to match scatter plot
for ha in chart.legend_.legendHandles:
ha.set_edgecolor(None)
ha.set_linewidths(0.5)
else:
ax.get_legend().remove()
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
chart.spines[axis].set_linewidth(1)
chart.tick_params(axis="both", length=3, width=1)
chart.grid(False)
sns.despine()
In [8]:
fig, axes = plt.subplots(
2,
6,
figsize=(7,3.5),
sharey=True,
)
for i,antibody_file in enumerate(escape):
antibody_name = antibody_file.split("/")[-1].split("_")[0]
plot_func_scores_vs_escape(merged_df.query("single_nucleotide_accessible == True"), axes[0][i], i, antibody_name)
plot_func_scores_vs_escape(merged_df.query("single_nucleotide_accessible == False"), axes[1][i], i, antibody_name)
# Common X and Y axis labels
fig.text(0.5, 0, "escape", ha="center", rotation="horizontal", fontsize=8)
fig.tight_layout(w_pad=0.5)
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
# Save fig
plt.savefig(func_vs_escape_svg)