Create filtered antibody escape CSVs based on configuration for plotting¶
This notebook filters the antibody escape data based on filters applied when plotting the data and outputs filtered escape CSV. In addition, escape profiles and logoplots are created for each antibody selection.
# Imports
import os
import warnings
import dmslogo
import pandas as pd
import seaborn as sns
import matplotlib.colors
import matplotlib.pyplot as plt
# Plotting colors
tol_muted_adjusted = [
"#000000",
"#CC6677",
"#1f78b4",
"#DDCC77",
"#117733",
"#882255",
"#88CCEE",
"#44AA99",
"#999933",
"#AA4499",
"#DDDDDD",
]
# Create color palette
def color_gradient_hex(start, end, n):
"""Color function from polyclonal"""
cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
name="_", colors=[start, end], N=n
)
return [matplotlib.colors.rgb2hex(tup) for tup in cmap(list(range(0, n)))]
# Seaborn style settings
sns.set(rc={
"figure.dpi":300,
"savefig.dpi":300,
"svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)
# Suppress warnings
warnings.simplefilter("ignore")
File paths for data:
# this cell is tagged as `parameters` for papermill parameterization
func_scores_293T = None
func_scores_human_293T = None
func_scores_mastomys_293T = None
escape_377H = None
escape_89F = None
escape_2510C = None
escape_121F = None
escape_256A = None
escape_372D = None
min_times_seen = None
min_func_score = None
n_selections = None
frac_models = None
out_dir = None
out_dir_simplified = None
out_dir_images = None
out_dir_func = None
filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None
simple_filtered_escape_377H = None
simple_filtered_escape_89F = None
simple_filtered_escape_2510C = None
simple_filtered_escape_121F = None
simple_filtered_escape_256A = None
simple_filtered_escape_372D = None
filtered_func_293T = None
filtered_func_human_293T = None
filtered_func_mastomys_293T = None
contacts_89F = None
contacts_377H = None
contacts_256A = None
contacts_2510C = None
contacts_121F = None
contacts_372D = None
func_effect_scale_bar = None
escape_scale_bar = None
saved_image_path = None
validation_image_path = None
arevirumab_escape_profile = None
# Parameters
func_scores_293T = "results/func_effects/averages/293T_entry_func_effects.csv"
func_scores_human_293T = (
"results/func_effects/averages/human_293T_entry_func_effects.csv"
)
func_scores_mastomys_293T = (
"results/func_effects/averages/mastomys_293T_entry_func_effects.csv"
)
escape_377H = "results/antibody_escape/averages/377H_mut_effect.csv"
escape_89F = "results/antibody_escape/averages/89F_mut_effect.csv"
escape_2510C = "results/antibody_escape/averages/2510C_mut_effect.csv"
escape_121F = "results/antibody_escape/averages/121F_mut_effect.csv"
escape_256A = "results/antibody_escape/averages/256A_mut_effect.csv"
escape_372D = "results/antibody_escape/averages/372D_mut_effect.csv"
contacts_89F = "data/antibody_contacts/antibody_contacts_89F.csv"
contacts_377H = "data/antibody_contacts/antibody_contacts_377H.csv"
contacts_256A = "data/antibody_contacts/antibody_contacts_256A.csv"
contacts_2510C = "data/antibody_contacts/antibody_contacts_2510C.csv"
contacts_121F = "data/antibody_contacts/antibody_contacts_121F.csv"
contacts_372D = "data/antibody_contacts/antibody_contacts_372D.csv"
min_times_seen = 2
min_func_score = -1.5
n_selections = 8
frac_models = 1
out_dir = "results/filtered_antibody_escape_CSVs/"
out_dir_simplified = "results/simplified_filtered_antibody_escape_CSVs/"
out_dir_images = "results/antibody_escape_profiles/"
out_dir_func = "results/filtered_func_effect_CSVs/"
filtered_escape_377H = (
"results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
"results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
"results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
"results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
"results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
"results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
simple_filtered_escape_377H = (
"results/simplified_filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
simple_filtered_escape_89F = (
"results/simplified_filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
simple_filtered_escape_2510C = (
"results/simplified_filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
simple_filtered_escape_121F = (
"results/simplified_filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
simple_filtered_escape_256A = (
"results/simplified_filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
simple_filtered_escape_372D = (
"results/simplified_filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
filtered_func_293T = "results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
filtered_func_human_293T = (
"results/filtered_func_effect_CSVs/human_293T_filtered_func_effects.csv"
)
filtered_func_mastomys_293T = (
"results/filtered_func_effect_CSVs/mastomys_293T_filtered_func_effects.csv"
)
func_effect_scale_bar = "results/antibody_escape_profiles/func_effect_scale_bar.svg"
escape_scale_bar = "results/antibody_escape_profiles/escape_scale_bar.svg"
saved_image_path = "results/antibody_escape_profiles/antibody_escape_profiles.svg"
validation_image_path = "results/antibody_escape_profiles/validation_escape_profile.svg"
arevirumab_escape_profile = (
"results/antibody_escape_profiles/arevirumab_escape_profile.svg"
)
# # Uncomment for running interactive
# func_scores_293T = "../results/func_effects/averages/293T_entry_func_effects.csv"
# func_scores_human_293T = "../results/func_effects/averages/human_293T_entry_func_effects.csv"
# func_scores_mastomys_293T = "../results/func_effects/averages/mastomys_293T_entry_func_effects.csv"
# escape_377H = "../results/antibody_escape/averages/377H_mut_effect.csv"
# escape_89F = "../results/antibody_escape/averages/89F_mut_effect.csv"
# escape_2510C = "../results/antibody_escape/averages/2510C_mut_effect.csv"
# escape_121F = "../results/antibody_escape/averages/121F_mut_effect.csv"
# escape_256A = "../results/antibody_escape/averages/256A_mut_effect.csv"
# escape_372D = "../results/antibody_escape/averages/372D_mut_effect.csv"
# min_times_seen = 2
# min_func_score = -1.5
# n_selections = 8
# frac_models = 1
# out_dir = "../results/filtered_antibody_escape_CSVs/"
# out_dir_simplified = "../results/simplified_filtered_antibody_escape_CSVs/"
# out_dir_images = "../results/antibody_escape_profiles/"
# out_dir_func = "../results/filtered_func_effect_CSVs/"
# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
# simple_filtered_escape_377H = "../results/simplified_filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# simple_filtered_escape_89F = "../results/simplified_filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# simple_filtered_escape_2510C = "../results/simplified_filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# simple_filtered_escape_121F = "../results/simplified_filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# simple_filtered_escape_256A = "../results/simplified_filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# simple_filtered_escape_372D = "../results/simplified_filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
# filtered_func_293T = "../results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
# filtered_func_human_293T = "../results/filtered_func_effect_CSVs/human_293T_filtered_func_effects.csv"
# filtered_func_mastomys_293T = "../results/filtered_func_effect_CSVs/mastomys_293T_filtered_func_effects.csv"
# contacts_89F = "../data/antibody_contacts/antibody_contacts_89F.csv"
# contacts_377H = "../data/antibody_contacts/antibody_contacts_377H.csv"
# contacts_256A = "../data/antibody_contacts/antibody_contacts_256A.csv"
# contacts_2510C = "../data/antibody_contacts/antibody_contacts_2510C.csv"
# contacts_121F = "../data/antibody_contacts/antibody_contacts_121F.csv"
# contacts_372D = "../data/antibody_contacts/antibody_contacts_372D.csv"
# func_effect_scale_bar = "../results/antibody_escape_profiles/func_effect_scale_bar.svg"
# escape_scale_bar = "../results/antibody_escape_profiles/escape_scale_bar.svg"
# saved_image_path = "../results/antibody_escape_profiles/antibody_escape_profiles.svg"
# validation_image_path = "../results/antibody_escape_profiles/validation_escape_profile.svg"
# arevirumab_escape_profile = "../results/antibody_escape_profiles/arevirumab_escape_profile.svg"
Function to create filtered CSVs of the antibody escape as well as plot escape profiles as summed site escape lineplots and logoplots that are colored by functional scores. Escape scores less than 0 are clipped to 0 and functional scores greater than 0 are clipped to 0. Functional scale bar 0 must be changed to ≥0 afterwards. Contact sites are highlighted gray in logoplots.
# Cocktail escape sites
cocktail_escape_sites = []
def get_filtered_csv(
escape_file,
contacts_file,
func_scores,
min_times_seen,
min_func_score,
n_selections,
frac_models,
line_plot,
logo_plot,
output_file = None,
simplified_output_file =None,
sites = None,
negative = False,
color_contact_sites = False,
):
"""
Function that filters and writes an antibody escape csv.
Also creates summed escape profiles and logoplots.
"""
antibody_name = escape_file.split("/")[-1].split("_")[0]
# Read data
escape_df = pd.read_csv(escape_file)
func_scores = pd.read_csv(func_scores)
contacts_df = pd.read_csv(contacts_file)
# Create mutation column to match antibody df
# **
# Pipeline does not filter functional scores
# prior to filtering antibody selections
# **
func_scores["site"] = func_scores["site"].astype(str)
func_scores["mutation"] = func_scores["wildtype"] + func_scores["site"] + func_scores["mutant"]
func_scores_filter = func_scores.loc[func_scores["effect"] >= min_func_score]["mutation"].tolist()
# Filter escape df for min times seen
# min fraction of models, and no stop codons
escape_df = (
escape_df.loc[
(escape_df["times_seen"] >= min_times_seen)
&
(escape_df["frac_models"] >= frac_models)
&
(escape_df["mutant"] != "*")
]
.reset_index(drop=True)
)
# Mark mutations that are below functional cutoff
# and replace measurements with NaN
escape_df["poor_cell_entry"] = (
escape_df["mutation"].apply(lambda x: False if x in func_scores_filter else True)
)
escape_df["escape_median"] = (
escape_df.apply(lambda x: float("NaN") if (x["poor_cell_entry"] == True) else x["escape_median"], axis=1)
)
escape_df["escape_std"] = (
escape_df.apply(lambda x: float("NaN") if (x["poor_cell_entry"] == True) else x["escape_std"], axis=1)
)
# Write filtered escape to csv
if output_file != None:
exported_csv = escape_df[[
"site",
"wildtype",
"mutant",
"mutation",
"escape_median",
"escape_std",
"times_seen",
"n_models",
"frac_models",
"poor_cell_entry",
]].copy()
# Add column with escape floored at 0
exported_csv["escape_median_floor_zero"] = exported_csv["escape_median"].clip(lower=0)
exported_csv.to_csv(output_file, index=False)
# Write simplified filtered escape to csv
if simplified_output_file != None:
escape_df[[
"site",
"wildtype",
"mutant",
"mutation",
"escape_median",
"poor_cell_entry",
]].rename(columns={"escape_median" : "escape"}).sort_values("escape", ascending=False).to_csv(simplified_output_file, index=False)
# Filter escape df for low functional score mutations
escape_df = escape_df.query("poor_cell_entry == False")
# Clip lower scores to 0
if negative == False:
escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)
# Summed escape to get top escape sites to show
summed_df = (
escape_df
.groupby(["site", "wildtype"])
.aggregate({
"escape_median" : "sum",
})
.rename(columns={"escape_median" : "site_escape"})
.reset_index()
)
# Top escape sites for each antibody combined to show
if sites == None:
sites = sorted(summed_df.nlargest(15, "site_escape")["site"].tolist())
if antibody_name in ["121F", "372D", "89F"]:
# print(f"top escape sites for {antibody_name}: {sites}")
cocktail_escape_sites.append(sites)
escape_df["show_site"] = escape_df.apply(lambda x: True if x["site"] in sites else False, axis=1)
# Shade contact sites in logo plot
shade_sites = list(contacts_df.loc[contacts_df["distance"] == 4]["position"].unique())
# **
# # Uncomment to show antibody contacts
# print(antibody_name)
# print(f"Contact sites: {shade_sites}")
# print()
# **
if color_contact_sites == True:
escape_df["shade_site"] = escape_df.apply(lambda x: "#DDCC77" if x["site"] in shade_sites else None, axis=1)
else:
escape_df["shade_site"] = escape_df.apply(lambda x: "#FFFFFF" if x["site"] in shade_sites else None, axis=1)
escape_df["shade_alpha"] = 0.35
escape_df = (
escape_df.merge(
summed_df,
how="left",
on=["site", "wildtype"],
validate="many_to_one",
)
)
# Drop extra columns
escape_df = (
escape_df.drop(
escape_df.columns.difference([
"site",
"wildtype",
"mutant",
"escape_median",
"show_site",
"shade_site",
"shade_alpha",
"site_escape",
]), axis=1)
)
# Fill in missing sites
seen_list = [False]*491
for index in range(len(escape_df.index)):
site = escape_df.at[index, "site"] - 1
seen_list[site] = True
for index, seen in enumerate(seen_list):
if seen == False:
site = index + 1
# Add missing sites
escape_df.loc[len(escape_df.index)] = [
site,
"X",
"X",
0,
False,
None,
None,
0,
]
# Sort by site
escape_df = (
escape_df
.sort_values(by="site")
.astype({"mutant" : "str"})
.reset_index(drop=True)
)
# Merge functional and escape dfs
func_scores["site"] = func_scores["site"].astype("int")
func_scores = func_scores.loc[func_scores["mutant"] != "*"] # remove stop codons
func_scores["effect"] = func_scores["effect"].clip(upper=0, lower=-1.75) # clip scores
escape_df = (
escape_df.merge(
func_scores,
how="left",
on=["site", "wildtype", "mutant"],
validate="one_to_one",
)
)
escape_df["effect"] = escape_df["effect"].fillna(-1.75) # missing functional values are filled as min to make less visible
# Add color column for logo plots
func_color_map = dmslogo.colorschemes.ValueToColorMap(
minvalue=func_scores["effect"].min(),
maxvalue=func_scores["effect"].max(),
cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#000000", n=20))
)
escape_df = (
escape_df.assign(
color=lambda x: x["effect"].map(func_color_map.val_to_color)
)
)
# Add wildtype to each site for logo plot
escape_df["wt_site"] = escape_df["wildtype"] + escape_df["site"].map(str)
# Set ylim for each antibody
fixed_ymin = None
fixed_ymax = None
if antibody_name == "2510C":
fixed_ymin = -7.5
fixed_ymax = 67.5
elif antibody_name == "121F":
fixed_ymin = -3.75
fixed_ymax = 33.75
elif antibody_name == "377H":
fixed_ymin = -9.375
fixed_ymax = 84.375
elif antibody_name == "256A":
fixed_ymin = -5
fixed_ymax = 45
elif antibody_name == "372D":
fixed_ymin = -1.875
fixed_ymax = 16.875
elif antibody_name == "89F":
fixed_ymin = -5
fixed_ymax = 45
else:
print("Error! No ylims set!")
# Plot escape profiles and logo plots
_, lineplot = dmslogo.draw_line(
escape_df,
x_col="site",
height_col="site_escape",
show_col="show_site",
ax=line_plot,
show_color="#CC6677",
linewidth=0.5,
fixed_ymin=fixed_ymin,
fixed_ymax=fixed_ymax,
)
lineplot.set(ylabel=None, xlabel=None)
lineplot.set_xlim(1,491)
xticks = [100, 200, 300, 400]
lineplot.set_xticks(xticks)
x_labels = [
"100",
"200",
"300",
"400",
]
lineplot.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=6)
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
lineplot.spines[axis].set_linewidth(1)
lineplot.tick_params(axis="both", length=2, width=1, pad=1)
if negative == False:
_, logoplot = dmslogo.draw_logo(
escape_df.query("show_site == True"),
x_col="site",
letter_col="mutant",
letter_height_col="escape_median",
ax=logo_plot,
xtick_col="wt_site",
color_col="color",
shade_color_col="shade_site",
shade_alpha_col="shade_alpha",
draw_line_at_zero="never",
fixed_ymin=fixed_ymin,
fixed_ymax=fixed_ymax,
)
else:
_, logoplot = dmslogo.draw_logo(
escape_df.query("show_site == True"),
x_col="site",
letter_col="mutant",
letter_height_col="escape_median",
ax=logo_plot,
xtick_col="wt_site",
color_col="color",
shade_color_col="shade_site",
shade_alpha_col="shade_alpha",
draw_line_at_zero="if_negative",
fixed_ymin=fixed_ymin,
fixed_ymax=fixed_ymax,
)
logoplot.set(ylabel=None, xlabel=None)
x_labels = logoplot.get_xticklabels()
logoplot.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=6)
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
logoplot.spines[axis].set_linewidth(1)
logoplot.tick_params(axis="both", length=2, width=1, pad=1)
# Set antibody specific y axis ticks
if antibody_name == "2510C":
yticks = [0, 25, 50]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "25", "50"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "25", "50"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
logoplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#44AA99",
)
if antibody_name == "121F":
yticks = [0, 15, 30]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "15", "30"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "15", "30"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
logoplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#999933",
)
if antibody_name == "377H":
yticks = [0, 35, 70]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "35", "70"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "35", "70"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
logoplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
if antibody_name == "256A":
yticks = [0, 20, 40]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
logoplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
if antibody_name == "372D":
yticks = [0, 7.5, 15]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "7.5", "15"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "7.5", "15"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
logoplot.set_title(
"antibody " + antibody_name[0:2] + "." + antibody_name[2:],
fontsize=8,
color="#AA4499",
)
if antibody_name == "89F":
yticks = [0, 20, 40]
lineplot.set_yticks(yticks)
lineplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
logoplot.set_yticks(yticks)
logoplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
lineplot.set_title(
"antibody " + antibody_name[0] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
logoplot.set_title(
"antibody " + antibody_name[0] + "." + antibody_name[1:],
fontsize=8,
color="#117733",
)
antibody_input_files = [
escape_2510C,
escape_121F,
escape_377H,
escape_256A,
escape_372D,
escape_89F,
]
antibody_output_files = [
[filtered_escape_2510C, simple_filtered_escape_2510C],
[filtered_escape_121F, simple_filtered_escape_121F],
[filtered_escape_377H, simple_filtered_escape_377H],
[filtered_escape_256A, simple_filtered_escape_256A],
[filtered_escape_372D, simple_filtered_escape_372D],
[filtered_escape_89F, simple_filtered_escape_89F],
]
contacts_files = [
contacts_2510C,
contacts_121F,
contacts_377H,
contacts_256A,
contacts_372D,
contacts_89F,
]
# Make output dir if doesn't exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
if not os.path.exists(out_dir_simplified):
os.mkdir(out_dir_simplified)
if not os.path.exists(out_dir_images):
os.mkdir(out_dir_images)
# Set figure size and subplots
fig, axes = plt.subplots(
6,
2,
gridspec_kw={"width_ratios":[1,4]},
figsize=(4.5, 7),
# sharex="col"
)
# Adjust spacing of subplots
fig.subplots_adjust(
bottom=0,
top=1,
wspace=0.1,
hspace=0.7,
)
# Iterate through list of antibody files
for i in range(len(antibody_input_files)):
get_filtered_csv(
antibody_input_files[i],
contacts_files[i],
func_scores_293T,
min_times_seen,
min_func_score,
n_selections,
frac_models,
axes[i][0],
axes[i][1],
output_file = antibody_output_files[i][0],
simplified_output_file = antibody_output_files[i][1],
color_contact_sites = True,
)
# Common X and Y axis labels
fig.text(0.05, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)
# Save fig
fig.savefig(saved_image_path)
Create escape profile for 8.9F single mutant antibody validations
# Set figure size and subplots
fig, axes = plt.subplots(
1,
2,
gridspec_kw={"width_ratios":[2,1.5]},
figsize=(3.5, 1),
# sharex="col"
)
# Adjust spacing of subplots
fig.subplots_adjust(
left=0.05,
right=1,
bottom=0.075,
top=1,
wspace=0.15,
hspace=1,
)
get_filtered_csv(
escape_89F,
contacts_89F,
func_scores_293T,
min_times_seen,
min_func_score,
n_selections,
frac_models,
axes[0],
axes[1],
sites = [89,119,125,126,129,135,138,148,149],
)
# Common X and Y axis labels
fig.text(0.5, -0.3, "site", ha="center", fontsize=8)
fig.text(-0.075, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)
# Save fig
fig.savefig(validation_image_path)
Create stacked logoplot for antibodies in Arevirumab-3
cocktail_files = [
escape_89F,
escape_121F,
escape_372D,
]
cocktail_contacts_files = [
contacts_89F,
contacts_121F,
contacts_372D,
]
# Set figure size and subplots
fig, axes = plt.subplots(
3,
2,
gridspec_kw={"width_ratios":[1,4.5]},
figsize=(7.5, 4),
sharex="col",
)
# Adjust spacing of subplots
fig.subplots_adjust(
bottom=0,
top=1,
wspace=0.1,
hspace=0.25,
)
# Flatten list of cocktail escape sites
cocktail_escape_sites = [x for xs in cocktail_escape_sites for x in xs]
# Combined top sites from each antibody
sites_to_show = list(set(cocktail_escape_sites))
# Iterate through list of antibody files
for i in range(len(cocktail_files)):
get_filtered_csv(
cocktail_files[i],
cocktail_contacts_files[i],
func_scores_293T,
min_times_seen,
min_func_score,
n_selections,
frac_models,
axes[i][0],
axes[i][1],
sites = sites_to_show,
color_contact_sites = True,
)
# Common X and Y axis labels
fig.text(0.5, -0.1, "site", ha="center", fontsize=8)
fig.text(0.05, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)
# Save fig
fig.savefig(arevirumab_escape_profile)
def get_filtered_func_CSV(input_file, output_file, min_times_seen, n_selections):
"""Function that creates a saved func effects CSV"""
# Read CSV file
func_scores = pd.read_csv(input_file)
# Filter for minimum selections, times seen
func_scores = (
func_scores.loc[
(
(func_scores["n_selections"] >= n_selections)
&
(func_scores["times_seen"] >= min_times_seen)
)
|
(func_scores["wildtype"] == func_scores["mutant"])
]
.reset_index(drop=True)
)
# Save CSV file
func_scores.to_csv(output_file, index=False)
func_effect_files = [
func_scores_293T,
func_scores_human_293T,
func_scores_mastomys_293T,
]
filtered_func_effect_files = [
filtered_func_293T,
filtered_func_human_293T,
filtered_func_mastomys_293T,
]
# Make output dir if doesn't exist
if not os.path.exists(out_dir_func):
os.mkdir(out_dir_func)
for index,file in enumerate(func_effect_files):
get_filtered_func_CSV(file, filtered_func_effect_files[index], min_times_seen, n_selections)
Create scale bars for functional effect and escape
func_effect_color_map = dmslogo.colorschemes.ValueToColorMap(
minvalue=-1.75,
maxvalue=0,
cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#000000", n=20))
)
func_effect_fig_scale, ax_scale = func_effect_color_map.scale_bar(
orientation="vertical",
label="effect on cell entry",
)
ax_scale.set_yticks([-1.5, -1.0, -0.5, 0])
scale_labels = ["-1.5", "-1.0", "-0.5", "≥0"]
ax_scale.set_yticklabels(labels=scale_labels)
# Save fig
func_effect_fig_scale.savefig(func_effect_scale_bar)
escape_color_map = dmslogo.colorschemes.ValueToColorMap(
minvalue=0,
maxvalue=1,
cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#0072B2", n=20))
)
escape_fig_scale, ax_scale = escape_color_map.scale_bar(
orientation="vertical",
label="escape",
low_high_ticks_only=True,
)
# Save fig
escape_fig_scale.savefig(escape_scale_bar)