Create filtered antibody escape CSVs based on configuration for plotting¶

This notebook filters the antibody escape data based on filters applied when plotting the data and outputs filtered escape CSV. In addition, escape profiles and logoplots are created for each antibody selection.

In [1]:
# Imports 
import os
import warnings
import dmslogo
import pandas as pd
import seaborn as sns
import matplotlib.colors
import matplotlib.pyplot as plt

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#DDDDDD",
]

# Create color palette
def color_gradient_hex(start, end, n):
    """Color function from polyclonal"""
    cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            name="_", colors=[start, end], N=n
        )
    return [matplotlib.colors.rgb2hex(tup) for tup in cmap(list(range(0, n)))]

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

File paths for data:

In [2]:
# this cell is tagged as `parameters` for papermill parameterization
func_scores_293T = None
func_scores_human_293T = None
func_scores_mastomys_293T = None

escape_377H = None
escape_89F = None
escape_2510C = None
escape_121F = None
escape_256A = None
escape_372D = None

min_times_seen = None
min_func_score = None
n_selections = None
frac_models = None

out_dir = None
out_dir_simplified = None
out_dir_images = None
out_dir_func = None

filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None

simple_filtered_escape_377H = None
simple_filtered_escape_89F = None
simple_filtered_escape_2510C = None
simple_filtered_escape_121F = None
simple_filtered_escape_256A = None
simple_filtered_escape_372D = None

filtered_func_293T = None
filtered_func_human_293T = None
filtered_func_mastomys_293T = None

contacts_89F = None
contacts_377H = None
contacts_256A = None
contacts_2510C = None
contacts_121F = None
contacts_372D = None

func_effect_scale_bar = None
escape_scale_bar = None

saved_image_path = None
validation_image_path = None
arevirumab_escape_profile = None
In [3]:
# Parameters
func_scores_293T = "results/func_effects/averages/293T_entry_func_effects.csv"
func_scores_human_293T = (
    "results/func_effects/averages/human_293T_entry_func_effects.csv"
)
func_scores_mastomys_293T = (
    "results/func_effects/averages/mastomys_293T_entry_func_effects.csv"
)
escape_377H = "results/antibody_escape/averages/377H_mut_effect.csv"
escape_89F = "results/antibody_escape/averages/89F_mut_effect.csv"
escape_2510C = "results/antibody_escape/averages/2510C_mut_effect.csv"
escape_121F = "results/antibody_escape/averages/121F_mut_effect.csv"
escape_256A = "results/antibody_escape/averages/256A_mut_effect.csv"
escape_372D = "results/antibody_escape/averages/372D_mut_effect.csv"
contacts_89F = "data/antibody_contacts/antibody_contacts_89F.csv"
contacts_377H = "data/antibody_contacts/antibody_contacts_377H.csv"
contacts_256A = "data/antibody_contacts/antibody_contacts_256A.csv"
contacts_2510C = "data/antibody_contacts/antibody_contacts_2510C.csv"
contacts_121F = "data/antibody_contacts/antibody_contacts_121F.csv"
contacts_372D = "data/antibody_contacts/antibody_contacts_372D.csv"
min_times_seen = 2
min_func_score = -1.5
n_selections = 8
frac_models = 1
out_dir = "results/filtered_antibody_escape_CSVs/"
out_dir_simplified = "results/simplified_filtered_antibody_escape_CSVs/"
out_dir_images = "results/antibody_escape_profiles/"
out_dir_func = "results/filtered_func_effect_CSVs/"
filtered_escape_377H = (
    "results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
    "results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
    "results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
    "results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
    "results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
    "results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
simple_filtered_escape_377H = (
    "results/simplified_filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
simple_filtered_escape_89F = (
    "results/simplified_filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
simple_filtered_escape_2510C = (
    "results/simplified_filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
simple_filtered_escape_121F = (
    "results/simplified_filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
simple_filtered_escape_256A = (
    "results/simplified_filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
simple_filtered_escape_372D = (
    "results/simplified_filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
filtered_func_293T = "results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
filtered_func_human_293T = (
    "results/filtered_func_effect_CSVs/human_293T_filtered_func_effects.csv"
)
filtered_func_mastomys_293T = (
    "results/filtered_func_effect_CSVs/mastomys_293T_filtered_func_effects.csv"
)
func_effect_scale_bar = "results/antibody_escape_profiles/func_effect_scale_bar.svg"
escape_scale_bar = "results/antibody_escape_profiles/escape_scale_bar.svg"
saved_image_path = "results/antibody_escape_profiles/antibody_escape_profiles.svg"
validation_image_path = "results/antibody_escape_profiles/validation_escape_profile.svg"
arevirumab_escape_profile = (
    "results/antibody_escape_profiles/arevirumab_escape_profile.svg"
)
In [4]:
# # Uncomment for running interactive
# func_scores_293T = "../results/func_effects/averages/293T_entry_func_effects.csv"
# func_scores_human_293T = "../results/func_effects/averages/human_293T_entry_func_effects.csv"
# func_scores_mastomys_293T = "../results/func_effects/averages/mastomys_293T_entry_func_effects.csv"

# escape_377H = "../results/antibody_escape/averages/377H_mut_effect.csv"
# escape_89F = "../results/antibody_escape/averages/89F_mut_effect.csv"
# escape_2510C = "../results/antibody_escape/averages/2510C_mut_effect.csv"
# escape_121F = "../results/antibody_escape/averages/121F_mut_effect.csv"
# escape_256A = "../results/antibody_escape/averages/256A_mut_effect.csv"
# escape_372D = "../results/antibody_escape/averages/372D_mut_effect.csv"

# min_times_seen = 2
# min_func_score = -1.5
# n_selections = 8
# frac_models = 1

# out_dir = "../results/filtered_antibody_escape_CSVs/"
# out_dir_simplified = "../results/simplified_filtered_antibody_escape_CSVs/"
# out_dir_images = "../results/antibody_escape_profiles/"
# out_dir_func = "../results/filtered_func_effect_CSVs/"

# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"

# simple_filtered_escape_377H = "../results/simplified_filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# simple_filtered_escape_89F = "../results/simplified_filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# simple_filtered_escape_2510C = "../results/simplified_filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# simple_filtered_escape_121F = "../results/simplified_filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# simple_filtered_escape_256A = "../results/simplified_filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# simple_filtered_escape_372D = "../results/simplified_filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"

# filtered_func_293T = "../results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
# filtered_func_human_293T = "../results/filtered_func_effect_CSVs/human_293T_filtered_func_effects.csv"
# filtered_func_mastomys_293T = "../results/filtered_func_effect_CSVs/mastomys_293T_filtered_func_effects.csv"

# contacts_89F = "../data/antibody_contacts/antibody_contacts_89F.csv"
# contacts_377H = "../data/antibody_contacts/antibody_contacts_377H.csv"
# contacts_256A = "../data/antibody_contacts/antibody_contacts_256A.csv"
# contacts_2510C = "../data/antibody_contacts/antibody_contacts_2510C.csv"
# contacts_121F = "../data/antibody_contacts/antibody_contacts_121F.csv"
# contacts_372D = "../data/antibody_contacts/antibody_contacts_372D.csv"

# func_effect_scale_bar = "../results/antibody_escape_profiles/func_effect_scale_bar.svg"
# escape_scale_bar = "../results/antibody_escape_profiles/escape_scale_bar.svg"

# saved_image_path = "../results/antibody_escape_profiles/antibody_escape_profiles.svg"
# validation_image_path = "../results/antibody_escape_profiles/validation_escape_profile.svg"
# arevirumab_escape_profile = "../results/antibody_escape_profiles/arevirumab_escape_profile.svg"

Function to create filtered CSVs of the antibody escape as well as plot escape profiles as summed site escape lineplots and logoplots that are colored by functional scores. Escape scores less than 0 are clipped to 0 and functional scores greater than 0 are clipped to 0. Functional scale bar 0 must be changed to ≥0 afterwards. Contact sites are highlighted gray in logoplots.

In [5]:
# Cocktail escape sites
cocktail_escape_sites = []

def get_filtered_csv(
    escape_file,  
    contacts_file,
    func_scores, 
    min_times_seen, 
    min_func_score, 
    n_selections, 
    frac_models,
    line_plot,
    logo_plot,
    output_file = None,
    simplified_output_file =None,
    sites = None,
    negative = False,
    color_contact_sites = False,
):
    """
    Function that filters and writes an antibody escape csv.
    Also creates summed escape profiles and logoplots.
    """

    antibody_name = escape_file.split("/")[-1].split("_")[0]

    # Read data
    escape_df = pd.read_csv(escape_file)
    func_scores = pd.read_csv(func_scores)
    contacts_df = pd.read_csv(contacts_file)

    # Create mutation column to match antibody df
    # **
    # Pipeline does not filter functional scores 
    # prior to filtering antibody selections
    # **
    func_scores["site"] = func_scores["site"].astype(str)
    func_scores["mutation"] = func_scores["wildtype"] + func_scores["site"] + func_scores["mutant"]
    func_scores_filter = func_scores.loc[func_scores["effect"] >= min_func_score]["mutation"].tolist()

    # Filter escape df for min times seen
    # min fraction of models, and no stop codons
    escape_df = (
        escape_df.loc[
            (escape_df["times_seen"] >= min_times_seen)
            &
            (escape_df["frac_models"] >= frac_models)
            &
            (escape_df["mutant"] != "*")
        ]
        .reset_index(drop=True)
    )
    
    # Mark mutations that are below functional cutoff
    # and replace measurements with NaN
    escape_df["poor_cell_entry"] = (
        escape_df["mutation"].apply(lambda x: False if x in func_scores_filter else True)
    )
    escape_df["escape_median"] = (
        escape_df.apply(lambda x: float("NaN") if (x["poor_cell_entry"] == True) else x["escape_median"], axis=1)
    )
    escape_df["escape_std"] = (
        escape_df.apply(lambda x: float("NaN") if (x["poor_cell_entry"] == True) else x["escape_std"], axis=1)
    )

    # Write filtered escape to csv
    if output_file != None:
        exported_csv = escape_df[[
            "site", 
            "wildtype", 
            "mutant", 
            "mutation", 
            "escape_median", 
            "escape_std", 
            "times_seen", 
            "n_models", 
            "frac_models",
            "poor_cell_entry",
        ]].copy()
        # Add column with escape floored at 0
        exported_csv["escape_median_floor_zero"] = exported_csv["escape_median"].clip(lower=0)
        exported_csv.to_csv(output_file, index=False)

    # Write simplified filtered escape to csv
    if simplified_output_file != None:
        escape_df[[
            "site", 
            "wildtype", 
            "mutant", 
            "mutation", 
            "escape_median",
            "poor_cell_entry",
        ]].rename(columns={"escape_median" : "escape"}).sort_values("escape", ascending=False).to_csv(simplified_output_file, index=False)

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")
    
    # Clip lower scores to 0
    if negative == False:
        escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Summed escape to get top escape sites to show
    summed_df = (
        escape_df
        .groupby(["site", "wildtype"])
        .aggregate({
            "escape_median" : "sum",
        })
        .rename(columns={"escape_median" : "site_escape"})
        .reset_index()
    )
    # Top escape sites for each antibody combined to show
    if sites == None:
        sites = sorted(summed_df.nlargest(15, "site_escape")["site"].tolist())
        if antibody_name in ["121F", "372D", "89F"]:
            # print(f"top escape sites for {antibody_name}: {sites}")
            cocktail_escape_sites.append(sites)
    escape_df["show_site"] = escape_df.apply(lambda x: True if x["site"] in sites else False, axis=1)
    
    # Shade contact sites in logo plot
    shade_sites = list(contacts_df.loc[contacts_df["distance"] == 4]["position"].unique())
    
    # **
    # # Uncomment to show antibody contacts
    # print(antibody_name)
    # print(f"Contact sites: {shade_sites}")
    # print()
    # **

    if color_contact_sites == True:
        escape_df["shade_site"] = escape_df.apply(lambda x: "#DDCC77" if x["site"] in shade_sites else None, axis=1)
    else:
        escape_df["shade_site"] = escape_df.apply(lambda x: "#FFFFFF" if x["site"] in shade_sites else None, axis=1)
    escape_df["shade_alpha"] = 0.35
    escape_df = (
        escape_df.merge(
            summed_df,
            how="left",
            on=["site", "wildtype"],
            validate="many_to_one",
        )
    )

    # Drop extra columns
    escape_df = (
        escape_df.drop(
            escape_df.columns.difference([
                "site", 
                "wildtype",
                "mutant", 
                "escape_median", 
                "show_site", 
                "shade_site",
                "shade_alpha",
                "site_escape", 
            ]), axis=1)
    )

    # Fill in missing sites
    seen_list = [False]*491
    for index in range(len(escape_df.index)):
        site = escape_df.at[index, "site"] - 1 
        seen_list[site] = True
    for index, seen in enumerate(seen_list):
        if seen == False:
            site = index + 1
            # Add missing sites
            escape_df.loc[len(escape_df.index)] = [
                site,
                "X",
                "X",
                0,
                False,
                None,
                None,
                0,
            ]

    # Sort by site
    escape_df = (
        escape_df
        .sort_values(by="site")
        .astype({"mutant" : "str"})
        .reset_index(drop=True)
    )

    # Merge functional and escape dfs
    func_scores["site"] = func_scores["site"].astype("int")
    func_scores = func_scores.loc[func_scores["mutant"] != "*"] # remove stop codons
    func_scores["effect"] = func_scores["effect"].clip(upper=0, lower=-1.75) # clip scores 
    escape_df = (
        escape_df.merge(
            func_scores,
            how="left",
            on=["site", "wildtype", "mutant"],
            validate="one_to_one",
        )
    )
    escape_df["effect"] = escape_df["effect"].fillna(-1.75) # missing functional values are filled as min to make less visible
    
    # Add color column for logo plots
    func_color_map = dmslogo.colorschemes.ValueToColorMap(
        minvalue=func_scores["effect"].min(),
        maxvalue=func_scores["effect"].max(),
        cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#000000", n=20))
    )
    escape_df = (
        escape_df.assign(
            color=lambda x: x["effect"].map(func_color_map.val_to_color)
        )
    )

    # Add wildtype to each site for logo plot
    escape_df["wt_site"] = escape_df["wildtype"] + escape_df["site"].map(str)

    # Set ylim for each antibody
    fixed_ymin = None
    fixed_ymax = None
    if antibody_name == "2510C":
        fixed_ymin = -7.5
        fixed_ymax = 67.5
    elif antibody_name == "121F":
        fixed_ymin = -3.75
        fixed_ymax = 33.75
    elif antibody_name == "377H":
        fixed_ymin = -9.375 
        fixed_ymax = 84.375
    elif antibody_name == "256A":
        fixed_ymin = -5 
        fixed_ymax = 45
    elif antibody_name == "372D":
        fixed_ymin = -1.875 
        fixed_ymax = 16.875
    elif antibody_name == "89F":
        fixed_ymin = -5 
        fixed_ymax = 45
    else:
        print("Error! No ylims set!")

    # Plot escape profiles and logo plots
    _, lineplot = dmslogo.draw_line(
        escape_df,
        x_col="site",
        height_col="site_escape",
        show_col="show_site",
        ax=line_plot,
        show_color="#CC6677",
        linewidth=0.5,
        fixed_ymin=fixed_ymin,
        fixed_ymax=fixed_ymax,
    )
    lineplot.set(ylabel=None, xlabel=None)
    lineplot.set_xlim(1,491)
    xticks = [100, 200, 300, 400]
    lineplot.set_xticks(xticks)
    x_labels = [
        "100", 
        "200",
        "300",
        "400",
    ]
    lineplot.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=6)
    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        lineplot.spines[axis].set_linewidth(1)
    lineplot.tick_params(axis="both", length=2, width=1, pad=1)
    
    if negative == False:
        _, logoplot = dmslogo.draw_logo(
            escape_df.query("show_site == True"),
            x_col="site",
            letter_col="mutant",
            letter_height_col="escape_median",
            ax=logo_plot,
            xtick_col="wt_site",
            color_col="color",
            shade_color_col="shade_site",
            shade_alpha_col="shade_alpha",
            draw_line_at_zero="never",
            fixed_ymin=fixed_ymin,
            fixed_ymax=fixed_ymax,
        )
    else:
         _, logoplot = dmslogo.draw_logo(
            escape_df.query("show_site == True"),
            x_col="site",
            letter_col="mutant",
            letter_height_col="escape_median",
            ax=logo_plot,
            xtick_col="wt_site",
            color_col="color",
            shade_color_col="shade_site",
            shade_alpha_col="shade_alpha",
            draw_line_at_zero="if_negative",
            fixed_ymin=fixed_ymin,
            fixed_ymax=fixed_ymax,
        )

    logoplot.set(ylabel=None, xlabel=None)
    x_labels = logoplot.get_xticklabels()
    logoplot.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=6)
    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        logoplot.spines[axis].set_linewidth(1)
    logoplot.tick_params(axis="both", length=2, width=1, pad=1)


    # Set antibody specific y axis ticks
    if antibody_name == "2510C":
        yticks = [0, 25, 50]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "25", "50"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "25", "50"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#44AA99",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#44AA99",
        )
    if antibody_name == "121F":
        yticks = [0, 15, 30]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "15", "30"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "15", "30"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#999933",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#999933",
        )
    if antibody_name == "377H":
        yticks = [0, 35, 70]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "35", "70"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "35", "70"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
    if antibody_name == "256A":
        yticks = [0, 20, 40]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
    if antibody_name == "372D":
        yticks = [0, 7.5, 15]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "7.5", "15"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "7.5", "15"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0:2] + "." + antibody_name[2:], 
            fontsize=8,
            color="#AA4499",
        )
    if antibody_name == "89F":
        yticks = [0, 20, 40]
        lineplot.set_yticks(yticks)
        lineplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
        logoplot.set_yticks(yticks)
        logoplot.set_yticklabels(labels=["0", "20", "40"], fontsize=6)
        lineplot.set_title(
            "antibody " + antibody_name[0] + "." + antibody_name[1:],
            fontsize=8,
            color="#117733",
        )
        logoplot.set_title(
            "antibody " + antibody_name[0] + "." + antibody_name[1:],
            fontsize=8,
            color="#117733",
        )
In [6]:
antibody_input_files = [
    escape_2510C,
    escape_121F,
    escape_377H,
    escape_256A,
    escape_372D,
    escape_89F,
]

antibody_output_files = [
    [filtered_escape_2510C, simple_filtered_escape_2510C],
    [filtered_escape_121F, simple_filtered_escape_121F],
    [filtered_escape_377H, simple_filtered_escape_377H],
    [filtered_escape_256A, simple_filtered_escape_256A],
    [filtered_escape_372D, simple_filtered_escape_372D],
    [filtered_escape_89F, simple_filtered_escape_89F],
]

contacts_files = [
    contacts_2510C,
    contacts_121F,
    contacts_377H,
    contacts_256A,
    contacts_372D,
    contacts_89F,
]

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
if not os.path.exists(out_dir_simplified):
    os.mkdir(out_dir_simplified)
if not os.path.exists(out_dir_images):
    os.mkdir(out_dir_images)

# Set figure size and subplots
fig, axes = plt.subplots(
    6, 
    2, 
    gridspec_kw={"width_ratios":[1,4]},
    figsize=(4.5, 7), 
    # sharex="col"
)

# Adjust spacing of subplots
fig.subplots_adjust(
    bottom=0, 
    top=1, 
    wspace=0.1, 
    hspace=0.7,
)

# Iterate through list of antibody files
for i in range(len(antibody_input_files)):
    get_filtered_csv(
        antibody_input_files[i], 
        contacts_files[i],
        func_scores_293T, 
        min_times_seen, 
        min_func_score, 
        n_selections, 
        frac_models,
        axes[i][0],
        axes[i][1],
        output_file = antibody_output_files[i][0],
        simplified_output_file = antibody_output_files[i][1],
        color_contact_sites = True,
    )

# Common X and Y axis labels
fig.text(0.05, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)

# Save fig
fig.savefig(saved_image_path)
No description has been provided for this image

Create escape profile for 8.9F single mutant antibody validations

In [7]:
# Set figure size and subplots
fig, axes = plt.subplots(
    1, 
    2, 
    gridspec_kw={"width_ratios":[2,1.5]},
    figsize=(3.5, 1), 
    # sharex="col"
)
# Adjust spacing of subplots
fig.subplots_adjust(
    left=0.05, 
    right=1,
    bottom=0.075, 
    top=1, 
    wspace=0.15, 
    hspace=1,
)

get_filtered_csv(
    escape_89F, 
    contacts_89F,
    func_scores_293T, 
    min_times_seen, 
    min_func_score, 
    n_selections, 
    frac_models,
    axes[0],
    axes[1],
    sites = [89,119,125,126,129,135,138,148,149],
)

# Common X and Y axis labels
fig.text(0.5, -0.3, "site", ha="center", fontsize=8)
fig.text(-0.075, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)

# Save fig
fig.savefig(validation_image_path)
No description has been provided for this image

Create stacked logoplot for antibodies in Arevirumab-3

In [8]:
cocktail_files = [
    escape_89F,
    escape_121F,
    escape_372D,
]

cocktail_contacts_files = [
    contacts_89F,
    contacts_121F,
    contacts_372D,
]

# Set figure size and subplots
fig, axes = plt.subplots(
    3, 
    2, 
    gridspec_kw={"width_ratios":[1,4.5]},
    figsize=(7.5, 4), 
    sharex="col",
)

# Adjust spacing of subplots
fig.subplots_adjust(
    bottom=0, 
    top=1, 
    wspace=0.1, 
    hspace=0.25,
)

# Flatten list of cocktail escape sites
cocktail_escape_sites = [x for xs in cocktail_escape_sites for x in xs]

# Combined top sites from each antibody
sites_to_show = list(set(cocktail_escape_sites))

# Iterate through list of antibody files
for i in range(len(cocktail_files)):
    get_filtered_csv(
        cocktail_files[i], 
        cocktail_contacts_files[i],
        func_scores_293T, 
        min_times_seen, 
        min_func_score, 
        n_selections, 
        frac_models,
        axes[i][0],
        axes[i][1],
        sites = sites_to_show,
        color_contact_sites = True,
    )

# Common X and Y axis labels
fig.text(0.5, -0.1, "site", ha="center", fontsize=8)
fig.text(0.05, 0.5, "site escape", va="center", rotation="vertical", fontsize=8)

# Save fig
fig.savefig(arevirumab_escape_profile)
No description has been provided for this image
In [9]:
def get_filtered_func_CSV(input_file,  output_file, min_times_seen, n_selections):
    """Function that creates a saved func effects CSV"""

    # Read CSV file
    func_scores = pd.read_csv(input_file)

    # Filter for minimum selections, times seen
    func_scores = (
        func_scores.loc[
            (
                (func_scores["n_selections"] >= n_selections)
                &
                (func_scores["times_seen"] >= min_times_seen)
            )
            |
            (func_scores["wildtype"] == func_scores["mutant"])
        ]
        .reset_index(drop=True)
    )

    # Save CSV file
    func_scores.to_csv(output_file, index=False)
In [10]:
func_effect_files = [
    func_scores_293T,
    func_scores_human_293T,
    func_scores_mastomys_293T,
]

filtered_func_effect_files = [
    filtered_func_293T,
    filtered_func_human_293T,
    filtered_func_mastomys_293T,
]

# Make output dir if doesn't exist
if not os.path.exists(out_dir_func):
    os.mkdir(out_dir_func)

for index,file in enumerate(func_effect_files):
    get_filtered_func_CSV(file,  filtered_func_effect_files[index], min_times_seen, n_selections)

Create scale bars for functional effect and escape

In [11]:
func_effect_color_map = dmslogo.colorschemes.ValueToColorMap(
    minvalue=-1.75,
    maxvalue=0,
    cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#000000", n=20))
)
func_effect_fig_scale, ax_scale = func_effect_color_map.scale_bar(
    orientation="vertical", 
    label="effect on cell entry",
)
ax_scale.set_yticks([-1.5, -1.0, -0.5, 0])
scale_labels = ["-1.5", "-1.0", "-0.5", "≥0"]
ax_scale.set_yticklabels(labels=scale_labels)

# Save fig
func_effect_fig_scale.savefig(func_effect_scale_bar)

escape_color_map = dmslogo.colorschemes.ValueToColorMap(
    minvalue=0,
    maxvalue=1,
    cmap=matplotlib.colors.ListedColormap(color_gradient_hex("white", "#0072B2", n=20))
)
escape_fig_scale, ax_scale = escape_color_map.scale_bar(
    orientation="vertical", 
    label="escape",
    low_high_ticks_only=True,
)

# Save fig
escape_fig_scale.savefig(escape_scale_bar)
No description has been provided for this image
No description has been provided for this image