# Imports
import os
import warnings
import pandas as pd
import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
from Bio import SeqIO, AlignIO 

# Plotting colors
# re-arranged for plot
tol_muted_adjusted = [
    "#AA4499",
    "#88CCEE",
    "#EE7733",
    "#44AA99",
    "#1f78b4",
    "#CC6677",
    "#117733",
    "#999933",
    "#DDCC77",
    "#CC3311",
    "#882255",
    "#000000",
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# this cell is tagged as `parameters` for papermill parameterization
filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None

filtered_func_293T = None

Josiah_sequence = None

out_dir = None
func_vs_escape = None
func_vs_escape_svg = None

# Parameters
filtered_escape_377H = (
    "results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
    "results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
    "results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
    "results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
    "results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
    "results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
filtered_func_293T = "results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"
Josiah_sequence = "data/Josiah_nucleotide_reference_NC_004296.fasta"
out_dir = "results/antibody_escape_profiles/"
func_vs_escape = (
    "results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.html"
)
func_vs_escape_svg = (
    "results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.svg"
)

# # Uncomment for running interactive
# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"

# filtered_func_293T = "../results/filtered_func_effect_CSVs/293T_filtered_func_effects.csv"

# Josiah_sequence = "../data/Josiah_nucleotide_reference_NC_004296.fasta"

# out_dir = "../results/antibody_escape_profiles/"
# func_vs_escape = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.html"
# func_vs_escape_svg = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect_all_muts.svg"

# Dictionary of codon values and corresponding amino acids
codon_chart = { 
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', 
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',                  
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R', 
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A', 
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L', 
    'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*', 
    'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W', 
} 

def check_if_AA_one_mutation_away(wt_codon, mutant_AA):
    """
    This function check if codon is within 
    one mutation away from a mutant amino acid
    """
    # Check if current codon is mutant AA
    if codon_chart[wt_codon] == mutant_AA:
        return True
    else:
        # Iterate through each position and check if each mutation
        for i in range(3):
            for new_base in ["A", "T", "G", "C"]:
                mutated_codon = wt_codon[:i] + new_base + wt_codon[i + 1:]
                if codon_chart[mutated_codon] == mutant_AA:
                    return True
    
    # Return false if not mutant was found
    return False

escape = [
    filtered_escape_2510C,
    filtered_escape_121F,
    filtered_escape_377H,
    filtered_escape_256A,
    filtered_escape_372D,
    filtered_escape_89F,
]

merged_df = pd.read_csv(filtered_func_293T)

# Add escape to dataframe for each antibody
for antibody_file in escape:

    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Load data as dataframe
    escape_df = pd.read_csv(antibody_file)

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")

    # Clip lower scores to 0
    escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Rename escape column to include antibody name
    escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})

    # Merge dataframes
    merged_df  = (
        merged_df.merge(
            escape_df[["site", "wildtype", "mutant", "escape_" + antibody_name]],
            how="left",
            on=["site", "wildtype", "mutant"],
            validate="one_to_one",
        )
    )

    # Clip upper scores to 0
    merged_df["effect"] = merged_df["effect"].clip(upper=0)

# Read josiah sequence
josiah_seq = SeqIO.read(Josiah_sequence, "fasta").seq

# Mark each mutation as being one nucleotide accesible
# from the josiah wt codon
merged_df["single_nucleotide_accessible"] = (
    merged_df.apply(lambda x: check_if_AA_one_mutation_away(josiah_seq[((x["site"]-1)*3):(((x["site"]-1)*3)+3)], x["mutant"]), axis=1)
)

subplots = []
for antibody_file in escape:
    
    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Set lim for each antibody
    fixed_min = None
    fixed_max = None
    if antibody_name == "2510C":
        fixed_min = -0.4
        fixed_max = 7
    elif antibody_name == "121F":
        fixed_min = -0.34
        fixed_max = 6
    elif antibody_name == "377H":
        fixed_min = -0.43
        fixed_max = 7.5
    elif antibody_name == "256A":
        fixed_min = -0.4
        fixed_max = 7
    elif antibody_name == "372D":
        fixed_min = -0.2
        fixed_max = 3.5
    elif antibody_name == "89F":
        fixed_min = -0.4
        fixed_max = 7
    else:
        print("Error! No ylims set!")

    curr_subplot = alt.Chart(merged_df.query("single_nucleotide_accessible == True"), title=antibody_name).mark_point(
        filled=True, 
        size=75,
        opacity=0.25,
    ).encode(
        alt.X(
            "escape_"+antibody_name,
            axis=alt.Axis(
                title="escape", 
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[fixed_min, fixed_max])
        ),
        alt.Y(
            "effect",
            axis=alt.Axis(
                title="effect on cell entry", 
                labelExpr=(
                    "datum.label == 0 ? '≥0' : datum.label"
                ),
                values=[-1.5,-1,-0.5,0],
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[-1.6,0.1])
        ),
        tooltip=[
            "site",
            "wildtype",
            "mutant",
            "effect",
            "escape_" + antibody_name,
            "single_nucleotide_accessible"
        ],
        color=alt.Color(
            "single_nucleotide_accessible:N", 
            scale=alt.Scale(
                domain=[True, False], 
                range=["#EE7733", "#000000"]
            ),
            legend=alt.Legend(
                title=["single nucleotide", "accessible mutation"],
            ),
        ),
    ).properties(
        width=150,
        height=150,
    )
    
    subplots.append(curr_subplot)

# Create row of plots
row_1 = alt.hconcat(
    subplots[0],
    subplots[1],
    subplots[2],
    subplots[3],
    subplots[4],
    subplots[5],
    spacing=5,
    title="single nucleotide accessible mutations",
)

subplots = []
for antibody_file in escape:
    
    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Set lim for each antibody
    fixed_min = None
    fixed_max = None
    if antibody_name == "2510C":
        fixed_min = -0.4
        fixed_max = 7
    elif antibody_name == "121F":
        fixed_min = -0.34
        fixed_max = 6
    elif antibody_name == "377H":
        fixed_min = -0.43
        fixed_max = 7.5
    elif antibody_name == "256A":
        fixed_min = -0.4
        fixed_max = 7
    elif antibody_name == "372D":
        fixed_min = -0.2
        fixed_max = 3.5
    elif antibody_name == "89F":
        fixed_min = -0.4
        fixed_max = 7
    else:
        print("Error! No ylims set!")

    curr_subplot = alt.Chart(merged_df.query("single_nucleotide_accessible == False"), title=antibody_name).mark_point(
        filled=True, 
        size=75,
        opacity=0.15,
    ).encode(
        alt.X(
            "escape_"+antibody_name,
            axis=alt.Axis(
                title="escape", 
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[fixed_min, fixed_max])
        ),
        alt.Y(
            "effect",
            axis=alt.Axis(
                title="effect on cell entry",
                labelExpr=(
                    "datum.label == 0 ? '≥0' : datum.label"
                ),
                values=[-1.5,-1,-0.5,0],
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[-1.6,0.1])
        ),
        tooltip=[
            "site",
            "wildtype",
            "mutant",
            "effect",
            "escape_" + antibody_name,
            "single_nucleotide_accessible"
        ],
        color=alt.Color(
            "single_nucleotide_accessible:N", 
            scale=alt.Scale(
                domain=[True, False], 
                range=["#EE7733", "#000000"]
            ),
            legend=alt.Legend(
                title=["single nucleotide", "accessible mutation"],
            ),
        ),
    ).properties(
        width=150,
        height=150,
    )
    
    subplots.append(curr_subplot)

# Create row of plots
row_2 = alt.hconcat(
    subplots[0],
    subplots[1],
    subplots[2],
    subplots[3],
    subplots[4],
    subplots[5],
    spacing=5,
    title="multi-nucleotide accessible mutations",
)

escape_vs_effect = alt.vconcat(
    row_1,
    row_2,
    spacing=10,
    title="Functional effect vs antibody escape",
).configure_axis(
    grid=False,
    labelFontSize=16,
    titleFontSize=16,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_title(
    fontSize=24,
).configure_view(
    stroke=None
)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

escape_vs_effect.save(func_vs_escape)

escape_vs_effect

# Functions
def  plot_func_scores_vs_escape(merged_df, ax, i, antibody_name):
    """
    This function creates a plot of 
    functional scores vs antibody escape 
    """

    # Set lim for each antibody
    fixed_min = None
    fixed_max = None
    if antibody_name == "2510C":
        fixed_min = -0.6
        fixed_max = 7
    elif antibody_name == "121F":
        fixed_min = -0.6
        fixed_max = 7
    elif antibody_name == "377H":
        fixed_min = -0.64
        fixed_max = 7.5
    elif antibody_name == "256A":
        fixed_min = -0.6
        fixed_max = 7
    elif antibody_name == "372D":
        fixed_min = -0.3
        fixed_max = 3.5
    elif antibody_name == "89F":
        fixed_min = -0.6
        fixed_max = 7
    else:
        print("Error! No ylims set!")

    # Plot escape vs functional score
    chart = sns.scatterplot(
        data=merged_df,
        y="effect",
        x="escape_"+antibody_name,
        hue="single_nucleotide_accessible",
        edgecolor=None,
        linewidth=0,
        palette={True : "#EE773340", False : "#00000026"},
        s=20,
        ax=ax,
    )
    if antibody_name == "2510C":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#44AA99",
        )
        xticks = [0, 2, 4, 6]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "121F":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#999933",
        )
        xticks = [0, 2, 4, 6]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#AA4499",
        )
        xticks = None
        if antibody_name == "377H":
            xticks = [0, 2, 4, 6]
        elif antibody_name == "256A":
            xticks = [0, 2, 4, 6]
        elif antibody_name == "372D":
            xticks = [0, 1, 2, 3]
        else:
            print("ERROR")
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "89F":
        chart.set_title(
            antibody_name[:1] + "." + antibody_name[1:], 
            fontsize=8, 
            color="#117733",
        )
        xticks = [0, 2, 4, 6]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
        
    chart.set_ylabel("effect on cell entry", fontsize=8)
    chart.set_xlim(fixed_min, fixed_max)
    chart.set_ylim(-1.6,0.1)
    yticks = [-1.5, -1, -0.5, 0]
    chart.set_yticks(yticks)
    chart.set_yticklabels(labels=["-1.5", "-1.0", "-0.5", "≥0"], fontsize=8)
    chart.set(xlabel=None)

     # Make only one legend
    if i == 5:
        sns.move_legend(
            chart, 
            "upper left", 
            bbox_to_anchor=(1, 1),
            fontsize=8,
            markerscale=1,
            handletextpad=0.1,
            title="single\nnucleotide\naccessible\nmutation",
            title_fontproperties = {
                "size" : 8, 
                # "weight" : "bold",
            },
            frameon=False,
            borderaxespad=0.1,
            reverse=True,
        )
        # Add edges to legend markers to match scatter plot
        for ha in chart.legend_.legendHandles:
            ha.set_edgecolor(None)
            ha.set_linewidths(0.5)
    else:
        ax.get_legend().remove()

    
    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        chart.spines[axis].set_linewidth(1)
    chart.tick_params(axis="both", length=3, width=1)

    chart.grid(False)
    sns.despine()

fig, axes = plt.subplots(
    2, 
    6, 
    figsize=(7,3.5),
    sharey=True,
)
for i,antibody_file in enumerate(escape):
    
    antibody_name = antibody_file.split("/")[-1].split("_")[0]
    
    plot_func_scores_vs_escape(merged_df.query("single_nucleotide_accessible == True"), axes[0][i], i, antibody_name)
    plot_func_scores_vs_escape(merged_df.query("single_nucleotide_accessible == False"), axes[1][i], i, antibody_name)

# Common X and Y axis labels
fig.text(0.5, 0, "escape", ha="center", rotation="horizontal", fontsize=8)

fig.tight_layout(w_pad=0.5)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(func_vs_escape_svg)

Analyze mutational accessibility for GPC to escape antibodies¶