# Imports
import os
import warnings
import seaborn as sns
import pandas as pd
import numpy as np
import altair as alt
import matplotlib.pyplot as plt

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#EE7733",
    "#CC3311",
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# this cell is tagged as `parameters` for papermill parameterization
contacts_89F = None
contacts_377H = None
contacts_256A = None
contacts_2510C = None
contacts_121F = None
contacts_372D = None

filtered_escape_377H = None
filtered_escape_89F = None
filtered_escape_2510C = None
filtered_escape_121F = None
filtered_escape_256A = None
filtered_escape_372D = None

func_scores = None

min_times_seen = None
n_selections = None

out_dir = None
func_vs_escape = None
func_vs_escape_svg = None
saved_image_path = None
func_distance_image_path = None

# Parameters
contacts_89F = "data/antibody_contacts/antibody_contacts_89F.csv"
contacts_377H = "data/antibody_contacts/antibody_contacts_377H.csv"
contacts_256A = "data/antibody_contacts/antibody_contacts_256A.csv"
contacts_2510C = "data/antibody_contacts/antibody_contacts_2510C.csv"
contacts_121F = "data/antibody_contacts/antibody_contacts_121F.csv"
contacts_372D = "data/antibody_contacts/antibody_contacts_372D.csv"
filtered_escape_377H = (
    "results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
)
filtered_escape_89F = (
    "results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
)
filtered_escape_2510C = (
    "results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
)
filtered_escape_121F = (
    "results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
)
filtered_escape_256A = (
    "results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
)
filtered_escape_372D = (
    "results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"
)
func_scores = "results/func_effects/averages/293T_entry_func_effects.csv"
out_dir = "results/antibody_escape_profiles/"
min_times_seen = 2
n_selections = 8
func_vs_escape = "results/antibody_escape_profiles/antibody_escape_vs_func_effect.html"
func_vs_escape_svg = (
    "results/antibody_escape_profiles/antibody_escape_vs_func_effect.svg"
)
saved_image_path = "results/antibody_escape_profiles/antibody_escape_by_distance.svg"
func_distance_image_path = (
    "results/antibody_escape_profiles/func_effect_by_distance.svg"
)

# # Uncomment for running interactive
# contacts_89F = "../data/antibody_contacts/antibody_contacts_89F.csv"
# contacts_377H = "../data/antibody_contacts/antibody_contacts_377H.csv"
# contacts_256A = "../data/antibody_contacts/antibody_contacts_256A.csv"
# contacts_2510C = "../data/antibody_contacts/antibody_contacts_2510C.csv"
# contacts_121F = "../data/antibody_contacts/antibody_contacts_121F.csv"
# contacts_372D = "../data/antibody_contacts/antibody_contacts_372D.csv"

# filtered_escape_377H = "../results/filtered_antibody_escape_CSVs/377H_filtered_mut_effect.csv"
# filtered_escape_89F = "../results/filtered_antibody_escape_CSVs/89F_filtered_mut_effect.csv"
# filtered_escape_2510C = "../results/filtered_antibody_escape_CSVs/2510C_filtered_mut_effect.csv"
# filtered_escape_121F = "../results/filtered_antibody_escape_CSVs/121F_filtered_mut_effect.csv"
# filtered_escape_256A = "../results/filtered_antibody_escape_CSVs/256A_filtered_mut_effect.csv"
# filtered_escape_372D = "../results/filtered_antibody_escape_CSVs/372D_filtered_mut_effect.csv"

# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"

# min_times_seen = 2
# n_selections = 8

# out_dir = "../results/antibody_escape_profiles/"
# func_vs_escape = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect.html"
# func_vs_escape_svg = "../results/antibody_escape_profiles/antibody_escape_vs_func_effect.svg"
# saved_image_path = "../results/antibody_escape_profiles/antibody_escape_by_distance.svg"
# func_distance_image_path = "../results/antibody_escape_profiles/func_effect_by_distance.svg"

contacts = [
    contacts_2510C,
    contacts_121F,
    contacts_377H,
    contacts_256A,
    contacts_372D,
    contacts_89F,
]

escape = [
    filtered_escape_2510C,
    filtered_escape_121F,
    filtered_escape_377H,
    filtered_escape_256A,
    filtered_escape_372D,
    filtered_escape_89F,
]

# Load data as dataframe
contacts_df = pd.read_csv(contacts[0])
functional_scores = pd.read_csv(func_scores)

# Calculate site averages for functional scores but filter
# for stop codons, min times seen, and min selections
merged_df = (
    functional_scores.loc[
        (functional_scores["mutant"] != "*")
        &
        (functional_scores["times_seen"] >= min_times_seen)
        &
        (functional_scores["n_selections"] >= n_selections)
    ]
)
merged_df = (
    merged_df
    .groupby(["site", "wildtype"])
    .aggregate({"effect" : "mean"})
    .reset_index()
)

# Add escape to dataframe for each antibody
for antibody_file in escape:

    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Load data as dataframe
    escape_df = pd.read_csv(antibody_file)

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")

    # Clip lower scores to 0
    escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Calculate site summed escape
    escape_df = (
        escape_df
        .groupby(["site", "wildtype"])
        .aggregate({"escape_median" : "sum"})
        .reset_index()
    )

    # Rename escape column to include antibody name
    escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})

    # Merge dataframes
    merged_df  = (
        merged_df.merge(
            escape_df[["site", "wildtype", "escape_" + antibody_name]],
            how="left",
            on=["site", "wildtype"],
            validate="one_to_one",
        )
    )

# Add contacts to dataframe for each antibody
for contacts_file in contacts:

    antibody_name = contacts_file.split("/")[-1].split("_")[-1][:-4]

    # Load data as dataframe
    contacts_df = pd.read_csv(contacts_file)
    
    # Filter contacts df
    contacts_df = (
        contacts_df
        .groupby(["position"])
        .aggregate({"distance" : "first"})
        .reset_index()
        .sort_values(by=["distance", "position"])
        .rename(columns={
            "position" : "site",
            "distance" : antibody_name + "_distance",
        })
        .reset_index(drop=True)
    )

    # Merge escape and contacts dataframes
    merged_df = (
        merged_df.merge(
            contacts_df,
            how="left",
            on="site",
            validate="one_to_one",
        )
    )

subplots = []
for antibody_file in escape:
    
    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Set lim for each antibody
    fixed_min = None
    fixed_max = None
    if antibody_name == "2510C":
        fixed_min = -7.5
        fixed_max = 67.5
    elif antibody_name == "121F":
        fixed_min = -3.75
        fixed_max = 33.75
    elif antibody_name == "377H":
        fixed_min = -8.75 
        fixed_max = 78.75
    elif antibody_name == "256A":
        fixed_min = -5 
        fixed_max = 45
    elif antibody_name == "372D":
        fixed_min = -1.875 
        fixed_max = 16.875
    elif antibody_name == "89F":
        fixed_min = -5 
        fixed_max = 45
    else:
        print("Error! No ylims set!")

    curr_subplot = alt.Chart(merged_df.loc[merged_df[antibody_name + "_distance"] == 4], title=antibody_name).mark_point(
        filled=True, 
        color="black", 
        size=75,
        opacity=0.15,
    ).encode(
        alt.X(
            "escape_"+antibody_name,
            axis=alt.Axis(
                title="site escape", 
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[fixed_min, fixed_max])
        ),
        alt.Y(
            "effect",
            axis=alt.Axis(
                title=["site mean","effect on cell entry"], 
                values=[-4,-3,-2,-1,0,1],
                domainWidth=1,
                domainColor="black",
                tickColor="black",
            ),
            scale=alt.Scale(domain=[-4.1,1.1])
        ),
        tooltip=[
            "site",
            "wildtype",
            "effect",
            "escape_" + antibody_name,
            antibody_name + "_distance",
        ], 
    ).properties(
        width=150,
        height=150,
    )
    
    subplots.append(curr_subplot)

escape_vs_effect = alt.hconcat(
    subplots[0],
    subplots[1],
    subplots[2],
    subplots[3],
    subplots[4],
    subplots[5],
    spacing=5,
    title=["Functional effect vs antibody escape", "for antibody contact sites"],
).configure_axis(
    grid=False,
    labelFontSize=16,
    titleFontSize=16,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_title(
    fontSize=24,
)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

escape_vs_effect.save(func_vs_escape)

escape_vs_effect

# Functions
def  plot_func_scores_vs_escape(merged_df, ax, i, antibody_name):
    """
    This function creates a plot of 
    functional scores vs antibody escape 
    for all antibody contact sites (within 4 A).
    """

    # Set lim for each antibody
    fixed_min = None
    fixed_max = None
    if antibody_name == "2510C":
        fixed_min = -7.5
        fixed_max = 67.5
    elif antibody_name == "121F":
        fixed_min = -3.75
        fixed_max = 33.75
    elif antibody_name == "377H":
        fixed_min = -9.375 
        fixed_max = 84.375
    elif antibody_name == "256A":
        fixed_min = -5 
        fixed_max = 45
    elif antibody_name == "372D":
        fixed_min = -1.875 
        fixed_max = 16.875
    elif antibody_name == "89F":
        fixed_min = -5 
        fixed_max = 45
    else:
        print("Error! No ylims set!")

    # Plot escape vs functional score
    chart = sns.scatterplot(
        data=merged_df.loc[merged_df[antibody_name + "_distance"] == 4],
        y="effect",
        x="escape_"+antibody_name,
        edgecolor=None,
        linewidth=0.5,
        # palette={False : "#00000026", True : "#EE7733CC"},
        s=20,
        ax=ax,
        alpha=0.15,
        color="#000000",
    )
    if antibody_name == "2510C":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#44AA99",
        )
        xticks = [0, 30, 60]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "121F":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#999933",
        )
        xticks = [0, 15, 30]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#AA4499",
        )
        xticks = None
        if antibody_name == "377H":
            xticks = [0, 35, 70]
        elif antibody_name == "256A":
            xticks = [0, 20, 40]
        elif antibody_name == "372D":
            xticks = [0, 7.5, 15]
        else:
            print("ERROR")
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
    if antibody_name == "89F":
        chart.set_title(
            antibody_name[:1] + "." + antibody_name[1:], 
            fontsize=8, 
            color="#117733",
        )
        xticks = [0, 20, 40]
        chart.set_xticks(xticks)
        chart.set_xticklabels(labels=map(str, xticks), fontsize=8)
        
    chart.set_ylabel("site mean\neffect on cell entry", fontsize=8)
    chart.set_xlim(fixed_min, fixed_max)
    chart.set_ylim(-4.1,1.1)
    yticks = [-4, -3, -2, -1, 0, 1]
    chart.set_yticks(yticks)
    chart.set_yticklabels(labels=["-4", "-3", "-2", "-1", "0", "1"], fontsize=8)
    chart.set(xlabel=None)
    
    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        chart.spines[axis].set_linewidth(1)
    chart.tick_params(axis="both", length=3, width=1)

    chart.grid(False)
    sns.despine()
    # Label points on each scatter plot
    for i in range(0, merged_df.shape[0]):
        x_pos = merged_df.at[i, "escape_"+antibody_name]
        y_pos = merged_df.at[i, "effect"]
        name = merged_df.at[i, "site"]
        wt = merged_df.at[i, "wildtype"]
        if antibody_name == "89F":
            if name == 125:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 258:
                chart.text(
                    x_pos+4,
                    y_pos-0.1,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
        if antibody_name == "377H":
            if name == 399:
                chart.text(
                    x_pos+5,
                    y_pos-0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 401:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )
            if name == 404:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )
        if antibody_name == "256A":
            if name == 399:
                chart.text(
                    x_pos+3,
                    y_pos-0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 401:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}  ",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )
            if name == 404:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )      
        if antibody_name == "2510C":
            if name == 100:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 228:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )
        if antibody_name == "121F":
            if name == 161:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 107:
                chart.text(
                    x_pos+3,
                    y_pos-0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 92:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
        if antibody_name == "372D":
            if name == 264:
                chart.text(
                    x_pos+1.5,
                    y_pos-0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#000000",
                )
            if name == 395:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )
            if name == 397:
                chart.text(
                    x_pos,
                    y_pos+0.2,
                    f"{wt}{name}",
                    fontsize=6,
                    horizontalalignment="center",
                    color="#000000",
                )

fig, axes = plt.subplots(
    1, 
    6, 
    figsize=(6.4,2),
    sharey=True,
)
for i,antibody_file in enumerate(escape):
    
    antibody_name = antibody_file.split("/")[-1].split("_")[0]
    
    plot_func_scores_vs_escape(merged_df, axes[i], i, antibody_name)

# Common X and Y axis labels
fig.text(0.5, 0, "site escape", ha="center", rotation="horizontal", fontsize=8)

fig.tight_layout(w_pad=0.5)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(func_vs_escape_svg)

# Load data as dataframe
contacts_df = pd.read_csv(contacts[0])
functional_scores = pd.read_csv(func_scores)

# Calculate site averages for functional scores but filter
# for stop codons, min times seen, and min selections
merged_df = (
    functional_scores.loc[
        (functional_scores["mutant"] != "*")
        &
        (functional_scores["times_seen"] >= min_times_seen)
        &
        (functional_scores["n_selections"] >= n_selections)
    ]
)

# Add escape to dataframe for each antibody
for antibody_file in escape:

    antibody_name = antibody_file.split("/")[-1].split("_")[0]

    # Load data as dataframe
    escape_df = pd.read_csv(antibody_file)

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")

    # Clip lower scores to 0
    escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Rename escape column to include antibody name
    escape_df = escape_df.rename(columns={"escape_median" : "escape_" + antibody_name})

    # Merge dataframes
    merged_df  = (
        merged_df.merge(
            escape_df[["site", "wildtype", "mutant", "escape_" + antibody_name]],
            how="left",
            on=["site", "wildtype", "mutant"],
            validate="one_to_one",
        )
    )

# Add contacts to dataframe for each antibody
for contacts_file in contacts:

    antibody_name = contacts_file.split("/")[-1].split("_")[-1][:-4]

    # Load data as dataframe
    contacts_df = pd.read_csv(contacts_file)
    
    # Filter contacts df
    contacts_df = (
        contacts_df
        .groupby(["position"])
        .aggregate({"distance" : "first"})
        .reset_index()
        .sort_values(by=["distance", "position"])
        .rename(columns={
            "position" : "site",
            "distance" : antibody_name + "_distance",
        })
        .reset_index(drop=True)
    )

    # Merge escape and contacts dataframes
    merged_df = (
        merged_df.merge(
            contacts_df,
            how="left",
            on="site",
            validate="many_to_one",
        )
    )

glycan_sites = [
    79,80,81,
    89,90,91,
    99,100,101,
    109,110,111,
    119,120,121,
    167,168,169,
    224,225,226,
    365,366,367,
    373,374,375,
    390,391,392,
    395,396,397,
]

# Mark each site if its a glycan site
merged_df["glycan_site"] = (
    merged_df["site"].apply(lambda x: True if x in glycan_sites else False)
)

glycan_numbers = {
    tuple([79,80,81,]) : "N79",
    tuple([89,90,91,]) : "N89",
    tuple([99,100,101,]) : "N99",
    tuple([109,110,111,]) : "N109",
    tuple([119,120,121,]) : "N119",
    tuple([167,168,169,]) : "N167",
    tuple([224,225,226,]) : "N224",
    tuple([365,366,367,]) : "N365",
    tuple([373,374,375,]) : "N373",
    tuple([390,391,392,]) : "N390",
    tuple([395,396,397,]) : "N395",
    tuple([
        120,
        121,
        125,
        150, # supported by 151 and 125 interactions
        151,
        256,
        257,
        258,]) : "\u03B1-DG binding sites",
    tuple([
        92, # histidine triad
        93, # histidine triad
        172,
        173,
        188,
        192,
        195,
        197,
        198,
        200,
        201,
        202,
        204,
        206,
        207,
        211,
        216,
        230, # histidine triad
    ]) : "LAMP1 binding sites",
}

def site_to_glycan_num(site):
    """Convert site to glycan number"""
    for key,val in glycan_numbers.items():
        if site in key:
            return val
    return float("NaN")

# Label each site if its glycan or RBD binding site
merged_df["glycan_number"] = (
    merged_df["site"].apply(lambda x: site_to_glycan_num(x))
)

# Plot score distrbutions for each region
plot_121F = alt.Chart(
        merged_df.query("glycan_number == glycan_number"), 
        title = alt.TitleParams(
            text="antibody 12.1F", 
            color="#999933", 
        )
    ).mark_circle(opacity=0.15, size=75, color="#000000").encode(
    y=alt.Y(
        "glycan_number:N",
        title="GPC site",
        sort=[
            "N79", 
            "N89", 
            "N99", 
            "N109", 
            "N119", 
            "N167", 
            "N224", 
            "N365", 
            "N373", 
            "N390", 
            "N395", 
            "\u03B1-DG binding sites", 
            "LAMP1 binding sites",
        ],
        axis=alt.Axis(
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
    ),
    x=alt.X(
        "escape_121F:Q",
        title=None,
        axis=alt.Axis(
            values=[0, 1, 2, 3, 4, 5],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-0.1,5.1])
    ),
    yOffset="jitter:Q",
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        alt.Tooltip(
            "escape_121F", format=".2f", title="12.1F escape"
        ),
        alt.Tooltip(
            "effect", format=".2f", title="effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
    width=400, 
    height=600,
)

# Plot score distrbutions for each region
plot_89F = alt.Chart(
        merged_df.query("glycan_number == glycan_number"), 
        title = alt.TitleParams(
            text="antibody 8.9F",
            color="#117733", 
        )
    ).mark_circle(opacity=0.15, size=75, color="#000000").encode(
    y=alt.Y(
        "glycan_number:N",
        title="GPC site",
        sort=[
            "N79", 
            "N89", 
            "N99", 
            "N109", 
            "N119", 
            "N167", 
            "N224", 
            "N365", 
            "N373", 
            "N390", 
            "N395", 
            "\u03B1-DG binding sites", 
            "LAMP1 binding sites",
        ],
        axis=alt.Axis(
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
    ),
    x=alt.X(
        "escape_89F:Q",
        title="escape",
        axis=alt.Axis(
            values=[0, 1, 2, 3, 4, 5],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-0.1,5.1])
    ),
    yOffset="jitter:Q",
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        alt.Tooltip(
            "escape_89F", format=".2f", title="8.9F escape"
        ),
        alt.Tooltip(
            "effect", format=".2f", title="effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
    width=400, 
    height=600,
)


combined_plot = alt.vconcat(
    plot_121F,
    plot_89F,
).configure_axis(
    grid=False,
    labelFontSize=16,
    titleFontSize=16,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_title(
    fontSize=24,
).configure_view(
    stroke=None
)

combined_plot

# Plot score distrbutions for each region
plot_121F = alt.Chart(
        merged_df.query("glycan_number == glycan_number"),
        title = alt.TitleParams(
            text="antibody 12.1F", 
            color="#999933", 
        )
    ).mark_circle(opacity=0.15, size=5, color="#000000").encode(
    y=alt.Y(
        "glycan_number:N",
        title="GPC site",
        sort=[
            "N79", 
            "N89", 
            "N99", 
            "N109", 
            "N119", 
            "N167", 
            "N224", 
            "N365", 
            "N373", 
            "N390", 
            "N395", 
            "\u03B1-DG binding sites", 
            "LAMP1 binding sites",
        ],
        axis=alt.Axis(
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
    ),
    x=alt.X(
        "escape_121F:Q",
        title=None,
        axis=alt.Axis(
            values=[0, 1, 2, 3, 4, 5],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-0.1,5.1])
    ),
    yOffset="jitter:Q",
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        alt.Tooltip(
            "escape_121F", format=".2f", title="12.1F escape"
        ),
        alt.Tooltip(
            "effect", format=".2f", title="effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
    width=75, 
    height=115,
)

# Plot score distrbutions for each region
plot_89F = alt.Chart(
        merged_df.query("glycan_number == glycan_number"), 
        title = alt.TitleParams(
            text="antibody 8.9F",
            color="#117733", 
        )
    ).mark_circle(opacity=0.15, size=5, color="#000000").encode(
    y=alt.Y(
        "glycan_number:N",
        title="GPC site",
        sort=[
            "N79", 
            "N89", 
            "N99", 
            "N109", 
            "N119", 
            "N167", 
            "N224", 
            "N365", 
            "N373", 
            "N390", 
            "N395", 
            "\u03B1-DG binding sites", 
            "LAMP1 binding sites",
        ],
        axis=alt.Axis(
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
    ),
    x=alt.X(
        "escape_89F:Q",
        title="escape",
        axis=alt.Axis(
            values=[0, 1, 2, 3, 4, 5],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-0.1,5.1])
    ),
    yOffset="jitter:Q",
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        alt.Tooltip(
            "escape_89F", format=".2f", title="8.9F escape"
        ),
        alt.Tooltip(
            "effect", format=".2f", title="effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
    width=75, 
    height=115,
)


combined_plot = alt.vconcat(
    plot_121F,
    plot_89F,
    spacing=5,
).configure_axis(
    grid=False,
    labelFontSize=8,
    titleFontSize=8,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_title(
    fontSize=8,
    fontWeight="bold",
).configure_view(
    stroke=None
)

combined_plot

# Functions
def plot_func_scores_vs_distance(contacts_file, escape_file, ax, i, func_scores_file):
    """
    This function creates a plot of 
    functional scores stratified by antibody
    distance.
    """

    antibody_name = contacts_file.split("/")[-1].split("_")[2][:-4]

    # Load data as dataframe
    contacts_df = pd.read_csv(contacts_file)
    escape_df = pd.read_csv(escape_file)
    func_scores = pd.read_csv(func_scores_file)

    # Filter contacts df
    contacts_df = (
        contacts_df
        .groupby(["position"])
        .aggregate({"distance" : "first"})
        .reset_index()
        .sort_values(by=["distance", "position"])
        .rename(columns={"position" : "site"})
        .reset_index(drop=True)
    )

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")

    # Floor escape scores at 0
    escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Calculate site sums
    escape_df = (
        escape_df
        .groupby(["site"])
        .aggregate({"escape_median" : "sum"})
        .reset_index()
    )

    # Calculate site averages for functional scores but filter
    # for stop codons, min times seen, and min selections
    func_scores = func_scores.loc[
        (func_scores["mutant"] != "*")
        &
        (func_scores["times_seen"] >= min_times_seen)
        &
        (func_scores["n_selections"] >= n_selections)
    ]
    func_scores = (
        func_scores
        .groupby(["site"])
        .aggregate({"effect" : "mean"})
        .reset_index()
    )
    # Merge functional and escape dfs
    escape_df = (
        escape_df.merge(
            func_scores,
            how="left",
            on=["site"],
            validate="one_to_one",
        )
    )

    # Merge escape and contacts dataframes
    merged_df = (
        escape_df.merge(
            contacts_df,
            how="left",
            on="site",
            validate="one_to_one",
        )
        .fillna(100)
    )

    # Mark sites with strong escape
    cutoff = escape_df["escape_median"].median() * 10
    merged_df["strong escape"] = (
        merged_df
        .apply(
            lambda x: True if x["escape_median"] > cutoff else False, 
            axis=1
        )
    )

    # Re-map distance for plotting and sort
    merged_df["distance"] = merged_df["distance"].map({
        100 : 3,
        4 : 0,
        8 : 1.5,
    })
    merged_df = merged_df.sort_values(by="distance")

    # Add jitter to x values
    merged_df["jittered_x"] = merged_df["distance"] + np.random.normal(0,0.1,merged_df["distance"].shape)

    merged_df = merged_df.loc[
        (merged_df["distance"] == 0)
        |
        (merged_df["strong escape"] == True)
    ]

    # Plot escape vs functional score
    chart = sns.scatterplot(
        data=merged_df,
        x="jittered_x",
        y="effect",
        hue="strong escape",
        edgecolor=None,
        linewidth=0.5,
        palette={False : "#00000026", True : "#EE7733CC"},
        s=20,
        ax=ax,
    )
    if antibody_name == "2510C":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#44AA99",
        )
    if antibody_name == "121F":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#999933",
        )
    if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#AA4499",
        )
    if antibody_name == "89F":
        chart.set_title(
            antibody_name[:1] + "." + antibody_name[1:], 
            fontsize=8, 
            color="#117733",
        )
    xticks = [0, 1.5, 3]
    chart.set_xticks(xticks)
    x_labels = [
        "contact", 
        "proximal", 
        "distal",
    ]
    chart.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=8)
    chart.set_ylabel("site mean\neffect on cell entry", fontsize=8)
    chart.set_ylim(-5, 1)
    chart.set_xlim(-0.75, 3.75)
    yticks = [-4, -2, 0]
    chart.set_yticks(yticks)
    chart.set_yticklabels(labels=["-4", "-2", "0"], fontsize=8)
    chart.set(xlabel=None)
    # Make only one legend
    if i == 5:
        sns.move_legend(
            chart, 
            "upper left", 
            bbox_to_anchor=(1, 1),
            fontsize=8,
            markerscale=1,
            handletextpad=0.1,
            title="site of\nstrong\nescape",
            title_fontproperties = {
                "size" : 8, 
                # "weight" : "bold",
            },
            frameon=False,
            borderaxespad=0.1,
            reverse=True,
        )
        # Add edges to legend markers to match scatter plot
        for ha in chart.legend_.legendHandles:
            ha.set_edgecolor(None)
            ha.set_linewidths(0.5)
    else:
        ax.get_legend().remove()

    # Only keep the first y-axis
    if i == 0:
        # Change all spines
        for axis in ["top", "bottom", "left", "right"]:
            chart.spines[axis].set_linewidth(1)
        chart.tick_params(axis="both", length=4, width=1)
    else:
        # Change all spines
        for axis in ["top", "bottom", "left", "right"]:
            chart.spines[axis].set_linewidth(1)
        # Remove y-axis
        chart.spines["left"].set_linewidth(0)
        chart.set_yticks([])
        chart.set_yticklabels([])
        chart.tick_params(axis="both", length=4, width=1)
        chart.set_ylabel("")

    chart.grid(False)
    sns.despine()

fig, axes = plt.subplots(1, 6, figsize=(6,2))
for i in range(len(contacts)):
    plot_func_scores_vs_distance(contacts[i], escape[i], axes[i], i, func_scores)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(func_distance_image_path)

# Functions
def plot_escape_vs_contact_distance(contacts_file, escape_file, ax, i):
    """
    This function creates a plot of 
    site escape stratified by contact.
    """

    antibody_name = contacts_file.split("/")[-1].split("_")[2][:-4]

    # Load data as dataframe
    contacts_df = pd.read_csv(contacts_file)
    escape_df = pd.read_csv(escape_file)

    # Filter contacts df
    contacts_df = (
        contacts_df
        .groupby(["position"])
        .aggregate({"distance" : "first"})
        .reset_index()
        .sort_values(by=["distance", "position"])
        .rename(columns={"position" : "site"})
        .reset_index(drop=True)
    )

    # Filter escape df for low functional score mutations
    escape_df = escape_df.query("poor_cell_entry == False")

    # Floor escape scores at 0
    escape_df["escape_median"] = escape_df["escape_median"].clip(lower=0)

    # Calculate site sums
    escape_df = (
        escape_df
        .groupby(["site"])
        .aggregate({"escape_median" : "sum"})
        .reset_index()
    )

    # Merge escape and contacts dataframes
    merged_df = (
        escape_df.merge(
            contacts_df,
            how="left",
            on="site",
            validate="one_to_one",
        )
        .fillna(100)
    )

    # Mark sites with strong escape
    cutoff = escape_df["escape_median"].median() * 10
    merged_df["strong escape"] = (
        merged_df
        .apply(
            lambda x: True if x["escape_median"] > cutoff else False, 
            axis=1
        )
    )

    # Re-map distance for plotting and sort
    merged_df["distance"] = merged_df["distance"].map({
        100 : 3,
        4 : 0,
        8 : 1.5,
    })
    merged_df = merged_df.sort_values(by="distance")

    # Add jitter to x values
    np.random.seed(0)
    merged_df["jittered_x"] = merged_df["distance"] + np.random.normal(0,0.1,merged_df["distance"].shape)
    
    
    # plt.figure(figsize=(1,2))
    chart = sns.scatterplot(
        data=merged_df,
        x="jittered_x",
        y="escape_median",
        hue="strong escape",
        edgecolor=None,
        linewidth=0.5,
        palette={False : "#00000026", True : "#EE7733CC"},
        s=20,
        ax=ax,
    )
    if antibody_name == "2510C":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#44AA99",
        )
    if antibody_name == "121F":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#999933",
        )
    if antibody_name == "377H" or antibody_name == "256A" or antibody_name == "372D":
        chart.set_title(
            antibody_name[:2] + "." + antibody_name[2:], 
            fontsize=8, 
            color="#AA4499",
        )
    if antibody_name == "89F":
        chart.set_title(
            antibody_name[:1] + "." + antibody_name[1:], 
            fontsize=8, 
            color="#117733",
        )
    xticks = [0, 1.5, 3]
    chart.set_xticks(xticks)
    x_labels = [
        "contact", 
        "proximal", 
        "distal",
    ]
    chart.set_xticklabels(labels=x_labels, rotation=90, horizontalalignment="center", fontsize=8)
    chart.set_ylabel("site escape", fontsize=8)
    chart.set_ylim(-2.5, 95)
    chart.set_xlim(-0.75, 3.75)
    yticks = [0, 20, 40, 60, 80]
    chart.set_yticks(yticks)
    chart.set_yticklabels(labels=["0", "20", "40", "60", "80"], fontsize=8)
    chart.set(xlabel=None)
    # Make only one legend
    if i == 5:
        sns.move_legend(
            chart, 
            "upper left", 
            bbox_to_anchor=(1, 1),
            fontsize=8,
            markerscale=1,
            handletextpad=0.1,
            title="site of\nstrong\nescape",
            title_fontproperties = {
                "size" : 8, 
                # "weight" : "bold",
            },
            frameon=False,
            borderaxespad=0.1,
            reverse=True,
        )
        # Add edges to legend markers to match scatter plot
        for ha in chart.legend_.legendHandles:
            ha.set_edgecolor(None)
            ha.set_linewidths(0.5)
    else:
        ax.get_legend().remove()

    # Only keep the first y-axis
    if i == 0:
        # Change all spines
        for axis in ["top", "bottom", "left", "right"]:
            chart.spines[axis].set_linewidth(1)
        chart.tick_params(axis="both", length=4, width=1)
    else:
        # Change all spines
        for axis in ["top", "bottom", "left", "right"]:
            chart.spines[axis].set_linewidth(1)
        # Remove y-axis
        chart.spines["left"].set_linewidth(0)
        chart.set_yticks([])
        chart.set_yticklabels([])
        chart.tick_params(axis="both", length=4, width=1)
        chart.set_ylabel("")

    chart.grid(False)
    sns.despine()
    
    # Plot cutoff line
    chart.axhline(
        y = cutoff, 
        color = "#000000", 
        linestyle = "--",
        alpha=0.5,
        linewidth=1,
    )

    # Calculate counts for each distance and add to top of plot
    print(antibody_name)
    strong_escape = merged_df.loc[(merged_df["distance"] == 0) & (merged_df["strong escape"] == True)].shape[0]
    total = merged_df.loc[(merged_df["distance"] == 0)].shape[0]
    print(f"Contacts with strong escape: {(strong_escape/total)*100:.1f}%")
    chart.text(
        0,
        91,
        f"{strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#EE7733",
    )
    chart.text(
        0,
        85,
        f"{total-strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#000000"
    )
    
    strong_escape = merged_df.loc[(merged_df["distance"] == 1.5) & (merged_df["strong escape"] == True)].shape[0]
    total = merged_df.loc[(merged_df["distance"] == 1.5)].shape[0]
    print(f"Proximal with strong escape: {(strong_escape/total)*100:.1f}%")
    chart.text(
        1.5,
        91,
        f"{strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#EE7733",
    )
    chart.text(
        1.5,
        85,
        f"{total-strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#000000"
    )

    strong_escape = merged_df.loc[(merged_df["distance"] == 3) & (merged_df["strong escape"] == True)].shape[0]
    total = (
        merged_df.loc[(merged_df["distance"] == 3)].shape[0] 
        + 
        (
            491 # size of LASV protein 
            - 
            merged_df.loc[(merged_df["distance"] == 1.5)].shape[0] 
            - 
            merged_df.loc[(merged_df["distance"] == 0)].shape[0] 
            - 
            merged_df.loc[(merged_df["distance"] == 3)].shape[0]
        )
    )
    print(f"Distal with strong escape: {(strong_escape/total)*100:.1f}%")
    # Calculate stats for summed escape across all sites
    print(f"Cumulative escape across all sites: {escape_df['escape_median'].sum()}")
    print(f"Median escape across all sites: {escape_df['escape_median'].median()}")
    print(f"Mean escape across all sites: {escape_df['escape_median'].mean()}")
    print()
    chart.text(
        3,
        91,
        f"{strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#EE7733",
    )
    chart.text(
        3,
        85,
        f"{total-strong_escape}",
        fontsize=7,
        horizontalalignment="center",
        color="#000000"
    )

    # Label points on each scatter plot
    for i in range(0, merged_df.shape[0]):
        x_pos = merged_df.at[i, "jittered_x"]
        y_pos = merged_df.at[i, "escape_median"]
        name = merged_df.at[i, "site"]
        if antibody_name == "89F":
            if name == 119:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 125:
                chart.text(
                    x_pos+0.2,
                    y_pos+1,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 129:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 138:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 150:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
        if antibody_name == "377H":
            if name == 398:
                chart.text(
                    x_pos+0.25,
                    y_pos,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 401:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 402:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 404:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
        if antibody_name == "256A":
            if name == 401:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 404:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )       
        if antibody_name == "2510C":
            if name == 76:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 99:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 101:
                chart.text(
                    x_pos-0.75,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 228:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
        if antibody_name == "121F":
            if name == 89:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 92:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 111:
                chart.text(
                    x_pos-1.25,
                    y_pos+1,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 127:
                chart.text(
                    x_pos+0.1,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 135:
                chart.text(
                    x_pos,
                    y_pos+3,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 160:
                chart.text(
                    x_pos-1.25,
                    y_pos+1,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
        if antibody_name == "372D":
            if name == 149:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 395:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 397:
                chart.text(
                    x_pos+0.05,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )
            if name == 398:
                chart.text(
                    x_pos-1.25,
                    y_pos+2,
                    f"{name}",
                    fontsize=6,
                    horizontalalignment="left",
                    color="#EE7733",
                )

fig, axes = plt.subplots(1, 6, figsize=(6,2))
for i in range(len(contacts)):
    plot_escape_vs_contact_distance(contacts[i], escape[i], axes[i], i)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(saved_image_path)

2510C
Contacts with strong escape: 23.1%
Proximal with strong escape: 14.3%
Distal with strong escape: 0.2%
Cumulative escape across all sites: 555.38503226
Median escape across all sites: 0.7075149999999999
Mean escape across all sites: 1.303720733004695

121F
Contacts with strong escape: 33.3%
Proximal with strong escape: 10.0%
Distal with strong escape: 6.1%
Cumulative escape across all sites: 683.2671576500001
Median escape across all sites: 0.560305
Mean escape across all sites: 1.6039135156103288

377H
Contacts with strong escape: 14.3%
Proximal with strong escape: 0.0%
Distal with strong escape: 0.7%
Cumulative escape across all sites: 791.4568553070001
Median escape across all sites: 1.0570673
Mean escape across all sites: 1.7946867467278913

256A
Contacts with strong escape: 9.1%
Proximal with strong escape: 0.0%
Distal with strong escape: 0.9%
Cumulative escape across all sites: 318.95216001250003
Median escape across all sites: 0.3497
Mean escape across all sites: 0.7504756706176471

372D
Contacts with strong escape: 18.2%
Proximal with strong escape: 0.0%
Distal with strong escape: 4.3%
Cumulative escape across all sites: 171.768081009
Median escape across all sites: 0.13061999999999999
Mean escape across all sites: 0.3859956876606742

89F
Contacts with strong escape: 77.8%
Proximal with strong escape: 66.7%
Distal with strong escape: 1.5%
Cumulative escape across all sites: 792.37621389
Median escape across all sites: 0.646014
Mean escape across all sites: 1.800855031568182

Escape sites related to antibody contact distance¶