# this cell is tagged as parameters for `papermill` parameterization
# input configs
altair_config = None
nipah_config = None

# input files
entropy_file = None

func_scores_E2_file = None
binding_E2_file = None
e2_low_func_file = None

func_scores_E3_file = None
binding_E3_file = None
e3_low_func_file = None

# output images
E2_binding_heatmap = None
E3_binding_heatmap = None
E2_entry_heatmap = None
E3_entry_heatmap = None
combined_entry_binding_contact = None
E3_entry_AA_prop_heatmap = None
E2_entry_AA_prop_heatmap = None
glycan_sites_img_heatmap = None
nipah_poly_sites_img_heatmap = None
HENV103_heatmap_plot = None
nAH1_heatmap_plot = None
HENV117_heatmap_plot = None
HENV32_heatmap_plot = None
HENV26_heatmap_plot = None
m102_heatmap_plot = None

# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/heatmap_theme.py"
entropy_file = "results/entropy/entropy.csv"
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
E2_binding_heatmap = "results/images/E2_binding_heatmap.html"
E3_binding_heatmap = "results/images/E3_binding_heatmap.html"
E2_entry_heatmap = "results/images/E2_entry_heatmap.html"
E3_entry_heatmap = "results/images/E3_entry_heatmap.html"
combined_entry_binding_contact = (
    "results/images/combined_entry_binding_contact_heatmaps.html"
)
E3_entry_AA_prop_heatmap = "results/images/E3_entry_AA_prop_heatmap.html"
E2_entry_AA_prop_heatmap = "results/images/E2_entry_AA_prop_heatmap.html"
glycan_sites_img_heatmap = "results/images/glycan_sites_img_heatmap.html"
nipah_poly_sites_img_heatmap = "results/images/nipah_poly_sites_img_heatmap.html"
HENV103_heatmap_plot = "results/images/HENV103_heatmap_plot.html"
nAH1_heatmap_plot = "results/images/nAH1_heatmap_plot.html"
HENV117_heatmap_plot = "results/images/HENV117_heatmap_plot.html"
HENV32_heatmap_plot = "results/images/HENV32_heatmap_plot.html"
HENV26_heatmap_plot = "results/images/HENV26_heatmap_plot.html"
m102_heatmap_plot = "results/images/m102_heatmap_plot.html"

import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import heatmap_theme
alt.themes.register('heatmap_theme', heatmap_theme.heatmap_theme)
alt.themes.enable('heatmap_theme')

Setup in correct directory

ThemeRegistry.enable('heatmap_theme')

if nipah_config is None:
    ##hard paths in case don't want to run with snakemake
    print("loading hard paths")
    #config files or .csv of entropy values that can be plotted with the heatmaps
    nipah_config = "nipah_config.yaml"
    entropy_file = "results/entropy/entropy.csv"

    # input files
    func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
    binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
    e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"

    func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
    binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
    e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"

    e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
    antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"

""" this config file contains information that may be needed for generating heatmaps. None are necessary to make heatmaps.

contact_sites: [239, 240, etc] # sites that are in contact with the receptor, can specify multiple sites to be plotted in the heatmap.
effect_color: default is 'redblue'
entropy_color: default is 'browns', not necessary, only if you want to plot entropy above the heatmaps
strokewidth_size: default is 0.5. Can customize the size of the stroke width in the heatmaps.
background_color: I use #d1d3d4 for the background color of the heatmaps. Can specify custom colors

"""

with open(nipah_config) as f:
    config = yaml.safe_load(f)

# import filtered data
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"

e2_binding = pd.read_csv(binding_E2_file).round(2) #Dataframe with effects of RBP mutations on binding to bEFNB2
e2_func = pd.read_csv(func_scores_E2_file).round(2) #Dataframe with effects of RBP mutations on entry in CHO-bEFNB2 cells
e2_low_func = pd.read_csv(e2_low_func_file).round(2) #Dataframe with effects of RBP mutations that are below the threshold for cell entry (will be masked in the heatmaps)

e3_binding = pd.read_csv(binding_E3_file).round(2)
e3_func = pd.read_csv(func_scores_E3_file).round(2)
e3_low_func = pd.read_csv(e3_low_func_file).round(2)

e3_low_func_ab = pd.read_csv(e3_low_func_file_ab).round(2)
antibody_escape = pd.read_csv(antibody_file).round(2)

def prepare_distance(distance_file):
    # read in residue distance data, calculated in different notebook
    df = distance_file[["site", "distance"]]
    df = df.dropna(subset=["site"])
    df["site"] = df["site"].astype("Int64")
    df["distance"] = df["distance"].round(2)
    df = df[["site", "distance"]].drop_duplicates()
    df["mutant"] = "distance"
    df["wildtype"] = ""
    df["type"] = "distance"
    df.rename(columns={"distance": "value"}, inplace=True)
    return df
    
def prepare_entropy():  # need to prepare entropy data for plotting on heatmap
    # read in entropy data, calculated in different notebook
    entropy = pd.read_csv(entropy_file)
    df = entropy[["site", "henipavirus_entropy"]]
    df = df.dropna(subset=["site"])
    df["site"] = df["site"].astype("Int64")
    df = df.rename(columns={"henipavirus_entropy": "entropy"})
    df["entropy"] = df["entropy"].round(2)
    df = df[["site", "entropy"]].drop_duplicates()
    df["mutant"] = "entropy"
    df["wildtype"] = ""
    df["type"] = "entropy"
    df.rename(columns={"entropy": "value"}, inplace=True)
    return df


def make_contact():
    df = pd.DataFrame(
        {
            "site": config["contact_sites"],
            "contact": [0.0] * len(config["contact_sites"]),
        }
    )
    # Renaming and restructuring the dataframe
    df["mutant"] = "receptor contact"
    df["wildtype"] = ""
    df["type"] = "receptor contact"
    df.rename(columns={"contact": "value"}, inplace=True)
    return df


# This gets called during heatmap generation. Makes a dataframe with the site, mutant, and value for each site, regardless if it was measured in DMS or not.
def make_empty_df(
    df,
    contact_df=None,
    entropy_df=None,
    contact_flag=None,
    entropy_flag=None,
    low_entry_df=None,
    binding_flag=None,
    escape_flag=None,
    distance_df=None,
):
    sites = range(71, 603)
    amino_acids = [
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
    ]
    # Create the combination of each site with each amino acid
    data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]

    # Create the DataFrame
    empty_df = pd.DataFrame(data)

    if binding_flag:
        print("plotting binding")
        if low_entry_df is None:
            print("You indicated binding but did not provide a low_entry_df")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["binding_mean"],
            var_name="type",
            value_name="value",
        )
        low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
        df_filter = low_entry_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["low_effect"],
            var_name="type",
            value_name="value",
        )
        df_test = pd.concat([df_test, df_filter])
    elif escape_flag:
        print("plotting escape")
        if low_entry_df is None:
            print("You indicated escape but did not provide a low_entry_df")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["escape_mean"],
            var_name="type",
            value_name="value",
        )
        low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
        df_filter = low_entry_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["low_effect"],
            var_name="type",
            value_name="value",
        )
        df_test = pd.concat([df_test, df_filter])
    else:
        print("plotting entry")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["effect"],
            var_name="type",
            value_name="value",
        )

    if contact_flag and entropy_flag is None:
        df_test = pd.concat([df_test], ignore_index=True)
    if contact_flag is True:
        df_test = pd.concat([df_test, contact_df], ignore_index=True)
    if entropy_flag is True:
        df_test = pd.concat([df_test, entropy_df], ignore_index=True)
    if entropy_flag and contact_flag is True:
        df_test = pd.concat([df_test, entropy_df, contact_df], ignore_index=True)
    if distance_df is not None:
        print('making a distance df')
        df_test = pd.concat([df_test,distance_df], ignore_index=True)
    return df_test

# Make the base heatmap. This contains information about the x_axis and heatmap_sites which are important for sorting them correctly.
def make_base_heatmap(df, heatmap_sites, x_axis):
    base = (
        alt.Chart(df)
        .encode(
            x=alt.X("site:O", title="Site", sort=heatmap_sites, axis=x_axis),
            y=alt.Y(
                "mutant",
                title="Amino Acid",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
                axis=alt.Axis(grid=False),
            ),
        )
        .properties(
            width=alt.Step(10),
            height=alt.Step(10),
        )
    )
    return base


# This makes an 'empty' heatmap that shows sites that were not observed as some color (default:gray)
def make_empty_heatmap(base, background_color):
    chart_empty = (
        base.mark_rect(color=background_color)
        .encode(tooltip=["site", "mutant"])
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == "escape_mean"))
            & (alt.datum.value == None)
        )
    )
    return chart_empty


# This makes the white squares and X for the wildtype amino acids
def make_wildtype_heatmap(unique_wildtypes_df, strokewidth_size, heatmap_sites):
    wildtype_layer_box = (
        alt.Chart(unique_wildtypes_df)
        .mark_rect(color="white", stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    wildtype_layer = (
        alt.Chart(unique_wildtypes_df)
        .mark_text(color="black", text="X", size=8, align="center", baseline="middle",dy=1)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    return wildtype_layer_box, wildtype_layer


# This makes the actual effect heatmap, and adds a bar for the legend if its the first time through the loop
def create_effect_chart(
    base, color_scale_effect, strokewidth_size, legend_title=None, effect_legend=None
):
    chart = (
        base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            color=alt.condition(
                '(datum.type == "effect" | datum.type == "binding_mean" | datum.type == "escape_mean")',
                alt.Color("value:Q", scale=color_scale_effect, legend=effect_legend),
                alt.value("transparent"),
            ),
            tooltip=["site", "mutant", "wildtype", "value"],
        )
        .transform_filter((alt.datum.wildtype != "") & (alt.datum.wildtype != None))
    )
    return chart


# This makes a chart for the entropy values
def create_entropy_chart(
    base, color_scale_entropy, strokewidth_size, legend_title=None, entropy_legend=None
):
    chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
        color=alt.condition(
            'datum.mutant == "entropy"',
            alt.Color("value:Q", scale=color_scale_entropy, legend=entropy_legend),
            alt.value("transparent"),
        ),
        tooltip=["site", "mutant", "wildtype", "value"],
    )
    return chart
    
def create_distance_chart(
    base, color_scale_entropy, strokewidth_size, legend_title=None, distance_legend=None
):
    chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
        color=alt.condition(
            'datum.mutant == "distance"',
            alt.Color("value:Q", scale=alt.Scale(scheme="purples", domain=[0, 10], reverse=True),legend=distance_legend),
            alt.value("transparent"),
        ),
        tooltip=["site", "mutant", "wildtype", "value"],
    )
    return chart

# This makes a chart for the contact sites
def create_contact_chart(base):
    chart_contact = (
        base.mark_rect(color="black")
        .encode(tooltip=["site"])
        .transform_filter((alt.datum.mutant == "receptor contact"))
    )
    return chart_contact

# Masks sites with low cell entry scores for binding or escape
def make_low_effect_heatmap(base, strokewidth_size, heatmap_sites):
    chart_filtered = (
        base.mark_rect(color="#808285", stroke="#000000", strokeWidth=strokewidth_size) ##939598
        .encode()
        .transform_filter(alt.datum.type == "low_effect")
    )
    return chart_filtered


# This compiles all the different charts and returns a single chart
def compile_chart(
    df,
    heatmap_sites,
    unique_wildtypes_df,
    x_axis,
    background_color,
    color_scale_effect,
    color_scale_entropy,
    strokewidth_size=None,
    legend_title=None,
    effect_legend=None,
    entropy_legend=None,
    binding_flag=None,
    escape_flag=None,
    distance_df=None,
    distance_legend=None
):
    base = make_base_heatmap(df, heatmap_sites, x_axis)
    chart_empty = make_empty_heatmap(base, background_color)
    chart_contact = create_contact_chart(base)
    chart_effect = create_effect_chart(
        base, color_scale_effect, strokewidth_size, legend_title, effect_legend
    )
    chart_entropy = create_entropy_chart(
        base, color_scale_entropy, strokewidth_size, legend_title, entropy_legend
    )
    chart_distance = create_distance_chart(
        base, color_scale_entropy, strokewidth_size, legend_title, distance_legend
    )
    wildtype_layer_box, wildtype_layer = make_wildtype_heatmap(
        unique_wildtypes_df, strokewidth_size, heatmap_sites
    )
    if binding_flag:
        print("Now making dataframe for binding")
        low_entry_heatmap = make_low_effect_heatmap(
            base, strokewidth_size, heatmap_sites
        )
        chart = alt.layer(
            chart_empty,
            chart_effect,
            low_entry_heatmap,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")
    elif escape_flag:
        print("Now making dataframe for escape")
        low_entry_heatmap = make_low_effect_heatmap(
            base, strokewidth_size, heatmap_sites
        )
        chart = alt.layer(
            chart_empty,
            chart_effect,
            low_entry_heatmap,
            chart_distance,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")
    else:
        chart = alt.layer(
            chart_empty,
            chart_effect,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")

    return chart

def plot_entry_heatmap(
    df,
    legend_title,
    null_color=None,
    ranges=None,
    effect_color=None,
    entropy_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    entropy_flag=None,
    contact_flag=None,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=None,
    binding_flag=None,
    escape_flag=None,
    custom_domain=None,
    escape_distance=None,
    subtitle_str=None,
):
    """
    Generates a customizable heatmap for deep mutational scanning (DMS) data visualization.

    Parameters:
    - df (DataFrame): The data frame containing the data to be visualized. It must include the columns 'site', 'mutant', 'value', and 'wildtype'.
    - legend_title (str): The title of the heatmap legend.
    - null_color (str, optional): Color for mutants with no observations. Default is 'gray'.
    - ranges (list of tuples, optional): Defines the ranges for site wrapping on the heatmap. If not provided, a default range is used.
    - effect_color (str, optional): Color scheme for effect values. Default is 'red-blue'.
    - entropy_color (str, optional): Color scheme for entropy values. Default is 'purples'.
    - strokewidth_size (float, optional): The width of the stroke used in the heatmap. Default size is not specified.
    - custom_y_axis_order (list, optional): Specifies a custom order for the y-axis, overriding the default amino acid order.
    - entropy_flag (bool, optional): If True, sequence entropy is included in the heatmap. Default is False.
    - contact_flag (bool, optional): If True, contact sites are included in the heatmap. Default is False.
    - specific_sites (list, optional): Specifies a subset of sites to be plotted. If None, all sites are plotted using wrapping. Default is None.
    - specific_sites_name (str, optional): A title to display at the top of the heatmap for specific sites. Default is None.
    - low_entry_df (DataFrame,optional): If given, will use different color to show sites with low entry scores (Used for Binding Score Heatmaps)
    - binding_flag (bool, optional): If True, will plot binding instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - custom_domain (list, optional): Give custom domain used for coloring. If None, will use default [-4,2.5]
    - escape_flag (bool,optional): If True, will plot escape instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - escape_distance (DataFrame, optional): give distance file for residues
    - subtitle_str (str, optional): Subtitle for the heatmap. Default is None.

    Returns:
    An Altair chart object representing the generated heatmap. This chart can be further customized or directly displayed in Jupyter notebooks or other compatible environments.
    """

    if contact_flag:
        contact_df = make_contact()
    else:
        contact_df = None
    if entropy_flag is True:
        entropy_df = prepare_entropy()
    else:
        entropy_df = None
    if escape_distance is not None:
        distance_df = prepare_distance(escape_distance)
    else: 
        distance_df = None

    # Make the dataframes for plotting. This calls the code in the cell above.
    empty_df = make_empty_df(
        df,
        contact_df=contact_df,
        entropy_df=entropy_df,
        distance_df=distance_df,
        contact_flag=contact_flag,
        entropy_flag=entropy_flag,
        low_entry_df=low_entry_df,
        binding_flag=binding_flag,
        escape_flag=escape_flag,
    )

    # Define the base order list. What order do you want the amino acids to appear?
    base_order = [
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
    ]

    # Initialize custom_order with custom_y_axis_order or base_order based on custom_y_axis_order's value
    custom_order = (
        custom_y_axis_order if custom_y_axis_order is not None else base_order
    )
    # Prepend conditions based on flags
    if entropy_flag and contact_flag:
        # Both flags are true, prepend both "contact" and "entropy"
        custom_order = ["contact", "entropy"] + custom_order
    elif entropy_flag:
        # Only entropy_flag is true, prepend "entropy"
        custom_order = ["entropy"] + custom_order
    elif contact_flag:
        # Only contact_flag is true, prepend "contact"
        custom_order = ["contact"] + custom_order
    elif distance_df is not None:
        custom_order = ["distance"] + custom_order

    # Optional parameters
    if null_color is None:
        background_color = "#d1d3d4"
    else:
        background_color = null_color

    # Sites for wrapping heatmap correctly. In this case, I did DMS on sites 71-603, so I wrap the heatmap with the specified sites.
    if ranges is None:
        full_ranges = [
            list(range(start, end))
            for start, end in [
                (71, 181),
                (181, 291),
                (291, 401),
                (401, 511),
                (511, 603),
            ]
        ]
    else:
        full_ranges = ranges

    if subtitle_str:
        subtitle_str = subtitle_str
    else:
        subtitle_str = ""

    # effect_color
    if custom_domain:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=custom_domain
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=custom_domain
            )
    else:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=[-4, 2.5] #default color scale for effects
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=[-4, 2.5] 
            )

    # entropy_color
    if entropy_color is None:
        color_scale_entropy = alt.Scale(
            scheme="purples", domain=[0, 1.5], reverse=True) #default color scale for entropy
    else:
        color_scale_entropy = alt.Scale(
            scheme=entropy_color, domain=[0, 1.5], reverse=True
        )
    
    # strokewidth size
    if strokewidth_size is None:
        strokewidth_size = 0.25 #default stroke width size
    else:
        strokewidth_size = strokewidth_size

    def determine_sorting_order(df):
        # Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
        final_df = df.sort_values("site")
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        final_df["mutant_rank"] = final_df["mutant"].map(sort_order)
        # Map the 'mutant' column to these ranks
        # Now sort the dataframe by this rank
        final_df = final_df.sort_values("mutant_rank")
        sites = sorted(final_df["site"].unique(), key=lambda x: float(x))
        return final_df, sites, sort_order

    heatmap_df, heatmap_sites, sort_order = determine_sorting_order(empty_df) #call function to sort the dataframe.

    # container to hold the charts during the loop
    charts = []

    if specific_sites: #if specific sites are given, plot only those sites in the heatmaps
        # Filter the heatmap to only show certain sites
        subset_df = heatmap_df[heatmap_df["site"].isin(specific_sites)]

        # Need to do independently for wildtype here for individual sites
        unique_wildtypes_df = subset_df.drop_duplicates(
            subset=["site", "wildtype"])
        unique_wildtypes_df = unique_wildtypes_df.sort_values("site")
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        unique_wildtypes_df["mutant_rank"] = unique_wildtypes_df["wildtype"].map(
            sort_order
        )
        unique_wildtypes_df = unique_wildtypes_df.sort_values("mutant_rank")

        # Setup the legend
        effect_legend = alt.Legend(
            title=legend_title,
            titleAlign="left",
            tickCount=3,
            gradientLength=75,
            titleAnchor="start",
            offset=5,
            titlePadding=2,
            titleOrient='top',
            gradientThickness=10,
        )
        entropy_legend = (
            alt.Legend(title="Henipavirus Entropy", values=[0, 1, 2])
            if entropy_flag is True
            else None
        )

        # Setup x-axis labeling. We want the labels to be vertical.
        x_axis = alt.Axis(
            labelAngle=-90,
            title="Site",
            labels=True,
        )
        # Run the main heatmap compiler function. This is where the magic happens. Code for this function in the cell above.
        chart = compile_chart(
            subset_df,
            heatmap_sites,
            unique_wildtypes_df,
            x_axis,
            background_color,
            color_scale_effect=color_scale_effect,
            color_scale_entropy=color_scale_entropy,
            strokewidth_size=strokewidth_size,
            legend_title=legend_title,
            effect_legend=effect_legend,
            entropy_legend=entropy_legend,
            binding_flag=binding_flag,
            escape_flag=escape_flag,
        )
        # Since this is a single chart, I don't know why I need to do this, but I seem to get errors if I don't append and then do alt.vconcat below. I get why I need to do this for multiple heatmaps in a for loop, but not here. Leaving in.
        charts.append(chart)
        if specific_sites_name:
            specific_sites_name = specific_sites_name
        else:
            specific_sites_name = ""
        combined_charts = alt.vconcat(*charts, title=specific_sites_name).resolve_scale(
            y="shared", x="shared", color="shared"
        )
        return combined_charts
    else:
        for idx, subset in enumerate(full_ranges): #if we didnt specify specific sites to be plotted, then it assumes you want the full range of sites to be wrapped in the heatmap.
            # Flags for showing the legend only the first time
            subset_df = heatmap_df[
                heatmap_df["site"].isin(subset)
            ]  # for the wrapping of sites
            unique_wildtypes_df = subset_df.drop_duplicates(
                subset=["site", "wildtype"]
            )  # for the wildtype mapping

            # Keep track of where in the loop we are for plotting
            is_last_plot = idx == len(full_ranges) - 1

            #define the legend for the effect values
            effect_legend = (
                alt.Legend(
                    title=legend_title,
                    direction="horizontal",
                    gradientLength=100,
                    titleAnchor="middle",
                    tickCount=3,
                    labelAlign="left",
                    titleAlign='center',
                    
                )
                if is_last_plot
                else None
            )
            entropy_legend = (
                alt.Legend(
                    title="Henipavirus Entropy",
                    direction="horizontal",
                    gradientLength=100,
                    titleAnchor="middle",
                    values=[0, 1, 2],
                    labelAlign="center",
                )
                if is_last_plot and entropy_flag is True
                else None
            )
            if distance_df is not None:
                distance_legend = (
                    alt.Legend(
                        title="Antibody distance",
                        direction="horizontal",
                        gradientLength=100,
                        tickCount=3,
                        titleAlign="center",
                        titleAnchor='middle',
                        labelAlign="left",
                    )
                    if is_last_plot is True
                    else None
                )
            else:
                distance_legend = None
            # Setup x-axis labeling. We want the labels to be vertical. Only show the labels every 10 sites. Only show the axis title on the last row.
            x_axis = alt.Axis(
                labelAngle=-90,
                labelExpr="datum.value % 10 === 0 ? datum.value : ''",
                title="Site" if is_last_plot else None,
                labels=True,
            )
            #This is where the magic happens. Code for this function in the cell above.
            chart = compile_chart( 
                subset_df,
                heatmap_sites,
                unique_wildtypes_df,
                x_axis,
                background_color,
                color_scale_effect=color_scale_effect,
                color_scale_entropy=color_scale_entropy,
                strokewidth_size=strokewidth_size,
                legend_title=legend_title,
                effect_legend=effect_legend,
                entropy_legend=entropy_legend,
                distance_legend=distance_legend,
                binding_flag=binding_flag,
                escape_flag=escape_flag
            )
            charts.append(chart)
        combined_chart = alt.vconcat(
            *charts, spacing=3, title=alt.Title(f"{legend_title}",subtitle=subtitle_str)
        ).resolve_scale(y="shared", x="independent", color="shared").configure_axisY(titlePadding=-50)
        return combined_chart

E2_binding_heatmap_full = plot_entry_heatmap(
    df=e2_binding,
    legend_title="bEFNB2 Binding",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    entropy_color=config["entropy_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    low_entry_df=e2_low_func,
    binding_flag=True,
    custom_domain=[-6, 2.5],
    subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E2_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E2_binding_heatmap_full.save(E2_binding_heatmap)

plotting binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding

E3_binding_heatmap_full = plot_entry_heatmap(
    df=e3_binding,
    legend_title="bEFNB3 Binding",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    entropy_color=config["entropy_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    low_entry_df=e3_low_func,
    binding_flag=True,
    custom_domain=[-2, 2],
    subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E3_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E3_binding_heatmap_full.save(E3_binding_heatmap)

plotting binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding

E2_entry_heatmap_full = plot_entry_heatmap(
    df=e2_func,
    legend_title="CHO-bEFNB2 Entry",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E2_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E2_entry_heatmap_full.save(E2_entry_heatmap)

plotting entry

E3_entry_heatmap_full = plot_entry_heatmap(
    df=e3_func,
    legend_title="CHO-bEFNB3 Entry",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E3_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E3_entry_heatmap_full.save(E3_entry_heatmap)

plotting entry

hydrophobic_AA = ["A", "V", "L", "I", "M"]
aromatic_AA = ["Y", "W", "F"]
positive_AA = ["K", "R", "H"]
negative_AA = ["E", "D"]
hydrophilic_AA = ["S", "T", "N", "Q"]
special_AA = ["C","G","P"]

def find_aa_wildtype_sites(df, aa_type):
    aa_list = list(df[df["wildtype"].isin(aa_type)]["site"].unique())
    return aa_list


def make_AA_lists(df):
    hydrophobic_AA_list = find_aa_wildtype_sites(df, hydrophobic_AA)
    aromatic_AA_list = find_aa_wildtype_sites(df, aromatic_AA)
    positive_AA_list = find_aa_wildtype_sites(df, positive_AA)
    negative_AA_list = find_aa_wildtype_sites(df, negative_AA)
    hydrophilic_AA_list = find_aa_wildtype_sites(df, hydrophilic_AA)
    special_AA_list = find_aa_wildtype_sites(df, special_AA)

    all_AA = [
        hydrophobic_AA_list,
        aromatic_AA_list,
        positive_AA_list,
        negative_AA_list,
        hydrophilic_AA_list,
        special_AA_list,
    ]
    return all_AA


AA_names = ["Hydrophobic", "Aromatic", "Positive", "Negative", "Hydrophilic","Special"]

def make_aa_property_charts(
    df, df_low, sites_list, sites_name, custom_domain, legend_name
):
    empty_charts = []
    for aa_type, name in zip(sites_list, sites_name):
        aa_property = plot_entry_heatmap(
            df=df,
            legend_title=legend_name,
            null_color=config["background_color"],
            effect_color=config["effect_color"],
            entropy_color=config["entropy_color"],
            strokewidth_size=config["strokewidth_size"],
            specific_sites=aa_type,
            specific_sites_name=name,
            low_entry_df=df_low,
            binding_flag=True,
            custom_domain=custom_domain,
        )
        empty_charts.append(aa_property)
    combined_chart = alt.vconcat(*empty_charts, spacing=3)
    return combined_chart


E2_binding_AA_prop = make_aa_property_charts(
    e2_binding, e2_low_func, make_AA_lists(e2_binding), AA_names, [-6, 2.5], "bEFNB2 Binding"
)

E2_binding_AA_prop.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding

E3_binding_AA_prop = make_aa_property_charts(
    e3_binding, e3_low_func, make_AA_lists(e3_binding), AA_names, [-2, 2], "bEFNB3 Binding"
)

E3_binding_AA_prop.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding

def make_aa_property_charts_entry(
    df, sites_list, sites_name, legend_name
):
    empty_charts = []
    for aa_type, name in zip(sites_list, sites_name):
        aa_property = plot_entry_heatmap(
            df=df,
            legend_title=legend_name,
            null_color=config["background_color"],
            effect_color=config["effect_color"],
            entropy_color=config["entropy_color"],
            strokewidth_size=config["strokewidth_size"],
            specific_sites=aa_type,
            specific_sites_name=name,
            contact_flag = True,
        )
        empty_charts.append(aa_property)
    combined_chart = alt.vconcat(*empty_charts, spacing=3)
    return combined_chart

E2_entry_AA_prop = make_aa_property_charts_entry(
    e2_func, make_AA_lists(e2_func),AA_names, "CHO-bEFNB2 entry"
).configure_title(offset=5)
E2_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
    E2_entry_AA_prop.save(E2_entry_AA_prop_heatmap)

plotting entry
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry

E3_entry_AA_prop = make_aa_property_charts_entry(
    e3_func, make_AA_lists(e2_func), AA_names, "CHO-bEFNB3 entry"
).configure_title(offset=5)
E3_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
    E3_entry_AA_prop.save(E3_entry_AA_prop_heatmap)

plotting entry
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry

def make_entry_binding_heatmaps_for_specific_sites(title,sites_list):
    specific_sites_E2_binding = plot_entry_heatmap(
        df=e2_binding,
        legend_title="B2",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
        low_entry_df=e2_low_func,
        binding_flag=True,
        custom_domain=[-6, 2.5],
    )
    specific_sites_E3_binding = plot_entry_heatmap(
        df=e3_binding,
        legend_title="B3",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
        low_entry_df=e3_low_func,
        binding_flag=True,
        custom_domain=[-2, 2],
    )
    specific_sites_E2_entry = plot_entry_heatmap(
        df=e2_func,
        legend_title="B2",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
    )
    specific_sites_E3_entry = plot_entry_heatmap(
        df=e3_func,
        legend_title="B3",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
    
    )
    combo_binding_img = alt.vconcat(specific_sites_E2_binding,specific_sites_E3_binding).properties(title='Binding')
    combo_entry_img = alt.vconcat(specific_sites_E2_entry,specific_sites_E3_entry).properties(title='Entry')
    combo_combo = alt.hconcat(combo_entry_img,combo_binding_img).properties(title=title).configure_title(offset=15)
    return combo_combo

dimer_assm_chainA = make_entry_binding_heatmaps_for_specific_sites('ChainA Dimer Interface',[160, 162, 163, 164, 165, 166, 169, 171, 205, 206, 208, 247, 249, 256, 258, 259, 260, 261, 266, 267, 268, 270, 323, 324, 325, 331])
dimer_assm_chainA.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

dimer_assm_chainB = make_entry_binding_heatmaps_for_specific_sites('ChainB Dimer Interface',[167, 168, 170, 171, 172, 202, 203, 204, 205, 208, 210, 212, 237, 238, 239, 240, 242, 258, 584, 589, 591]) 
dimer_assm_chainB.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

BLI_mutant_sites = make_entry_binding_heatmaps_for_specific_sites('Sites Chosen for BLI',[244,305,492,507,530,553,555,559,588]) 
BLI_mutant_sites.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

contact_sites_img = make_entry_binding_heatmaps_for_specific_sites('Contact Sites',config['contact_sites'])
contact_sites_img.display()
if combined_entry_binding_contact is not None:
    contact_sites_img.save(combined_entry_binding_contact)

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

stalk_sites_img = make_entry_binding_heatmaps_for_specific_sites('Stalk',list(range(72, 148)))
stalk_sites_img.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

neck_sites_img = make_entry_binding_heatmaps_for_specific_sites('Neck',list(range(148, 166)))
neck_sites_img.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

beta_sheet_sites_img = make_entry_binding_heatmaps_for_specific_sites('Beta Sheets',config['beta_sheet'])
beta_sheet_sites_img.display()

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

nipah_poly_sites_img = make_entry_binding_heatmaps_for_specific_sites('Nipah polymorphic sites',config['nipah_poly'])
nipah_poly_sites_img.display()
if combined_entry_binding_contact is not None:
    nipah_poly_sites_img.save(nipah_poly_sites_img_heatmap)

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

glycan_sites_img = make_entry_binding_heatmaps_for_specific_sites('Glycosylation sites',config['glycans'])
glycan_sites_img.display()
if combined_entry_binding_contact is not None:
    glycan_sites_img.save(glycan_sites_img_heatmap)

plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

m102 = antibody_escape[antibody_escape['ab'] == 'm102.4']
m102_dist = pd.read_csv("results/distances/df_m102_atomic_distances.csv")

HENV26 = antibody_escape[antibody_escape['ab'] == 'HENV-26']
HENV26_dist = pd.read_csv("results/distances/df_HENV26_atomic_distances.csv")

HENV32 = antibody_escape[antibody_escape['ab'] == 'HENV-32']
HENV32_dist = pd.read_csv("results/distances/df_HENV32_atomic_distances.csv")

nAH1 = antibody_escape[antibody_escape['ab'] == 'nAH1.3']
nAH1_dist = pd.read_csv("results/distances/df_nAH_atomic_distances.csv")

HENV117 = antibody_escape[antibody_escape['ab'] == 'HENV-117']


HENV103 = antibody_escape[antibody_escape['ab'] == 'HENV-103']

def make_escape_heatmaps(df,dist,title):
    escape_heatmap = plot_entry_heatmap(
        df=df,
        legend_title=f"{title} escape",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        low_entry_df=e3_low_func_ab,
        escape_flag=True,
        custom_domain=[-2, 2],
        contact_flag=False,
        escape_distance = dist,
        subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
    )
    escape_heatmap = escape_heatmap.configure_axisY(titlePadding=-10)
    return escape_heatmap

m102_heatmap = make_escape_heatmaps(m102,m102_dist,'m102.4')
#m102_heatmap.display()
m102_heatmap.save(m102_heatmap_plot)

plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

HENV26_heatmap = make_escape_heatmaps(HENV26,HENV26_dist,'HENV-26')
HENV26_heatmap.save(HENV26_heatmap_plot)

plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

HENV32_heatmap = make_escape_heatmaps(HENV32,HENV32_dist,'HENV-32')
HENV32_heatmap.save(HENV32_heatmap_plot)

plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

nAH1_heatmap = make_escape_heatmaps(nAH1,nAH1_dist,'nAH1.3')
nAH1_heatmap.save(nAH1_heatmap_plot)

plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

HENV117_heatmap = make_escape_heatmaps(HENV117,dist=None,title='HENV-117')
HENV117_heatmap = HENV117_heatmap.configure_axisY(titlePadding=4)
HENV117_heatmap.save(HENV117_heatmap_plot)

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

HENV103_heatmap = make_escape_heatmaps(HENV103,dist=None,title='HENV-103')
HENV103_heatmap = HENV103_heatmap.configure_axisY(titlePadding=4)
HENV103_heatmap.save(HENV103_heatmap_plot)

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape

plot_heatmaps.ipynb¶

Import modules, set working directory, and import altair theme to be used for all heatmaps¶

If running notebook interactively, this ensures the data is correctly loaded from hard paths¶

Read configs¶

Import DMS data. In this case, import data that has already been filtered.¶

Start of heatmap code¶

Next, we make heatmaps from different variables, then stitch them all together with alt.layer¶

Below is the main heatmap function 'plot_entry_heatmap'¶

Now that we have heatmap function setup, make heatmaps:¶

First, do binding heatmaps¶

Then, make entry heatmaps:¶

Then, make entry and binding heatmaps for contact sites:¶

Make heatmaps by amino acid property for binding¶

Now entry by amino acid property¶

Now for specific sites I want to focus on¶

Make antibody escape heatmaps¶