plot_heatmaps.ipynb¶
This notebook makes heatmaps of entry, binding, and antibody escape data from pre-filtered DMS data.
- Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
# input configs
altair_config = None
nipah_config = None
# input files
entropy_file = None
func_scores_E2_file = None
binding_E2_file = None
e2_low_func_file = None
func_scores_E3_file = None
binding_E3_file = None
e3_low_func_file = None
# output images
E2_binding_heatmap = None
E3_binding_heatmap = None
E2_entry_heatmap = None
E3_entry_heatmap = None
combined_entry_binding_contact = None
E3_entry_AA_prop_heatmap = None
E2_entry_AA_prop_heatmap = None
glycan_sites_img_heatmap = None
nipah_poly_sites_img_heatmap = None
HENV103_heatmap_plot = None
nAH1_heatmap_plot = None
HENV117_heatmap_plot = None
HENV32_heatmap_plot = None
HENV26_heatmap_plot = None
m102_heatmap_plot = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/heatmap_theme.py"
entropy_file = "results/entropy/entropy.csv"
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
E2_binding_heatmap = "results/images/E2_binding_heatmap.html"
E3_binding_heatmap = "results/images/E3_binding_heatmap.html"
E2_entry_heatmap = "results/images/E2_entry_heatmap.html"
E3_entry_heatmap = "results/images/E3_entry_heatmap.html"
combined_entry_binding_contact = (
"results/images/combined_entry_binding_contact_heatmaps.html"
)
E3_entry_AA_prop_heatmap = "results/images/E3_entry_AA_prop_heatmap.html"
E2_entry_AA_prop_heatmap = "results/images/E2_entry_AA_prop_heatmap.html"
glycan_sites_img_heatmap = "results/images/glycan_sites_img_heatmap.html"
nipah_poly_sites_img_heatmap = "results/images/nipah_poly_sites_img_heatmap.html"
HENV103_heatmap_plot = "results/images/HENV103_heatmap_plot.html"
nAH1_heatmap_plot = "results/images/nAH1_heatmap_plot.html"
HENV117_heatmap_plot = "results/images/HENV117_heatmap_plot.html"
HENV32_heatmap_plot = "results/images/HENV32_heatmap_plot.html"
HENV26_heatmap_plot = "results/images/HENV26_heatmap_plot.html"
m102_heatmap_plot = "results/images/m102_heatmap_plot.html"
Import modules, set working directory, and import altair theme to be used for all heatmaps¶
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import heatmap_theme
alt.themes.register('heatmap_theme', heatmap_theme.heatmap_theme)
alt.themes.enable('heatmap_theme')
Setup in correct directory
Out[3]:
ThemeRegistry.enable('heatmap_theme')
If running notebook interactively, this ensures the data is correctly loaded from hard paths¶
In [4]:
if nipah_config is None:
##hard paths in case don't want to run with snakemake
print("loading hard paths")
#config files or .csv of entropy values that can be plotted with the heatmaps
nipah_config = "nipah_config.yaml"
entropy_file = "results/entropy/entropy.csv"
# input files
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"
Read configs¶
In [5]:
""" this config file contains information that may be needed for generating heatmaps. None are necessary to make heatmaps.
contact_sites: [239, 240, etc] # sites that are in contact with the receptor, can specify multiple sites to be plotted in the heatmap.
effect_color: default is 'redblue'
entropy_color: default is 'browns', not necessary, only if you want to plot entropy above the heatmaps
strokewidth_size: default is 0.5. Can customize the size of the stroke width in the heatmaps.
background_color: I use #d1d3d4 for the background color of the heatmaps. Can specify custom colors
"""
with open(nipah_config) as f:
config = yaml.safe_load(f)
Import DMS data. In this case, import data that has already been filtered.¶
In [6]:
# import filtered data
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"
e2_binding = pd.read_csv(binding_E2_file).round(2) #Dataframe with effects of RBP mutations on binding to bEFNB2
e2_func = pd.read_csv(func_scores_E2_file).round(2) #Dataframe with effects of RBP mutations on entry in CHO-bEFNB2 cells
e2_low_func = pd.read_csv(e2_low_func_file).round(2) #Dataframe with effects of RBP mutations that are below the threshold for cell entry (will be masked in the heatmaps)
e3_binding = pd.read_csv(binding_E3_file).round(2)
e3_func = pd.read_csv(func_scores_E3_file).round(2)
e3_low_func = pd.read_csv(e3_low_func_file).round(2)
e3_low_func_ab = pd.read_csv(e3_low_func_file_ab).round(2)
antibody_escape = pd.read_csv(antibody_file).round(2)
Start of heatmap code¶
In [7]:
def prepare_distance(distance_file):
# read in residue distance data, calculated in different notebook
df = distance_file[["site", "distance"]]
df = df.dropna(subset=["site"])
df["site"] = df["site"].astype("Int64")
df["distance"] = df["distance"].round(2)
df = df[["site", "distance"]].drop_duplicates()
df["mutant"] = "distance"
df["wildtype"] = ""
df["type"] = "distance"
df.rename(columns={"distance": "value"}, inplace=True)
return df
def prepare_entropy(): # need to prepare entropy data for plotting on heatmap
# read in entropy data, calculated in different notebook
entropy = pd.read_csv(entropy_file)
df = entropy[["site", "henipavirus_entropy"]]
df = df.dropna(subset=["site"])
df["site"] = df["site"].astype("Int64")
df = df.rename(columns={"henipavirus_entropy": "entropy"})
df["entropy"] = df["entropy"].round(2)
df = df[["site", "entropy"]].drop_duplicates()
df["mutant"] = "entropy"
df["wildtype"] = ""
df["type"] = "entropy"
df.rename(columns={"entropy": "value"}, inplace=True)
return df
def make_contact():
df = pd.DataFrame(
{
"site": config["contact_sites"],
"contact": [0.0] * len(config["contact_sites"]),
}
)
# Renaming and restructuring the dataframe
df["mutant"] = "receptor contact"
df["wildtype"] = ""
df["type"] = "receptor contact"
df.rename(columns={"contact": "value"}, inplace=True)
return df
# This gets called during heatmap generation. Makes a dataframe with the site, mutant, and value for each site, regardless if it was measured in DMS or not.
def make_empty_df(
df,
contact_df=None,
entropy_df=None,
contact_flag=None,
entropy_flag=None,
low_entry_df=None,
binding_flag=None,
escape_flag=None,
distance_df=None,
):
sites = range(71, 603)
amino_acids = [
"R",
"K",
"H",
"D",
"E",
"Q",
"N",
"S",
"T",
"Y",
"W",
"F",
"A",
"I",
"L",
"M",
"V",
"G",
"P",
"C",
]
# Create the combination of each site with each amino acid
data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]
# Create the DataFrame
empty_df = pd.DataFrame(data)
if binding_flag:
print("plotting binding")
if low_entry_df is None:
print("You indicated binding but did not provide a low_entry_df")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["binding_mean"],
var_name="type",
value_name="value",
)
low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
df_filter = low_entry_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["low_effect"],
var_name="type",
value_name="value",
)
df_test = pd.concat([df_test, df_filter])
elif escape_flag:
print("plotting escape")
if low_entry_df is None:
print("You indicated escape but did not provide a low_entry_df")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["escape_mean"],
var_name="type",
value_name="value",
)
low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
df_filter = low_entry_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["low_effect"],
var_name="type",
value_name="value",
)
df_test = pd.concat([df_test, df_filter])
else:
print("plotting entry")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["effect"],
var_name="type",
value_name="value",
)
if contact_flag and entropy_flag is None:
df_test = pd.concat([df_test], ignore_index=True)
if contact_flag is True:
df_test = pd.concat([df_test, contact_df], ignore_index=True)
if entropy_flag is True:
df_test = pd.concat([df_test, entropy_df], ignore_index=True)
if entropy_flag and contact_flag is True:
df_test = pd.concat([df_test, entropy_df, contact_df], ignore_index=True)
if distance_df is not None:
print('making a distance df')
df_test = pd.concat([df_test,distance_df], ignore_index=True)
return df_test
Next, we make heatmaps from different variables, then stitch them all together with alt.layer¶
In [8]:
# Make the base heatmap. This contains information about the x_axis and heatmap_sites which are important for sorting them correctly.
def make_base_heatmap(df, heatmap_sites, x_axis):
base = (
alt.Chart(df)
.encode(
x=alt.X("site:O", title="Site", sort=heatmap_sites, axis=x_axis),
y=alt.Y(
"mutant",
title="Amino Acid",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
axis=alt.Axis(grid=False),
),
)
.properties(
width=alt.Step(10),
height=alt.Step(10),
)
)
return base
# This makes an 'empty' heatmap that shows sites that were not observed as some color (default:gray)
def make_empty_heatmap(base, background_color):
chart_empty = (
base.mark_rect(color=background_color)
.encode(tooltip=["site", "mutant"])
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == "escape_mean"))
& (alt.datum.value == None)
)
)
return chart_empty
# This makes the white squares and X for the wildtype amino acids
def make_wildtype_heatmap(unique_wildtypes_df, strokewidth_size, heatmap_sites):
wildtype_layer_box = (
alt.Chart(unique_wildtypes_df)
.mark_rect(color="white", stroke="#000000", strokeWidth=strokewidth_size)
.encode(
x=alt.X("site:O", sort=heatmap_sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
),
tooltip=["site", "wildtype"],
)
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
wildtype_layer = (
alt.Chart(unique_wildtypes_df)
.mark_text(color="black", text="X", size=8, align="center", baseline="middle",dy=1)
.encode(
x=alt.X("site:O", sort=heatmap_sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
),
tooltip=["site", "wildtype"],
)
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
return wildtype_layer_box, wildtype_layer
# This makes the actual effect heatmap, and adds a bar for the legend if its the first time through the loop
def create_effect_chart(
base, color_scale_effect, strokewidth_size, legend_title=None, effect_legend=None
):
chart = (
base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size)
.encode(
color=alt.condition(
'(datum.type == "effect" | datum.type == "binding_mean" | datum.type == "escape_mean")',
alt.Color("value:Q", scale=color_scale_effect, legend=effect_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
.transform_filter((alt.datum.wildtype != "") & (alt.datum.wildtype != None))
)
return chart
# This makes a chart for the entropy values
def create_entropy_chart(
base, color_scale_entropy, strokewidth_size, legend_title=None, entropy_legend=None
):
chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
color=alt.condition(
'datum.mutant == "entropy"',
alt.Color("value:Q", scale=color_scale_entropy, legend=entropy_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
return chart
def create_distance_chart(
base, color_scale_entropy, strokewidth_size, legend_title=None, distance_legend=None
):
chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
color=alt.condition(
'datum.mutant == "distance"',
alt.Color("value:Q", scale=alt.Scale(scheme="purples", domain=[0, 10], reverse=True),legend=distance_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
return chart
# This makes a chart for the contact sites
def create_contact_chart(base):
chart_contact = (
base.mark_rect(color="black")
.encode(tooltip=["site"])
.transform_filter((alt.datum.mutant == "receptor contact"))
)
return chart_contact
# Masks sites with low cell entry scores for binding or escape
def make_low_effect_heatmap(base, strokewidth_size, heatmap_sites):
chart_filtered = (
base.mark_rect(color="#808285", stroke="#000000", strokeWidth=strokewidth_size) ##939598
.encode()
.transform_filter(alt.datum.type == "low_effect")
)
return chart_filtered
# This compiles all the different charts and returns a single chart
def compile_chart(
df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect,
color_scale_entropy,
strokewidth_size=None,
legend_title=None,
effect_legend=None,
entropy_legend=None,
binding_flag=None,
escape_flag=None,
distance_df=None,
distance_legend=None
):
base = make_base_heatmap(df, heatmap_sites, x_axis)
chart_empty = make_empty_heatmap(base, background_color)
chart_contact = create_contact_chart(base)
chart_effect = create_effect_chart(
base, color_scale_effect, strokewidth_size, legend_title, effect_legend
)
chart_entropy = create_entropy_chart(
base, color_scale_entropy, strokewidth_size, legend_title, entropy_legend
)
chart_distance = create_distance_chart(
base, color_scale_entropy, strokewidth_size, legend_title, distance_legend
)
wildtype_layer_box, wildtype_layer = make_wildtype_heatmap(
unique_wildtypes_df, strokewidth_size, heatmap_sites
)
if binding_flag:
print("Now making dataframe for binding")
low_entry_heatmap = make_low_effect_heatmap(
base, strokewidth_size, heatmap_sites
)
chart = alt.layer(
chart_empty,
chart_effect,
low_entry_heatmap,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
elif escape_flag:
print("Now making dataframe for escape")
low_entry_heatmap = make_low_effect_heatmap(
base, strokewidth_size, heatmap_sites
)
chart = alt.layer(
chart_empty,
chart_effect,
low_entry_heatmap,
chart_distance,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
else:
chart = alt.layer(
chart_empty,
chart_effect,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
return chart
Below is the main heatmap function 'plot_entry_heatmap'¶
In [9]:
def plot_entry_heatmap(
df,
legend_title,
null_color=None,
ranges=None,
effect_color=None,
entropy_color=None,
strokewidth_size=None,
custom_y_axis_order=None,
entropy_flag=None,
contact_flag=None,
specific_sites=None,
specific_sites_name=None,
low_entry_df=None,
binding_flag=None,
escape_flag=None,
custom_domain=None,
escape_distance=None,
subtitle_str=None,
):
"""
Generates a customizable heatmap for deep mutational scanning (DMS) data visualization.
Parameters:
- df (DataFrame): The data frame containing the data to be visualized. It must include the columns 'site', 'mutant', 'value', and 'wildtype'.
- legend_title (str): The title of the heatmap legend.
- null_color (str, optional): Color for mutants with no observations. Default is 'gray'.
- ranges (list of tuples, optional): Defines the ranges for site wrapping on the heatmap. If not provided, a default range is used.
- effect_color (str, optional): Color scheme for effect values. Default is 'red-blue'.
- entropy_color (str, optional): Color scheme for entropy values. Default is 'purples'.
- strokewidth_size (float, optional): The width of the stroke used in the heatmap. Default size is not specified.
- custom_y_axis_order (list, optional): Specifies a custom order for the y-axis, overriding the default amino acid order.
- entropy_flag (bool, optional): If True, sequence entropy is included in the heatmap. Default is False.
- contact_flag (bool, optional): If True, contact sites are included in the heatmap. Default is False.
- specific_sites (list, optional): Specifies a subset of sites to be plotted. If None, all sites are plotted using wrapping. Default is None.
- specific_sites_name (str, optional): A title to display at the top of the heatmap for specific sites. Default is None.
- low_entry_df (DataFrame,optional): If given, will use different color to show sites with low entry scores (Used for Binding Score Heatmaps)
- binding_flag (bool, optional): If True, will plot binding instead of entry. Must be used with low_entry_df to mask low cell entry mutants
- custom_domain (list, optional): Give custom domain used for coloring. If None, will use default [-4,2.5]
- escape_flag (bool,optional): If True, will plot escape instead of entry. Must be used with low_entry_df to mask low cell entry mutants
- escape_distance (DataFrame, optional): give distance file for residues
- subtitle_str (str, optional): Subtitle for the heatmap. Default is None.
Returns:
An Altair chart object representing the generated heatmap. This chart can be further customized or directly displayed in Jupyter notebooks or other compatible environments.
"""
if contact_flag:
contact_df = make_contact()
else:
contact_df = None
if entropy_flag is True:
entropy_df = prepare_entropy()
else:
entropy_df = None
if escape_distance is not None:
distance_df = prepare_distance(escape_distance)
else:
distance_df = None
# Make the dataframes for plotting. This calls the code in the cell above.
empty_df = make_empty_df(
df,
contact_df=contact_df,
entropy_df=entropy_df,
distance_df=distance_df,
contact_flag=contact_flag,
entropy_flag=entropy_flag,
low_entry_df=low_entry_df,
binding_flag=binding_flag,
escape_flag=escape_flag,
)
# Define the base order list. What order do you want the amino acids to appear?
base_order = [
"R",
"K",
"H",
"D",
"E",
"Q",
"N",
"S",
"T",
"Y",
"W",
"F",
"A",
"I",
"L",
"M",
"V",
"G",
"P",
"C",
]
# Initialize custom_order with custom_y_axis_order or base_order based on custom_y_axis_order's value
custom_order = (
custom_y_axis_order if custom_y_axis_order is not None else base_order
)
# Prepend conditions based on flags
if entropy_flag and contact_flag:
# Both flags are true, prepend both "contact" and "entropy"
custom_order = ["contact", "entropy"] + custom_order
elif entropy_flag:
# Only entropy_flag is true, prepend "entropy"
custom_order = ["entropy"] + custom_order
elif contact_flag:
# Only contact_flag is true, prepend "contact"
custom_order = ["contact"] + custom_order
elif distance_df is not None:
custom_order = ["distance"] + custom_order
# Optional parameters
if null_color is None:
background_color = "#d1d3d4"
else:
background_color = null_color
# Sites for wrapping heatmap correctly. In this case, I did DMS on sites 71-603, so I wrap the heatmap with the specified sites.
if ranges is None:
full_ranges = [
list(range(start, end))
for start, end in [
(71, 181),
(181, 291),
(291, 401),
(401, 511),
(511, 603),
]
]
else:
full_ranges = ranges
if subtitle_str:
subtitle_str = subtitle_str
else:
subtitle_str = ""
# effect_color
if custom_domain:
if effect_color is None:
color_scale_effect = alt.Scale(
scheme="redblue", domainMid=0, domain=custom_domain
)
else:
color_scale_effect = alt.Scale(
scheme=effect_color, domainMid=0, domain=custom_domain
)
else:
if effect_color is None:
color_scale_effect = alt.Scale(
scheme="redblue", domainMid=0, domain=[-4, 2.5] #default color scale for effects
)
else:
color_scale_effect = alt.Scale(
scheme=effect_color, domainMid=0, domain=[-4, 2.5]
)
# entropy_color
if entropy_color is None:
color_scale_entropy = alt.Scale(
scheme="purples", domain=[0, 1.5], reverse=True) #default color scale for entropy
else:
color_scale_entropy = alt.Scale(
scheme=entropy_color, domain=[0, 1.5], reverse=True
)
# strokewidth size
if strokewidth_size is None:
strokewidth_size = 0.25 #default stroke width size
else:
strokewidth_size = strokewidth_size
def determine_sorting_order(df):
# Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
final_df = df.sort_values("site")
sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
final_df["mutant_rank"] = final_df["mutant"].map(sort_order)
# Map the 'mutant' column to these ranks
# Now sort the dataframe by this rank
final_df = final_df.sort_values("mutant_rank")
sites = sorted(final_df["site"].unique(), key=lambda x: float(x))
return final_df, sites, sort_order
heatmap_df, heatmap_sites, sort_order = determine_sorting_order(empty_df) #call function to sort the dataframe.
# container to hold the charts during the loop
charts = []
if specific_sites: #if specific sites are given, plot only those sites in the heatmaps
# Filter the heatmap to only show certain sites
subset_df = heatmap_df[heatmap_df["site"].isin(specific_sites)]
# Need to do independently for wildtype here for individual sites
unique_wildtypes_df = subset_df.drop_duplicates(
subset=["site", "wildtype"])
unique_wildtypes_df = unique_wildtypes_df.sort_values("site")
sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
unique_wildtypes_df["mutant_rank"] = unique_wildtypes_df["wildtype"].map(
sort_order
)
unique_wildtypes_df = unique_wildtypes_df.sort_values("mutant_rank")
# Setup the legend
effect_legend = alt.Legend(
title=legend_title,
titleAlign="left",
tickCount=3,
gradientLength=75,
titleAnchor="start",
offset=5,
titlePadding=2,
titleOrient='top',
gradientThickness=10,
)
entropy_legend = (
alt.Legend(title="Henipavirus Entropy", values=[0, 1, 2])
if entropy_flag is True
else None
)
# Setup x-axis labeling. We want the labels to be vertical.
x_axis = alt.Axis(
labelAngle=-90,
title="Site",
labels=True,
)
# Run the main heatmap compiler function. This is where the magic happens. Code for this function in the cell above.
chart = compile_chart(
subset_df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect=color_scale_effect,
color_scale_entropy=color_scale_entropy,
strokewidth_size=strokewidth_size,
legend_title=legend_title,
effect_legend=effect_legend,
entropy_legend=entropy_legend,
binding_flag=binding_flag,
escape_flag=escape_flag,
)
# Since this is a single chart, I don't know why I need to do this, but I seem to get errors if I don't append and then do alt.vconcat below. I get why I need to do this for multiple heatmaps in a for loop, but not here. Leaving in.
charts.append(chart)
if specific_sites_name:
specific_sites_name = specific_sites_name
else:
specific_sites_name = ""
combined_charts = alt.vconcat(*charts, title=specific_sites_name).resolve_scale(
y="shared", x="shared", color="shared"
)
return combined_charts
else:
for idx, subset in enumerate(full_ranges): #if we didnt specify specific sites to be plotted, then it assumes you want the full range of sites to be wrapped in the heatmap.
# Flags for showing the legend only the first time
subset_df = heatmap_df[
heatmap_df["site"].isin(subset)
] # for the wrapping of sites
unique_wildtypes_df = subset_df.drop_duplicates(
subset=["site", "wildtype"]
) # for the wildtype mapping
# Keep track of where in the loop we are for plotting
is_last_plot = idx == len(full_ranges) - 1
#define the legend for the effect values
effect_legend = (
alt.Legend(
title=legend_title,
direction="horizontal",
gradientLength=100,
titleAnchor="middle",
tickCount=3,
labelAlign="left",
titleAlign='center',
)
if is_last_plot
else None
)
entropy_legend = (
alt.Legend(
title="Henipavirus Entropy",
direction="horizontal",
gradientLength=100,
titleAnchor="middle",
values=[0, 1, 2],
labelAlign="center",
)
if is_last_plot and entropy_flag is True
else None
)
if distance_df is not None:
distance_legend = (
alt.Legend(
title="Antibody distance",
direction="horizontal",
gradientLength=100,
tickCount=3,
titleAlign="center",
titleAnchor='middle',
labelAlign="left",
)
if is_last_plot is True
else None
)
else:
distance_legend = None
# Setup x-axis labeling. We want the labels to be vertical. Only show the labels every 10 sites. Only show the axis title on the last row.
x_axis = alt.Axis(
labelAngle=-90,
labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=True,
)
#This is where the magic happens. Code for this function in the cell above.
chart = compile_chart(
subset_df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect=color_scale_effect,
color_scale_entropy=color_scale_entropy,
strokewidth_size=strokewidth_size,
legend_title=legend_title,
effect_legend=effect_legend,
entropy_legend=entropy_legend,
distance_legend=distance_legend,
binding_flag=binding_flag,
escape_flag=escape_flag
)
charts.append(chart)
combined_chart = alt.vconcat(
*charts, spacing=3, title=alt.Title(f"{legend_title}",subtitle=subtitle_str)
).resolve_scale(y="shared", x="independent", color="shared").configure_axisY(titlePadding=-50)
return combined_chart
Now that we have heatmap function setup, make heatmaps:¶
First, do binding heatmaps¶
In [10]:
E2_binding_heatmap_full = plot_entry_heatmap(
df=e2_binding,
legend_title="bEFNB2 Binding",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
low_entry_df=e2_low_func,
binding_flag=True,
custom_domain=[-6, 2.5],
subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E2_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
E2_binding_heatmap_full.save(E2_binding_heatmap)
plotting binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding
In [11]:
E3_binding_heatmap_full = plot_entry_heatmap(
df=e3_binding,
legend_title="bEFNB3 Binding",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
low_entry_df=e3_low_func,
binding_flag=True,
custom_domain=[-2, 2],
subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E3_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
E3_binding_heatmap_full.save(E3_binding_heatmap)
plotting binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding
Then, make entry heatmaps:¶
In [12]:
E2_entry_heatmap_full = plot_entry_heatmap(
df=e2_func,
legend_title="CHO-bEFNB2 Entry",
null_color=config["background_color"],
effect_color=config["effect_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E2_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
E2_entry_heatmap_full.save(E2_entry_heatmap)
plotting entry
In [13]:
E3_entry_heatmap_full = plot_entry_heatmap(
df=e3_func,
legend_title="CHO-bEFNB3 Entry",
null_color=config["background_color"],
effect_color=config["effect_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
E3_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
E3_entry_heatmap_full.save(E3_entry_heatmap)
plotting entry
Then, make entry and binding heatmaps for contact sites:¶
Make heatmaps by amino acid property for binding¶
In [14]:
hydrophobic_AA = ["A", "V", "L", "I", "M"]
aromatic_AA = ["Y", "W", "F"]
positive_AA = ["K", "R", "H"]
negative_AA = ["E", "D"]
hydrophilic_AA = ["S", "T", "N", "Q"]
special_AA = ["C","G","P"]
def find_aa_wildtype_sites(df, aa_type):
aa_list = list(df[df["wildtype"].isin(aa_type)]["site"].unique())
return aa_list
def make_AA_lists(df):
hydrophobic_AA_list = find_aa_wildtype_sites(df, hydrophobic_AA)
aromatic_AA_list = find_aa_wildtype_sites(df, aromatic_AA)
positive_AA_list = find_aa_wildtype_sites(df, positive_AA)
negative_AA_list = find_aa_wildtype_sites(df, negative_AA)
hydrophilic_AA_list = find_aa_wildtype_sites(df, hydrophilic_AA)
special_AA_list = find_aa_wildtype_sites(df, special_AA)
all_AA = [
hydrophobic_AA_list,
aromatic_AA_list,
positive_AA_list,
negative_AA_list,
hydrophilic_AA_list,
special_AA_list,
]
return all_AA
AA_names = ["Hydrophobic", "Aromatic", "Positive", "Negative", "Hydrophilic","Special"]
def make_aa_property_charts(
df, df_low, sites_list, sites_name, custom_domain, legend_name
):
empty_charts = []
for aa_type, name in zip(sites_list, sites_name):
aa_property = plot_entry_heatmap(
df=df,
legend_title=legend_name,
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=aa_type,
specific_sites_name=name,
low_entry_df=df_low,
binding_flag=True,
custom_domain=custom_domain,
)
empty_charts.append(aa_property)
combined_chart = alt.vconcat(*empty_charts, spacing=3)
return combined_chart
E2_binding_AA_prop = make_aa_property_charts(
e2_binding, e2_low_func, make_AA_lists(e2_binding), AA_names, [-6, 2.5], "bEFNB2 Binding"
)
E2_binding_AA_prop.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding
In [15]:
E3_binding_AA_prop = make_aa_property_charts(
e3_binding, e3_low_func, make_AA_lists(e3_binding), AA_names, [-2, 2], "bEFNB3 Binding"
)
E3_binding_AA_prop.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding
Now entry by amino acid property¶
In [16]:
def make_aa_property_charts_entry(
df, sites_list, sites_name, legend_name
):
empty_charts = []
for aa_type, name in zip(sites_list, sites_name):
aa_property = plot_entry_heatmap(
df=df,
legend_title=legend_name,
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=aa_type,
specific_sites_name=name,
contact_flag = True,
)
empty_charts.append(aa_property)
combined_chart = alt.vconcat(*empty_charts, spacing=3)
return combined_chart
In [17]:
E2_entry_AA_prop = make_aa_property_charts_entry(
e2_func, make_AA_lists(e2_func),AA_names, "CHO-bEFNB2 entry"
).configure_title(offset=5)
E2_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
E2_entry_AA_prop.save(E2_entry_AA_prop_heatmap)
plotting entry plotting entry plotting entry plotting entry plotting entry plotting entry
In [18]:
E3_entry_AA_prop = make_aa_property_charts_entry(
e3_func, make_AA_lists(e2_func), AA_names, "CHO-bEFNB3 entry"
).configure_title(offset=5)
E3_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
E3_entry_AA_prop.save(E3_entry_AA_prop_heatmap)
plotting entry plotting entry plotting entry plotting entry plotting entry plotting entry
Now for specific sites I want to focus on¶
In [19]:
def make_entry_binding_heatmaps_for_specific_sites(title,sites_list):
specific_sites_E2_binding = plot_entry_heatmap(
df=e2_binding,
legend_title="B2",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
low_entry_df=e2_low_func,
binding_flag=True,
custom_domain=[-6, 2.5],
)
specific_sites_E3_binding = plot_entry_heatmap(
df=e3_binding,
legend_title="B3",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
low_entry_df=e3_low_func,
binding_flag=True,
custom_domain=[-2, 2],
)
specific_sites_E2_entry = plot_entry_heatmap(
df=e2_func,
legend_title="B2",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
)
specific_sites_E3_entry = plot_entry_heatmap(
df=e3_func,
legend_title="B3",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
)
combo_binding_img = alt.vconcat(specific_sites_E2_binding,specific_sites_E3_binding).properties(title='Binding')
combo_entry_img = alt.vconcat(specific_sites_E2_entry,specific_sites_E3_entry).properties(title='Entry')
combo_combo = alt.hconcat(combo_entry_img,combo_binding_img).properties(title=title).configure_title(offset=15)
return combo_combo
In [20]:
dimer_assm_chainA = make_entry_binding_heatmaps_for_specific_sites('ChainA Dimer Interface',[160, 162, 163, 164, 165, 166, 169, 171, 205, 206, 208, 247, 249, 256, 258, 259, 260, 261, 266, 267, 268, 270, 323, 324, 325, 331])
dimer_assm_chainA.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [21]:
dimer_assm_chainB = make_entry_binding_heatmaps_for_specific_sites('ChainB Dimer Interface',[167, 168, 170, 171, 172, 202, 203, 204, 205, 208, 210, 212, 237, 238, 239, 240, 242, 258, 584, 589, 591])
dimer_assm_chainB.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [22]:
BLI_mutant_sites = make_entry_binding_heatmaps_for_specific_sites('Sites Chosen for BLI',[244,305,492,507,530,553,555,559,588])
BLI_mutant_sites.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [23]:
contact_sites_img = make_entry_binding_heatmaps_for_specific_sites('Contact Sites',config['contact_sites'])
contact_sites_img.display()
if combined_entry_binding_contact is not None:
contact_sites_img.save(combined_entry_binding_contact)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [24]:
stalk_sites_img = make_entry_binding_heatmaps_for_specific_sites('Stalk',list(range(72, 148)))
stalk_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [25]:
neck_sites_img = make_entry_binding_heatmaps_for_specific_sites('Neck',list(range(148, 166)))
neck_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [26]:
beta_sheet_sites_img = make_entry_binding_heatmaps_for_specific_sites('Beta Sheets',config['beta_sheet'])
beta_sheet_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [27]:
nipah_poly_sites_img = make_entry_binding_heatmaps_for_specific_sites('Nipah polymorphic sites',config['nipah_poly'])
nipah_poly_sites_img.display()
if combined_entry_binding_contact is not None:
nipah_poly_sites_img.save(nipah_poly_sites_img_heatmap)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [28]:
glycan_sites_img = make_entry_binding_heatmaps_for_specific_sites('Glycosylation sites',config['glycans'])
glycan_sites_img.display()
if combined_entry_binding_contact is not None:
glycan_sites_img.save(glycan_sites_img_heatmap)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
Make antibody escape heatmaps¶
In [29]:
m102 = antibody_escape[antibody_escape['ab'] == 'm102.4']
m102_dist = pd.read_csv("results/distances/df_m102_atomic_distances.csv")
HENV26 = antibody_escape[antibody_escape['ab'] == 'HENV-26']
HENV26_dist = pd.read_csv("results/distances/df_HENV26_atomic_distances.csv")
HENV32 = antibody_escape[antibody_escape['ab'] == 'HENV-32']
HENV32_dist = pd.read_csv("results/distances/df_HENV32_atomic_distances.csv")
nAH1 = antibody_escape[antibody_escape['ab'] == 'nAH1.3']
nAH1_dist = pd.read_csv("results/distances/df_nAH_atomic_distances.csv")
HENV117 = antibody_escape[antibody_escape['ab'] == 'HENV-117']
HENV103 = antibody_escape[antibody_escape['ab'] == 'HENV-103']
def make_escape_heatmaps(df,dist,title):
escape_heatmap = plot_entry_heatmap(
df=df,
legend_title=f"{title} escape",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
low_entry_df=e3_low_func_ab,
escape_flag=True,
custom_domain=[-2, 2],
contact_flag=False,
escape_distance = dist,
subtitle_str='Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence',
)
escape_heatmap = escape_heatmap.configure_axisY(titlePadding=-10)
return escape_heatmap
In [30]:
m102_heatmap = make_escape_heatmaps(m102,m102_dist,'m102.4')
#m102_heatmap.display()
m102_heatmap.save(m102_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [31]:
HENV26_heatmap = make_escape_heatmaps(HENV26,HENV26_dist,'HENV-26')
HENV26_heatmap.save(HENV26_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [32]:
HENV32_heatmap = make_escape_heatmaps(HENV32,HENV32_dist,'HENV-32')
HENV32_heatmap.save(HENV32_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [33]:
nAH1_heatmap = make_escape_heatmaps(nAH1,nAH1_dist,'nAH1.3')
nAH1_heatmap.save(nAH1_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [34]:
HENV117_heatmap = make_escape_heatmaps(HENV117,dist=None,title='HENV-117')
HENV117_heatmap = HENV117_heatmap.configure_axisY(titlePadding=4)
HENV117_heatmap.save(HENV117_heatmap_plot)
plotting escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [35]:
HENV103_heatmap = make_escape_heatmaps(HENV103,dist=None,title='HENV-103')
HENV103_heatmap = HENV103_heatmap.configure_axisY(titlePadding=4)
HENV103_heatmap.save(HENV103_heatmap_plot)
plotting escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [ ]: