Visualize receptor binding regions and distribution of scores for different GPC regions¶
In [1]:
# Imports
import os
import polyclonal
import pandas as pd
import altair as alt
# Plotting colors
# re-arranged for plot
tol_muted_adjusted = [
"#AA4499",
"#88CCEE",
"#EE7733",
"#44AA99",
"#1f78b4",
"#CC6677",
"#117733",
"#999933",
"#DDCC77",
"#CC3311",
"#882255",
"#000000",
"#DDDDDD",
]
# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
In [2]:
# this cell is tagged as `parameters` for papermill parameterization
func_scores = None
min_times_seen = None
n_selections = None
html_dir = None
html_output = None
In [3]:
# Parameters
func_scores = "results/func_effects/averages/293T_entry_func_effects.csv"
min_times_seen = 2
n_selections = 8
html_dir = "results/func_scores_distributions/"
html_output = "results/func_scores_distributions/func_scores_distributions.html"
In [4]:
# # Uncomment for running interactive
# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"
# min_times_seen = 2
# n_selections = 8
# html_dir = "../results/func_scores_distributions/"
# html_output = "../results/func_scores_distributions/func_scores_distributions.html"
In [5]:
# Filter functional scores based on min times seen and selection number
functional_scores = pd.read_csv(func_scores)
# Add dummy phenotype column
functional_scores["phenotype"] = "functional_effect"
# Rename effect column
functional_scores = functional_scores.rename(columns={"effect" : "effect on cell entry"})
In [6]:
# Plotting settings
alphabet = ['R','K','H','D','E','Q','N','S','T','Y','W','F','A','I','L','M','V','G','P','C','*']
addtl_tooltip_stats = [
"n_selections",
"times_seen",
]
addtl_slider_stats = {
"times_seen" : 2,
"n_selections" : 8,
}
Heatmap of alpha-dystroglycan binding residues¶
In [7]:
# Alpha dystroglycan binding sites
DG_sites = [
120,
121,
125,
150, # supported by 151 and 125 interactions
151,
256,
257,
258,
]
DG_chart = polyclonal.plot.lineplot_and_heatmap(
data_df=functional_scores,
stat_col="effect on cell entry",
category_col="phenotype",
alphabet=alphabet,
addtl_tooltip_stats=addtl_tooltip_stats,
addtl_slider_stats=addtl_slider_stats,
init_floor_at_zero=False,
init_site_statistic="mean",
show_zoombar=False,
show_lineplot=False,
sites=DG_sites,
plot_title="\u03B1-DG binding residues",
heatmap_max_at_least=2,
)
DG_chart
Out[7]:
Heatmap of LAMP1 binding residues¶
In [8]:
# LAMP1 binding sites
LAMP1_sites = [
92, # histidine triad
93, # histidine triad
172,
173,
188,
192,
195,
197,
198,
200,
201,
202,
204,
206,
207,
211,
216,
230, # histidine triad
]
LAMP1_chart = polyclonal.plot.lineplot_and_heatmap(
data_df=functional_scores,
stat_col="effect on cell entry",
category_col="phenotype",
alphabet=alphabet,
addtl_tooltip_stats=addtl_tooltip_stats,
addtl_slider_stats=addtl_slider_stats,
init_floor_at_zero=False,
init_site_statistic="mean",
show_zoombar=False,
show_lineplot=False,
sites=LAMP1_sites,
plot_title="LAMP1 binding residues",
heatmap_max_at_least=2,
)
LAMP1_chart
Out[8]:
Show entire heatmap separated by ranges¶
In [9]:
segments = []
for segment in [(1,140),(141,280),(281,420),(421,491)]:
curr_segment = polyclonal.plot.lineplot_and_heatmap(
data_df=functional_scores,
stat_col="effect on cell entry",
category_col="phenotype",
alphabet=alphabet,
addtl_tooltip_stats=addtl_tooltip_stats,
addtl_slider_stats=addtl_slider_stats,
init_floor_at_zero=False,
init_site_statistic="mean",
show_zoombar=False,
show_lineplot=False,
heatmap_max_at_least=2,
sites_to_show={"include_range" : segment},
)
curr_segment.display()
Distributions of functional scores for different regions¶
Plot the distribution of averaged functional scores per site (except stop codons) stratified by region of GPC to show more mutationally tolerant and intolerant regions.
In [10]:
# Filter functional scores minimum times seen and selections
# and remove stop codons
functional_scores = (
functional_scores.loc[
(functional_scores["times_seen"] >= min_times_seen)
&
(functional_scores["n_selections"] >= n_selections)
&
(functional_scores["mutant"] != "*")
]
)
# # **
# # Uncomment if want to compute site mean effects
# # Groupby site
# functional_scores = (
# functional_scores
# .groupby("site")
# .aggregate({
# "wildtype" : "first",
# "effect on cell entry" : "mean",
# })
# .reset_index()
# )
# # **
# Label regions
functional_scores["region"] = (
functional_scores.apply(
lambda x: "SSP" if x["site"] <= 58 else ("GP1" if x["site"] <= 259 else "GP2"), axis=1
)
)
# Add GP2 transmembrane domain (428 - 447)
TM = list(range(428,448))
TM_sites = (
functional_scores.loc[functional_scores["site"].isin(TM)].copy()
)
TM_sites["region"] = "TM"
functional_scores = (
pd.concat([
functional_scores,
TM_sites,
], ignore_index = True)
)
# Add GP2 cytoplasmic tail (448 - 491)
CT = range(448,492)
CT_sites = (
functional_scores.loc[functional_scores["site"].isin(CT)].copy()
)
CT_sites["region"] = "CT"
functional_scores = (
pd.concat([
functional_scores,
CT_sites,
], ignore_index = True)
)
# Add DG binding sites
DG_binding_sites = (
functional_scores.loc[functional_scores["site"].isin(DG_sites)].copy()
)
DG_binding_sites["region"] = "\u03B1-DG binding sites"
functional_scores = (
pd.concat([
functional_scores,
DG_binding_sites,
], ignore_index = True)
)
# Add LAMP1 binding sites
LAMP1_binding_sites = (
functional_scores.loc[functional_scores["site"].isin(LAMP1_sites)].copy()
)
LAMP1_binding_sites["region"] = "LAMP1 binding sites"
functional_scores = (
pd.concat([
functional_scores,
LAMP1_binding_sites,
], ignore_index = True)
)
# Add glycosylation sites N - X - S/T
glycans = [
79,80,81,
89,90,91,
99,100,101,
109,110,111,
119,120,121,
167,168,169,
224,225,226,
365,366,367,
373,374,375,
390,391,392,
395,396,397,
]
glycan_sites = (
functional_scores.loc[functional_scores["site"].isin(glycans)].copy()
)
glycan_sites["region"] = "N-glycosylation sites"
functional_scores = (
pd.concat([
functional_scores,
glycan_sites,
], ignore_index = True)
)
In [11]:
# Plot score distrbutions for each region
distribution_plot = alt.Chart(
functional_scores, title="Effect on cell entry for different GPC regions"
).mark_circle(opacity=0.15, size=75).encode(
y=alt.Y(
"region:N",
title="GPC region",
sort=None,
axis=alt.Axis(
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
x=alt.X(
"effect on cell entry:Q",
title="effect on cell entry",
axis=alt.Axis(
values=[-5,-4,-3,-2,-1,0,1],
domainWidth=1,
domainColor="black",
tickColor="black",
),
),
yOffset="jitter:Q",
color=alt.Color(
"region:N",
scale=alt.Scale(
domain=functional_scores["region"].unique().tolist(),
range=tol_muted_adjusted
),
).legend(None),
tooltip=[
"site",
"wildtype",
alt.Tooltip(
"effect on cell entry", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)
# Plot median line
median_plot = alt.Chart(functional_scores).mark_tick(size=45, thickness=3, color="#000000").encode(
y=alt.Y(
"region:N",
title="GPC region",
sort=None,
axis=alt.Axis(
domainWidth=1,
),
),
x=alt.X(
"median(effect on cell entry):Q",
axis=alt.Axis(
values=[-5,-4,-3,-2,-1,0,1],
domainWidth=1,
),
),
tooltip=[
"region:N",
alt.Tooltip(
"median(effect on cell entry):Q", format=".2f", title="median effect on cell entry",
),
],
)
# Combine striplot and median
combined_plot = (
(distribution_plot + median_plot)
.configure_axis(
grid=False,
labelFontSize=16,
titleFontSize=16,
labelFontWeight="normal",
titleFontWeight="normal",
)
.properties(
width=400,
height=400,
)
.configure_title(
fontSize=24,
).configure_view(
stroke=None
)
)
# Make output dir if doesn't exist
if not os.path.exists(html_dir):
os.mkdir(html_dir)
print(f"Saving to {html_output}")
combined_plot.save(html_output)
combined_plot
Saving to results/func_scores_distributions/func_scores_distributions.html
Out[11]:
Create same plot as above but reduce font sizes for a figure in a manuscript
In [12]:
# Plot score distrbutions for each region
distribution_plot = alt.Chart(
functional_scores,
).mark_circle(opacity=0.15, size=5).encode(
y=alt.Y(
"region:N",
title="GPC region",
sort=None,
axis=alt.Axis(
domainWidth=1,
),
),
x=alt.X(
"effect on cell entry:Q",
title="effect on cell entry",
axis=alt.Axis(
values=[-5,-4,-3,-2,-1,0,1],
domainWidth=1,
),
),
yOffset="jitter:Q",
color=alt.Color(
"region:N",
scale=alt.Scale(
domain=functional_scores["region"].unique().tolist(),
range=tol_muted_adjusted
),
).legend(None),
tooltip=[
"site",
"wildtype",
alt.Tooltip(
"effect on cell entry", format=".2f", title="effect on cell entry"
),
],
).transform_calculate(
# Generate Gaussian jitter with a Box-Muller transform
jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)
# Plot median line
median_plot = alt.Chart(functional_scores).mark_tick(size=10, thickness=1, color="#000000").encode(
y=alt.Y(
"region:N",
title="GPC region",
sort=None,
axis=alt.Axis(
domainWidth=1,
),
),
x=alt.X(
"median(effect on cell entry):Q",
axis=alt.Axis(
values=[-5,-4,-3,-2,-1,0,1],
domainWidth=1,
),
),
tooltip=[
"region:N",
alt.Tooltip(
"median(effect on cell entry):Q", format=".2f", title="median effect on cell entry",
),
],
)
# Combine striplot and median
combined_plot = (
(distribution_plot + median_plot)
.configure_axis(
grid=False,
labelFontSize=8,
titleFontSize=8,
labelFontWeight="normal",
titleFontWeight="normal",
)
.properties(
width=150,
height=150,
).configure_view(
stroke=None
)
)
combined_plot
Out[12]: