Generate logo plots¶
This notebook generates logo plots for the escape maps for each antibody.
Import python modules:
In [1]:
import os
import altair as alt
import numpy as np
import pandas as pd
import matplotlib
matplotlib.rcParams['svg.fonttype'] = 'none'
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import dmslogo
from dmslogo.colorschemes import ValueToColorMap
# Make output dir if doesn't exist
if not os.path.exists('./results/logoplots/'):
os.mkdir('./results/logoplots/')
Set filtering parameters:
In [2]:
#here are filtering parameters
sums_threshold = 0.99 #will include top X% of summed escape sites
max_threshold = 0.99 #will include top X% of max escape sites
escape_std_cutoff = 5 #only antibody escape mutants below this escape_std will be included
func_times_seen_cutoff = 3 #how many times a unique barcoded mutation was observed in functional selections
antibody_times_seen = 3 #how many times a unique barcoded mutation was observed in antibody escape
Load data:
In [3]:
ab_dfs = {'007': './results/antibody_escape/averages/007_mut_effect.csv',
'10-1074': './results/antibody_escape/averages/10-1074_mut_effect.csv',
'PGT121': './results/antibody_escape/averages/PGT121_mut_effect.csv',
'PGT128': './results/antibody_escape/averages/PGT128_mut_effect.csv',
'BG18': './results/antibody_escape/averages/BG18_mut_effect.csv',
}
func_scores = pd.read_csv('./results/func_effects/averages/TZM-bl_entry_func_effects.csv')
site_numbering_map = pd.read_csv('./data/site_numbering_map.csv')
site_numbering_map['site']=site_numbering_map['reference_site']
Process each antibody dataframe:
In [4]:
processed_dfs = {}
for ab in ab_dfs:
ab_df = pd.read_csv(ab_dfs[ab])
# Merge mAb escape file with functional_scores
processed_df = ab_df.merge(func_scores, on=['site','wildtype','mutant'],how='left',suffixes=['_ab','_func_effects'])
processed_df = processed_df.merge(site_numbering_map[['site', 'sequential_site']], on=['site'])
# Filter the dataframes based on filtering parameters
processed_df = (processed_df
.query('mutant!="*"')
.query('mutant!="-"')
#.query('effect>-4.5')
.query('times_seen_func_effects>=@func_times_seen_cutoff')
.query('times_seen_ab>=@antibody_times_seen')
)
processed_dfs[ab] = processed_df
Get sites to show:
In [5]:
all_sites_to_show = []
for ab in processed_dfs:
processed_df = processed_dfs[ab]
# Get sites with max score over some percentile threshold
sums_max = processed_df.groupby('site')['escape_median'].max()
quantile_max = sums_max.quantile(sums_threshold)
sites_max_score = sums_max[sums_max > quantile_max].index.tolist()
for site in sites_max_score:
if site not in all_sites_to_show:
all_sites_to_show.append(site)
# Get sites with sum score over percentile threshold
sums_total = processed_df.groupby('site')['escape_median'].sum()
quantile_total = sums_total.quantile(max_threshold)
sites_agg_sum = sums_total[sums_total > quantile_total].index.tolist()
for site in sites_agg_sum:
if site not in all_sites_to_show:
all_sites_to_show.append(site)
print(sorted(all_sites_to_show))
# Curated list of sites that includes sites closeby:
all_sites_to_show = ['79', '136', '137', '138', '139', '140', '141', '142', '151', '160', '161',
'162', '186', '187', '188', '197', '198', '199', '299',
'301', '302', '303', '319', '320', '321', '322', '323', '323a', '324', '325', '326',
'327', '328', '329', '330', '331', '332', '333', '334', '375']
print(all_sites_to_show)
['115', '136', '137', '138', '139', '140', '142', '151', '160', '161', '162', '177', '178', '182', '186', '188', '199', '274', '299', '302', '303', '319', '325', '328', '330', '332', '334', '339', '347', '374', '415', '419', '423', '430', '436', '442', '455', '462', '487', '509', '518', '557', '559', '56', '565', '61', '674', '680', '690', '700', '75', '77'] ['79', '136', '137', '138', '139', '140', '141', '142', '151', '160', '161', '162', '186', '187', '188', '197', '198', '199', '299', '301', '302', '303', '319', '320', '321', '322', '323', '323a', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '375']
Make the logo plots:
In [6]:
# Make smaller color map, from here: https://stackoverflow.com/questions/18926031/how-to-extract-a-subset-of-a-colormap-as-a-new-colormap-in-matplotlib
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
new_cmap = colors.LinearSegmentedColormap.from_list(
'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
cmap(np.linspace(minval, maxval, n)))
return new_cmap
arr = np.linspace(0, 50, 100).reshape((10, 10))
cmap = plt.get_cmap('YlOrBr')
new_cmap = truncate_colormap(cmap, .2, 1)
# Make the figures for each antibody
for ab in processed_dfs:
processed_df = processed_dfs[ab]
processed_df = processed_df.assign(label=lambda x: x['wildtype'] + x['site'])
processed_df['show_site'] = processed_df['site'].isin(all_sites_to_show)
processed_df['clip'] = np.clip(processed_df['effect'], None, 0)
min_prop = processed_df['effect'].min()
max_prop = processed_df['clip'].max()
map1 = ValueToColorMap(minvalue=min_prop, maxvalue=max_prop, cmap=new_cmap)
processed_df['color'] = processed_df['clip'].map(map1.val_to_color)
if ab!='':
x_ticks = 'label'
else:
processed_df['blanks'] = ''
x_ticks = 'blanks'
fig, ax = dmslogo.draw_logo(
data=processed_df.query('show_site'),
x_col='sequential_site',
xtick_col=x_ticks,
letter_col='mutant',
letter_height_col='escape_median',
ylabel='Escape',
xlabel='',
color_col='color',
title=ab,
axisfontscale=1.5,
)
fig.savefig(f'./results/logoplots/{ab}_logoplot.svg', bbox_inches='tight',format='svg')
Make a figure for the color scale:
In [7]:
fig, _ = map1.scale_bar(orientation='horizontal')
fig.savefig(f'./results/logoplots/scalebar_figure.svg', bbox_inches='tight',format='svg')
In [ ]: