Comparison of escape across strains¶
This notebook makes correlation plots of effects of mutations on escape from antibodies across strains of HIV Envelope.
First, import python modules:
In [1]:
import pandas as pd
from scipy import stats
import altair as alt
import numpy as np
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
Make plots:
In [2]:
for antibody in ['3BNC117', '10-1074']:
df_BF520 = pd.read_csv(f'../HIV_Envelope_BF520_DMS_3BNC117_10-1074/results/antibody_escape/averages/{antibody}_mut_effect.csv')
df_TRO11 = pd.read_csv(f'results/antibody_escape/averages/{antibody}_mut_effect.csv')
func_effects_BF520 = pd.read_csv(f'../HIV_Envelope_BF520_DMS_3BNC117_10-1074/results/func_effects/averages/TZM-bl_entry_func_effects.csv')
func_effects_TRO11 = pd.read_csv(f'results/func_effects/averages/TZM-bl_entry_func_effects.csv')
merged_df = df_BF520.merge(df_TRO11, on=['site', 'mutant'], suffixes=('_BF520', '_TRO11'))
merged_df = merged_df.merge(func_effects_BF520[['site', 'mutant', 'effect']], on=['site', 'mutant'])
merged_df = merged_df.merge(func_effects_TRO11[['site', 'mutant', 'effect']], on=['site', 'mutant'], suffixes=('_BF520', '_TRO11'))
#merged_df = merged_df.query('site!="281"')
for floored in [False, True]:
if floored:
plot_df = merged_df.copy()
plot_df['escape_median_BF520'] = np.clip(plot_df['escape_median_BF520'], a_min=0, a_max=None)
plot_df['escape_median_TRO11'] = np.clip(plot_df['escape_median_TRO11'], a_min=0, a_max=None)
else:
plot_df = merged_df
plot_df['max_std'] = plot_df[['escape_std_BF520', 'escape_std_TRO11']].values.max(1)
plot_df['min_func_effect'] = plot_df[['effect_BF520', 'effect_TRO11']].values.min(1)
plot_df = plot_df.assign(conserved=lambda x: x['wildtype_BF520']==x['wildtype_TRO11'])
slope, intercept, r_value, p_value, std_err = stats.linregress(
(plot_df
.query('times_seen_BF520>3 & times_seen_TRO11>2')
#.query('escape_std_BF520<1.5 & escape_std_TRO11<1.5')
.query('effect_BF520>-4 & effect_TRO11>-4')
['escape_median_BF520']
.astype(float)
),
(plot_df
.query('times_seen_BF520>3 & times_seen_TRO11>2')
#.query('escape_std_BF520<1.5 & escape_std_TRO11<1.5')
.query('effect_BF520>-4 & effect_TRO11>-4')
['escape_median_TRO11']
.astype(float)
),
)
corr_chart = (
alt.Chart((plot_df
.query('times_seen_BF520>3 & times_seen_TRO11>2')
#.query('escape_std_BF520<1.5 & escape_std_TRO11<1.5')
.query('effect_BF520>-4 & effect_TRO11>-4')
))
.encode(
x=alt.X('escape_median_BF520',
# scale=alt.Scale(type="log"),
),
y=alt.Y(
'escape_median_TRO11',
#scale=alt.Scale(type="log"),
),
color=alt.Color("conserved"),
tooltip=[
alt.Tooltip(c, format=".3g") if plot_df[c].dtype == float
else c
for c in plot_df.columns]
)
.mark_circle(filled=True, size=60, opacity=0.4)
.configure_axis(grid=False)
.resolve_scale(y="independent", x="independent")
.properties(width=200, height=200)
)
print(f"R^2: {r_value**2}")
print(f"R: {r_value}")
corr_chart.display()
R^2: 0.058869542821923554 R: 0.24263046556836912
R^2: 0.02505079303290191 R: 0.15827442317981105
R^2: 0.29906373786543666 R: 0.5468672031356759
R^2: 0.4214133166189432 R: 0.6491635515175996
In [ ]: