Validation neutralization assays versus polyclonal fitsΒΆ
Compare actual measured neutralization values for specific mutants to the polyclonal fits.
Import Python modules:
[1]:
import os
import pickle
import altair as alt
import pandas as pd
import yaml
Read configuration and validation assay measurements:
[2]:
with open("config.yaml") as f:
config = yaml.safe_load(f)
validation_ic50s = pd.read_csv(config["validation_ic50s"], na_filter=None)
validation_ic50s
[2]:
| antibody | aa_substitutions | measured IC50 | lower_bound | |
|---|---|---|---|---|
| 0 | LyCoV-1404 | 0.00187 | False | |
| 1 | LyCoV-1404 | F486L | 0.00143 | False |
| 2 | LyCoV-1404 | N439Y | 1.12000 | False |
| 3 | LyCoV-1404 | K444N | 4.00000 | True |
| 4 | LyCoV-1404 | S446T | 0.48500 | False |
| 5 | LyCoV-1404 | G447D | 4.00000 | True |
| 6 | LyCoV-1404 | P499H | 4.00000 | True |
| 7 | CC67.105 | 2.19000 | False | |
| 8 | CC67.105 | D1146N | 300.00000 | True |
| 9 | CC67.105 | D1153Y | 300.00000 | True |
| 10 | CC67.105 | F1156L | 300.00000 | True |
| 11 | CC67.105 | D1163R | 9.43000 | False |
| 12 | NTD_5-7 | 0.28000 | False | |
| 13 | NTD_5-7 | G103F | 96.00000 | True |
| 14 | NTD_5-7 | L176K | 96.00000 | True |
| 15 | NTD_5-7 | S172N | 96.00000 | True |
| 16 | CC9.104 | 2.34000 | False | |
| 17 | CC9.104 | D1146N | 2.90000 | False |
| 18 | CC9.104 | D1153Y | 300.00000 | True |
| 19 | CC9.104 | F1156L | 269.00000 | False |
| 20 | CC9.104 | D1163R | 28.60000 | False |
Now get the predictions by the averaged polyclonal model fits:
[3]:
validation_vs_prediction = []
for antibody, antibody_df in validation_ic50s.groupby("antibody"):
with open(os.path.join(config["escape_dir"], f"{antibody}.pickle"), "rb") as f:
model = pickle.load(f)
validation_vs_prediction.append(model.icXX(antibody_df))
validation_vs_prediction = pd.concat(validation_vs_prediction, ignore_index=True)
validation_vs_prediction
[3]:
| antibody | aa_substitutions | measured IC50 | lower_bound | mean_IC50 | median_IC50 | std_IC50 | n_models | frac_models | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | CC67.105 | 2.19000 | False | 2.465804 | 2.465804 | 0.200788 | 2 | 1.0 | |
| 1 | CC67.105 | D1146N | 300.00000 | True | 1468.569818 | 1468.569818 | 648.994727 | 2 | 1.0 |
| 2 | CC67.105 | D1153Y | 300.00000 | True | 1232.814936 | 1232.814936 | 900.657655 | 2 | 1.0 |
| 3 | CC67.105 | D1163R | 9.43000 | False | 13.646795 | 13.646795 | 0.084466 | 2 | 1.0 |
| 4 | CC67.105 | F1156L | 300.00000 | True | 768.075469 | 768.075469 | 281.559387 | 2 | 1.0 |
| 5 | CC9.104 | 2.34000 | False | 6.545884 | 6.545884 | 1.483351 | 2 | 1.0 | |
| 6 | CC9.104 | D1146N | 2.90000 | False | 10.638997 | 10.638997 | 4.709676 | 2 | 1.0 |
| 7 | CC9.104 | D1153Y | 300.00000 | True | 404.286520 | 404.286520 | 159.859789 | 2 | 1.0 |
| 8 | CC9.104 | D1163R | 28.60000 | False | 49.436653 | 49.436653 | 6.927173 | 2 | 1.0 |
| 9 | CC9.104 | F1156L | 269.00000 | False | 579.525276 | 579.525276 | 72.243821 | 2 | 1.0 |
| 10 | LyCoV-1404 | 0.00187 | False | 0.014724 | 0.011228 | 0.008491 | 4 | 1.0 | |
| 11 | LyCoV-1404 | F486L | 0.00143 | False | 0.011964 | 0.009337 | 0.007061 | 4 | 1.0 |
| 12 | LyCoV-1404 | G447D | 4.00000 | True | 21.311019 | 8.213625 | 28.776157 | 4 | 1.0 |
| 13 | LyCoV-1404 | K444N | 4.00000 | True | 6.623210 | 1.338305 | 11.132054 | 4 | 1.0 |
| 14 | LyCoV-1404 | N439Y | 1.12000 | False | 6.436479 | 1.279219 | 11.179901 | 4 | 1.0 |
| 15 | LyCoV-1404 | P499H | 4.00000 | True | 43.618055 | 2.720276 | 82.627223 | 4 | 1.0 |
| 16 | LyCoV-1404 | S446T | 0.48500 | False | 1.278914 | 0.674567 | 1.431229 | 4 | 1.0 |
| 17 | NTD_5-7 | 0.28000 | False | 26.914560 | 26.914560 | 1.835849 | 2 | 1.0 | |
| 18 | NTD_5-7 | G103F | 96.00000 | True | 222.142356 | 222.142356 | 49.373198 | 2 | 1.0 |
| 19 | NTD_5-7 | L176K | 96.00000 | True | 320.491383 | 320.491383 | 85.772741 | 2 | 1.0 |
| 20 | NTD_5-7 | S172N | 96.00000 | True | 312.404917 | 312.404917 | 109.658037 | 2 | 1.0 |
Now plot the results. We will plot the median across the replicate polyclonal fits to different deep mutational scanning replicates. This is an interactive plot that you can mouse over for details:
[4]:
corr_chart = (
alt.Chart(validation_vs_prediction)
.encode(
x=alt.X(
"measured IC50",
title="measured IC50 (ug/ml)",
scale=alt.Scale(type="log"),
),
y=alt.Y(
"median_IC50",
title="predicted IC50 (arbitrary units)",
scale=alt.Scale(type="log"),
),
facet=alt.Facet("antibody", columns=4, title=None),
color=alt.Color("lower_bound", title="lower bound"),
tooltip=[
alt.Tooltip(c, format=".3g") if validation_vs_prediction[c].dtype == float
else c
for c in validation_vs_prediction.columns.tolist()
],
)
.mark_circle(filled=True, size=60, opacity=0.6)
.configure_axis(grid=False)
.resolve_scale(y="independent", x="independent")
.properties(width=150, height=150)
)
corr_chart
/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
[4]:
Now also calculate the fold changes, using the median prediction:
[5]:
fold_changes = (
validation_vs_prediction
.rename(columns={"median_IC50": "predicted IC50"})
.query("aa_substitutions != ''")
[["antibody", "aa_substitutions", "measured IC50", "predicted IC50", "lower_bound"]]
.merge(
validation_vs_prediction
.rename(columns={"median_IC50": "predicted IC50"})
.query("aa_substitutions == ''")
[["antibody", "measured IC50", "predicted IC50"]],
on="antibody",
how="left",
validate="many_to_one",
suffixes=[" mutant", " unmutated"],
)
.assign(
measured_fold_change=lambda x: x["measured IC50 mutant"] / x["measured IC50 unmutated"],
predicted_fold_change=lambda x: x["predicted IC50 mutant"] / x["predicted IC50 unmutated"],
)
)
fold_changes
[5]:
| antibody | aa_substitutions | measured IC50 mutant | predicted IC50 mutant | lower_bound | measured IC50 unmutated | predicted IC50 unmutated | measured_fold_change | predicted_fold_change | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | CC67.105 | D1146N | 300.00000 | 1468.569818 | True | 2.19000 | 2.465804 | 136.986301 | 595.574434 |
| 1 | CC67.105 | D1153Y | 300.00000 | 1232.814936 | True | 2.19000 | 2.465804 | 136.986301 | 499.964692 |
| 2 | CC67.105 | D1163R | 9.43000 | 13.646795 | False | 2.19000 | 2.465804 | 4.305936 | 5.534420 |
| 3 | CC67.105 | F1156L | 300.00000 | 768.075469 | True | 2.19000 | 2.465804 | 136.986301 | 311.490885 |
| 4 | CC9.104 | D1146N | 2.90000 | 10.638997 | False | 2.34000 | 6.545884 | 1.239316 | 1.625296 |
| 5 | CC9.104 | D1153Y | 300.00000 | 404.286520 | True | 2.34000 | 6.545884 | 128.205128 | 61.761945 |
| 6 | CC9.104 | D1163R | 28.60000 | 49.436653 | False | 2.34000 | 6.545884 | 12.222222 | 7.552326 |
| 7 | CC9.104 | F1156L | 269.00000 | 579.525276 | False | 2.34000 | 6.545884 | 114.957265 | 88.532776 |
| 8 | LyCoV-1404 | F486L | 0.00143 | 0.009337 | False | 0.00187 | 0.011228 | 0.764706 | 0.831574 |
| 9 | LyCoV-1404 | G447D | 4.00000 | 8.213625 | True | 0.00187 | 0.011228 | 2139.037433 | 731.511692 |
| 10 | LyCoV-1404 | K444N | 4.00000 | 1.338305 | True | 0.00187 | 0.011228 | 2139.037433 | 119.190499 |
| 11 | LyCoV-1404 | N439Y | 1.12000 | 1.279219 | False | 0.00187 | 0.011228 | 598.930481 | 113.928225 |
| 12 | LyCoV-1404 | P499H | 4.00000 | 2.720276 | True | 0.00187 | 0.011228 | 2139.037433 | 242.269825 |
| 13 | LyCoV-1404 | S446T | 0.48500 | 0.674567 | False | 0.00187 | 0.011228 | 259.358289 | 60.077411 |
| 14 | NTD_5-7 | G103F | 96.00000 | 222.142356 | True | 0.28000 | 26.914560 | 342.857143 | 8.253613 |
| 15 | NTD_5-7 | L176K | 96.00000 | 320.491383 | True | 0.28000 | 26.914560 | 342.857143 | 11.907733 |
| 16 | NTD_5-7 | S172N | 96.00000 | 312.404917 | True | 0.28000 | 26.914560 | 342.857143 | 11.607283 |
Now plot the fold changes:
[6]:
fold_change_chart = (
alt.Chart(fold_changes)
.encode(
x=alt.X(
"measured_fold_change",
title="measured fold change IC50",
scale=alt.Scale(type="log"),
),
y=alt.Y(
"predicted_fold_change",
title="predicted fold change IC50",
scale=alt.Scale(type="log"),
),
facet=alt.Facet("antibody", columns=4, title=None),
color=alt.Color("lower_bound", title="lower bound"),
tooltip=[
alt.Tooltip(c, format=".3g") if fold_changes[c].dtype == float
else c
for c in fold_changes.columns.tolist()
],
)
.mark_circle(filled=True, size=60, opacity=0.6)
.configure_axis(grid=False)
.resolve_scale(y="independent", x="independent")
.properties(width=150, height=150)
)
fold_change_chart
/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
[6]:
[ ]: