Validation neutralization assays versus polyclonal fitsΒΆ

Compare actual measured neutralization values for specific mutants to the polyclonal fits.

Import Python modules:

[1]:
import os
import pickle

import altair as alt

import pandas as pd

import yaml

Read configuration and validation assay measurements:

[2]:
with open("config.yaml") as f:
    config = yaml.safe_load(f)

validation_ic50s = pd.read_csv(config["validation_ic50s"], na_filter=None)

validation_ic50s
[2]:
antibody aa_substitutions measured IC50 lower_bound
0 LyCoV-1404 0.00187 False
1 LyCoV-1404 F486L 0.00143 False
2 LyCoV-1404 N439Y 1.12000 False
3 LyCoV-1404 K444N 4.00000 True
4 LyCoV-1404 S446T 0.48500 False
5 LyCoV-1404 G447D 4.00000 True
6 LyCoV-1404 P499H 4.00000 True
7 CC67.105 2.19000 False
8 CC67.105 D1146N 300.00000 True
9 CC67.105 D1153Y 300.00000 True
10 CC67.105 F1156L 300.00000 True
11 CC67.105 D1163R 9.43000 False
12 NTD_5-7 0.28000 False
13 NTD_5-7 G103F 96.00000 True
14 NTD_5-7 L176K 96.00000 True
15 NTD_5-7 S172N 96.00000 True
16 CC9.104 2.34000 False
17 CC9.104 D1146N 2.90000 False
18 CC9.104 D1153Y 300.00000 True
19 CC9.104 F1156L 269.00000 False
20 CC9.104 D1163R 28.60000 False

Now get the predictions by the averaged polyclonal model fits:

[3]:
validation_vs_prediction = []
for antibody, antibody_df in validation_ic50s.groupby("antibody"):
    with open(os.path.join(config["escape_dir"], f"{antibody}.pickle"), "rb") as f:
        model = pickle.load(f)
    validation_vs_prediction.append(model.icXX(antibody_df))

validation_vs_prediction = pd.concat(validation_vs_prediction, ignore_index=True)

validation_vs_prediction
[3]:
antibody aa_substitutions measured IC50 lower_bound mean_IC50 median_IC50 std_IC50 n_models frac_models
0 CC67.105 2.19000 False 2.465804 2.465804 0.200788 2 1.0
1 CC67.105 D1146N 300.00000 True 1468.569818 1468.569818 648.994727 2 1.0
2 CC67.105 D1153Y 300.00000 True 1232.814936 1232.814936 900.657655 2 1.0
3 CC67.105 D1163R 9.43000 False 13.646795 13.646795 0.084466 2 1.0
4 CC67.105 F1156L 300.00000 True 768.075469 768.075469 281.559387 2 1.0
5 CC9.104 2.34000 False 6.545884 6.545884 1.483351 2 1.0
6 CC9.104 D1146N 2.90000 False 10.638997 10.638997 4.709676 2 1.0
7 CC9.104 D1153Y 300.00000 True 404.286520 404.286520 159.859789 2 1.0
8 CC9.104 D1163R 28.60000 False 49.436653 49.436653 6.927173 2 1.0
9 CC9.104 F1156L 269.00000 False 579.525276 579.525276 72.243821 2 1.0
10 LyCoV-1404 0.00187 False 0.014724 0.011228 0.008491 4 1.0
11 LyCoV-1404 F486L 0.00143 False 0.011964 0.009337 0.007061 4 1.0
12 LyCoV-1404 G447D 4.00000 True 21.311019 8.213625 28.776157 4 1.0
13 LyCoV-1404 K444N 4.00000 True 6.623210 1.338305 11.132054 4 1.0
14 LyCoV-1404 N439Y 1.12000 False 6.436479 1.279219 11.179901 4 1.0
15 LyCoV-1404 P499H 4.00000 True 43.618055 2.720276 82.627223 4 1.0
16 LyCoV-1404 S446T 0.48500 False 1.278914 0.674567 1.431229 4 1.0
17 NTD_5-7 0.28000 False 26.914560 26.914560 1.835849 2 1.0
18 NTD_5-7 G103F 96.00000 True 222.142356 222.142356 49.373198 2 1.0
19 NTD_5-7 L176K 96.00000 True 320.491383 320.491383 85.772741 2 1.0
20 NTD_5-7 S172N 96.00000 True 312.404917 312.404917 109.658037 2 1.0

Now plot the results. We will plot the median across the replicate polyclonal fits to different deep mutational scanning replicates. This is an interactive plot that you can mouse over for details:

[4]:
corr_chart = (
    alt.Chart(validation_vs_prediction)
    .encode(
        x=alt.X(
            "measured IC50",
            title="measured IC50 (ug/ml)",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "median_IC50",
            title="predicted IC50 (arbitrary units)",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if validation_vs_prediction[c].dtype == float
            else c
            for c in validation_vs_prediction.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

corr_chart
/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
[4]:

Now also calculate the fold changes, using the median prediction:

[5]:
fold_changes = (
    validation_vs_prediction
    .rename(columns={"median_IC50": "predicted IC50"})
    .query("aa_substitutions != ''")
    [["antibody", "aa_substitutions", "measured IC50", "predicted IC50", "lower_bound"]]
    .merge(
        validation_vs_prediction
        .rename(columns={"median_IC50": "predicted IC50"})
        .query("aa_substitutions == ''")
        [["antibody", "measured IC50", "predicted IC50"]],
        on="antibody",
        how="left",
        validate="many_to_one",
        suffixes=[" mutant", " unmutated"],
    )
    .assign(
        measured_fold_change=lambda x: x["measured IC50 mutant"] / x["measured IC50 unmutated"],
        predicted_fold_change=lambda x: x["predicted IC50 mutant"] / x["predicted IC50 unmutated"],
    )
)

fold_changes
[5]:
antibody aa_substitutions measured IC50 mutant predicted IC50 mutant lower_bound measured IC50 unmutated predicted IC50 unmutated measured_fold_change predicted_fold_change
0 CC67.105 D1146N 300.00000 1468.569818 True 2.19000 2.465804 136.986301 595.574434
1 CC67.105 D1153Y 300.00000 1232.814936 True 2.19000 2.465804 136.986301 499.964692
2 CC67.105 D1163R 9.43000 13.646795 False 2.19000 2.465804 4.305936 5.534420
3 CC67.105 F1156L 300.00000 768.075469 True 2.19000 2.465804 136.986301 311.490885
4 CC9.104 D1146N 2.90000 10.638997 False 2.34000 6.545884 1.239316 1.625296
5 CC9.104 D1153Y 300.00000 404.286520 True 2.34000 6.545884 128.205128 61.761945
6 CC9.104 D1163R 28.60000 49.436653 False 2.34000 6.545884 12.222222 7.552326
7 CC9.104 F1156L 269.00000 579.525276 False 2.34000 6.545884 114.957265 88.532776
8 LyCoV-1404 F486L 0.00143 0.009337 False 0.00187 0.011228 0.764706 0.831574
9 LyCoV-1404 G447D 4.00000 8.213625 True 0.00187 0.011228 2139.037433 731.511692
10 LyCoV-1404 K444N 4.00000 1.338305 True 0.00187 0.011228 2139.037433 119.190499
11 LyCoV-1404 N439Y 1.12000 1.279219 False 0.00187 0.011228 598.930481 113.928225
12 LyCoV-1404 P499H 4.00000 2.720276 True 0.00187 0.011228 2139.037433 242.269825
13 LyCoV-1404 S446T 0.48500 0.674567 False 0.00187 0.011228 259.358289 60.077411
14 NTD_5-7 G103F 96.00000 222.142356 True 0.28000 26.914560 342.857143 8.253613
15 NTD_5-7 L176K 96.00000 320.491383 True 0.28000 26.914560 342.857143 11.907733
16 NTD_5-7 S172N 96.00000 312.404917 True 0.28000 26.914560 342.857143 11.607283

Now plot the fold changes:

[6]:
fold_change_chart = (
    alt.Chart(fold_changes)
    .encode(
        x=alt.X(
            "measured_fold_change",
            title="measured fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "predicted_fold_change",
            title="predicted fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if fold_changes[c].dtype == float
            else c
            for c in fold_changes.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

fold_change_chart
/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
[6]:
[ ]: