Validation neutralization assays versus polyclonal fitsΒΆ

Compare actual measured neutralization values for specific mutants to the polyclonal fits.

Import Python modules:

[1]:
import os
import pickle

import altair as alt

import pandas as pd

import yaml

Read configuration and validation assay measurements:

[2]:
with open("config.yaml") as f:
    config = yaml.safe_load(f)

validation_ic50s = pd.read_csv(config["validation_ic50s"], na_filter=None)

validation_ic50s
[2]:
antibody aa_substitutions measured IC50 lower_bound
0 LyCoV-1404 0.00187 False
1 LyCoV-1404 F486L 0.00143 False
2 LyCoV-1404 N439Y 1.12000 False
3 LyCoV-1404 K444N 4.00000 True
4 LyCoV-1404 S446T 0.48500 False
5 LyCoV-1404 G447D 4.00000 True
6 LyCoV-1404 P499H 4.00000 True
7 CC67.105 2.19000 False
8 CC67.105 D1146N 300.00000 True
9 CC67.105 D1153Y 300.00000 True
10 CC67.105 F1156L 300.00000 True
11 CC67.105 D1163R 9.43000 False
12 NTD_5-7 0.28000 False
13 NTD_5-7 G103F 96.00000 True
14 NTD_5-7 L176K 96.00000 True
15 NTD_5-7 S172N 96.00000 True
16 CC9.104 2.34000 False
17 CC9.104 D1146N 2.90000 False
18 CC9.104 D1153Y 300.00000 True
19 CC9.104 F1156L 269.00000 False
20 CC9.104 D1163R 28.60000 False

Now get the predictions by the averaged polyclonal model fits:

[3]:
validation_vs_prediction = []
for antibody, antibody_df in validation_ic50s.groupby("antibody"):
    with open(os.path.join(config["escape_dir"], f"{antibody}.pickle"), "rb") as f:
        model = pickle.load(f)
    validation_vs_prediction.append(model.icXX(antibody_df))

validation_vs_prediction = pd.concat(validation_vs_prediction, ignore_index=True)

validation_vs_prediction
[3]:
antibody aa_substitutions measured IC50 lower_bound mean_IC50 median_IC50 std_IC50 n_models frac_models
0 CC67.105 2.19000 False 2.468402 2.468402 0.198780 2 1.0
1 CC67.105 D1146N 300.00000 True 1467.517641 1467.517641 638.535106 2 1.0
2 CC67.105 D1153Y 300.00000 True 1232.753267 1232.753267 902.841544 2 1.0
3 CC67.105 D1163R 9.43000 False 13.725926 13.725926 0.130053 2 1.0
4 CC67.105 F1156L 300.00000 True 766.805532 766.805532 280.191446 2 1.0
5 CC9.104 2.34000 False 6.550473 6.550473 1.482620 2 1.0
6 CC9.104 D1146N 2.90000 False 10.674126 10.674126 4.723801 2 1.0
7 CC9.104 D1153Y 300.00000 True 404.157279 404.157279 159.941756 2 1.0
8 CC9.104 D1163R 28.60000 False 49.462293 49.462293 6.903959 2 1.0
9 CC9.104 F1156L 269.00000 False 578.763417 578.763417 71.288123 2 1.0
10 LyCoV-1404 0.00187 False 0.014739 0.011242 0.008504 4 1.0
11 LyCoV-1404 F486L 0.00143 False 0.011952 0.009339 0.007061 4 1.0
12 LyCoV-1404 G447D 4.00000 True 21.401612 8.185022 28.996188 4 1.0
13 LyCoV-1404 K444N 4.00000 True 6.591168 1.338001 11.068115 4 1.0
14 LyCoV-1404 N439Y 1.12000 False 6.421553 1.280491 11.148465 4 1.0
15 LyCoV-1404 P499H 4.00000 True 43.608651 2.722700 82.611823 4 1.0
16 LyCoV-1404 S446T 0.48500 False 1.278148 0.672045 1.432830 4 1.0
17 NTD_5-7 0.28000 False 26.911953 26.911953 1.859189 2 1.0
18 NTD_5-7 G103F 96.00000 True 222.361415 222.361415 48.521059 2 1.0
19 NTD_5-7 L176K 96.00000 True 320.594043 320.594043 85.134741 2 1.0
20 NTD_5-7 S172N 96.00000 True 312.500275 312.500275 110.120694 2 1.0

Now plot the results. We will plot the median across the replicate polyclonal fits to different deep mutational scanning replicates. This is an interactive plot that you can mouse over for details:

[4]:
corr_chart = (
    alt.Chart(validation_vs_prediction)
    .encode(
        x=alt.X(
            "measured IC50",
            title="measured IC50 (ug/ml)",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "median_IC50",
            title="predicted IC50 (arbitrary units)",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if validation_vs_prediction[c].dtype == float
            else c
            for c in validation_vs_prediction.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

corr_chart
/fh/fast/bloom_j/computational_notebooks/jbloom/2022/SARS-CoV-2_Omicron_BA.1_spike_DMS_mAbs/.snakemake/conda/a73ad69c741ab6d85d86c04aa086afcd_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
[4]:

Now also calculate the fold changes, using the median prediction:

[5]:
fold_changes = (
    validation_vs_prediction
    .rename(columns={"median_IC50": "predicted IC50"})
    .query("aa_substitutions != ''")
    [["antibody", "aa_substitutions", "measured IC50", "predicted IC50", "lower_bound"]]
    .merge(
        validation_vs_prediction
        .rename(columns={"median_IC50": "predicted IC50"})
        .query("aa_substitutions == ''")
        [["antibody", "measured IC50", "predicted IC50"]],
        on="antibody",
        how="left",
        validate="many_to_one",
        suffixes=[" mutant", " unmutated"],
    )
    .assign(
        measured_fold_change=lambda x: x["measured IC50 mutant"] / x["measured IC50 unmutated"],
        predicted_fold_change=lambda x: x["predicted IC50 mutant"] / x["predicted IC50 unmutated"],
    )
)

fold_changes
[5]:
antibody aa_substitutions measured IC50 mutant predicted IC50 mutant lower_bound measured IC50 unmutated predicted IC50 unmutated measured_fold_change predicted_fold_change
0 CC67.105 D1146N 300.00000 1467.517641 True 2.19000 2.468402 136.986301 594.521237
1 CC67.105 D1153Y 300.00000 1232.753267 True 2.19000 2.468402 136.986301 499.413415
2 CC67.105 D1163R 9.43000 13.725926 False 2.19000 2.468402 4.305936 5.560652
3 CC67.105 F1156L 300.00000 766.805532 True 2.19000 2.468402 136.986301 310.648513
4 CC9.104 D1146N 2.90000 10.674126 False 2.34000 6.550473 1.239316 1.629520
5 CC9.104 D1153Y 300.00000 404.157279 True 2.34000 6.550473 128.205128 61.698951
6 CC9.104 D1163R 28.60000 49.462293 False 2.34000 6.550473 12.222222 7.550950
7 CC9.104 F1156L 269.00000 578.763417 False 2.34000 6.550473 114.957265 88.354453
8 LyCoV-1404 F486L 0.00143 0.009339 False 0.00187 0.011242 0.764706 0.830738
9 LyCoV-1404 G447D 4.00000 8.185022 True 0.00187 0.011242 2139.037433 728.068939
10 LyCoV-1404 K444N 4.00000 1.338001 True 0.00187 0.011242 2139.037433 119.017040
11 LyCoV-1404 N439Y 1.12000 1.280491 False 0.00187 0.011242 598.930481 113.901413
12 LyCoV-1404 P499H 4.00000 2.722700 True 0.00187 0.011242 2139.037433 242.187879
13 LyCoV-1404 S446T 0.48500 0.672045 False 0.00187 0.011242 259.358289 59.779346
14 NTD_5-7 G103F 96.00000 222.361415 True 0.28000 26.911953 342.857143 8.262552
15 NTD_5-7 L176K 96.00000 320.594043 True 0.28000 26.911953 342.857143 11.912701
16 NTD_5-7 S172N 96.00000 312.500275 True 0.28000 26.911953 342.857143 11.611951

Now plot the fold changes:

[6]:
fold_change_chart = (
    alt.Chart(fold_changes)
    .encode(
        x=alt.X(
            "measured_fold_change",
            title="measured fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "predicted_fold_change",
            title="predicted fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if fold_changes[c].dtype == float
            else c
            for c in fold_changes.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

fold_change_chart
/fh/fast/bloom_j/computational_notebooks/jbloom/2022/SARS-CoV-2_Omicron_BA.1_spike_DMS_mAbs/.snakemake/conda/a73ad69c741ab6d85d86c04aa086afcd_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
[6]:
[ ]: