Validation neutralization assays versus `polyclonal` fits¶

Compare actual measured neutralization values for specific mutants to the polyclonal fits.

Import Python modules:

[1]:

import os
import pickle

import altair as alt

import pandas as pd

import yaml

Read configuration and validation assay measurements:

[2]:

with open("config.yaml") as f:
    config = yaml.safe_load(f)

validation_ic50s = pd.read_csv(config["validation_ic50s"], na_filter=None)

validation_ic50s

[2]:

	antibody	aa_substitutions	measured IC50	lower_bound
0	LyCoV-1404		0.00187	False
1	LyCoV-1404	F486L	0.00143	False
2	LyCoV-1404	N439Y	1.12000	False
3	LyCoV-1404	K444N	4.00000	True
4	LyCoV-1404	S446T	0.48500	False
5	LyCoV-1404	G447D	4.00000	True
6	LyCoV-1404	P499H	4.00000	True
7	CC67.105		2.19000	False
8	CC67.105	D1146N	300.00000	True
9	CC67.105	D1153Y	300.00000	True
10	CC67.105	F1156L	300.00000	True
11	CC67.105	D1163R	9.43000	False
12	NTD_5-7		0.28000	False
13	NTD_5-7	G103F	96.00000	True
14	NTD_5-7	L176K	96.00000	True
15	NTD_5-7	S172N	96.00000	True
16	CC9.104		2.34000	False
17	CC9.104	D1146N	2.90000	False
18	CC9.104	D1153Y	300.00000	True
19	CC9.104	F1156L	269.00000	False
20	CC9.104	D1163R	28.60000	False

Now get the predictions by the averaged polyclonal model fits:

[3]:

validation_vs_prediction = []
for antibody, antibody_df in validation_ic50s.groupby("antibody"):
    with open(os.path.join(config["escape_dir"], f"{antibody}.pickle"), "rb") as f:
        model = pickle.load(f)
    validation_vs_prediction.append(model.icXX(antibody_df))

validation_vs_prediction = pd.concat(validation_vs_prediction, ignore_index=True)

validation_vs_prediction

[3]:

	antibody	aa_substitutions	measured IC50	lower_bound	mean_IC50	median_IC50	std_IC50	n_models	frac_models
0	CC67.105		2.19000	False	2.468402	2.468402	0.198780	2	1.0
1	CC67.105	D1146N	300.00000	True	1467.517641	1467.517641	638.535106	2	1.0
2	CC67.105	D1153Y	300.00000	True	1232.753267	1232.753267	902.841544	2	1.0
3	CC67.105	D1163R	9.43000	False	13.725926	13.725926	0.130053	2	1.0
4	CC67.105	F1156L	300.00000	True	766.805532	766.805532	280.191446	2	1.0
5	CC9.104		2.34000	False	6.550473	6.550473	1.482620	2	1.0
6	CC9.104	D1146N	2.90000	False	10.674126	10.674126	4.723801	2	1.0
7	CC9.104	D1153Y	300.00000	True	404.157279	404.157279	159.941756	2	1.0
8	CC9.104	D1163R	28.60000	False	49.462293	49.462293	6.903959	2	1.0
9	CC9.104	F1156L	269.00000	False	578.763417	578.763417	71.288123	2	1.0
10	LyCoV-1404		0.00187	False	0.014739	0.011242	0.008504	4	1.0
11	LyCoV-1404	F486L	0.00143	False	0.011952	0.009339	0.007061	4	1.0
12	LyCoV-1404	G447D	4.00000	True	21.401612	8.185022	28.996188	4	1.0
13	LyCoV-1404	K444N	4.00000	True	6.591168	1.338001	11.068115	4	1.0
14	LyCoV-1404	N439Y	1.12000	False	6.421553	1.280491	11.148465	4	1.0
15	LyCoV-1404	P499H	4.00000	True	43.608651	2.722700	82.611823	4	1.0
16	LyCoV-1404	S446T	0.48500	False	1.278148	0.672045	1.432830	4	1.0
17	NTD_5-7		0.28000	False	26.911953	26.911953	1.859189	2	1.0
18	NTD_5-7	G103F	96.00000	True	222.361415	222.361415	48.521059	2	1.0
19	NTD_5-7	L176K	96.00000	True	320.594043	320.594043	85.134741	2	1.0
20	NTD_5-7	S172N	96.00000	True	312.500275	312.500275	110.120694	2	1.0

Now plot the results. We will plot the median across the replicate polyclonal fits to different deep mutational scanning replicates. This is an interactive plot that you can mouse over for details:

[4]:

corr_chart = (
    alt.Chart(validation_vs_prediction)
    .encode(
        x=alt.X(
            "measured IC50",
            title="measured IC50 (ug/ml)",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "median_IC50",
            title="predicted IC50 (arbitrary units)",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if validation_vs_prediction[c].dtype == float
            else c
            for c in validation_vs_prediction.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

corr_chart

/fh/fast/bloom_j/computational_notebooks/jbloom/2022/SARS-CoV-2_Omicron_BA.1_spike_DMS_mAbs/.snakemake/conda/a73ad69c741ab6d85d86c04aa086afcd_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():

[4]:

Now also calculate the fold changes, using the median prediction:

[5]:

fold_changes = (
    validation_vs_prediction
    .rename(columns={"median_IC50": "predicted IC50"})
    .query("aa_substitutions != ''")
    [["antibody", "aa_substitutions", "measured IC50", "predicted IC50", "lower_bound"]]
    .merge(
        validation_vs_prediction
        .rename(columns={"median_IC50": "predicted IC50"})
        .query("aa_substitutions == ''")
        [["antibody", "measured IC50", "predicted IC50"]],
        on="antibody",
        how="left",
        validate="many_to_one",
        suffixes=[" mutant", " unmutated"],
    )
    .assign(
        measured_fold_change=lambda x: x["measured IC50 mutant"] / x["measured IC50 unmutated"],
        predicted_fold_change=lambda x: x["predicted IC50 mutant"] / x["predicted IC50 unmutated"],
    )
)

fold_changes

[5]:

	antibody	aa_substitutions	measured IC50 mutant	predicted IC50 mutant	lower_bound	measured IC50 unmutated	predicted IC50 unmutated	measured_fold_change	predicted_fold_change
0	CC67.105	D1146N	300.00000	1467.517641	True	2.19000	2.468402	136.986301	594.521237
1	CC67.105	D1153Y	300.00000	1232.753267	True	2.19000	2.468402	136.986301	499.413415
2	CC67.105	D1163R	9.43000	13.725926	False	2.19000	2.468402	4.305936	5.560652
3	CC67.105	F1156L	300.00000	766.805532	True	2.19000	2.468402	136.986301	310.648513
4	CC9.104	D1146N	2.90000	10.674126	False	2.34000	6.550473	1.239316	1.629520
5	CC9.104	D1153Y	300.00000	404.157279	True	2.34000	6.550473	128.205128	61.698951
6	CC9.104	D1163R	28.60000	49.462293	False	2.34000	6.550473	12.222222	7.550950
7	CC9.104	F1156L	269.00000	578.763417	False	2.34000	6.550473	114.957265	88.354453
8	LyCoV-1404	F486L	0.00143	0.009339	False	0.00187	0.011242	0.764706	0.830738
9	LyCoV-1404	G447D	4.00000	8.185022	True	0.00187	0.011242	2139.037433	728.068939
10	LyCoV-1404	K444N	4.00000	1.338001	True	0.00187	0.011242	2139.037433	119.017040
11	LyCoV-1404	N439Y	1.12000	1.280491	False	0.00187	0.011242	598.930481	113.901413
12	LyCoV-1404	P499H	4.00000	2.722700	True	0.00187	0.011242	2139.037433	242.187879
13	LyCoV-1404	S446T	0.48500	0.672045	False	0.00187	0.011242	259.358289	59.779346
14	NTD_5-7	G103F	96.00000	222.361415	True	0.28000	26.911953	342.857143	8.262552
15	NTD_5-7	L176K	96.00000	320.594043	True	0.28000	26.911953	342.857143	11.912701
16	NTD_5-7	S172N	96.00000	312.500275	True	0.28000	26.911953	342.857143	11.611951

Now plot the fold changes:

[6]:

fold_change_chart = (
    alt.Chart(fold_changes)
    .encode(
        x=alt.X(
            "measured_fold_change",
            title="measured fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "predicted_fold_change",
            title="predicted fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if fold_changes[c].dtype == float
            else c
            for c in fold_changes.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

fold_change_chart

/fh/fast/bloom_j/computational_notebooks/jbloom/2022/SARS-CoV-2_Omicron_BA.1_spike_DMS_mAbs/.snakemake/conda/a73ad69c741ab6d85d86c04aa086afcd_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():

[6]:

[ ]:

Validation neutralization assays versus polyclonal fits¶

Validation neutralization assays versus `polyclonal` fits¶