Validation neutralization assays versus `polyclonal` fits¶

Compare actual measured neutralization values for specific mutants to the polyclonal fits.

Import Python modules:

[1]:

import os
import pickle

import altair as alt

import pandas as pd

import yaml

Read configuration and validation assay measurements:

[2]:

with open("config.yaml") as f:
    config = yaml.safe_load(f)

validation_ic50s = pd.read_csv(config["validation_ic50s"], na_filter=None)

validation_ic50s

[2]:

	antibody	aa_substitutions	measured IC50	lower_bound
0	LyCoV-1404		0.00187	False
1	LyCoV-1404	F486L	0.00143	False
2	LyCoV-1404	N439Y	1.12000	False
3	LyCoV-1404	K444N	4.00000	True
4	LyCoV-1404	S446T	0.48500	False
5	LyCoV-1404	G447D	4.00000	True
6	LyCoV-1404	P499H	4.00000	True
7	CC67.105		2.19000	False
8	CC67.105	D1146N	300.00000	True
9	CC67.105	D1153Y	300.00000	True
10	CC67.105	F1156L	300.00000	True
11	CC67.105	D1163R	9.43000	False
12	NTD_5-7		0.28000	False
13	NTD_5-7	G103F	96.00000	True
14	NTD_5-7	L176K	96.00000	True
15	NTD_5-7	S172N	96.00000	True
16	CC9.104		2.34000	False
17	CC9.104	D1146N	2.90000	False
18	CC9.104	D1153Y	300.00000	True
19	CC9.104	F1156L	269.00000	False
20	CC9.104	D1163R	28.60000	False

Now get the predictions by the averaged polyclonal model fits:

[3]:

validation_vs_prediction = []
for antibody, antibody_df in validation_ic50s.groupby("antibody"):
    with open(os.path.join(config["escape_dir"], f"{antibody}.pickle"), "rb") as f:
        model = pickle.load(f)
    validation_vs_prediction.append(model.icXX(antibody_df))

validation_vs_prediction = pd.concat(validation_vs_prediction, ignore_index=True)

validation_vs_prediction

[3]:

	antibody	aa_substitutions	measured IC50	lower_bound	mean_IC50	median_IC50	std_IC50	n_models	frac_models
0	CC67.105		2.19000	False	2.465804	2.465804	0.200788	2	1.0
1	CC67.105	D1146N	300.00000	True	1468.569818	1468.569818	648.994727	2	1.0
2	CC67.105	D1153Y	300.00000	True	1232.814936	1232.814936	900.657655	2	1.0
3	CC67.105	D1163R	9.43000	False	13.646795	13.646795	0.084466	2	1.0
4	CC67.105	F1156L	300.00000	True	768.075469	768.075469	281.559387	2	1.0
5	CC9.104		2.34000	False	6.545884	6.545884	1.483351	2	1.0
6	CC9.104	D1146N	2.90000	False	10.638997	10.638997	4.709676	2	1.0
7	CC9.104	D1153Y	300.00000	True	404.286520	404.286520	159.859789	2	1.0
8	CC9.104	D1163R	28.60000	False	49.436653	49.436653	6.927173	2	1.0
9	CC9.104	F1156L	269.00000	False	579.525276	579.525276	72.243821	2	1.0
10	LyCoV-1404		0.00187	False	0.014724	0.011228	0.008491	4	1.0
11	LyCoV-1404	F486L	0.00143	False	0.011964	0.009337	0.007061	4	1.0
12	LyCoV-1404	G447D	4.00000	True	21.311019	8.213625	28.776157	4	1.0
13	LyCoV-1404	K444N	4.00000	True	6.623210	1.338305	11.132054	4	1.0
14	LyCoV-1404	N439Y	1.12000	False	6.436479	1.279219	11.179901	4	1.0
15	LyCoV-1404	P499H	4.00000	True	43.618055	2.720276	82.627223	4	1.0
16	LyCoV-1404	S446T	0.48500	False	1.278914	0.674567	1.431229	4	1.0
17	NTD_5-7		0.28000	False	26.914560	26.914560	1.835849	2	1.0
18	NTD_5-7	G103F	96.00000	True	222.142356	222.142356	49.373198	2	1.0
19	NTD_5-7	L176K	96.00000	True	320.491383	320.491383	85.772741	2	1.0
20	NTD_5-7	S172N	96.00000	True	312.404917	312.404917	109.658037	2	1.0

Now plot the results. We will plot the median across the replicate polyclonal fits to different deep mutational scanning replicates. This is an interactive plot that you can mouse over for details:

[4]:

corr_chart = (
    alt.Chart(validation_vs_prediction)
    .encode(
        x=alt.X(
            "measured IC50",
            title="measured IC50 (ug/ml)",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "median_IC50",
            title="predicted IC50 (arbitrary units)",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if validation_vs_prediction[c].dtype == float
            else c
            for c in validation_vs_prediction.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

corr_chart

/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():

[4]:

Now also calculate the fold changes, using the median prediction:

[5]:

fold_changes = (
    validation_vs_prediction
    .rename(columns={"median_IC50": "predicted IC50"})
    .query("aa_substitutions != ''")
    [["antibody", "aa_substitutions", "measured IC50", "predicted IC50", "lower_bound"]]
    .merge(
        validation_vs_prediction
        .rename(columns={"median_IC50": "predicted IC50"})
        .query("aa_substitutions == ''")
        [["antibody", "measured IC50", "predicted IC50"]],
        on="antibody",
        how="left",
        validate="many_to_one",
        suffixes=[" mutant", " unmutated"],
    )
    .assign(
        measured_fold_change=lambda x: x["measured IC50 mutant"] / x["measured IC50 unmutated"],
        predicted_fold_change=lambda x: x["predicted IC50 mutant"] / x["predicted IC50 unmutated"],
    )
)

fold_changes

[5]:

	antibody	aa_substitutions	measured IC50 mutant	predicted IC50 mutant	lower_bound	measured IC50 unmutated	predicted IC50 unmutated	measured_fold_change	predicted_fold_change
0	CC67.105	D1146N	300.00000	1468.569818	True	2.19000	2.465804	136.986301	595.574434
1	CC67.105	D1153Y	300.00000	1232.814936	True	2.19000	2.465804	136.986301	499.964692
2	CC67.105	D1163R	9.43000	13.646795	False	2.19000	2.465804	4.305936	5.534420
3	CC67.105	F1156L	300.00000	768.075469	True	2.19000	2.465804	136.986301	311.490885
4	CC9.104	D1146N	2.90000	10.638997	False	2.34000	6.545884	1.239316	1.625296
5	CC9.104	D1153Y	300.00000	404.286520	True	2.34000	6.545884	128.205128	61.761945
6	CC9.104	D1163R	28.60000	49.436653	False	2.34000	6.545884	12.222222	7.552326
7	CC9.104	F1156L	269.00000	579.525276	False	2.34000	6.545884	114.957265	88.532776
8	LyCoV-1404	F486L	0.00143	0.009337	False	0.00187	0.011228	0.764706	0.831574
9	LyCoV-1404	G447D	4.00000	8.213625	True	0.00187	0.011228	2139.037433	731.511692
10	LyCoV-1404	K444N	4.00000	1.338305	True	0.00187	0.011228	2139.037433	119.190499
11	LyCoV-1404	N439Y	1.12000	1.279219	False	0.00187	0.011228	598.930481	113.928225
12	LyCoV-1404	P499H	4.00000	2.720276	True	0.00187	0.011228	2139.037433	242.269825
13	LyCoV-1404	S446T	0.48500	0.674567	False	0.00187	0.011228	259.358289	60.077411
14	NTD_5-7	G103F	96.00000	222.142356	True	0.28000	26.914560	342.857143	8.253613
15	NTD_5-7	L176K	96.00000	320.491383	True	0.28000	26.914560	342.857143	11.907733
16	NTD_5-7	S172N	96.00000	312.404917	True	0.28000	26.914560	342.857143	11.607283

Now plot the fold changes:

[6]:

fold_change_chart = (
    alt.Chart(fold_changes)
    .encode(
        x=alt.X(
            "measured_fold_change",
            title="measured fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        y=alt.Y(
            "predicted_fold_change",
            title="predicted fold change IC50",
            scale=alt.Scale(type="log"),
        ),
        facet=alt.Facet("antibody", columns=4, title=None),
        color=alt.Color("lower_bound", title="lower bound"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if fold_changes[c].dtype == float
            else c
            for c in fold_changes.columns.tolist()
        ],
    )
    .mark_circle(filled=True, size=60, opacity=0.6)
    .configure_axis(grid=False)
    .resolve_scale(y="independent", x="independent")
    .properties(width=150, height=150)
)

fold_change_chart

/fh/fast/bloom_j/software/miniconda3/envs/dms-vep-pipeline/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():

[6]:

[ ]:

Validation neutralization assays versus polyclonal fits¶

Validation neutralization assays versus `polyclonal` fits¶